Index: Configure.pl
===================================================================
RCS file: /cvs/public/parrot/Configure.pl,v
retrieving revision 1.139
diff -u -b -r1.139 Configure.pl
--- Configure.pl	9 Apr 2004 20:31:51 -0000	1.139
+++ Configure.pl	13 Apr 2004 08:16:17 -0000
@@ -54,7 +54,7 @@
 You can add and remove option values with C<< :rem{<opt>} >> and
 C<< :add{<opt>} >>. For example:
 
-    perl Configure.pl --ccflags="rem{-g} :add{-O2}"
+    perl Configure.pl --ccflags=":rem{-g} :add{-O2}"
 
 =over
 
@@ -167,6 +167,10 @@
 be one of: C<gc>, C<libc>, C<malloc> or C<malloc-trace>. The default is
 C<gc>.
 
+=item C<--icudatadir=(path)>
+
+Use the given directory to locate ICU's data file(s)
+
 =back
 
 Other Options (may not be implemented)
@@ -274,6 +278,8 @@
    --execcapable        Use JIT to emit a native executable
    --gc=(type)          Determine the type of garbage collection
                         type=(gc|libc|malloc|malloc-trace) default is gc
+
+   --icudatadir=(path)  Use the given directory to locate ICU's data file(s)
 
 Other Options (may not be implemented):
 
Index: config/gen/icu.pl
===================================================================
RCS file: /cvs/public/parrot/config/gen/icu.pl,v
retrieving revision 1.7
diff -u -b -r1.7 icu.pl
--- config/gen/icu.pl	12 Apr 2004 20:36:36 -0000	1.7
+++ config/gen/icu.pl	13 Apr 2004 08:16:17 -0000
@@ -20,10 +20,17 @@
 
 $description="Configuring ICU if requested...";
 
-@args=qw(buildicu verbose);
+@args=qw(buildicu verbose icudatadir);
 
 sub runstep {
-  my ($buildicu, $verbose) = @_;
+  my ($buildicu, $verbose, $icudatadir) = @_;
+
+  if( !defined $icudatadir )
+  {
+	  $icudatadir = 'blib/lib/icu/2.6.1';
+  }
+
+  Configure::Data->set( icudatadir => $icudatadir );
 
 #  unless ($buildicu) {
 #    print " [Skipped] " if $verbose;
Index: config/gen/config_h/config_h.in
===================================================================
RCS file: /cvs/public/parrot/config/gen/config_h/config_h.in,v
retrieving revision 1.21
diff -u -b -r1.21 config_h.in
--- config/gen/config_h/config_h.in	10 Apr 2004 09:49:15 -0000	1.21
+++ config/gen/config_h/config_h.in	13 Apr 2004 08:16:17 -0000
@@ -134,6 +134,8 @@
 #define PARROT_CORE_CG_OPLIB_INIT Parrot_DynOp_core_cg_${MAJOR}_${MINOR}_${PATCH}
 #define PARROT_CORE_CGP_OPLIB_INIT Parrot_DynOp_core_cgp_${MAJOR}_${MINOR}_${PATCH}
 
+#define DEFAULT_ICU_DATA_DIR "${icudatadir}"
+
 #define INTVAL_FMT "${intvalfmt}"
 #define FLOATVAL_FMT "${floatvalfmt}"
 
Index: include/parrot/string_primitives.h
===================================================================
RCS file: /cvs/public/parrot/include/parrot/string_primitives.h,v
retrieving revision 1.1
diff -u -b -r1.1 string_primitives.h
--- include/parrot/string_primitives.h	9 Apr 2004 20:32:24 -0000	1.1
+++ include/parrot/string_primitives.h	13 Apr 2004 08:16:18 -0000
@@ -23,6 +23,10 @@
 void string_fill_from_buffer(struct Parrot_Interp *interpreter,
 	const void *buffer, UINTVAL len, const char *encoding_name, STRING *s);
 
+/* Utility method which knows how to uwind a single escape sequence */
+Parrot_UInt4 string_unescape_single_sequence( Parrot_UInt4 *offset, 
+							Parrot_UInt4 input_length, char *cstring);
+
 UINTVAL
 Parrot_char_digit_value(struct Parrot_Interp *interpreter, UINTVAL character);
 
Index: src/string.c
===================================================================
RCS file: /cvs/public/parrot/src/string.c,v
retrieving revision 1.185
diff -u -b -r1.185 string.c
--- src/string.c	11 Apr 2004 13:14:07 -0000	1.185
+++ src/string.c	13 Apr 2004 08:16:24 -0000
@@ -243,8 +243,9 @@
 void
 string_init(void)
 {
-/* XXXX: pull out into a config */
-    string_set_data_directory("blib/lib/icu/2.6.1");
+/* DEFAULT_ICU_DATA_DIR is configured at build time. Need a way to
+    specify this at runtime as well. */
+    string_set_data_directory(DEFAULT_ICU_DATA_DIR);
 /*
     encoding_init();
     chartype_init();
@@ -3053,6 +3054,7 @@
 
 */
 
+#if 0
 STRING *
 string_unescape_cstring(struct Parrot_Interp * interpreter, 
     char *cstring, char delimiter)
@@ -3166,6 +3168,85 @@
 
     result->bufused = p - (char *)result->strstart;
     string_compute_strlen(interpreter, result);
+
+    return string_constant_copy(interpreter, result);
+}
+#endif
+
+STRING *
+string_unescape_cstring(struct Parrot_Interp *interpreter, 
+    char *cstring, char delimiter)
+{
+    char *p, *string;
+    STRING *result;
+    size_t clength = strlen(cstring); /* is this ok if cstring is NULL? */
+
+    if( !cstring || !clength ) return NULL;
+
+    result = string_make(interpreter, cstring, clength, "iso-8859-1", 0);
+
+    for (p = (char *)result->strstart, string = cstring ; *string; )
+    {
+        if (*string == '\\' && string[1]) 
+        {
+            Parrot_UInt4 c32;
+            Parrot_UInt4 lenParsed = 0;
+
+            ++string; /* get past the backslash */
+            c32 = string_unescape_single_sequence(&lenParsed, 
+                                    cstring + clength - string, string);
+                                    
+            if( lenParsed == 0 )
+            {
+                /* error */
+            }
+            else
+            {
+                string += lenParsed; /* this had better not go past the terminating null */
+                
+                if( result->representation == enum_stringrep_one )
+                {
+                    if( c32 <= 0xFF )
+                    {
+                        *p++ = c32;
+                    }
+                    else
+                    {
+                        /* finish up the string so far */
+                        result->bufused = p - (char *)result->strstart;
+                        string_compute_strlen(interpreter, result);
+                        
+                        string_append_chr(interpreter, result, c32);
+                    }
+                }
+                else
+                {
+                    string_append_chr(interpreter, result, c32);
+                }
+            }
+        }
+        else if (*string == delimiter)
+        {
+            break;
+        }
+        else
+        {
+            if( result->representation == enum_stringrep_one )
+            {
+                *p++ = *string++;
+            }
+            else
+            {
+                string_append_chr(interpreter, result, *string++);
+            }
+        }
+    }
+
+    if( result->representation == enum_stringrep_one )
+    {
+        result->bufused = p - (char *)result->strstart;
+        string_compute_strlen(interpreter, result);
+    }
 
     return string_constant_copy(interpreter, result);
 }
Index: src/string_primitives.c
===================================================================
RCS file: /cvs/public/parrot/src/string_primitives.c,v
retrieving revision 1.4
diff -u -b -r1.4 string_primitives.c
--- src/string_primitives.c	11 Apr 2004 13:14:07 -0000	1.4
+++ src/string_primitives.c	13 Apr 2004 08:16:25 -0000
@@ -23,6 +23,7 @@
 #include <unicode/ucnv.h>
 #include <unicode/utypes.h>
 #include <unicode/uchar.h>
+#include <unicode/ustring.h>
 #include <assert.h>
 
 /*
@@ -41,6 +42,19 @@
 string_set_data_directory(const char *dir)
 {
     u_setDataDirectory(dir);
+    
+    /* Since u_setDataDirectory doesn't have a result code, we'll spot 
+       check that everything is okay by making sure that '9' had decimal
+       value 9. Using 57 rather than '9' so that the encoding of this
+       source code file isn't an issue.... (Don't want to get bitten by
+       EBCDIC.) */
+
+    if( !u_isdigit(57) || (u_charDigitValue(57) != 9) )
+    {
+            internal_exception(ICU_ERROR,
+                "string_set_data_directory: ICU data files not found"
+                "(apparently) for directory [%s]", dir);
+    }
 }
 
 /*
@@ -125,82 +139,22 @@
 	string_compute_strlen(interpreter, s);
 }
 
-/*
-{
-
-    UErrorCode myError = U_ZERO_ERROR;
-    UConverter *conv = ucnv_open("ISO-8859-1", &myError);
-
-    NSData *data = [NSData dataWithContentsOfFile:@"/var/tmp/ja.txt"];
-
-
-    UConverter *conv2 = ucnv_open("UTF-8", &myError);
-    UChar *outputBuffer = malloc(50*sizeof(UChar));
-    UChar *start = outputBuffer;
-    char *source = [data bytes];
-    UErrorCode convError = 0;
-
-    ucnv_toUnicode(conv2, &outputBuffer, &(outputBuffer[50]), &source, source + [data length], NULL, TRUE, &convError);
-
-    NSLog(@"conv error = %i", convError);
-    NSLog(@"conv length = %i", outputBuffer - start);
-
-
-    UConverter *conv3 = ucnv_open("UCS-2", &myError); //ISO-8859-1"
-    char *outbuffer = malloc(50);
-    char *outstart = outbuffer;
-    UErrorCode redoErr = 0;
-
-    NSLog(@"conv3 = %p", conv3);
-    NSLog(@"%s", ucnv_getName(conv3, &redoErr));
-
-        UErrorCode callbackStatus = 0;
-
-          ucnv_setFromUCallBack(conv3,
-                       UCNV_FROM_U_CALLBACK_STOP, //UCNV_FROM_U_CALLBACK_ESCAPE
-                       NULL,
-                       NULL,
-                       NULL,
-                       &callbackStatus);
-
-    ucnv_fromUnicode(conv3, &outbuffer, outbuffer + 50, &start, outputBuffer, NULL, TRUE, &redoErr);
-//    ucnv_fromUnicode(conv3, NULL, NULL, &start, outputBuffer, NULL, TRUE, &redoErr);
-
-    NSLog(@"redo error = %i", redoErr);
-    NSLog(@"%s", u_errorName(redoErr));
-    NSLog(@"redo length = %i", outbuffer - outstart);
-
-NSLog(@"%x", *outstart);
-NSLog(@"%d", *outstart);
-NSLog(@"%x", outstart[1]);
-    NSLog(@"%@", [[NSString alloc] initWithData:[NSData dataWithBytes:outstart length:(outbuffer - outstart)] encoding:NSISOLatin1StringEncoding]);
+static Parrot_UInt2
+_string_char_at_callback(Parrot_Int4 offset, void *context);
 
+static Parrot_UInt2
+_string_char_at_callback(Parrot_Int4 offset, void *context)
 {
-    UConverter *jisConv = ucnv_open("shift_jis", &myError);
-    USet *set =  uset_open(0, 0);
-    UErrorCode setError = U_ZERO_ERROR;
-
-    ucnv_getUnicodeSet( jisConv, set, UCNV_ROUNDTRIP_SET, &setError);
-
-    NSLog(@"set size = %d", uset_size(set));
-    NSLog(@"contains range = %d", uset_containsRange(set, 0x10000, 0x10ffff));
-    NSLog(@"uset_containsNoneOfRange = %d", uset_containsNoneOfRange(set, 0x10000, 0x10ffff));
+    return *((char*)context + offset);
 }
+
+Parrot_UInt4
+string_unescape_single_sequence( Parrot_UInt4 *offset, 
+					Parrot_UInt4 input_length, char *cstring)
 {
-    UConverter *utf8Conv = ucnv_open("UTF-8", &myError);
-    USet *set =  uset_open(0, 0);
-    UErrorCode setError = U_ZERO_ERROR;
-
-    ucnv_getUnicodeSet( utf8Conv, set, UCNV_ROUNDTRIP_SET, &setError);
-
-    NSLog(@"set size = %d", uset_size(set));
-    NSLog(@"contains range = %d", uset_containsRange(set, 0x10000, 0x10ffff));
-    NSLog(@"uset_containsNoneOfRange = %d", uset_containsNoneOfRange(set, 0x10000, 0x10ffff));
-}
-    [pool release];
-    return 0;
+    return u_unescapeAt(_string_char_at_callback, offset, input_length, 
+                                    cstring);
 }
-*/
 
 /*
 
