Hello, all. Forgive me if I'm doing something wrong; I'm not familiar
with the patch submission procedure, or which parts of the code are OK
to touch.

I added a compiler primitive for finding the index of a substring within
a string. I didn't see any other way of doing the same thing
efficiently. It's basically just a wrapper around strstr. It returns 0
if the substring couldn't be found. I didn't add a method to the string
type (outside of the module I'm working on) for calling it, but I'm
using it in the module for simple parsing procedures.

I also changed string.substring to more gracefully handle unbounded and
out-of-bounds range limits by taking the intersection of the passed
range and the bounding range of the string (e.g.,
"hello".substring(-5..30) == "hello"). Ranges that don't overlap the
string at all will return an empty string (e.g.,
"hello".substring(50..100) == ""). It takes striding into account as
well. I also made string_index (single character substrings) return an
empty string if the index is out of range. I don't see the danger in
handling substrings like this instead of crashing, and other languages
(e.g., Python) do this as well. All in all I think it makes string
handling a little friendlier.

Also, for edge cases that return empty strings, I simply returned an
empty string literal. I have no idea how safe that is or how it affects
garbage collection. Let me know if it's better practice to allocate a
new string instead.

I put a test case in /test/trivial/bwross/string_index.chpl.

-- Brandon
Index: compiler/AST/primitive.cpp
===================================================================
--- compiler/AST/primitive.cpp	(revision 22367)
+++ compiler/AST/primitive.cpp	(working copy)
@@ -540,6 +540,7 @@
   prim_def("string_length", returnInfoDefaultInt);
   prim_def("ascii", returnInfoInt32);
   prim_def("string_index", returnInfoString, true, true);
+  prim_def("string_index_of", returnInfoInt32, true, true);
   prim_def(PRIM_STRING_COPY, "string_copy", returnInfoString, false, true);
   prim_def(PRIM_STRING_NORMALIZE, "string_normalize", returnInfoVoid, true, false);
   prim_def(PRIM_STRING_FROM_C_STRING, "string_from_c_string", returnInfoString, true, true);
Index: modules/internal/ChapelRangeBase.chpl
===================================================================
--- modules/internal/ChapelRangeBase.chpl	(revision 22367)
+++ modules/internal/ChapelRangeBase.chpl	(working copy)
@@ -1014,13 +1014,9 @@
   // Return a substring of a string with a range of indices.
   inline proc string.substring(r: rangeBase(?))
   {
-    if r.boundedType != BoundedRangeType.bounded then
-      compilerError("substring indexing undefined on unbounded ranges");
-  
-    if r.stride != 1 then
-      return __primitive("string_strided_select", this, r.alignedLow, r.alignedHigh, r.stride);
-    else
-      return __primitive("string_select", this, r.low, r.high);
+    var r2 = r[(1..this.length)._base]; // Intersect with string bounds.
+    var lo:int = r2.alignedLow, hi:int = r2.alignedHigh;
+    return __primitive("string_strided_select", this, lo, hi, r2.stride);
   }
   
   
Index: runtime/include/chpltypes.h
===================================================================
--- runtime/include/chpltypes.h	(revision 22367)
+++ runtime/include/chpltypes.h	(working copy)
@@ -229,6 +229,7 @@
 void chpl_string_widen(struct chpl_chpl____wide_chpl_string_s* x, chpl_string from, int32_t lineno, chpl_string filename);
 void chpl_comm_wide_get_string(chpl_string* local, struct chpl_chpl____wide_chpl_string_s* x, int32_t tid, int32_t lineno, chpl_string filename);
 chpl_string string_concat(chpl_string x, chpl_string y, int32_t lineno, chpl_string filename);
+int32_t string_index_of(chpl_string x, chpl_string y, int32_t lineno, chpl_string filename);
 chpl_string string_index(chpl_string x, int i, int32_t lineno, chpl_string filename);
 chpl_string string_select(chpl_string x, int low, int high, int32_t lineno, chpl_string filename);
 chpl_string string_strided_select(chpl_string x, int low, int high, int stride, int32_t lineno, chpl_string filename);
Index: runtime/src/chpltypes.c
===================================================================
--- runtime/src/chpltypes.c	(revision 22367)
+++ runtime/src/chpltypes.c	(working copy)
@@ -248,17 +248,28 @@
   return z;
 }
 
+int32_t
+string_index_of(chpl_string x, chpl_string y, int32_t lineno, chpl_string filename) {
+  chpl_string z = strstr(x, y);
+  return z ? (int32_t) (z-x)+1 : 0;
+}
 
 chpl_string
 string_strided_select(chpl_string x, int low, int high, int stride, int32_t lineno, chpl_string filename) {
   int64_t length = string_length(x);
   char* result = NULL;
   char* dst = NULL;
+
+  // Clamp the range to something in the string instead of just crashing.
+  if (high > length)
+    high = length;
+  if (low < 1)
+    low = 1;
+  if (low > length || high < 1 || high < low)
+    return "";
+
   chpl_string src = stride > 0 ? x + low - 1 : x + high - 1;
-  int size = high - low >= 0 ? high - low : 0;
-  if (low < 1 || low > length || high > length) {
-    chpl_error("string index out of bounds", lineno, filename);
-  }
+  int size = high-low;
   result = chpltypes_malloc(size + 2, CHPL_RT_MD_STRING_STRIDED_SELECT_DATA,
                             lineno, filename);
   dst = result;
@@ -285,10 +296,10 @@
 
 chpl_string
 string_index(chpl_string x, int i, int32_t lineno, chpl_string filename) {
+  if (i-1 < 0 || i-1 >= string_length(x))
+    return "";
   char* buffer = chpltypes_malloc(2, CHPL_RT_MD_STRING_COPY_DATA,
                                   lineno, filename);
-  if (i-1 < 0 || i-1 >= string_length(x))
-    chpl_error("string index out of bounds", lineno, filename);
   sprintf(buffer, "%c", x[i-1]);
   return buffer;
 }
Index: test/trivial/bwross/string_index.chpl
===================================================================
--- test/trivial/bwross/string_index.chpl	(revision 0)
+++ test/trivial/bwross/string_index.chpl	(working copy)
@@ -0,0 +1,42 @@
+proc string.indexOf(token:string) {
+  return __primitive("string_index_of", this, token);
+}
+
+var s = "hello world";
+
+// string_index_of
+write(s.indexOf(" "), " ");
+write(s.indexOf("hello"), " ");
+write(s.indexOf("world"), " ");
+write(s.indexOf("nowhere"), " ");
+write(s.indexOf(s), " ");
+writeln(s.indexOf(""));
+
+// string_select
+writeln(s.substring(..));
+writeln(s.substring(..5));
+writeln(s.substring(7..));
+writeln(s.substring(4..8));
+writeln(s.substring(-50..50));
+writeln(s.substring(-100..-50));
+writeln(s.substring(50..100));
+
+// with stride
+writeln(s.substring(1..11 by 2));
+writeln(s.substring(1..11 by -1));
+writeln(s.substring(1..11 by -2));
+writeln(s.substring(1.. by 3));
+writeln(s.substring(2.. by 3));
+writeln(s.substring(-3.. by 3));
+writeln(s.substring(-1.. by 3));
+writeln(s.substring(-2.. by 3));
+
+// string_index cases
+writeln(s.substring(99));
+writeln(s.substring(3));
+writeln(s.substring(-30));
+
+// all together now!
+writeln(s.substring(1..s.indexOf(" ")),
+        "beautifu", s.substring(3),
+        s.substring(s.indexOf(" ")..));
Index: test/trivial/bwross/string_index.good
===================================================================
--- test/trivial/bwross/string_index.good	(revision 0)
+++ test/trivial/bwross/string_index.good	(working copy)
@@ -0,0 +1,20 @@
+6 1 7 0 1 1
+hello world
+hello
+world
+lo wo
+hello world
+
+
+hlowrd
+dlrow olleh
+drwolh
+hlwl
+eood
+l r
+eood
+hlwl
+
+l
+
+hello beautiful world
------------------------------------------------------------------------------
Rapidly troubleshoot problems before they affect your business. Most IT 
organizations don't have a clear picture of how application performance 
affects their revenue. With AppDynamics, you get 100% visibility into your 
Java,.NET, & PHP application. Start your 15-day FREE TRIAL of AppDynamics Pro!
http://pubads.g.doubleclick.net/gampad/clk?id=84349351&iu=/4140/ostg.clktrk
_______________________________________________
Chapel-developers mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/chapel-developers

Reply via email to