Hello, all. Forgive me if I'm doing something wrong; I'm not familiar
with the patch submission procedure, or which parts of the code are OK
to touch.
I added a compiler primitive for finding the index of a substring within
a string. I didn't see any other way of doing the same thing
efficiently. It's basically just a wrapper around strstr. It returns 0
if the substring couldn't be found. I didn't add a method to the string
type (outside of the module I'm working on) for calling it, but I'm
using it in the module for simple parsing procedures.
I also changed string.substring to more gracefully handle unbounded and
out-of-bounds range limits by taking the intersection of the passed
range and the bounding range of the string (e.g.,
"hello".substring(-5..30) == "hello"). Ranges that don't overlap the
string at all will return an empty string (e.g.,
"hello".substring(50..100) == ""). It takes striding into account as
well. I also made string_index (single character substrings) return an
empty string if the index is out of range. I don't see the danger in
handling substrings like this instead of crashing, and other languages
(e.g., Python) do this as well. All in all I think it makes string
handling a little friendlier.
Also, for edge cases that return empty strings, I simply returned an
empty string literal. I have no idea how safe that is or how it affects
garbage collection. Let me know if it's better practice to allocate a
new string instead.
I put a test case in /test/trivial/bwross/string_index.chpl.
-- Brandon
Index: compiler/AST/primitive.cpp
===================================================================
--- compiler/AST/primitive.cpp (revision 22367)
+++ compiler/AST/primitive.cpp (working copy)
@@ -540,6 +540,7 @@
prim_def("string_length", returnInfoDefaultInt);
prim_def("ascii", returnInfoInt32);
prim_def("string_index", returnInfoString, true, true);
+ prim_def("string_index_of", returnInfoInt32, true, true);
prim_def(PRIM_STRING_COPY, "string_copy", returnInfoString, false, true);
prim_def(PRIM_STRING_NORMALIZE, "string_normalize", returnInfoVoid, true, false);
prim_def(PRIM_STRING_FROM_C_STRING, "string_from_c_string", returnInfoString, true, true);
Index: modules/internal/ChapelRangeBase.chpl
===================================================================
--- modules/internal/ChapelRangeBase.chpl (revision 22367)
+++ modules/internal/ChapelRangeBase.chpl (working copy)
@@ -1014,13 +1014,9 @@
// Return a substring of a string with a range of indices.
inline proc string.substring(r: rangeBase(?))
{
- if r.boundedType != BoundedRangeType.bounded then
- compilerError("substring indexing undefined on unbounded ranges");
-
- if r.stride != 1 then
- return __primitive("string_strided_select", this, r.alignedLow, r.alignedHigh, r.stride);
- else
- return __primitive("string_select", this, r.low, r.high);
+ var r2 = r[(1..this.length)._base]; // Intersect with string bounds.
+ var lo:int = r2.alignedLow, hi:int = r2.alignedHigh;
+ return __primitive("string_strided_select", this, lo, hi, r2.stride);
}
Index: runtime/include/chpltypes.h
===================================================================
--- runtime/include/chpltypes.h (revision 22367)
+++ runtime/include/chpltypes.h (working copy)
@@ -229,6 +229,7 @@
void chpl_string_widen(struct chpl_chpl____wide_chpl_string_s* x, chpl_string from, int32_t lineno, chpl_string filename);
void chpl_comm_wide_get_string(chpl_string* local, struct chpl_chpl____wide_chpl_string_s* x, int32_t tid, int32_t lineno, chpl_string filename);
chpl_string string_concat(chpl_string x, chpl_string y, int32_t lineno, chpl_string filename);
+int32_t string_index_of(chpl_string x, chpl_string y, int32_t lineno, chpl_string filename);
chpl_string string_index(chpl_string x, int i, int32_t lineno, chpl_string filename);
chpl_string string_select(chpl_string x, int low, int high, int32_t lineno, chpl_string filename);
chpl_string string_strided_select(chpl_string x, int low, int high, int stride, int32_t lineno, chpl_string filename);
Index: runtime/src/chpltypes.c
===================================================================
--- runtime/src/chpltypes.c (revision 22367)
+++ runtime/src/chpltypes.c (working copy)
@@ -248,17 +248,28 @@
return z;
}
+int32_t
+string_index_of(chpl_string x, chpl_string y, int32_t lineno, chpl_string filename) {
+ chpl_string z = strstr(x, y);
+ return z ? (int32_t) (z-x)+1 : 0;
+}
chpl_string
string_strided_select(chpl_string x, int low, int high, int stride, int32_t lineno, chpl_string filename) {
int64_t length = string_length(x);
char* result = NULL;
char* dst = NULL;
+
+ // Clamp the range to something in the string instead of just crashing.
+ if (high > length)
+ high = length;
+ if (low < 1)
+ low = 1;
+ if (low > length || high < 1 || high < low)
+ return "";
+
chpl_string src = stride > 0 ? x + low - 1 : x + high - 1;
- int size = high - low >= 0 ? high - low : 0;
- if (low < 1 || low > length || high > length) {
- chpl_error("string index out of bounds", lineno, filename);
- }
+ int size = high-low;
result = chpltypes_malloc(size + 2, CHPL_RT_MD_STRING_STRIDED_SELECT_DATA,
lineno, filename);
dst = result;
@@ -285,10 +296,10 @@
chpl_string
string_index(chpl_string x, int i, int32_t lineno, chpl_string filename) {
+ if (i-1 < 0 || i-1 >= string_length(x))
+ return "";
char* buffer = chpltypes_malloc(2, CHPL_RT_MD_STRING_COPY_DATA,
lineno, filename);
- if (i-1 < 0 || i-1 >= string_length(x))
- chpl_error("string index out of bounds", lineno, filename);
sprintf(buffer, "%c", x[i-1]);
return buffer;
}
Index: test/trivial/bwross/string_index.chpl
===================================================================
--- test/trivial/bwross/string_index.chpl (revision 0)
+++ test/trivial/bwross/string_index.chpl (working copy)
@@ -0,0 +1,42 @@
+proc string.indexOf(token:string) {
+ return __primitive("string_index_of", this, token);
+}
+
+var s = "hello world";
+
+// string_index_of
+write(s.indexOf(" "), " ");
+write(s.indexOf("hello"), " ");
+write(s.indexOf("world"), " ");
+write(s.indexOf("nowhere"), " ");
+write(s.indexOf(s), " ");
+writeln(s.indexOf(""));
+
+// string_select
+writeln(s.substring(..));
+writeln(s.substring(..5));
+writeln(s.substring(7..));
+writeln(s.substring(4..8));
+writeln(s.substring(-50..50));
+writeln(s.substring(-100..-50));
+writeln(s.substring(50..100));
+
+// with stride
+writeln(s.substring(1..11 by 2));
+writeln(s.substring(1..11 by -1));
+writeln(s.substring(1..11 by -2));
+writeln(s.substring(1.. by 3));
+writeln(s.substring(2.. by 3));
+writeln(s.substring(-3.. by 3));
+writeln(s.substring(-1.. by 3));
+writeln(s.substring(-2.. by 3));
+
+// string_index cases
+writeln(s.substring(99));
+writeln(s.substring(3));
+writeln(s.substring(-30));
+
+// all together now!
+writeln(s.substring(1..s.indexOf(" ")),
+ "beautifu", s.substring(3),
+ s.substring(s.indexOf(" ")..));
Index: test/trivial/bwross/string_index.good
===================================================================
--- test/trivial/bwross/string_index.good (revision 0)
+++ test/trivial/bwross/string_index.good (working copy)
@@ -0,0 +1,20 @@
+6 1 7 0 1 1
+hello world
+hello
+world
+lo wo
+hello world
+
+
+hlowrd
+dlrow olleh
+drwolh
+hlwl
+eood
+l r
+eood
+hlwl
+
+l
+
+hello beautiful world
------------------------------------------------------------------------------
Rapidly troubleshoot problems before they affect your business. Most IT
organizations don't have a clear picture of how application performance
affects their revenue. With AppDynamics, you get 100% visibility into your
Java,.NET, & PHP application. Start your 15-day FREE TRIAL of AppDynamics Pro!
http://pubads.g.doubleclick.net/gampad/clk?id=84349351&iu=/4140/ostg.clktrk
_______________________________________________
Chapel-developers mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/chapel-developers