Alfie Richards <[email protected]> writes:
> The string_slice inherits from array_slice and is used to refer to a
> substring of an array that is memory managed elsewhere without modifying
> the underlying array.
>
> For example, this is useful in cases such as when needing to refer to a
> substring of an attribute in the syntax tree.
>
> This commit also adds some minimal helper functions for string_slice,
> such as a strtok alternative, equality operators, strcmp, and a function
> to strip whitespace from the beginning and end of a string_slice.
>
> gcc/ChangeLog:
>
> * vec.cc (string_slice::strtok): New method.
> (strcmp): Add implementation for string_slice.
> (string_slice::strip): New method.
> (test_string_slice_initializers): New test.
> (test_string_slice_strtok): Ditto.
> (test_string_slice_strcmp): Ditto.
> (test_string_slice_equality): Ditto.
> (test_string_slice_invalid): Ditto.
> (test_string_slice_strip): Ditto.
> (vec_cc_tests): Add new tests.
> * vec.h (class string_slice): New class.
> (strcmp): Add implementation for string_slice.
Thanks, mostly LGTM. Some very minor things below, and a question:
> diff --git a/gcc/vec.cc b/gcc/vec.cc
> index 55f5f3dd447..189cb492c7e 100644
> --- a/gcc/vec.cc
> +++ b/gcc/vec.cc
> @@ -176,6 +176,61 @@ dump_vec_loc_statistics (void)
> vec_mem_desc.dump (VEC_ORIGIN);
> }
>
> +string_slice
> +string_slice::tokenize (string_slice *str, string_slice delims)
> +{
> + const char *ptr = str->begin ();
> +
> + gcc_assert (str->is_valid () && delims.is_valid ());
> +
> + for (; ptr < str->end (); ptr++)
> + for (char c : delims)
> + if (*ptr == c)
> + {
> + /* Update the input string to be the remaining string. */
> + const char* str_begin = str->begin ();
Formatting nit: const char *str_begin
> + *str = string_slice (ptr + 1, str->end ());
> + return string_slice (str_begin, ptr);
> + }
> +
> + /* If no deliminators between the start and end, return the whole string.
> */
> + string_slice res = *str;
> + *str = string_slice::invalid ();
> + return res;
> +}
> +
> +int
> +strcmp (string_slice str1, string_slice str2)
> +{
> + for (unsigned int i = 0; i < str1.size () && i < str2.size (); i++)
> + {
> + if (str1[i] < str2[i])
> + return -1;
> + if (str1[i] > str2[i])
> + return 1;
> + }
> +
> + if (str1.size () < str2.size ())
> + return -1;
> + if (str1.size () > str2.size ())
> + return 1;
> + return 0;
> +}
> +
> +string_slice
> +string_slice::strip ()
> +{
> + const char *start = this->begin ();
> + const char *end = this->end ();
> +
> + while (start < end && ISSPACE (*start))
> + start++;
> + while (end > start && ISSPACE (*(end-1)))
> + end--;
> +
> + return string_slice (start, end-start);
Just string_slice (start, end) should be enough.
> +}
> +
> #if CHECKING_P
> /* Report qsort comparator CMP consistency check failure with P1, P2, P3 as
> witness elements. */
> [...]
> diff --git a/gcc/vec.h b/gcc/vec.h
> index 915df06f03e..d709d339d40 100644
> --- a/gcc/vec.h
> +++ b/gcc/vec.h
> @@ -2484,4 +2484,69 @@ make_array_slice (T *base, unsigned int size)
> # pragma GCC poison m_vec m_vecpfx m_vecdata
> #endif
>
> +/* string_slice inherits from array_slice, specifically to refer to a
> substring
> + of a character array.
> + It includes some string like helpers. */
> +class string_slice : public array_slice<const char>
> +{
> +public:
> + explicit string_slice () : array_slice<const char> () {}
> + explicit string_slice (const char *str) : array_slice (str, strlen (str))
> {}
> + explicit string_slice (const char *str, size_t len) :
> + array_slice (str, len) {}
> + explicit string_slice (const char *start, const char *end) :
> + array_slice (start, end-start) {}
Formatting nit: end - start.
What was the reason for making the constructors explicit? It would be nice
if string literals at least could be used implicitly.
Thanks,
Richard
> +
> + friend bool operator== (const string_slice &lhs, const string_slice &rhs)
> + {
> + if (!lhs.is_valid () || !rhs.is_valid ())
> + return false;
> + if (lhs.size () != rhs.size ())
> + return false;
> + return memcmp (lhs.begin (), rhs.begin (), lhs.size ()) == 0;
> + }
> +
> + friend bool operator== (const char *lhs, const string_slice &rhs)
> + {
> + return string_slice (lhs) == rhs;
> + }
> +
> + friend bool operator== (const string_slice &lhs, const char *rhs)
> + {
> + return lhs == string_slice (rhs);
> + }
> +
> + friend bool operator!= (const string_slice &lhs, const string_slice &rhs)
> + {
> + return !(lhs == rhs);
> + }
> +
> + friend bool operator!= (const char *lhs, const string_slice &rhs)
> + {
> + return !(string_slice (lhs) == rhs);
> + }
> +
> + friend bool operator!= (const string_slice &lhs, const char *rhs)
> + {
> + return !(lhs == string_slice (rhs));
> + }
> +
> + /* Returns an invalid string_slice. */
> + static string_slice invalid ()
> + {
> + return string_slice (nullptr, ~0U);
> + }
> +
> + /* tokenize is used to split a string by some deliminator into
> + strtok_slice's. Similarly to the posix strtok_r.but without modifying
> the
> + input string, and returning all tokens which may be empty in the case
> + of an empty input string of consecutive deliminators. */
> + static string_slice tokenize (string_slice *str, string_slice delims);
> +
> + /* Removes white space from the front and back of the string_slice. */
> + string_slice strip ();
> +};
> +
> +int strcmp (string_slice str1, string_slice str2);
> +
> #endif // GCC_VEC_H