lidavidm commented on a change in pull request #10356: URL: https://github.com/apache/arrow/pull/10356#discussion_r635613255
########## File path: cpp/src/arrow/compute/kernels/scalar_string.cc ########## @@ -494,6 +494,95 @@ const FunctionDoc match_substring_regex_doc( "position.\n" "Null inputs emit null. The pattern must be given in MatchSubstringOptions."), {"strings"}, "MatchSubstringOptions"); + +// SQL LIKE match + +/// Convert a SQL-style LIKE pattern (using '%' and '_') into a regex pattern +std::string MakeLikeRegex(const MatchSubstringOptions& options) { + // Allow . to match \n + std::string like_pattern = "(?s:^"; + like_pattern.reserve(options.pattern.size() + 7); + bool escaped = false; + for (const char c : options.pattern) { + if (!escaped && c == '%') { + like_pattern.append(".*"); + } else if (!escaped && c == '_') { + like_pattern.append("."); + } else if (!escaped && c == '\\') { + escaped = true; + } else { + switch (c) { + case '.': + case '?': + case '+': + case '*': + case '^': + case '$': + case '\\': + case '[': + case '{': + case '(': + case ')': + case '|': { + like_pattern.push_back('\\'); + like_pattern.push_back(c); + escaped = false; + break; + } + default: { + like_pattern.push_back(c); + escaped = false; + break; + } + } + } + } + like_pattern.append("$)"); + return like_pattern; +} + +// A LIKE pattern matching this regex can be translated into a substring search. +static RE2 kLikePatternIsSubstringMatch("%+([^%_])*%+"); Review comment: D'oh, good catch. I've fixed this and adjusted the tests to catch this. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org