This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 5249f99e0e [arrow-string] Implement string view support for
`regexp_match` (#6849)
5249f99e0e is described below
commit 5249f99e0e6671fc06996365dd4cf22a8427348c
Author: Tai Le Manh <[email protected]>
AuthorDate: Mon Dec 30 19:42:37 2024 +0700
[arrow-string] Implement string view support for `regexp_match` (#6849)
* [arrow-string] Implement string view suport for regexp match
Signed-off-by: Tai Le Manh <[email protected]>
* update unit tests
* fix clippy warnings
* Add test cases
Signed-off-by: Tai Le Manh <[email protected]>
---------
Signed-off-by: Tai Le Manh <[email protected]>
---
arrow-string/src/regexp.rs | 640 ++++++++++++++++++++++++++++++++-------------
1 file changed, 452 insertions(+), 188 deletions(-)
diff --git a/arrow-string/src/regexp.rs b/arrow-string/src/regexp.rs
index d14662be72..f3893cd5bd 100644
--- a/arrow-string/src/regexp.rs
+++ b/arrow-string/src/regexp.rs
@@ -20,7 +20,9 @@
use crate::like::StringArrayType;
-use arrow_array::builder::{BooleanBufferBuilder, GenericStringBuilder,
ListBuilder};
+use arrow_array::builder::{
+ BooleanBufferBuilder, GenericStringBuilder, ListBuilder, StringViewBuilder,
+};
use arrow_array::cast::AsArray;
use arrow_array::*;
use arrow_buffer::NullBuffer;
@@ -243,78 +245,96 @@ where
Ok(BooleanArray::from(data))
}
-fn regexp_array_match<OffsetSize: OffsetSizeTrait>(
- array: &GenericStringArray<OffsetSize>,
- regex_array: &GenericStringArray<OffsetSize>,
- flags_array: Option<&GenericStringArray<OffsetSize>>,
-) -> Result<ArrayRef, ArrowError> {
- let mut patterns: HashMap<String, Regex> = HashMap::new();
- let builder: GenericStringBuilder<OffsetSize> =
GenericStringBuilder::with_capacity(0, 0);
- let mut list_builder = ListBuilder::new(builder);
+macro_rules! process_regexp_array_match {
+ ($array:expr, $regex_array:expr, $flags_array:expr, $list_builder:expr) =>
{
+ let mut patterns: HashMap<String, Regex> = HashMap::new();
- let complete_pattern = match flags_array {
- Some(flags) => Box::new(
- regex_array
- .iter()
- .zip(flags.iter())
- .map(|(pattern, flags)| {
+ let complete_pattern = match $flags_array {
+ Some(flags) => Box::new($regex_array.iter().zip(flags.iter()).map(
+ |(pattern, flags)| {
pattern.map(|pattern| match flags {
Some(value) => format!("(?{value}){pattern}"),
None => pattern.to_string(),
})
- }),
- ) as Box<dyn Iterator<Item = Option<String>>>,
- None => Box::new(
- regex_array
- .iter()
- .map(|pattern| pattern.map(|pattern| pattern.to_string())),
- ),
- };
+ },
+ )) as Box<dyn Iterator<Item = Option<String>>>,
+ None => Box::new(
+ $regex_array
+ .iter()
+ .map(|pattern| pattern.map(|pattern| pattern.to_string())),
+ ),
+ };
- array
- .iter()
- .zip(complete_pattern)
- .map(|(value, pattern)| {
- match (value, pattern) {
- // Required for Postgres compatibility:
- // SELECT regexp_match('foobarbequebaz', ''); = {""}
- (Some(_), Some(pattern)) if pattern == *"" => {
- list_builder.values().append_value("");
- list_builder.append(true);
- }
- (Some(value), Some(pattern)) => {
- let existing_pattern = patterns.get(&pattern);
- let re = match existing_pattern {
- Some(re) => re,
- None => {
- let re = Regex::new(pattern.as_str()).map_err(|e| {
- ArrowError::ComputeError(format!(
- "Regular expression did not compile: {e:?}"
- ))
- })?;
- patterns.entry(pattern).or_insert(re)
- }
- };
- match re.captures(value) {
- Some(caps) => {
- let mut iter = caps.iter();
- if caps.len() > 1 {
- iter.next();
- }
- for m in iter.flatten() {
- list_builder.values().append_value(m.as_str());
+ $array
+ .iter()
+ .zip(complete_pattern)
+ .map(|(value, pattern)| {
+ match (value, pattern) {
+ // Required for Postgres compatibility:
+ // SELECT regexp_match('foobarbequebaz', ''); = {""}
+ (Some(_), Some(pattern)) if pattern == *"" => {
+ $list_builder.values().append_value("");
+ $list_builder.append(true);
+ }
+ (Some(value), Some(pattern)) => {
+ let existing_pattern = patterns.get(&pattern);
+ let re = match existing_pattern {
+ Some(re) => re,
+ None => {
+ let re =
Regex::new(pattern.as_str()).map_err(|e| {
+ ArrowError::ComputeError(format!(
+ "Regular expression did not compile:
{e:?}"
+ ))
+ })?;
+ patterns.entry(pattern).or_insert(re)
}
+ };
+ match re.captures(value) {
+ Some(caps) => {
+ let mut iter = caps.iter();
+ if caps.len() > 1 {
+ iter.next();
+ }
+ for m in iter.flatten() {
+
$list_builder.values().append_value(m.as_str());
+ }
- list_builder.append(true);
+ $list_builder.append(true);
+ }
+ None => $list_builder.append(false),
}
- None => list_builder.append(false),
}
+ _ => $list_builder.append(false),
}
- _ => list_builder.append(false),
- }
- Ok(())
- })
- .collect::<Result<Vec<()>, ArrowError>>()?;
+ Ok(())
+ })
+ .collect::<Result<Vec<()>, ArrowError>>()?;
+ };
+}
+
+fn regexp_array_match<OffsetSize: OffsetSizeTrait>(
+ array: &GenericStringArray<OffsetSize>,
+ regex_array: &GenericStringArray<OffsetSize>,
+ flags_array: Option<&GenericStringArray<OffsetSize>>,
+) -> Result<ArrayRef, ArrowError> {
+ let builder: GenericStringBuilder<OffsetSize> =
GenericStringBuilder::with_capacity(0, 0);
+ let mut list_builder = ListBuilder::new(builder);
+
+ process_regexp_array_match!(array, regex_array, flags_array, list_builder);
+
+ Ok(Arc::new(list_builder.finish()))
+}
+
+fn regexp_array_match_utf8view(
+ array: &StringViewArray,
+ regex_array: &StringViewArray,
+ flags_array: Option<&StringViewArray>,
+) -> Result<ArrayRef, ArrowError> {
+ let builder = StringViewBuilder::with_capacity(0);
+ let mut list_builder = ListBuilder::new(builder);
+
+ process_regexp_array_match!(array, regex_array, flags_array, list_builder);
+
Ok(Arc::new(list_builder.finish()))
}
@@ -333,6 +353,54 @@ fn get_scalar_pattern_flag<'a, OffsetSize:
OffsetSizeTrait>(
}
}
+fn get_scalar_pattern_flag_utf8view<'a>(
+ regex_array: &'a dyn Array,
+ flag_array: Option<&'a dyn Array>,
+) -> (Option<&'a str>, Option<&'a str>) {
+ let regex = regex_array.as_string_view();
+ let regex = regex.is_valid(0).then(|| regex.value(0));
+
+ if let Some(flag_array) = flag_array {
+ let flag = flag_array.as_string_view();
+ (regex, flag.is_valid(0).then(|| flag.value(0)))
+ } else {
+ (regex, None)
+ }
+}
+
+macro_rules! process_regexp_match {
+ ($array:expr, $regex:expr, $list_builder:expr) => {
+ $array
+ .iter()
+ .map(|value| {
+ match value {
+ // Required for Postgres compatibility:
+ // SELECT regexp_match('foobarbequebaz', ''); = {""}
+ Some(_) if $regex.as_str().is_empty() => {
+ $list_builder.values().append_value("");
+ $list_builder.append(true);
+ }
+ Some(value) => match $regex.captures(value) {
+ Some(caps) => {
+ let mut iter = caps.iter();
+ if caps.len() > 1 {
+ iter.next();
+ }
+ for m in iter.flatten() {
+
$list_builder.values().append_value(m.as_str());
+ }
+ $list_builder.append(true);
+ }
+ None => $list_builder.append(false),
+ },
+ None => $list_builder.append(false),
+ }
+ Ok(())
+ })
+ .collect::<Result<Vec<()>, ArrowError>>()?
+ };
+}
+
fn regexp_scalar_match<OffsetSize: OffsetSizeTrait>(
array: &GenericStringArray<OffsetSize>,
regex: &Regex,
@@ -340,35 +408,19 @@ fn regexp_scalar_match<OffsetSize: OffsetSizeTrait>(
let builder: GenericStringBuilder<OffsetSize> =
GenericStringBuilder::with_capacity(0, 0);
let mut list_builder = ListBuilder::new(builder);
- array
- .iter()
- .map(|value| {
- match value {
- // Required for Postgres compatibility:
- // SELECT regexp_match('foobarbequebaz', ''); = {""}
- Some(_) if regex.as_str() == "" => {
- list_builder.values().append_value("");
- list_builder.append(true);
- }
- Some(value) => match regex.captures(value) {
- Some(caps) => {
- let mut iter = caps.iter();
- if caps.len() > 1 {
- iter.next();
- }
- for m in iter.flatten() {
- list_builder.values().append_value(m.as_str());
- }
+ process_regexp_match!(array, regex, list_builder);
- list_builder.append(true);
- }
- None => list_builder.append(false),
- },
- _ => list_builder.append(false),
- }
- Ok(())
- })
- .collect::<Result<Vec<()>, ArrowError>>()?;
+ Ok(Arc::new(list_builder.finish()))
+}
+
+fn regexp_scalar_match_utf8view(
+ array: &StringViewArray,
+ regex: &Regex,
+) -> Result<ArrayRef, ArrowError> {
+ let builder = StringViewBuilder::with_capacity(0);
+ let mut list_builder = ListBuilder::new(builder);
+
+ process_regexp_match!(array, regex, list_builder);
Ok(Arc::new(list_builder.finish()))
}
@@ -406,7 +458,7 @@ pub fn regexp_match(
if array.data_type() != rhs.data_type() {
return Err(ArrowError::ComputeError(
- "regexp_match() requires both array and pattern to be either Utf8
or LargeUtf8"
+ "regexp_match() requires both array and pattern to be either Utf8,
Utf8View or LargeUtf8"
.to_string(),
));
}
@@ -428,7 +480,7 @@ pub fn regexp_match(
if flags_array.is_some() && rhs.data_type() != flags.unwrap().data_type() {
return Err(ArrowError::ComputeError(
- "regexp_match() requires both pattern and flags to be either
string or largestring"
+ "regexp_match() requires both pattern and flags to be either Utf8,
Utf8View or LargeUtf8"
.to_string(),
));
}
@@ -436,11 +488,13 @@ pub fn regexp_match(
if is_rhs_scalar {
// Regex and flag is scalars
let (regex, flag) = match rhs.data_type() {
+ DataType::Utf8View => get_scalar_pattern_flag_utf8view(rhs, flags),
DataType::Utf8 => get_scalar_pattern_flag::<i32>(rhs, flags),
DataType::LargeUtf8 => get_scalar_pattern_flag::<i64>(rhs, flags),
_ => {
return Err(ArrowError::ComputeError(
- "regexp_match() requires pattern to be either Utf8 or
LargeUtf8".to_string(),
+ "regexp_match() requires pattern to be either Utf8,
Utf8View or LargeUtf8"
+ .to_string(),
));
}
};
@@ -468,14 +522,21 @@ pub fn regexp_match(
})?;
match array.data_type() {
+ DataType::Utf8View =>
regexp_scalar_match_utf8view(array.as_string_view(), &re),
DataType::Utf8 => regexp_scalar_match(array.as_string::<i32>(),
&re),
DataType::LargeUtf8 =>
regexp_scalar_match(array.as_string::<i64>(), &re),
_ => Err(ArrowError::ComputeError(
- "regexp_match() requires array to be either Utf8 or
LargeUtf8".to_string(),
+ "regexp_match() requires array to be either Utf8, Utf8View or
LargeUtf8"
+ .to_string(),
)),
}
} else {
match array.data_type() {
+ DataType::Utf8View => {
+ let regex_array = rhs.as_string_view();
+ let flags_array = flags.map(|flags| flags.as_string_view());
+ regexp_array_match_utf8view(array.as_string_view(),
regex_array, flags_array)
+ }
DataType::Utf8 => {
let regex_array = rhs.as_string();
let flags_array = flags.map(|flags| flags.as_string());
@@ -487,7 +548,8 @@ pub fn regexp_match(
regexp_array_match(array.as_string::<i64>(), regex_array,
flags_array)
}
_ => Err(ArrowError::ComputeError(
- "regexp_match() requires array to be either Utf8 or
LargeUtf8".to_string(),
+ "regexp_match() requires array to be either Utf8, Utf8View or
LargeUtf8"
+ .to_string(),
)),
}
}
@@ -497,114 +559,316 @@ pub fn regexp_match(
mod tests {
use super::*;
- #[test]
- fn match_single_group() {
- let values = vec![
+ macro_rules! test_match_single_group {
+ ($test_name:ident, $values:expr, $patterns:expr, $arr_type:ty,
$builder_type:ty, $expected:expr) => {
+ #[test]
+ fn $test_name() {
+ let array: $arr_type = <$arr_type>::from($values);
+ let pattern: $arr_type = <$arr_type>::from($patterns);
+
+ let actual = regexp_match(&array, &pattern, None).unwrap();
+
+ let elem_builder: $builder_type = <$builder_type>::new();
+ let mut expected_builder = ListBuilder::new(elem_builder);
+
+ for val in $expected {
+ match val {
+ Some(v) => {
+ expected_builder.values().append_value(v);
+ expected_builder.append(true);
+ }
+ None => expected_builder.append(false),
+ }
+ }
+
+ let expected = expected_builder.finish();
+ let result =
actual.as_any().downcast_ref::<ListArray>().unwrap();
+ assert_eq!(&expected, result);
+ }
+ };
+ }
+
+ test_match_single_group!(
+ match_single_group_string,
+ vec![
Some("abc-005-def"),
Some("X-7-5"),
Some("X545"),
None,
Some("foobarbequebaz"),
Some("foobarbequebaz"),
- ];
- let array = StringArray::from(values);
- let mut pattern_values = vec![r".*-(\d*)-.*"; 4];
- pattern_values.push(r"(bar)(bequ1e)");
- pattern_values.push("");
- let pattern = GenericStringArray::<i32>::from(pattern_values);
- let actual = regexp_match(&array, &pattern, None).unwrap();
- let elem_builder: GenericStringBuilder<i32> =
GenericStringBuilder::new();
- let mut expected_builder = ListBuilder::new(elem_builder);
- expected_builder.values().append_value("005");
- expected_builder.append(true);
- expected_builder.values().append_value("7");
- expected_builder.append(true);
- expected_builder.append(false);
- expected_builder.append(false);
- expected_builder.append(false);
- expected_builder.values().append_value("");
- expected_builder.append(true);
- let expected = expected_builder.finish();
- let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
- assert_eq!(&expected, result);
- }
+ ],
+ vec![
+ r".*-(\d*)-.*",
+ r".*-(\d*)-.*",
+ r".*-(\d*)-.*",
+ r".*-(\d*)-.*",
+ r"(bar)(bequ1e)",
+ ""
+ ],
+ StringArray,
+ GenericStringBuilder<i32>,
+ [Some("005"), Some("7"), None, None, None, Some("")]
+ );
+ test_match_single_group!(
+ match_single_group_string_view,
+ vec![
+ Some("abc-005-def"),
+ Some("X-7-5"),
+ Some("X545"),
+ None,
+ Some("foobarbequebaz"),
+ Some("foobarbequebaz"),
+ ],
+ vec![
+ r".*-(\d*)-.*",
+ r".*-(\d*)-.*",
+ r".*-(\d*)-.*",
+ r".*-(\d*)-.*",
+ r"(bar)(bequ1e)",
+ ""
+ ],
+ StringViewArray,
+ StringViewBuilder,
+ [Some("005"), Some("7"), None, None, None, Some("")]
+ );
+
+ macro_rules! test_match_single_group_with_flags {
+ ($test_name:ident, $values:expr, $patterns:expr, $flags:expr,
$array_type:ty, $builder_type:ty, $expected:expr) => {
+ #[test]
+ fn $test_name() {
+ let array: $array_type = <$array_type>::from($values);
+ let pattern: $array_type = <$array_type>::from($patterns);
+ let flags: $array_type = <$array_type>::from($flags);
+
+ let actual = regexp_match(&array, &pattern,
Some(&flags)).unwrap();
- #[test]
- fn match_single_group_with_flags() {
- let values = vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"),
None];
- let array = StringArray::from(values);
- let pattern = StringArray::from(vec![r"x.*-(\d*)-.*"; 4]);
- let flags = StringArray::from(vec!["i"; 4]);
- let actual = regexp_match(&array, &pattern, Some(&flags)).unwrap();
- let elem_builder: GenericStringBuilder<i32> =
GenericStringBuilder::with_capacity(0, 0);
- let mut expected_builder = ListBuilder::new(elem_builder);
- expected_builder.append(false);
- expected_builder.values().append_value("7");
- expected_builder.append(true);
- expected_builder.append(false);
- expected_builder.append(false);
- let expected = expected_builder.finish();
- let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
- assert_eq!(&expected, result);
+ let elem_builder: $builder_type = <$builder_type>::new();
+ let mut expected_builder = ListBuilder::new(elem_builder);
+
+ for val in $expected {
+ match val {
+ Some(v) => {
+ expected_builder.values().append_value(v);
+ expected_builder.append(true);
+ }
+ None => {
+ expected_builder.append(false);
+ }
+ }
+ }
+
+ let expected = expected_builder.finish();
+ let result =
actual.as_any().downcast_ref::<ListArray>().unwrap();
+ assert_eq!(&expected, result);
+ }
+ };
}
- #[test]
- fn match_scalar_pattern() {
- let values = vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"),
None];
- let array = StringArray::from(values);
- let pattern = Scalar::new(StringArray::from(vec![r"x.*-(\d*)-.*"; 1]));
- let flags = Scalar::new(StringArray::from(vec!["i"; 1]));
- let actual = regexp_match(&array, &pattern, Some(&flags)).unwrap();
- let elem_builder: GenericStringBuilder<i32> =
GenericStringBuilder::with_capacity(0, 0);
- let mut expected_builder = ListBuilder::new(elem_builder);
- expected_builder.append(false);
- expected_builder.values().append_value("7");
- expected_builder.append(true);
- expected_builder.append(false);
- expected_builder.append(false);
- let expected = expected_builder.finish();
- let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
- assert_eq!(&expected, result);
-
- // No flag
- let values = vec![Some("abc-005-def"), Some("x-7-5"), Some("X545"),
None];
- let array = StringArray::from(values);
- let actual = regexp_match(&array, &pattern, None).unwrap();
- let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
- assert_eq!(&expected, result);
+ test_match_single_group_with_flags!(
+ match_single_group_with_flags_string,
+ vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None],
+ vec![r"x.*-(\d*)-.*"; 4],
+ vec!["i"; 4],
+ StringArray,
+ GenericStringBuilder<i32>,
+ [None, Some("7"), None, None]
+ );
+ test_match_single_group_with_flags!(
+ match_single_group_with_flags_stringview,
+ vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None],
+ vec![r"x.*-(\d*)-.*"; 4],
+ vec!["i"; 4],
+ StringViewArray,
+ StringViewBuilder,
+ [None, Some("7"), None, None]
+ );
+
+ macro_rules! test_match_scalar_pattern {
+ ($test_name:ident, $values:expr, $pattern:expr, $flag:expr,
$array_type:ty, $builder_type:ty, $expected:expr) => {
+ #[test]
+ fn $test_name() {
+ let array: $array_type = <$array_type>::from($values);
+
+ let pattern_scalar =
Scalar::new(<$array_type>::from(vec![$pattern; 1]));
+ let flag_scalar = Scalar::new(<$array_type>::from(vec![$flag;
1]));
+
+ let actual = regexp_match(&array, &pattern_scalar,
Some(&flag_scalar)).unwrap();
+
+ let elem_builder: $builder_type = <$builder_type>::new();
+ let mut expected_builder = ListBuilder::new(elem_builder);
+
+ for val in $expected {
+ match val {
+ Some(v) => {
+ expected_builder.values().append_value(v);
+ expected_builder.append(true);
+ }
+ None => expected_builder.append(false),
+ }
+ }
+
+ let expected = expected_builder.finish();
+ let result =
actual.as_any().downcast_ref::<ListArray>().unwrap();
+ assert_eq!(&expected, result);
+ }
+ };
}
- #[test]
- fn match_scalar_no_pattern() {
- let values = vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"),
None];
- let array = StringArray::from(values);
- let pattern = Scalar::new(new_null_array(&DataType::Utf8, 1));
- let actual = regexp_match(&array, &pattern, None).unwrap();
- let elem_builder: GenericStringBuilder<i32> =
GenericStringBuilder::with_capacity(0, 0);
- let mut expected_builder = ListBuilder::new(elem_builder);
- expected_builder.append(false);
- expected_builder.append(false);
- expected_builder.append(false);
- expected_builder.append(false);
- let expected = expected_builder.finish();
- let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
- assert_eq!(&expected, result);
+ test_match_scalar_pattern!(
+ match_scalar_pattern_string_with_flags,
+ vec![
+ Some("abc-005-def"),
+ Some("x-7-5"),
+ Some("X-0-Y"),
+ Some("X545"),
+ None
+ ],
+ r"x.*-(\d*)-.*",
+ Some("i"),
+ StringArray,
+ GenericStringBuilder<i32>,
+ [None, Some("7"), Some("0"), None, None]
+ );
+ test_match_scalar_pattern!(
+ match_scalar_pattern_stringview_with_flags,
+ vec![
+ Some("abc-005-def"),
+ Some("x-7-5"),
+ Some("X-0-Y"),
+ Some("X545"),
+ None
+ ],
+ r"x.*-(\d*)-.*",
+ Some("i"),
+ StringViewArray,
+ StringViewBuilder,
+ [None, Some("7"), Some("0"), None, None]
+ );
+
+ test_match_scalar_pattern!(
+ match_scalar_pattern_string_no_flags,
+ vec![
+ Some("abc-005-def"),
+ Some("x-7-5"),
+ Some("X-0-Y"),
+ Some("X545"),
+ None
+ ],
+ r"x.*-(\d*)-.*",
+ None::<&str>,
+ StringArray,
+ GenericStringBuilder<i32>,
+ [None, Some("7"), None, None, None]
+ );
+ test_match_scalar_pattern!(
+ match_scalar_pattern_stringview_no_flags,
+ vec![
+ Some("abc-005-def"),
+ Some("x-7-5"),
+ Some("X-0-Y"),
+ Some("X545"),
+ None
+ ],
+ r"x.*-(\d*)-.*",
+ None::<&str>,
+ StringViewArray,
+ StringViewBuilder,
+ [None, Some("7"), None, None, None]
+ );
+
+ macro_rules! test_match_scalar_no_pattern {
+ ($test_name:ident, $values:expr, $array_type:ty, $pattern_type:expr,
$builder_type:ty, $expected:expr) => {
+ #[test]
+ fn $test_name() {
+ let array: $array_type = <$array_type>::from($values);
+ let pattern = Scalar::new(new_null_array(&$pattern_type, 1));
+
+ let actual = regexp_match(&array, &pattern, None).unwrap();
+
+ let elem_builder: $builder_type = <$builder_type>::new();
+ let mut expected_builder = ListBuilder::new(elem_builder);
+
+ for val in $expected {
+ match val {
+ Some(v) => {
+ expected_builder.values().append_value(v);
+ expected_builder.append(true);
+ }
+ None => expected_builder.append(false),
+ }
+ }
+
+ let expected = expected_builder.finish();
+ let result =
actual.as_any().downcast_ref::<ListArray>().unwrap();
+ assert_eq!(&expected, result);
+ }
+ };
}
- #[test]
- fn test_single_group_not_skip_match() {
- let array = StringArray::from(vec![Some("foo"), Some("bar")]);
- let pattern = GenericStringArray::<i32>::from(vec![r"foo"]);
- let actual = regexp_match(&array, &pattern, None).unwrap();
- let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
- let elem_builder: GenericStringBuilder<i32> =
GenericStringBuilder::new();
- let mut expected_builder = ListBuilder::new(elem_builder);
- expected_builder.values().append_value("foo");
- expected_builder.append(true);
- let expected = expected_builder.finish();
- assert_eq!(&expected, result);
+ test_match_scalar_no_pattern!(
+ match_scalar_no_pattern_string,
+ vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None],
+ StringArray,
+ DataType::Utf8,
+ GenericStringBuilder<i32>,
+ [None::<&str>, None, None, None]
+ );
+ test_match_scalar_no_pattern!(
+ match_scalar_no_pattern_stringview,
+ vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None],
+ StringViewArray,
+ DataType::Utf8View,
+ StringViewBuilder,
+ [None::<&str>, None, None, None]
+ );
+
+ macro_rules! test_match_single_group_not_skip {
+ ($test_name:ident, $values:expr, $pattern:expr, $array_type:ty,
$builder_type:ty, $expected:expr) => {
+ #[test]
+ fn $test_name() {
+ let array: $array_type = <$array_type>::from($values);
+ let pattern: $array_type = <$array_type>::from(vec![$pattern]);
+
+ let actual = regexp_match(&array, &pattern, None).unwrap();
+
+ let elem_builder: $builder_type = <$builder_type>::new();
+ let mut expected_builder = ListBuilder::new(elem_builder);
+
+ for val in $expected {
+ match val {
+ Some(v) => {
+ expected_builder.values().append_value(v);
+ expected_builder.append(true);
+ }
+ None => expected_builder.append(false),
+ }
+ }
+
+ let expected = expected_builder.finish();
+ let result =
actual.as_any().downcast_ref::<ListArray>().unwrap();
+ assert_eq!(&expected, result);
+ }
+ };
}
+ test_match_single_group_not_skip!(
+ match_single_group_not_skip_string,
+ vec![Some("foo"), Some("bar")],
+ r"foo",
+ StringArray,
+ GenericStringBuilder<i32>,
+ [Some("foo")]
+ );
+ test_match_single_group_not_skip!(
+ match_single_group_not_skip_stringview,
+ vec![Some("foo"), Some("bar")],
+ r"foo",
+ StringViewArray,
+ StringViewBuilder,
+ [Some("foo")]
+ );
+
macro_rules! test_flag_utf8 {
($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr)
=> {
#[test]