pitrou commented on a change in pull request #11955: URL: https://github.com/apache/arrow/pull/11955#discussion_r770466612
########## File path: python/pyarrow/compute.py ########## @@ -326,243 +335,6 @@ def cast(arr, target_type, safe=True): return call_function("cast", [arr], options) -def count_substring(array, pattern, *, ignore_case=False): - """ - Count the occurrences of substring *pattern* in each value of a - string array. - - Parameters - ---------- - array : pyarrow.Array or pyarrow.ChunkedArray - pattern : str - pattern to search for exact matches - ignore_case : bool, default False - Ignore case while searching. - - Returns - ------- - result : pyarrow.Array or pyarrow.ChunkedArray - """ - return call_function("count_substring", [array], - MatchSubstringOptions(pattern, - ignore_case=ignore_case)) - - -def count_substring_regex(array, pattern, *, ignore_case=False): - """ - Count the non-overlapping matches of regex *pattern* in each value - of a string array. - - Parameters - ---------- - array : pyarrow.Array or pyarrow.ChunkedArray - pattern : str - pattern to search for exact matches - ignore_case : bool, default False - Ignore case while searching. - - Returns - ------- - result : pyarrow.Array or pyarrow.ChunkedArray - """ - return call_function("count_substring_regex", [array], - MatchSubstringOptions(pattern, - ignore_case=ignore_case)) - - -def find_substring(array, pattern, *, ignore_case=False): - """ - Find the index of the first occurrence of substring *pattern* in each - value of a string array. - - Parameters - ---------- - array : pyarrow.Array or pyarrow.ChunkedArray - pattern : str - pattern to search for exact matches - ignore_case : bool, default False - Ignore case while searching. - - Returns - ------- - result : pyarrow.Array or pyarrow.ChunkedArray - """ - return call_function("find_substring", [array], - MatchSubstringOptions(pattern, - ignore_case=ignore_case)) - - -def find_substring_regex(array, pattern, *, ignore_case=False): - """ - Find the index of the first match of regex *pattern* in each - value of a string array. - - Parameters - ---------- - array : pyarrow.Array or pyarrow.ChunkedArray - pattern : str - regex pattern to search for - ignore_case : bool, default False - Ignore case while searching. - - Returns - ------- - result : pyarrow.Array or pyarrow.ChunkedArray - """ - return call_function("find_substring_regex", [array], - MatchSubstringOptions(pattern, - ignore_case=ignore_case)) - - -def match_like(array, pattern, *, ignore_case=False): - """ - Test if the SQL-style LIKE pattern *pattern* matches a value of a - string array. - - Parameters - ---------- - array : pyarrow.Array or pyarrow.ChunkedArray - pattern : str - SQL-style LIKE pattern. '%' will match any number of - characters, '_' will match exactly one character, and all - other characters match themselves. To match a literal percent - sign or underscore, precede the character with a backslash. - ignore_case : bool, default False - Ignore case while searching. - - Returns - ------- - result : pyarrow.Array or pyarrow.ChunkedArray - - """ - return call_function("match_like", [array], - MatchSubstringOptions(pattern, - ignore_case=ignore_case)) - - -def match_substring(array, pattern, *, ignore_case=False): - """ - Test if substring *pattern* is contained within a value of a string array. - - Parameters - ---------- - array : pyarrow.Array or pyarrow.ChunkedArray - pattern : str - pattern to search for exact matches - ignore_case : bool, default False - Ignore case while searching. - - Returns - ------- - result : pyarrow.Array or pyarrow.ChunkedArray - """ - return call_function("match_substring", [array], - MatchSubstringOptions(pattern, - ignore_case=ignore_case)) - - -def match_substring_regex(array, pattern, *, ignore_case=False): - """ - Test if regex *pattern* matches at any position a value of a string array. - - Parameters - ---------- - array : pyarrow.Array or pyarrow.ChunkedArray - pattern : str - regex pattern to search - ignore_case : bool, default False - Ignore case while searching. - - Returns - ------- - result : pyarrow.Array or pyarrow.ChunkedArray - """ - return call_function("match_substring_regex", [array], - MatchSubstringOptions(pattern, - ignore_case=ignore_case)) - - -def mode(array, n=1, *, skip_nulls=True, min_count=0): - """ - Return top-n most common values and number of times they occur in a passed - numerical (chunked) array, in descending order of occurrence. If there are - multiple values with same count, the smaller one is returned first. - - Parameters - ---------- - array : pyarrow.Array or pyarrow.ChunkedArray - n : int, default 1 - Specify the top-n values. - skip_nulls : bool, default True - If True, ignore nulls in the input. Else return an empty array - if any input is null. - min_count : int, default 0 - If there are fewer than this many values in the input, return - an empty array. - - Returns - ------- - An array of <input type "Mode", int64_t "Count"> structs - - Examples - -------- - >>> import pyarrow as pa - >>> import pyarrow.compute as pc - >>> arr = pa.array([1, 1, 2, 2, 3, 2, 2, 2]) - >>> modes = pc.mode(arr, 2) - >>> modes[0] - <pyarrow.StructScalar: {'mode': 2, 'count': 5}> - >>> modes[1] - <pyarrow.StructScalar: {'mode': 1, 'count': 2}> - """ - options = ModeOptions(n, skip_nulls=skip_nulls, min_count=min_count) - return call_function("mode", [array], options) - - -def filter(data, mask, null_selection_behavior='drop'): Review comment: Hmm. I don't really like the idea of maintaining these special case wrappers, though. Or perhaps we could raise a deprecation warning before removing them. @amol- Thoughts? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org