Throne3d commented on code in PR #750:
URL: https://github.com/apache/datafusion-python/pull/750#discussion_r1671173527


##########
python/datafusion/functions.py:
##########
@@ -15,9 +15,1731 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import annotations
 
-from ._internal import functions
+# from datafusion._internal.context import SessionContext
+# from datafusion._internal.expr import Expr
+# from datafusion._internal.expr.conditional_expr import CaseBuilder
+# from datafusion._internal.expr.window import WindowFrame
 
+from datafusion._internal import functions as f, common
+from datafusion.expr import CaseBuilder, Expr, WindowFrame
+from datafusion.context import SessionContext
 
-def __getattr__(name):
-    return getattr(functions, name)
+
+def isnan(expr: Expr) -> Expr:
+    """
+    Returns true if a given number is +NaN or -NaN otherwise returns false.
+    """
+    return Expr(f.isnan(expr.expr))
+
+
+def nullif(expr1: Expr, expr2: Expr) -> Expr:
+    """
+    Returns NULL if expr1 equals expr2; otherwise it returns expr1. This can 
be used to perform the inverse operation of the COALESCE expression.
+    """
+    return Expr(f.nullif(expr1.expr, expr2.expr))
+
+
+def encode(input: Expr, encoding: Expr) -> Expr:
+    """
+    Encode the `input`, using the `encoding`. encoding can be base64 or hex.
+    """
+    return Expr(f.encode(input.expr, encoding.expr))
+
+
+def decode(input: Expr, encoding: Expr) -> Expr:
+    """
+    Decode the `input`, using the `encoding`. encoding can be base64 or hex.
+    """
+    return Expr(f.decode(input.expr, encoding.expr))
+
+
+def array_to_string(expr: Expr, delimiter: Expr) -> Expr:
+    """
+    Converts each element to its text representation.
+    """
+    return Expr(f.array_to_string(expr.expr, delimiter.expr))
+
+
+def array_join(expr: Expr, delimiter: Expr) -> Expr:
+    """
+    Converts each element to its text representation.
+    This is an alias for :func:`array_to_string`.
+    """
+    return array_to_string(expr, delimiter)
+
+
+def list_to_string(expr: Expr, delimiter: Expr) -> Expr:
+    """
+    Converts each element to its text representation.
+    This is an alias for :func:`array_to_string`.
+    """
+    return array_to_string(expr, delimiter)
+
+
+def list_join(expr: Expr, delimiter: Expr) -> Expr:
+    """
+    Converts each element to its text representation.
+    This is an alias for :func:`array_to_string`.
+    """
+    return array_to_string(expr, delimiter)
+
+
+def in_list(arg: Expr, values: list[Expr], negated: bool = False) -> Expr:
+    """
+    Returns whether the argument is contained within the list `values`.
+    """
+    values = [v.expr for v in values]
+    return Expr(f.in_list(arg.expr, values, negated))
+
+
+def digest(value: Expr, method: Expr) -> Expr:
+    """
+    Computes the binary hash of an expression using the specified algorithm.
+    Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, 
blake2b, and blake3.
+    """
+    return Expr(f.digest(value.expr, method.expr))
+
+
+def concat(*args: Expr) -> Expr:
+    """
+    Concatenates the text representations of all the arguments. NULL arguments 
are ignored.
+    """
+    args = [arg.expr for arg in args]
+    return Expr(f.concat(*args))
+
+
+def concat_ws(separator: str, *args: Expr) -> Expr:
+    """
+    Concatenates the list `args` with the separator. `NULL` arugments are 
ignored. `separator` should not be `NULL`.
+    """
+    args = [arg.expr for arg in args]
+    return Expr(f.concat_ws(separator, *args))
+
+
+def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> 
Expr:
+    """
+    Creates a new sort expression.
+    """
+    return Expr(f.order_by(expr.expr, ascending, nulls_first))
+
+
+def alias(expr: Expr, name: str) -> Expr:
+    """
+    Creates an alias expression.
+    """
+    return Expr(f.alias(expr.expr, name))
+
+
+def col(name: str) -> Expr:
+    """
+    Creates a column reference expression.
+    """
+    return Expr(f.col(name))
+
+
+def count_star() -> Expr:
+    """
+    Create a COUNT(1) aggregate expression.
+    """
+    return Expr(f.count_star())
+
+
+def case(expr: Expr) -> CaseBuilder:
+    """
+    Create a CASE WHEN statement with literal WHEN expressions for comparison 
to the base expression.
+    """
+    return CaseBuilder(f.case(expr.expr))
+
+
+def window(
+    name: str,
+    args: list[Expr],
+    partition_by: list[Expr] | None = None,
+    order_by: list[Expr] | None = None,
+    window_frame: WindowFrame | None = None,
+    ctx: SessionContext | None = None,
+) -> Expr:
+    """
+    Creates a new Window function expression.
+    """
+    args = [a.expr for a in args]
+    partition_by = [e.expr for e in partition_by] if partition_by is not None 
else None
+    order_by = [o.expr for o in order_by] if order_by is not None else None
+    window_frame = window_frame.window_frame if window_frame is not None else 
None
+    return Expr(f.window(name, args, partition_by, order_by, window_frame, 
ctx))
+
+
+# scalar functions
+def abs(arg: Expr) -> Expr:
+    """
+    Return the absolute value of a given number.
+
+    Returns
+    -------
+    Expr
+        A new expression representing the absolute value of the input 
expression.
+    """
+    return Expr(f.abs(arg.expr))
+
+
+def acos(arg: Expr) -> Expr:
+    """
+    Returns the arc cosine or inverse cosine of a number.
+
+    Returns
+    -------
+    Expr
+        A new expression representing the arc cosine of the input expression.
+    """
+    return Expr(f.acos(arg.expr))
+
+
+def acosh(arg: Expr) -> Expr:
+    """
+    Returns inverse hyperbolic cosine.
+    """
+    return Expr(f.acosh(arg.expr))
+
+
+def ascii(arg: Expr) -> Expr:
+    """
+    Returns the numeric code of the first character of the argument.
+    """
+    return Expr(f.ascii(arg.expr))
+
+
+def asin(arg: Expr) -> Expr:
+    """
+    Returns the arc sine or inverse sine of a number.
+    """
+    return Expr(f.asin(arg.expr))
+
+
+def asinh(arg: Expr) -> Expr:
+    """
+    Returns inverse hyperbolic sine.
+    """
+    return Expr(f.asinh(arg.expr))
+
+
+def atan(arg: Expr) -> Expr:
+    """
+    Returns inverse tangent of a number.
+    """
+    return Expr(f.atan(arg.expr))
+
+
+def atanh(arg: Expr) -> Expr:
+    """
+    Returns inverse hyperbolic tangent.
+    """
+    return Expr(f.atanh(arg.expr))
+
+
+def atan2(y: Expr, x: Expr) -> Expr:
+    """
+    Returns inverse tangent of a division given in the argument.
+    """
+    return Expr(f.atan2(y.expr, x.expr))
+
+
+def bit_length(arg: Expr) -> Expr:
+    """
+    Returns the number of bits in the string argument.
+    """
+    return Expr(f.bit_length(arg.expr))
+
+
+def btrim(arg: Expr) -> Expr:
+    """
+    Removes all characters, spaces by default, from both sides of a string.
+    """
+    return Expr(f.btrim(arg.expr))
+
+
+def cbrt(arg: Expr) -> Expr:
+    """
+    Returns the cube root of a number.
+    """
+    return Expr(f.cbrt(arg.expr))
+
+
+def ceil(arg: Expr) -> Expr:
+    """
+    Returns the nearest integer greater than or equal to argument.
+    """
+    return Expr(f.ceil(arg.expr))
+
+
+def character_length(arg: Expr) -> Expr:
+    """
+    Returns the number of characters in the argument.
+    """
+    return Expr(f.character_length(arg.expr))
+
+
+def length(string: Expr) -> Expr:
+    """
+    The number of characters in the `string`
+    """
+    return Expr(f.length(string.expr))
+
+
+def char_length(string: Expr) -> Expr:
+    """
+    The number of characters in the `string`.
+    """
+    return Expr(f.char_length(string.expr))
+
+
+def chr(arg: Expr) -> Expr:
+    """
+    Converts the Unicode code point to a UTF8 character.
+    """
+    return Expr(f.chr(arg.expr))
+
+
+def coalesce(*args: Expr) -> Expr:
+    """
+    Returns `coalesce(args...)`, which evaluates to the value of the first 
expr which is not NULL.
+    """
+    args = [arg.expr for arg in args]
+    return Expr(f.coalesce(*args))
+
+
+def cos(arg: Expr) -> Expr:
+    """
+    Returns the cosine of the argument.
+    """
+    return Expr(f.cos(arg.expr))
+
+
+def cosh(arg: Expr) -> Expr:
+    """
+    Returns the hyperbolic cosine of the argument.
+    """
+    return Expr(f.cosh(arg.expr))
+
+
+def cot(arg: Expr) -> Expr:
+    """
+    Returns the cotangent of the argument.
+    """
+    return Expr(f.cot(arg.expr))
+
+
+def degrees(arg: Expr) -> Expr:
+    """
+    Converts the argument from radians to degrees.
+    """
+    return Expr(f.degrees(arg.expr))
+
+
+def ends_with(arg: Expr, suffix: Expr) -> Expr:
+    """
+    Returns true if the `string` ends with the `suffix`, false otherwise.
+    """
+    return Expr(f.ends_with(arg.expr, suffix.expr))
+
+
+def exp(arg: Expr) -> Expr:
+    """
+    Returns the exponential of the arugment.
+    """
+    return Expr(f.exp(arg.expr))
+
+
+def factorial(arg: Expr) -> Expr:
+    """
+    Returns the factorial of the argument.
+    """
+    return Expr(f.factorial(arg.expr))
+
+
+def find_in_set(string: Expr, string_list: Expr) -> Expr:
+    """
+    Returns a value in the range of 1 to N if the string is in the string list 
`string_list` consisting of N substrings.
+    The string list is a string composed of substrings separated by `,` 
characters.
+    """
+    return Expr(f.find_in_set(string.expr, string_list.expr))
+
+
+def floor(arg: Expr) -> Expr:
+    """
+    Returns the nearest integer less than or equal to the argument.
+    """
+    return Expr(f.floor(arg.expr))
+
+
+def gcd(x: Expr, y: Expr) -> Expr:
+    """
+    Returns the greatest common divisor.
+    """
+    return Expr(f.gcd(x.expr, y.expr))
+
+
+def initcap(string: Expr) -> Expr:
+    """
+    Converts the first letter of each word in `string` in uppercase and the 
remaining characters in lowercase.
+    """
+    return Expr(f.initcap(string.expr))
+
+
+def instr(string: Expr, substring: Expr) -> Expr:
+    """
+    Finds the position from where the `substring` matches the `string`.
+    This is an alias for :func:`strpos`.
+    """
+    return strpos(string, substring)
+
+
+def iszero(arg: Expr) -> Expr:
+    """
+    Returns true if a given number is +0.0 or -0.0 otherwise returns false.
+    """
+    return Expr(f.iszero(arg.expr))
+
+
+def lcm(x: Expr, y: Expr) -> Expr:
+    """
+    Returns the least common multiple.
+    """
+    return Expr(f.lcm(x.expr, y.expr))
+
+
+def left(string: Expr, n: Expr) -> Expr:
+    """
+    Returns the first `n` characters in the `string`.
+    """
+    return Expr(f.left(string.expr, n.expr))
+
+
+def levenshtein(string1: Expr, string2: Expr) -> Expr:
+    """
+    Returns the Levenshtein distance between the two given strings
+    """
+    return Expr(f.levenshtein(string1.expr, string2.expr))
+
+
+def ln(arg: Expr) -> Expr:
+    """
+    Returns the natural logarithm (base e) of the argument.
+    """
+    return Expr(f.ln(arg.expr))
+
+
+def log(base: Expr, num: Expr) -> Expr:
+    """
+    Returns the logarithm of a number for a particular `base`
+    """
+    return Expr(f.log(base.expr, num.expr))
+
+
+def log10(arg: Expr) -> Expr:
+    """
+    Base 10 logarithm of the argument.
+    """
+    return Expr(f.log10(arg.expr))
+
+
+def log2(arg: Expr) -> Expr:
+    """
+    Base 2 logarithm of the argument.
+    """
+    return Expr(f.log2(arg.expr))
+
+
+def lower(arg: Expr) -> Expr:
+    """
+    Converts a string to lowercase.
+    """
+    return Expr(f.lower(arg.expr))
+
+
+def lpad(string: Expr, count: Expr, characters: Expr | None = None) -> Expr:
+    """
+    Extends the string to length length by prepending the characters fill (a 
space by default). If the string is already longer than length then it is 
truncated (on the right).
+    """
+    characters = characters if characters is not None else Expr.literal(" ")
+    return Expr(f.lpad(string.expr, count.expr, characters.expr))
+
+
+def ltrim(arg: Expr) -> Expr:
+    """
+    Removes all characters, spaces by default, from the beginning of a string.
+    """
+    return Expr(f.ltrim(arg.expr))
+
+
+def md5(arg: Expr) -> Expr:
+    """
+    Computes an MD5 128-bit checksum for a string expression.
+    """
+    return Expr(f.md5(arg.expr))
+
+
+def nanvl(x: Expr, y: Expr) -> Expr:
+    """
+    Returns `x` if `x` is not `NaN`. Otherwise returns `y`.
+    """
+    return Expr(f.nanvl(x.expr, y.expr))
+
+
+def octet_length(arg: Expr) -> Expr:
+    """
+    Returns the number of bytes of a string.
+    """
+    return Expr(f.octet_length(arg.expr))
+
+
+# TODO: `overlay` in datafusion needs to be updated from generic `args` 
definition, and then exposed in this repo.
+# def overlay(string: Expr, substring: Expr, start: Expr, length: Expr | None 
= None) -> Expr:
+#     """
+#     Replace the substring of string that starts at the `start`'th character 
and extends for `length` characters with new substring
+#     """
+#     return Expr()
+
+
+def pi() -> Expr:
+    """
+    Returns an approximate value of π.
+    """
+    return Expr(f.pi())
+
+
+def position(string: Expr, substring: Expr) -> Expr:
+    """
+    Finds the position from where the `substring` matches the `string`.
+    This is an alias for :func:`strpos`.
+    """
+    return strpos(string, substring)
+
+
+def power(base: Expr, exponent: Expr) -> Expr:
+    """
+    Returns `base` raised to the power of `exponent`.
+    """
+    return Expr(f.power(base.expr, exponent.expr))
+
+
+def pow(base: Expr, exponent: Expr) -> Expr:
+    """
+    Returns `base` raised to the power of `exponent`.
+    This is an alias of `power`.
+    """
+    return power(base, exponent)
+
+
+def radians(arg: Expr) -> Expr:
+    """
+    Converts the argument from degrees to radians.
+    """
+    return Expr(f.radians(arg.expr))
+
+
+def regexp_like(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr:
+    """
+    Tests a string using a regular expression returning true if at
+    least one match, false otherwise.
+    """
+    if flags is not None:
+        flags = flags.expr
+    return Expr(f.regexp_like(string.expr, regex.expr, flags))
+
+
+def regexp_match(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr:
+    """
+    Returns an array with each element containing the leftmost-first
+    match of the corresponding index in `regex` to string in `string`
+
+    If there is no match, the list element is NULL.
+
+    If a match is found, and the pattern contains no capturing parenthesized 
subexpressions,
+    then the list element is a single-element [`GenericStringArray`] 
containing the substring
+    matching the whole pattern.
+
+    If a match is found, and the pattern contains capturing parenthesized 
subexpressions, then the
+    list element is a [`GenericStringArray`] whose n'th element is the 
substring matching
+    the n'th capturing parenthesized subexpression of the pattern.
+    """
+
+    # TODO VALIDATE THIS IS CORRECT FOR DATAFRAME RESULTS
+    if flags is not None:
+        flags = flags.expr
+    return Expr(f.regexp_match(string.expr, regex.expr, flags))
+
+
+def regexp_replace(
+    string: Expr, pattern: Expr, replacement: Expr, flags: Expr | None = None
+) -> Expr:
+    """
+    Replaces substring(s) matching a PCRE-like regular expression.
+
+    The full list of supported features and syntax can be found at
+    <https://docs.rs/regex/latest/regex/#syntax>
+
+    Supported flags with the addition of 'g' can be found at
+    <https://docs.rs/regex/latest/regex/#grouping-and-flags>
+    """
+    if flags is not None:
+        flags = flags.expr
+    return Expr(f.regexp_replace(string.expr, pattern.expr, replacement.expr, 
flags))
+
+
+def repeat(string: Expr, n: Expr) -> Expr:
+    """
+    Repeats the `string` to `n` times.
+    """
+    return Expr(f.repeat(string.expr, n.expr))
+
+
+def replace(string: Expr, from_val: Expr, to_val: Expr) -> Expr:
+    """
+    Replaces all occurrences of `from` with `to` in the `string`.
+    """
+    return Expr(f.replace(string.expr, from_val.expr, to_val.expr))
+
+
+def reverse(arg: Expr) -> Expr:
+    """
+    Reverse the string argument.
+    """
+    return Expr(f.reverse(arg.expr))
+
+
+def right(string: Expr, n: Expr) -> Expr:
+    """
+    Returns the last `n` characters in the `string`.
+    """
+    return Expr(f.right(string.expr, n.expr))
+
+
+def round(arg: Expr) -> Expr:
+    """
+    Round the argument to the nearest integer.
+    """
+    return Expr(f.round(arg.expr))

Review Comment:
   Looks like this is missing its optional second parameter based on the test 
failures
   
   ```
   col_name = 'revenue', col_type = Decimal128Type(decimal128(38, 4))
   
       def df_selection(col_name, col_type):
           if col_type == pa.float64() or isinstance(col_type, 
pa.Decimal128Type):
   >           return F.round(col(col_name), lit(2)).alias(col_name)
   E           TypeError: round() takes 1 positional argument but 2 were given
   ```
   
   Maybe something like this, similar to rpad?
   ```suggestion
   def round(arg: Expr, decimal_places: Expr | None = None) -> Expr:
       """
       Round the argument to the nearest integer. If decimal_places is 
specified, rounds to the given number of decimal places.
       """
       decimal_places = decimal_places if decimal_places is not None else 
Expr.literal(0)
       return Expr(f.round(arg.expr, decimal_places.expr))
   ```
   ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to