Jefffrey commented on code in PR #9268: URL: https://github.com/apache/arrow-datafusion/pull/9268#discussion_r1501345793
########## datafusion-cli/Cargo.lock: ########## Review Comment: Maybe omit these changes from PR, now that no new dependency is added? ########## datafusion/sql/tests/sql_integration.rs: ########## @@ -4423,6 +4423,19 @@ fn test_field_not_found_window_function() { quick_test(qualified_sql, expected); } +#[test] +fn test_parse_escaped_string_literal_value() { + let sql = r"SELECT length('\r\n') AS len"; + let expected = "Projection: character_length(Utf8(\"\\r\\n\")) AS len\ + \n EmptyRelation"; + quick_test(sql, expected); + + let sql = r"SELECT length(E'\r\n') AS len"; + let expected = "Projection: character_length(Utf8(\"\r\n\")) AS len\ + \n EmptyRelation"; + quick_test(sql, expected); +} Review Comment: Could we add a negative test here too? ########## datafusion/sql/src/utils.rs: ########## @@ -246,3 +246,169 @@ pub(crate) fn normalize_ident(id: Ident) -> String { None => id.value.to_ascii_lowercase(), } } + +macro_rules! return_if_none { + ($o:expr) => { + match $o { + Some(s) => s, + None => return None, + } + }; +} Review Comment: I think you can just use `?` if the function already returns an Option ########## datafusion/sql/src/utils.rs: ########## @@ -246,3 +246,169 @@ pub(crate) fn normalize_ident(id: Ident) -> String { None => id.value.to_ascii_lowercase(), } } + +macro_rules! return_if_none { + ($o:expr) => { + match $o { + Some(s) => s, + None => return None, + } + }; +} + +pub(crate) fn unescape(s: &str) -> Option<String> { + let mut queue: VecDeque<_> = String::from(s).chars().collect(); + let mut unescaped = String::new(); + + while let Some(c) = queue.pop_front() { + if c != '\\' { + unescaped.push(c); + continue; + } + + let ch = match queue.pop_front() { + Some(c) => match c { + 'b' => '\u{0008}', + 'f' => '\u{000C}', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'u' => return_if_none!(unescape_unicode_16(&mut queue)), + 'U' => return_if_none!(unescape_unicode_32(&mut queue)), + 'x' => return_if_none!(unescape_byte(&mut queue)), + c if c.is_digit(8) => return_if_none!(unescape_octal(c, &mut queue)), + c => c, + }, + None => return None, + }; + unescaped.push(ch); + } + + Some(unescaped) +} + +// Hexadecimal byte value. \xh, \xhh (h = 0–9, A–F) +fn unescape_byte(queue: &mut VecDeque<char>) -> Option<char> { + let mut s = String::new(); + let mut try_get_next = || match queue.front() { + Some(c) if c.is_ascii_hexdigit() => queue.pop_front(), + _ => None, + }; Review Comment: Maybe inline this? Might make the code a bit simpler to understand -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org