eyalleshem commented on code in PR #2073:
URL:
https://github.com/apache/datafusion-sqlparser-rs/pull/2073#discussion_r2499623928
##########
src/tokenizer.rs:
##########
@@ -2176,35 +2206,82 @@ impl<'a> Tokenizer<'a> {
/// Read from `chars` until `predicate` returns `false` or EOF is hit.
/// Return the characters read as String, and keep the first non-matching
/// char available as `chars.next()`.
-fn peeking_take_while(chars: &mut State, mut predicate: impl FnMut(char) ->
bool) -> String {
- let mut s = String::new();
+fn peeking_take_while(chars: &mut State, predicate: impl FnMut(char) -> bool)
-> String {
+ borrow_slice_until(chars, predicate).to_string()
+}
+
+/// Borrow a slice from the original string until `predicate` returns `false`
or EOF is hit.
+///
+/// # Arguments
+/// * `chars` - The character iterator state (contains reference to original
source)
+/// * `predicate` - Function that returns true while we should continue taking
characters
+///
+/// # Returns
+/// A borrowed slice of the source string containing the matched characters
+fn borrow_slice_until<'a>(
+ chars: &mut State<'a>,
+ mut predicate: impl FnMut(char) -> bool,
+) -> &'a str {
+ // Record the starting byte position
+ let start_pos = chars.byte_pos;
+
+ // Consume characters while predicate is true
while let Some(&ch) = chars.peek() {
if predicate(ch) {
- chars.next(); // consume
- s.push(ch);
+ chars.next(); // consume (this updates byte_pos)
} else {
break;
}
}
- s
+
+ // Get the ending byte position
+ let end_pos = chars.byte_pos;
+
+ // Return the slice from the original source
+ &chars.source[start_pos..end_pos]
}
-/// Same as peeking_take_while, but also passes the next character to the
predicate.
-fn peeking_next_take_while(
- chars: &mut State,
+/// Borrow a slice from the original string until `predicate` returns `false`
or EOF is hit.
+/// This version also passes the next character to the predicate for lookahead.
+/// This is a zero-copy version of `peeking_next_take_while`.
+///
+/// # Arguments
+/// * `chars` - The character iterator state (contains reference to original
source)
+/// * `predicate` - Function that returns true while we should continue taking
characters.
+/// Takes current char and optional next char for lookahead.
+///
+/// # Returns
+/// A borrowed slice of the source string containing the matched characters
+fn borrow_slice_until_next<'a>(
Review Comment:
Maybe, but I'm not sure what happens if EOF is reached on the next
character. I don't think I want to include that as part of this commit.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]