eyalleshem commented on code in PR #2073:
URL:
https://github.com/apache/datafusion-sqlparser-rs/pull/2073#discussion_r2499546040
##########
src/tokenizer.rs:
##########
@@ -1876,13 +1893,26 @@ impl<'a> Tokenizer<'a> {
comment
}
- /// Tokenize an identifier or keyword, after the first char is already
consumed.
- fn tokenize_word(&self, first_chars: impl Into<String>, chars: &mut State)
-> String {
- let mut s = first_chars.into();
- s.push_str(&peeking_take_while(chars, |ch| {
- self.dialect.is_identifier_part(ch)
- }));
- s
+ /// Tokenize an identifier or keyword, after the first char(s) have
already been consumed.
+ /// `consumed_byte_len` is the byte length of the consumed character(s).
+ fn tokenize_word(&self, consumed_byte_len: usize, chars: &mut State<'a>)
-> String {
+ // Calculate where the first character started
+ let first_char_byte_pos = chars.byte_pos - consumed_byte_len;
+
+ // Use the zero-copy version and convert to String
+ self.tokenize_word_borrowed(first_char_byte_pos, chars)
+ .to_string()
+ }
+
+ /// Tokenize an identifier or keyword, returning a borrowed slice when
possible.
+ /// The first character position must be provided (before it was consumed).
+ /// Returns a slice with the same lifetime as the State's source.
+ fn tokenize_word_borrowed(&self, first_char_byte_pos: usize, chars: &mut
State<'a>) -> &'a str {
+ // Consume the rest of the word
+ borrow_slice_until(chars, |ch| self.dialect.is_identifier_part(ch));
+
+ // Return a slice from the first char to the current position
+ &chars.source[first_char_byte_pos..chars.byte_pos]
Review Comment:
done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]