Hello, Manuel,
I have made some additional improvements to the CSV library. I found that the
way in which I was using (possibly abusing) the lalr parser resulted in more
memory being allocated than needed. By calling it for every record in a csv
file, I was repeatedly reallocating the lalr stack. Since my parsing needs are
simple, I rewrote it and obtained a significant performance improvement (~4x)
on my test file. I introduced one change in semantics, which I don't believe
should bother anyone. Now, when we see a line consisting of just a newline, the
empty list is returned instead of a list containing an empty string. The patch
is attached.
Best Regards,Joseph Donaldson
--- bigloo4.3b/api/csv/src/Llib/csv.scm 2017-08-03 00:11:25.000000000 -0700
+++ bigloo4.3b_mod/api/csv/src/Llib/csv.scm 2017-08-12 13:05:46.149711507 -0700
@@ -35,39 +35,42 @@
(define +psv-lexer+ (make-csv-lexer #\| #\"))
-;*---------------------------------------------------------------------*/
-;* +csv-parser+ ... */
-;*---------------------------------------------------------------------*/
-(define +csv-parser+
- (lalr-grammar (separator text)
-
- (fields
- ((field)
- (list field))
- ((field separator fields)
- (cons field fields)))
-
- (field
- (()
- "")
- ((text field)
- (string-append text field)))))
-
+
+
+(define +csv-unspecified+ '(#unspecified))
+
;*---------------------------------------------------------------------*/
;* read-csv-record ... */
;*---------------------------------------------------------------------*/
(define (read-csv-record in #!optional (lexer +csv-lexer+))
- (if (input-port? in)
- (let ((pc (peek-char in)))
- (if (eof-object? pc)
- pc
- (read/lalrp +csv-parser+ lexer in
- (lambda (x) (or (eof-object? x) (eq? x 'newline))))))
- (raise
- (instantiate::&io-port-error
- (proc "read-csv-record")
- (msg "invalid input port")
- (obj in)))))
+ (when (not (input-port? in))
+ (raise (instantiate::&io-port-error (proc "read-csv-record")
+ (msg "invalid input port")
+ (obj in))))
+ (let loop ((token (read/rp lexer in))
+ (last-token +csv-unspecified+)
+ (res '()))
+ (cond ((or (eq? token 'newline)
+ (eof-object? token))
+ (if (and (eof-object? token)
+ (eq? last-token +csv-unspecified+))
+ #eof-object
+ (reverse! res)))
+ ((and (pair? token)
+ (eq? (car token) 'text))
+ (loop (read/rp lexer in)
+ (car token)
+ (if (eq? last-token 'text)
+ (cons (string-append (car res) (cdr token)) (cdr res))
+ (cons (cdr token) res))))
+ ((eq? token 'separator)
+ (loop (read/rp lexer in)
+ 'separator
+ res))
+ (else
+ (loop (read/rp lexer in)
+ 'text
+ res)))))
;*---------------------------------------------------------------------*/
;* read-csv-records ... */
--- bigloo4.3b/api/csv/src/Llib/csv.sch 2017-08-03 00:11:25.000000000 -0700
+++ bigloo4.3b_mod/api/csv/src/Llib/csv.sch 2017-08-12 13:07:49.452368223 -0700
@@ -13,31 +13,32 @@
;* make-csv-lexer */
;*---------------------------------------------------------------------*/
(define-macro (make-csv-lexer sep quot)
- (if (and (char? sep) (char? quot))
+ (if (and (char? sep)
+ (char? quot))
`(regular-grammar ((quote ,quot)
(separator ,sep))
(quote
(let loop ((curr (read-char (the-port)))
- (res ""))
+ (res '()))
(cond ((eof-object? curr)
- (raise (instantiate::&io-parse-error
- (proc "csv lexer")
- (msg "failed to parse")
- (obj curr))))
+ (raise (instantiate::&io-parse-error (proc "lexer")
+ (msg "failed to parse fail")
+ (obj curr))))
((and (char=? curr ,quot)
(not (eof-object? (peek-char (the-port))))
(char=? (peek-char (the-port)) ,quot))
(read-char (the-port))
(loop (read-char (the-port))
- (string-append res (string ,quot))))
+ (cons ,quot res)))
((char=? curr ,quot)
- (cons 'text res))
+ (cons 'text (list->string (reverse! res))))
(else
(loop (read-char (the-port))
- (string-append res (string curr)))))))
+ (cons curr res))))))
(separator
'separator)
- ((or (: #\return #\newline) #\newline)
+ ((or (: #\return #\newline)
+ #\newline)
'newline)
((+ (out quote separator #\return #\newline))
(cons 'text (the-string)))
@@ -45,7 +46,5 @@
(let ((c (the-failure)))
(if (eof-object? c)
c
- (error "csv-lexer" "Illegal character" c)))))
- (error "csv-lexer"
- "separator and quote must be a single character"
- (list sep quot))))
+ (error 'csv-lexer "Illegal character" c)))))
+ (error 'csv-lexer "separator and quote must be a single character" (list sep quot))))
\ No newline at end of file
--- bigloo4.3b/api/csv/recette/recette.scm 2017-08-03 00:11:25.000000000 -0700
+++ bigloo4.3b_mod/api/csv/recette/recette.scm 2017-08-12 13:10:16.164896480 -0700
@@ -155,8 +155,8 @@
(close-input-port in)))
:result (lambda (v)
(if (eq? v 'result)
- '("")
- (csv-record=? v '("")))))
+ '()
+ (csv-record=? v '()))))
(define-test empty
(let* ((test-string "")