https://bugs.koha-community.org/bugzilla3/show_bug.cgi?id=36831
--- Comment #14 from Martin Renvoize <[email protected]> --- I had a bit of a think about this.. I've seen the following cod in use somewhere.. it tries to best guess using counts of occurrences.. maybe this is an approach we could be happy with? sub detect_delimiter_and_quote { my ($filename) = @_; my $sample_lines = 5; # Number of lines to sample for detection open my $fh, '<', $filename or die "Could not open '$filename': $!"; my @lines; while (<$fh>) { push @lines, $_; last if $. >= $sample_lines; } close $fh; my %delimiter_count; my %quote_count; foreach my $line (@lines) { foreach my $char (',', '\t', ';', '|') { my $count = () = $line =~ /\Q$char\E/g; $delimiter_count{$char} += $count if $count; } foreach my $char ('"', "'") { my $count = () = $line =~ /\Q$char\E/g; $quote_count{$char} += $count if $count; } } # Guess the delimiter with the highest count my ($delimiter) = sort { $delimiter_count{$b} <=> $delimiter_count{$a} } keys %delimiter_count; # Guess the quote character with the highest count my ($quote) = sort { $quote_count{$b} <=> $quote_count{$a} } keys %quote_count; # Fallback to common defaults if nothing is detected $delimiter //= ','; $quote //= '"'; return ($delimiter, $quote); } -- You are receiving this mail because: You are watching all bug changes. _______________________________________________ Koha-bugs mailing list [email protected] https://lists.koha-community.org/cgi-bin/mailman/listinfo/koha-bugs website : http://www.koha-community.org/ git : http://git.koha-community.org/ bugs : http://bugs.koha-community.org/
