This is an automated email from the git hooks/post-receive script. glondu pushed a commit to branch master in repository ocaml-csv.
commit a33a6f0e04bd874e3efd24e029c506d67dcddcdb Author: Stephane Glondu <st...@glondu.net> Date: Wed Aug 3 11:27:27 2016 +0200 Imported Upstream version 1.5 --- .merlin | 3 ++ Makefile | 7 ++- _oasis | 3 +- _oasis_remove_.ml | 7 +++ csv.install | 6 +++ examples/csvtool.ml | 139 ++++++++++++++++++++++++++++------------------------ opam/descr | 8 +++ opam/findlib | 1 + opam/opam | 33 +++++++++++++ src/csv.ml | 22 ++++----- src/csv.mli | 10 ++++ 11 files changed, 160 insertions(+), 79 deletions(-) diff --git a/.merlin b/.merlin new file mode 100644 index 0000000..11c5393 --- /dev/null +++ b/.merlin @@ -0,0 +1,3 @@ +S src +S examples +B _build/** diff --git a/Makefile b/Makefile index d93511a..190cb84 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ PKGVERSION = $(shell oasis query version) PKG_TARBALL = $(PKGNAME)-$(PKGVERSION).tar.gz DISTFILES = LICENSE.txt INSTALL.txt README.md _oasis \ - _tags Makefile setup.ml \ + _tags Makefile setup.ml _oasis_remove_.ml csv.install \ $(filter-out %~, $(wildcard src/*) $(wildcard examples/*) $(wildcard tests/*)) WEB = shell.forge.ocamlcore.org:/home/groups/csv/htdocs @@ -30,8 +30,11 @@ csvtool: all ./csvtool.native pastecol 1-3 2,1,2 \ tests/testcsv9.csv tests/testcsv9.csv +opam csv.install: _oasis + oasis2opam --local + # "Force" a tag to be defined for each released tarball -dist tar: setup.ml +dist tar: setup.ml opam @ if [ -z "$(PKGNAME)" ]; then echo "PKGNAME not defined"; exit 1; fi @ if [ -z "$(PKGVERSION)" ]; then \ echo "PKGVERSION not defined"; exit 1; fi diff --git a/_oasis b/_oasis index 86f74b8..4b036d0 100644 --- a/_oasis +++ b/_oasis @@ -1,7 +1,7 @@ # -*-conf-*- OASISFormat: 0.4 Name: csv -Version: 1.4.2 +Version: 1.5 Synopsis: A pure OCaml library to read and write CSV files. Description: This is a pure OCaml library to read and write CSV files, including all extensions used by Excel — e.g. quotes, newlines, @@ -63,6 +63,7 @@ Test conformity Run: true Executable test_header + Build$: flag(tests) Path: tests/ MainIs: test_header.ml BuildDepends: csv diff --git a/_oasis_remove_.ml b/_oasis_remove_.ml new file mode 100644 index 0000000..0d23853 --- /dev/null +++ b/_oasis_remove_.ml @@ -0,0 +1,7 @@ +open Printf + +let () = + let dir = Sys.argv.(1) in + (try Sys.chdir dir + with _ -> eprintf "Cannot change directory to %s\n%!" dir); + exit (Sys.command "ocaml setup.ml -uninstall") diff --git a/csv.install b/csv.install new file mode 100644 index 0000000..c6cfc2d --- /dev/null +++ b/csv.install @@ -0,0 +1,6 @@ +etc: [ + "setup.ml" + "setup.data" + "setup.log" + "_oasis_remove_.ml" +] diff --git a/examples/csvtool.ml b/examples/csvtool.ml index d2ece8a..274ffa8 100644 --- a/examples/csvtool.ml +++ b/examples/csvtool.ml @@ -71,6 +71,34 @@ let take n l = dummy.tl (*------------------------------ end of extlib code *) +(** Generic [iter] function reading a list of CSV files. The function + [f] can raise [Exit] to mean that no further processing should be done. *) +let csv_iter ~input_sep ~f files = + try + List.iter (fun filename -> + let fh = if filename = "-" then stdin else open_in filename in + let csv_in = Csv.of_channel ~separator:input_sep fh in + try (f filename csv_in : unit); + Csv.close_in csv_in; + with e -> Csv.close_in csv_in; raise e + ) files + with Exit -> () + +let iter_csv_rows ~input_sep ~f files = + csv_iter ~input_sep files ~f:(fun _ csv_in -> Csv.iter ~f csv_in) + +(** Generic [fold] function on a list of CSV files. *) +let csv_fold ~input_sep ~f ~init files = + List.fold_left (fun a filename -> + let fh = if filename = "-" then stdin else open_in filename in + let csv_in = Csv.of_channel ~separator:input_sep fh in + let a = f a csv_in in + Csv.close_in csv_in; + a + ) init files + + + (* Parse column specs. *) type colspec = range list and range = @@ -94,7 +122,7 @@ let parse_colspec ~count_zero colspec = Col (int_of_string col) ) with - Failure "int_of_string" -> + Failure _ -> failwith (colspec ^ ":" ^ col ^ ": invalid column-spec") ) cols in @@ -124,7 +152,7 @@ let cols_of_colspec colspec row = | [] -> [] | Col c :: rest -> (try List.nth row c - with Failure "nth" -> "") :: loop rest + with Failure _ -> "") :: loop rest | Range (s, e) :: rest -> let width = e-s+1 in let range = take width (drop s row) in @@ -137,73 +165,49 @@ let cols_of_colspec colspec row = (* The actual commands. *) let cmd_cols ~input_sep ~output_sep ~chan colspec files = - List.iter ( - fun filename -> - let csv = Csv.load ~separator:input_sep filename in - let csv = List.map (cols_of_colspec colspec) csv in - Csv.output_all (Csv.to_channel ~separator:output_sep chan) csv - ) files + let csv_out = Csv.to_channel ~separator:output_sep chan in + iter_csv_rows ~input_sep files ~f:(fun row -> + Csv.output_record csv_out (cols_of_colspec colspec row)); + Csv.close_out csv_out let cmd_namedcols ~input_sep ~output_sep ~chan names files = - List.iter ( - fun filename -> - let csv = Csv.load ~separator:input_sep filename in - let header, data = - match csv with - | [] -> failwith "no rows in this CSV file" - | h :: t -> h, t in - (* Do the headers requested exist in the CSV file? If not, - * throw an error. - *) - List.iter ( - fun name -> - if not (List.mem name header) then - failwith ("namedcol: requested header not in CSV file: " ^ name) - ) names; - let data = Csv.associate header data in - let data = List.map ( - fun row -> List.map (fun name -> List.assoc name row) names - ) data in - let data = names :: data in - Csv.output_all (Csv.to_channel ~separator:output_sep chan) data - ) files + let csv_out = Csv.to_channel ~separator:output_sep chan in + (* Output the header of the final file. *) + Csv.output_record csv_out names; + csv_iter ~input_sep files ~f:(fun fname csv_in -> + match (try Some(Csv.next csv_in) with End_of_file -> None) with + | None -> () + | Some header -> + (* Do the headers requested exist in the CSV file? If not, + throw an error. *) + List.iter (fun name -> + if not (List.mem name header) then + failwith (sprintf "namedcol: requested header %S not in CSV \ + file %S" name fname) + ) names; + Csv.iter csv_in ~f:(fun row -> + let row = Csv.combine ~header row in + let named = List.map (fun name -> List.assoc name row) names in + Csv.output_record csv_out named; + ) + ); + Csv.close_out csv_out let cmd_width ~input_sep ~chan files = - let width = List.fold_left ( - fun width filename -> - let csv = Csv.load ~separator:input_sep filename in - let width = max width (Csv.columns csv) in - width - ) 0 files in + let width = csv_fold ~input_sep files ~init:0 ~f:(fun w csv_in -> + Csv.fold_left csv_in ~init:w ~f:(fun w row -> + max w (List.length row))) in fprintf chan "%d\n" width let cmd_height ~input_sep ~chan files = - let height = List.fold_left ( - fun height filename -> - let csv = Csv.load ~separator:input_sep filename in - let height = height + Csv.lines csv in - height - ) 0 files in + let height = csv_fold ~input_sep files ~init:0 ~f:(fun h csv_in -> + Csv.fold_left ~f:(fun h _ -> h + 1) ~init:h csv_in) in fprintf chan "%d\n" height let cmd_readable ~input_sep ~chan files = let csv = List.concat (List.map (Csv.load ~separator:input_sep) files) in Csv.save_out_readable chan csv -let iter_csv_rows ~input_sep ~f files = - List.iter (fun filename -> - let in_chan, close = - match filename with - | "-" -> stdin, false - | filename -> open_in filename, true in - try - Csv.iter ~f (Csv.of_channel ~separator:input_sep in_chan); - if close then close_in in_chan - with Exit -> - if close then close_in in_chan - ) files - - let cmd_cat ~input_sep ~output_sep ~chan files = (* Avoid loading the whole file into memory. *) let chan = Csv.to_channel ~separator:output_sep chan in @@ -306,17 +310,24 @@ let cmd_pastecol ~input_sep ~output_sep ~chan colspec1 colspec2 file1 file2 = let cmd_set_columns ~input_sep ~output_sep ~chan cols files = (* Avoid loading the whole file into memory. *) + let csv_out = Csv.to_channel ~separator:output_sep chan in let f row = - let csv = [row] in - let csv = Csv.set_columns ~cols csv in - Csv.output_all (Csv.to_channel ~separator:output_sep chan) csv - in - iter_csv_rows ~input_sep ~f files + match Csv.set_columns ~cols [row] with + | [row] -> Csv.output_record csv_out row + | _ -> assert false in + iter_csv_rows ~input_sep ~f files; + Csv.close_out csv_out let cmd_set_rows ~input_sep ~output_sep ~chan rows files = - let csv = List.concat (List.map (Csv.load ~separator:input_sep) files) in - let csv = Csv.set_rows ~rows csv in - Csv.output_all (Csv.to_channel ~separator:output_sep chan) csv + let rows = ref rows in + let csv_out = Csv.to_channel ~separator:output_sep chan in + iter_csv_rows ~input_sep files ~f:(fun row -> + if !rows <= 0 then raise Exit; + Csv.output_record csv_out row; + decr rows; + ); + for _i = 1 to !rows do Csv.output_record csv_out [] done; + Csv.close_out csv_out let cmd_head ~input_sep ~output_sep ~chan rows files = (* Avoid loading the whole file into memory, or even loading diff --git a/opam/descr b/opam/descr new file mode 100644 index 0000000..b552980 --- /dev/null +++ b/opam/descr @@ -0,0 +1,8 @@ +A pure OCaml library to read and write CSV files. +This is a pure OCaml library to read and write CSV files, including +all extensions used by Excel — e.g. quotes, newlines, 8 bit +characters in fields, \"0 etc. A special representation of rows of CSV +files with a header is provided. The library comes with a handy +command line tool called csvtool for handling CSV files from shell +scripts. + diff --git a/opam/findlib b/opam/findlib new file mode 100644 index 0000000..efa258e --- /dev/null +++ b/opam/findlib @@ -0,0 +1 @@ +csv diff --git a/opam/opam b/opam/opam new file mode 100644 index 0000000..5141d49 --- /dev/null +++ b/opam/opam @@ -0,0 +1,33 @@ +opam-version: "1.2" +name: "csv" +version: "1.5" +maintainer: "Christophe Troestler <christophe.troest...@umons.ac.be>" +authors: [ "Richard Jones" + "Christophe Troestler" ] +license: "LGPL-2.1 with OCaml linking exception" +homepage: "https://github.com/Chris00/ocaml-csv" +dev-repo: "https://github.com/Chris00/ocaml-csv.git" +bug-reports: "https://github.com/Chris00/ocaml-csv/issues" +tags: [ "database" "science" ] +build: [ + ["oasis" "setup"] + ["ocaml" "setup.ml" "-configure" "--prefix" prefix] + ["ocaml" "setup.ml" "-build"] +] +install: ["ocaml" "setup.ml" "-install"] +remove: [ + ["ocaml" "%{etc}%/csv/_oasis_remove_.ml" "%{etc}%/csv"] +] +build-test: [ + ["oasis" "setup"] + ["ocaml" "setup.ml" "-configure" "--enable-tests"] + ["ocaml" "setup.ml" "-build"] + ["ocaml" "setup.ml" "-test"] +] +build-doc: [ "ocaml" "setup.ml" "-doc" ] +depends: [ + "base-bytes" + "oasis" {build & >= "0.4"} + "ocamlbuild" {build} + "ocamlfind" {build & >= "1.5"} +] diff --git a/src/csv.ml b/src/csv.ml index dfe70ed..5177b00 100644 --- a/src/csv.ml +++ b/src/csv.ml @@ -743,6 +743,9 @@ let to_buffer ?separator ?backslash_escape ?excel_tricks buf = method close_out () = () end) +let close_out oc = + oc.out_chan#close_out() + let rec really_output oc s ofs len = let w = oc.out_chan#output s ofs len in if w < len then really_output oc s (ofs+w) (len-w) @@ -854,7 +857,7 @@ let save ?separator ?backslash_escape ?excel_tricks fname t = let ch = open_out fname in let csv = to_channel ?separator ?backslash_escape ?excel_tricks ch in output_all csv t; - close_out ch + Pervasives.close_out ch (* * Reading rows with headers @@ -1099,18 +1102,13 @@ let to_array csv = let of_array csv = List.map Array.to_list (Array.to_list csv) +let rec combine ~header row = match header, row with + | [], _ -> [] + | _, [] -> List.map (fun h -> (h, "")) header + | h0 :: h, x :: r -> (h0, x) :: combine ~header:h r + let associate header data = - let nr_cols = List.length header in - let rec trunc = function - | 0, _ -> [] - | n, [] -> "" :: trunc (n-1, []) - | n, (x :: xs) -> x :: trunc (n-1, xs) - in - List.map ( - fun row -> - let row = trunc (nr_cols, row) in - List.combine header row - ) data + List.map (fun row -> combine header row) data let map ~f csv = List.map (fun row -> List.map (fun el -> f el) row) csv diff --git a/src/csv.mli b/src/csv.mli index e98b08d..dcfc609 100644 --- a/src/csv.mli +++ b/src/csv.mli @@ -245,6 +245,9 @@ val to_buffer : ?separator:char -> Buffer.t -> out_channel (** Same as {!Csv.to_out_obj} but output to a buffer. *) +val close_out : out_channel -> unit +(** [close_out oc] close the channel [oc]. The underlying channel is + closed as well. *) val output_record : out_channel -> string list -> unit (** [output_record oc r] write the record [r] is CSV form to the @@ -475,6 +478,13 @@ val associate : string list -> t -> (string * string) list list by the spreadsheet is not much larger. *) +val combine : header: string list -> string list -> (string * string) list +(** [combine ~header row] returns a row with elements [(h, x)] where + [h] is the header name and [x] the corresponding row entry. If + the [row] has less entries than [header], they are interpreted as + being empty. See {!associate} which applies this function to all + rows. *) + val map : f:(string -> string) -> t -> t (** [map f csv] applies [f] to all entries of [csv] and returns the resulting CSV. *) -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ocaml-maint/packages/ocaml-csv.git _______________________________________________ Pkg-ocaml-maint-commits mailing list Pkg-ocaml-maint-commits@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-ocaml-maint-commits