Hello community, here is the log from the commit of package ghc-xlsx-tabular for openSUSE:Factory checked in at 2017-03-03 17:52:40 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/ghc-xlsx-tabular (Old) and /work/SRC/openSUSE:Factory/.ghc-xlsx-tabular.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "ghc-xlsx-tabular" Fri Mar 3 17:52:40 2017 rev:2 rq:461700 version:0.2.2 Changes: -------- --- /work/SRC/openSUSE:Factory/ghc-xlsx-tabular/ghc-xlsx-tabular.changes 2016-12-10 18:26:31.780362197 +0100 +++ /work/SRC/openSUSE:Factory/.ghc-xlsx-tabular.new/ghc-xlsx-tabular.changes 2017-03-03 17:52:41.220035600 +0100 @@ -1,0 +2,5 @@ +Sun Feb 12 14:17:29 UTC 2017 - psim...@suse.com + +- Update to version 0.2.2 with cabal2obs. + +------------------------------------------------------------------- Old: ---- xlsx-tabular-0.1.0.1.tar.gz New: ---- xlsx-tabular-0.2.2.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ ghc-xlsx-tabular.spec ++++++ --- /var/tmp/diff_new_pack.gk2Dxh/_old 2017-03-03 17:52:41.955931662 +0100 +++ /var/tmp/diff_new_pack.gk2Dxh/_new 2017-03-03 17:52:41.959931096 +0100 @@ -1,7 +1,7 @@ # # spec file for package ghc-xlsx-tabular # -# Copyright (c) 2016 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -18,15 +18,14 @@ %global pkg_name xlsx-tabular Name: ghc-%{pkg_name} -Version: 0.1.0.1 +Version: 0.2.2 Release: 0 -Summary: Xlsx table decode utility +Summary: Xlsx table cell value extraction utility License: BSD-3-Clause -Group: System/Libraries +Group: Development/Languages/Other Url: https://hackage.haskell.org/package/%{pkg_name} Source0: https://hackage.haskell.org/package/%{pkg_name}-%{version}/%{pkg_name}-%{version}.tar.gz BuildRequires: ghc-Cabal-devel -# Begin cabal-rpm deps: BuildRequires: ghc-aeson-devel BuildRequires: ghc-bytestring-devel BuildRequires: ghc-containers-devel @@ -36,10 +35,12 @@ BuildRequires: ghc-text-devel BuildRequires: ghc-xlsx-devel BuildRoot: %{_tmppath}/%{name}-%{version}-build -# End cabal-rpm deps %description -Please see README.md. +Convenience utility to read xlsx tabular cells. + +You can extract the values from xlsx files table rows to JSON format by the +heuristics or your custom function. %package devel Summary: Haskell %{pkg_name} library development files @@ -55,15 +56,12 @@ %prep %setup -q -n %{pkg_name}-%{version} - %build %ghc_lib_build - %install %ghc_lib_install - %post devel %ghc_pkg_recache ++++++ xlsx-tabular-0.1.0.1.tar.gz -> xlsx-tabular-0.2.2.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xlsx-tabular-0.1.0.1/src/Codec/Xlsx/Util/Tabular/Imports.hs new/xlsx-tabular-0.2.2/src/Codec/Xlsx/Util/Tabular/Imports.hs --- old/xlsx-tabular-0.1.0.1/src/Codec/Xlsx/Util/Tabular/Imports.hs 2016-06-08 14:59:26.000000000 +0200 +++ new/xlsx-tabular-0.2.2/src/Codec/Xlsx/Util/Tabular/Imports.hs 2016-10-29 17:27:57.000000000 +0200 @@ -14,6 +14,7 @@ , Text , IntSet , IntSet.fromList + , IntSet.member , FromJSON, parseJSON , ToJSON, toJSON , Value(Object), object @@ -25,7 +26,7 @@ ) where -import Codec.Xlsx as X hiding (fromList) +import Codec.Xlsx as X import Codec.Xlsx.Formatted as X import Codec.Xlsx.Util.Tabular.Types as X import Control.Applicative ((<$>), (<*>)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xlsx-tabular-0.1.0.1/src/Codec/Xlsx/Util/Tabular/Json.hs new/xlsx-tabular-0.2.2/src/Codec/Xlsx/Util/Tabular/Json.hs --- old/xlsx-tabular-0.1.0.1/src/Codec/Xlsx/Util/Tabular/Json.hs 2016-03-09 08:43:26.000000000 +0100 +++ new/xlsx-tabular-0.2.2/src/Codec/Xlsx/Util/Tabular/Json.hs 2017-01-16 12:40:38.000000000 +0100 @@ -16,7 +16,7 @@ instance FromJSON RichTextRun where parseJSON (Object v) = - RichTextRun <$> pure Nothing <*> (v .: "text") + RichTextRun <$> return Nothing <*> (v .: "text") deriveJSON defaultOptions diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xlsx-tabular-0.1.0.1/src/Codec/Xlsx/Util/Tabular.hs new/xlsx-tabular-0.2.2/src/Codec/Xlsx/Util/Tabular.hs --- old/xlsx-tabular-0.1.0.1/src/Codec/Xlsx/Util/Tabular.hs 2016-03-09 08:10:01.000000000 +0100 +++ new/xlsx-tabular-0.2.2/src/Codec/Xlsx/Util/Tabular.hs 2016-11-09 15:20:31.000000000 +0100 @@ -1,7 +1,35 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} --- | Convinience utility to read Xlsx tabular cells. + +{- | Convenience utility to read Xlsx tabular cells. + +The majority of the @toTableRows*@ functions assume that the table +of interest consiste of contiguous rows styled with borders lines +surrounding all cells, with possible text above and below the table +that is not of interest. Like so: + +@ +Some documentation here.... +--------------------------- +| Header1 | Header2 | ... | +--------------------------- +| Value1 | Value2 | ... | +--------------------------- +| Value1 | Value2 | ... | +--------------------------- +Maybe some annoying text here, I don't care about. +@ + +The heauristic used for table row selection in these functions is +that any table rows will have a bottom border line. + +If the above heuristic is not valid for your table you can instead +provide your own row selection predicate to the `toTableRowsCustom` +function. For example, the predicate @\\_ _ -> True@ (or @(const +. const) True@) will select all contiguous rows. + +-} module Codec.Xlsx.Util.Tabular ( -- * Types @@ -18,167 +46,148 @@ -- ** TabularRow , tabularRowIx , tabularRowCells - -- * Methods - , def -- * Functions , toTableRowsFromFile , toTableRows , toTableRows' + -- * Custom row predicates + , toTableRowsCustom ) where import Codec.Xlsx.Util.Tabular.Imports import qualified Data.ByteString.Lazy as ByteString -type Rows = - [(Int, Cols)] +type Row = (Int, Cols) + +type Rows = [(Int, Cols)] -- [Row] -type Cols = - [(Int, Cell)] +type Cols = [(Int, Cell)] -type RowValues = - [(Int, [(Int, Maybe CellValue)])] +type RowValues = [(Int, [(Int, Maybe CellValue)])] +-- | A @RowPredicate@ is given the Xlsx "StyleSheet" as well as the +-- row itself (consisting of the row's index and the row's cells) and +-- should return @True@ if the row is part of the table and false +-- otherwise. +type RowPredicate = StyleSheet -> Row -> Bool --- |Read from Xlsx file as tabular rows +-- |Read tabular rows from the first sheel of an Xlsx file. +-- The table is assumed to consist of all contiguous rows +-- that have bottom border lines, starting with the header. toTableRowsFromFile :: Int -- ^ Starting row index (header row) -> String -- ^ File name -> IO (Maybe Tabular) -toTableRowsFromFile offset fname = do - s <- ByteString.readFile fname - let xlsx = toXlsx s - rows = toTableRows' xlsx offset - pure rows +toTableRowsFromFile offset fname = + flip toTableRows' offset . toXlsx <$> ByteString.readFile fname -- |Decode cells as tabular rows. +-- The table is assumed to consist of all contiguous rows +-- that have bottom border lines, starting with the header. toTableRows :: Xlsx -- ^ Xlsx Workbook -> Text -- ^ Worksheet name to decode -> Int -- ^ Starting row index (header row) -> Maybe Tabular -toTableRows xlsx sheetName offset = - decodeRows <$> styles <*> Just offset <*> rows - where - styles = parseStyleSheet (xlsx ^. xlStyles) ^? _Right - rows = - xlsx - ^? ixSheet sheetName - . wsCells - . to toRows +toTableRows = toTableRowsCustom borderBottomPredicate --- |Decode cells as tabular rows from first sheet. +-- |Decode cells from first sheet as tabular rows. +-- The table is assumed to consist of all contiguous rows +-- that have bottom border lines, starting with the header. toTableRows' :: Xlsx -- ^ Xlsx Workbook -> Int -- ^ Starting row index (header row) -> Maybe Tabular -toTableRows' xlsx offset = - toTableRows xlsx firstSheetName offset +toTableRows' xlsx = toTableRows xlsx firstSheetName where - firstSheetName = - xlsx ^. xlSheets - & keys - & head + firstSheetName = fst $ head $ xlsx ^. xlSheets + -- ^ TODO: Is this still true with xlsx-0.3 or are sheets now + -- in alphabetical order?? + +-- | Decode cells as tabular rows. +-- The table is assumed to consist of all contiguous rows +-- that fulfill the given predicate, starting with the header. +-- +-- The predicate function is given the Xlsx @StyleSheet@ as well +-- as a row (consisting of the row's index and the row's cells) +-- and should return @True@ if the row is part of the table. +-- +-- Since 0.1.1 +toTableRowsCustom :: (StyleSheet -> (Int, [(Int, Cell)]) -> Bool) + -- ^ Predicate for row selection + -> Xlsx -- ^ Xlsx Workbook + -> Text -- ^ Worksheet name to decode + -> Int -- ^ Starting row index (header row) + -> Maybe Tabular +toTableRowsCustom predicate xlsx sheetName offset = do + styles <- parseStyleSheet (xlsx ^. xlStyles) ^? _Right + rows <- xlsx ^? ixSheet sheetName . wsCells . to toRows + decodeRows (predicate styles) offset rows -decodeRows ss offset rs = +decodeRows p offset rs = if null rs' then Nothing else Just $ def & tabularHeads .~ header' - & tabularRows .~ rows + & tabularRows .~ rows where - rs' = getCells ss offset rs + rs' = getCells p offset rs header = head rs' ^. _2 - header' = - header - & fmap toText - & join + header' = join $ map toText header toText (i, Just (CellText t)) = [def & tabularHeadIx .~ i & tabularHeadLabel .~ t] toText _ = [] - cix = fmap (view tabularHeadIx) header' - & fromList - rows = - fmap rowValue (tail rs') - rowValue rvs = - def - & tabularRowIx .~ (rvs ^. _1) - & tabularRowCells .~ (rvs ^. _2 & fmap f & join) + ixs = map (view tabularHeadIx) header' + rows = map rowValue (tail rs') + rowValue (ix, row) = def + & tabularRowIx .~ ix + & tabularRowCells .~ insertMissingCells ixs row where - f (i, cell) = - [cell | cix ^. contains i] + -- Insert empty cells when there is a header but no corresponding + -- cell. This can happen if cells have no content nor formatting + -- defined. + insertMissingCells :: [Int] -> [(Int, Maybe CellValue)] -> [Maybe CellValue] + insertMissingCells ixs cs = map (join . flip lookup cs) ixs + --- |行から値のあるセルを取り出す -getCells :: StyleSheet -- ^スタイルシート - -> Int -- ^開始行 - -> Rows -- ^セル行 +-- |Pickup cells that has value from line +getCells :: (Row -> Bool) -- ^ Predicate + -> Int -- ^ Start line number + -> Rows -- ^ cell rows -> RowValues -getCells ss i rs = - startAt ss i rs - & takeContiguous i - & takeUntil ss - & fmap rvs - & filter vs - where - rvs (i, cs) = - (i, rowValues cs) - filter = - Prelude.filter - vs (i, cs) = - any (\(_, v) -> isJust v) cs - -startAt :: StyleSheet -> Int -> Rows -> Rows -startAt ss i rs = - dropWhile f rs - where - f (x, _) = - x < i +getCells p i = filter (any (isJust . snd) . snd) + . (fmap . fmap) rowValues + . takeContiguous i + . takeWhile p + . startAt i + +startAt :: Int -> Rows -> Rows +startAt i = dropWhile ((< i) . fst) --- |指定の行から連続している行を取り出す +-- |Take contiguous rows that start from i takeContiguous :: Int -> Rows -> Rows -takeContiguous i rs = - [r | (x, r@(y, _)) <- zip [i..] rs, x == y] +--takeContiguous i rs = [r | (x, r@(y, _)) <- zip [i..] rs, x == y] +takeContiguous i = map snd . filter (uncurry (==) . fmap fst) . zip [i..] --- |有効セルのすべてに枠線(Bottom側)が存在しなくなる --- |すなわち枠囲みの欄外になるまでの行を取り出す -takeUntil :: StyleSheet -> Rows -> Rows -takeUntil ss rs = - takeWhile f rs - where - f (i, cs) = - or $ rowBordersHas borderBottom ss cs +rowValues = map (fmap _cellValue) -rowBordersHas v ss cs = - x - where - x = - fmap f cs - f (i, cell) = - cellHasBorder ss cell v +-- Predicate for at least one cell having a bottom border style. + +-- |Take rows while all valued cell has bottom border line. +-- | * no bottom border line means out of table. +borderBottomPredicate :: RowPredicate -- StyleSheet -> Row -> Bool +borderBottomPredicate ss = or . rowBordersHas borderBottom ss . snd + +rowBordersHas v ss = map (cellHasBorder v ss . snd) -rowValues cs = - x - where - x = - fmap f cs - f (i, cell) = - (i, cell ^. cellValue) -cellHasBorder ss cell v = - fromMaybe False mb +cellHasBorder v ss cell = fromMaybe False mb where - b = cellBorder ss cell - mb = borderStyleHasLine v <$> b + mb = borderStyleHasLine v <$> cellBorder ss cell cellBorder :: StyleSheet -> Cell -> Maybe Border -cellBorder ss cell = - view cellStyle cell - >>= pure . xf - >>= view cellXfBorderId - >>= pure . bd +cellBorder ss cell = fmap xf (view cellStyle cell) + >>= fmap bd . view cellXfBorderId where xf n = (ss ^. styleSheetCellXfs) !! n bd n = (ss ^. styleSheetBorders) !! n -borderStyleHasLine v b = - fromMaybe False value +borderStyleHasLine v b = fromMaybe False value where - value = - view v b - >>= view borderStyleLine - >>= pure . (/= LineStyleNone) + value = view v b >>= fmap (/= LineStyleNone) . view borderStyleLine diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xlsx-tabular-0.1.0.1/xlsx-tabular.cabal new/xlsx-tabular-0.2.2/xlsx-tabular.cabal --- old/xlsx-tabular-0.1.0.1/xlsx-tabular.cabal 2016-06-08 15:15:12.000000000 +0200 +++ new/xlsx-tabular-0.2.2/xlsx-tabular.cabal 2017-01-16 13:43:21.000000000 +0100 @@ -1,14 +1,22 @@ name: xlsx-tabular -version: 0.1.0.1 -synopsis: Xlsx table decode utility -description: Please see README.md -homepage: http://github.com/kkazuo/xlsx-tabular#readme +version: 0.2.2 +synopsis: Xlsx table cell value extraction utility +description: + . + Convenience utility to read xlsx tabular cells. + . + You can extract the values from xlsx files + table rows to JSON format by the heuristics or + your custom function. + +homepage: https://github.com/kkazuo/xlsx-tabular +bug-reports: https://github.com/kkazuo/xlsx-tabular/issues license: BSD3 license-file: LICENSE -author: Kazuo Koga -maintainer: obiwa...@me.com -copyright: (c) 2016 Kazuo Koga -category: Codec +author: Koga Kazuo <obiwa...@me.com> +maintainer: Koga Kazuo <obiwa...@me.com> +copyright: (c) 2016 Koga Kazuo +category: Codec, Text build-type: Simple -- extra-source-files: cabal-version: >=1.10 @@ -19,14 +27,14 @@ , Codec.Xlsx.Util.Tabular.Types , Codec.Xlsx.Util.Tabular.Imports , Codec.Xlsx.Util.Tabular.Json - build-depends: base >= 4.7 && < 5 + build-depends: base >= 4.8 && < 5 , aeson , bytestring , containers , data-default , lens , text - , xlsx + , xlsx >=0.3 default-language: Haskell2010 test-suite xlsx-tabular-test