commit ghc-scalpel for openSUSE:Factory

root Mon, 17 Apr 2017 01:25:48 -0700

Hello community,

here is the log from the commit of package ghc-scalpel for openSUSE:Factory 
checked in at 2017-04-17 10:25:09
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/ghc-scalpel (Old)
 and      /work/SRC/openSUSE:Factory/.ghc-scalpel.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "ghc-scalpel"

Mon Apr 17 10:25:09 2017 rev:2 rq:479415 version:0.5.0

Changes:
--------
--- /work/SRC/openSUSE:Factory/ghc-scalpel/ghc-scalpel.changes  2017-03-08 
01:03:54.572137666 +0100
+++ /work/SRC/openSUSE:Factory/.ghc-scalpel.new/ghc-scalpel.changes     
2017-04-17 10:25:21.546304814 +0200
@@ -1,0 +2,5 @@
+Sun Feb 12 14:05:43 UTC 2017 - psim...@suse.com
+
+- Update to version 0.5.0 with cabal2obs.
+
+-------------------------------------------------------------------

Old:
----
  scalpel-0.3.1.tar.gz

New:
----
  scalpel-0.5.0.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ ghc-scalpel.spec ++++++
--- /var/tmp/diff_new_pack.sgQ5Gt/_old  2017-04-17 10:25:22.322194933 +0200
+++ /var/tmp/diff_new_pack.sgQ5Gt/_new  2017-04-17 10:25:22.322194933 +0200
@@ -1,7 +1,7 @@
 #
 # spec file for package ghc-scalpel
 #
-# Copyright (c) 2016 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany.
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -17,9 +17,8 @@
 
 
 %global pkg_name scalpel
-%bcond_with tests
 Name:           ghc-%{pkg_name}
-Version:        0.3.1
+Version:        0.5.0
 Release:        0
 Summary:        A high level web scraping library for Haskell
 License:        Apache-2.0
@@ -28,18 +27,13 @@
 Source0:        
https://hackage.haskell.org/package/%{pkg_name}-%{version}/%{pkg_name}-%{version}.tar.gz
 BuildRequires:  ghc-Cabal-devel
 BuildRequires:  ghc-bytestring-devel
-BuildRequires:  ghc-containers-devel
 BuildRequires:  ghc-curl-devel
 BuildRequires:  ghc-data-default-devel
-BuildRequires:  ghc-regex-base-devel
-BuildRequires:  ghc-regex-tdfa-devel
 BuildRequires:  ghc-rpm-macros
+BuildRequires:  ghc-scalpel-core-devel
 BuildRequires:  ghc-tagsoup-devel
 BuildRequires:  ghc-text-devel
 BuildRoot:      %{_tmppath}/%{name}-%{version}-build
-%if %{with tests}
-BuildRequires:  ghc-HUnit-devel
-%endif
 
 %description
 Scalpel is a web scraping library inspired by libraries like Parsec and Perl's
@@ -66,9 +60,6 @@
 %install
 %ghc_lib_install
 
-%check
-%cabal_test
-
 %post devel
 %ghc_pkg_recache
 

++++++ scalpel-0.3.1.tar.gz -> scalpel-0.5.0.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/scalpel-0.3.1/CHANGELOG.md 
new/scalpel-0.5.0/CHANGELOG.md
--- old/scalpel-0.3.1/CHANGELOG.md      2016-05-27 05:10:27.000000000 +0200
+++ new/scalpel-0.5.0/CHANGELOG.md      2017-02-05 05:43:42.000000000 +0100
@@ -2,6 +2,29 @@
 
 ## HEAD
 
+## 0.5.0
+
+- Split `scalpel` into two packages: `scalpel` and `scalpel-core`. The latter
+  does not provide networking support and does not depend on curl.
+
+## 0.4.1
+
+- Added `notP` attribute predicate.
+
+## 0.4.0
+
+- Add the `chroot` tricks (#23 and #25) to README.md and added examples.
+- Fix backtracking that occurs when using `guard` and `chroot`.
+- Fix bug where the same tag may appear in the result set multiple times.
+- Performance optimizations when using the (//) operator.
+- Make Scraper an instance of MonadFail. Practically this means that failed
+  pattern matches in `<-` expressions within a do block will evaluate to mzero
+  instead of throwing an error and bringing down the entire script.
+- Pluralized scrapers will now return the empty list instead mzero when there
+  are no matches.
+- Add the `position` scraper which provides the index of the current sub-tree
+  within the context of a `chroots`'s do-block.
+
 ## 0.3.1
 
 - Added the `innerHTML` and `innerHTMLs` scraper.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/scalpel-0.3.1/README.md new/scalpel-0.5.0/README.md
--- old/scalpel-0.3.1/README.md 2016-01-31 08:15:23.000000000 +0100
+++ new/scalpel-0.5.0/README.md 2017-02-05 05:03:44.000000000 +0100
@@ -109,3 +109,183 @@
            imageURL <- attr "src" $ "img"  @: [hasClass "image"]
            return $ ImageComment author imageURL
 ```
+
+Tips & Tricks
+-------------
+
+The primitives provided by scalpel are intentionally minimalistic with the
+assumption being that users will be able to build up complex functionality by
+combining them with functions that work on existing type classes (Monad,
+Applicative, Alternative, etc.).
+
+This section gives examples of common tricks for building up more complex
+behavior from the simple primitives provided by this library.
+
+### OverloadedStrings
+
+`Selector`, `TagName` and `AttributeName` are all `IsString` instances, and
+thus it is convenient to use scalpel with `OverloadedStrings` enabled. If not
+using `OverloadedStrings`, all tag names must be wrapped with `tagSelector`.
+
+### Matching Wildcards
+
+Scalpel has 3 different wildcard values each corresponding to a distinct use 
case.
+
+- `anySelector` is used to match all tags:
+
+    `textOfAllTags = texts anySelector`
+
+- `AnyTag` is used when matching all tags with some attribute constraint. For
+  example, to match all tags with the attribute `class` equal to `"button"`:
+
+    `textOfTagsWithClassButton = texts $ AnyTag @: [hasClass "button"]`
+
+- `AnyAttribute` is used when matching tags with some arbitrary attribute equal
+   to a particular value. For example, to match all tags with some attribute
+   equal to `"button"`:
+
+    `textOfTagsWithAnAttributeWhoseValueIsButton = texts $ AnyTag @: 
[AnyAttribute @= "button"]`
+
+### Complex Predicates
+
+It is possible to run into scenarios where the name and attributes of a tag are
+not sufficient to isolate interesting tags and properties of child tags need to
+be considered.
+
+In these cases the `guard` function of the `Alternative` type class can be
+combined with `chroot` and `anySelector` to implement predicates of arbitrary
+complexity.
+
+Building off the above example, consider a use case where we would like find 
the
+html contents of a comment that mentions the word "cat".
+
+The strategy will be the following:
+
+1. Isolate the comment div using `chroot`.
+
+2. Then within the context of that div the textual contents can be retrieved
+   with `text anySelector`. This works because the first tag within the 
current context
+   is the div tag selected by chroot, and the `anySelector` selector will 
match the
+   first tag within the current context.
+
+3. Then the predicate that `"cat"` appear in the text of the comment will be
+   enforced using `guard`. If the predicate fails, scalpel will backtrack and
+   continue the search for divs until one is found that matches the predicate.
+
+4. Return the desired HTML content of the comment div.
+
+```haskell
+catComment :: Scraper String String
+catComment =
+    -- 1. First narrow the current context to the div containing the comment's
+    --    textual content.
+    chroot ("div" @: [hasClass "comment", hasClass "text"]) $ do
+        -- 2. anySelector can be used to access the root tag of the current 
context.
+        contents <- text anySelector
+        -- 3. Skip comment divs that do not contain "cat".
+        guard ("cat" `isInfixOf` contents)
+        -- 4. Generate the desired value.
+        html anySelector
+```
+
+For the full source of this example, see
+[complex-predicates](https://github.com/fimad/scalpel/tree/master/examples/complex-predicates/)
+in the examples directory.
+
+### Generalized Repetition
+
+The pluralized versions of the primitive scrapers (`texts`, `attrs`, `htmls`)
+allow the user to extract content from all of the tags matching a given
+selector. For more complex scraping tasks it will at times be desirable to be
+able to extract multiple values from the same tag.
+
+Like the previous example, the trick here is to use a combination of the
+`chroots` function and the `anySelector` selector.
+
+Consider an extension to the original example where image comments may contain
+some alt text and the desire is to return a tuple of the alt text and the URLs
+of the images.
+
+The strategy will be the following:
+
+1. to isolate each img tag using `chroots`.
+
+2. Then within the context of each img tag, use the `anySelector` selector to 
extract
+   the alt and src attributes from the current tag.
+
+3. Create and return a tuple of the extracted attributes.
+
+```haskell
+altTextAndImages :: Scraper String [(String, URL)]
+altTextAndImages =
+    -- 1. First narrow the current context to each img tag.
+    chroots "img" $ do
+        -- 2. Use anySelector to access all the relevant content from the the 
currently
+        -- selected img tag.
+        altText <- attr "alt" anySelector
+        srcUrl  <- attr "src" anySelector
+        -- 3. Combine the retrieved content into the desired final result.
+        return (altText, srcUrl)
+```
+
+For the full source of this example, see
+[generalized-repetition](https://github.com/fimad/scalpel/tree/master/examples/generalized-repetition/)
+in the examples directory.
+
+### scalpel-core
+
+The `scalpel` package relies on curl to provide networking support. For small
+projects and one off scraping tasks this is likely sufficient. However when
+using scalpel in existing projects or on platforms without curl this dependency
+can be a hindrance.
+
+For these scenarios users can instead depend on
+[scalpel-core](https://hackage.haskell.org/package/scalpel-core) which does not
+provide networking support and does not depend on curl.
+
+Troubleshooting
+---------------
+
+### My Scraping Target Doesn't Return The Markup I Expected
+
+Some websites return different markup depending on the user agent sent along
+with the request. In some cases, this even means returning no markup at all in
+an effort to prevent scraping.
+
+To work around this, you can add your own user agent string with a curl option.
+
+```haskell
+#!/usr/local/bin/stack
+-- stack runghc --resolver lts-6.24 --install-ghc --package scalpel-0.4.0
+
+import Network.Curl
+import Text.HTML.Scalpel
+
+main = do
+    html <- scrapeURLWithOpts opts url $ htmls anySelector
+    maybe printError printHtml html
+  where
+    url = "https://www.google.com";
+    opts = [ CurlUserAgent "some user agent string" ]
+    printError = putStrLn "Failed"
+    printHtml = mapM_ putStrLn
+```
+
+A list of user agent strings can be found
+[here](http://www.useragentstring.com/pages/useragentstring.php).
+
+### Building on Windows
+
+Building scalpel on Windows can be a challenge because of the dependency on
+curl. In order to successfully build scalpel you must download
+[curl](http://curl.haxx.se/download.html) and add the following to your
+stack.yaml file.
+
+```yaml
+extra-lib-dirs: ["C:/Program Files/cURL/dlls"]
+extra-include-dirs: ["C:/Program Files/cURL/dlls"]
+```
+
+If you do not require network support, you can instead depend on
+[scalpel-core](https://hackage.haskell.org/package/scalpel-core) which does not
+does not depend on curl.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/scalpel-0.3.1/benchmarks/Main.hs 
new/scalpel-0.5.0/benchmarks/Main.hs
--- old/scalpel-0.3.1/benchmarks/Main.hs        2016-01-31 10:22:23.000000000 
+0100
+++ new/scalpel-0.5.0/benchmarks/Main.hs        1970-01-01 01:00:00.000000000 
+0100
@@ -1,41 +0,0 @@
-import Text.HTML.Scalpel
-
-import Control.Applicative ((<$>))
-import Control.Monad (replicateM_)
-import Criterion.Main (bgroup, bench, defaultMain, nf)
-import qualified Data.Text as T
-
-
-main :: IO ()
-main = do
-    let nested100  = makeNested 100
-    let nested1000 = makeNested 1000
-    let nested10000 = makeNested 10000
-    defaultMain [
-            bgroup "nested" [
-                bench "100" $ nf sumListTags nested100
-            ,   bench "1000" $ nf sumListTags nested1000
-            ,   bench "10000" $ nf sumListTags nested10000
-            ]
-        ,   bgroup "many-selects" [
-                bench "10"  $ nf (manySelects 10) nested1000
-            ,   bench "100" $ nf (manySelects 100) nested1000
-            ,   bench "1000" $ nf (manySelects 1000) nested1000
-            ]
-        ]
-
-makeNested :: Int -> T.Text
-makeNested i = T.concat [T.replicate i open, one, T.replicate i close]
-    where
-        open  = T.pack "<tag>"
-        close = T.pack "</tag>"
-        one   = T.pack "1"
-
-sumListTags :: T.Text -> Maybe Integer
-sumListTags testData = scrapeStringLike testData
-                     $ (sum . map (const 1)) <$> texts "tag"
-
-manySelects :: Int -> T.Text -> Maybe ()
-manySelects i testData = scrapeStringLike testData
-                       $ replicateM_ i
-                       $ texts "tag"
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/scalpel-0.3.1/scalpel.cabal 
new/scalpel-0.5.0/scalpel.cabal
--- old/scalpel-0.3.1/scalpel.cabal     2016-05-27 05:10:36.000000000 +0200
+++ new/scalpel-0.5.0/scalpel.cabal     2017-02-05 05:43:42.000000000 +0100
@@ -1,5 +1,5 @@
 name:                scalpel
-version:             0.3.1
+version:             0.5.0
 synopsis:            A high level web scraping library for Haskell.
 description:
     Scalpel is a web scraping library inspired by libraries like Parsec and
@@ -24,63 +24,27 @@
 source-repository this
   type:     git
   location: https://github.com/fimad/scalpel.git
-  tag:      v0.3.1
+  tag:      v0.5.0
 
 library
   other-extensions:
           FlexibleInstances
       ,   FunctionalDependencies
   other-modules:
-          Text.HTML.Scalpel.Internal.Scrape
-      ,   Text.HTML.Scalpel.Internal.Scrape.StringLike
-      ,   Text.HTML.Scalpel.Internal.Scrape.URL
-      ,   Text.HTML.Scalpel.Internal.Select
-      ,   Text.HTML.Scalpel.Internal.Select.Combinators
-      ,   Text.HTML.Scalpel.Internal.Select.Types
+      Text.HTML.Scalpel.Internal.Scrape.URL
   exposed-modules:
       Text.HTML.Scalpel
   hs-source-dirs:   src/
   default-language: Haskell2010
   build-depends:
           base          >= 4.6 && < 5
+      ,   scalpel-core  == 0.5.0
       ,   bytestring
-      ,   containers
       ,   curl          >= 1.3.4
       ,   data-default
-      ,   regex-base
-      ,   regex-tdfa
       ,   tagsoup       >= 0.12.2
       ,   text
   default-extensions:
           ParallelListComp
       ,   PatternGuards
   ghc-options: -W
-
-test-suite lib-tests
-  type:             exitcode-stdio-1.0
-  main-is:          TestMain.hs
-  hs-source-dirs:   tests/
-  default-language: Haskell2010
-  build-depends:
-          HUnit
-      ,   base          >= 4.6 && < 5
-      ,   regex-base
-      ,   regex-tdfa
-      ,   scalpel
-      ,   tagsoup
-  default-extensions:
-          ParallelListComp
-      ,   PatternGuards
-  ghc-options: -W
-
-benchmark bench
-  type:             exitcode-stdio-1.0
-  default-language: Haskell2010
-  hs-source-dirs:   benchmarks
-  main-is:          Main.hs
-  build-depends:
-      base               >=4.7 && <5
-    , criterion          >=1.1
-    , scalpel
-    , text
-  ghc-options: -Wall
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/scalpel-0.3.1/src/Text/HTML/Scalpel/Internal/Scrape/StringLike.hs 
new/scalpel-0.5.0/src/Text/HTML/Scalpel/Internal/Scrape/StringLike.hs
--- old/scalpel-0.3.1/src/Text/HTML/Scalpel/Internal/Scrape/StringLike.hs       
2016-01-31 06:51:30.000000000 +0100
+++ new/scalpel-0.5.0/src/Text/HTML/Scalpel/Internal/Scrape/StringLike.hs       
1970-01-01 01:00:00.000000000 +0100
@@ -1,16 +0,0 @@
-{-# OPTIONS_HADDOCK hide #-}
-module Text.HTML.Scalpel.Internal.Scrape.StringLike (
-    scrapeStringLike
-) where
-
-import Text.HTML.Scalpel.Internal.Scrape
-
-import qualified Text.HTML.TagSoup as TagSoup
-import qualified Text.StringLike as TagSoup
-
-
--- | The 'scrapeStringLike' function parses a 'StringLike' value into a list of
--- tags and executes a 'Scraper' on it.
-scrapeStringLike :: (Ord str, TagSoup.StringLike str)
-                 => str -> Scraper str a -> Maybe a
-scrapeStringLike html scraper = scrape scraper (TagSoup.parseTags html)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/scalpel-0.3.1/src/Text/HTML/Scalpel/Internal/Scrape/URL.hs 
new/scalpel-0.5.0/src/Text/HTML/Scalpel/Internal/Scrape/URL.hs
--- old/scalpel-0.3.1/src/Text/HTML/Scalpel/Internal/Scrape/URL.hs      
2016-01-31 08:15:23.000000000 +0100
+++ new/scalpel-0.5.0/src/Text/HTML/Scalpel/Internal/Scrape/URL.hs      
2017-02-05 05:03:44.000000000 +0100
@@ -13,7 +13,7 @@
 ,   scrapeURLWithConfig
 ) where
 
-import Text.HTML.Scalpel.Internal.Scrape
+import Text.HTML.Scalpel.Core
 
 import Control.Applicative ((<$>))
 import Data.Char (toLower)
@@ -52,6 +52,12 @@
 
 -- | The 'scrapeURL' function downloads the contents of the given URL and
 -- executes a 'Scraper' on it.
+--
+-- 'scrapeURL' makes use of curl to make HTTP requests. The dependency on curl
+-- may be too heavyweight for some use cases. In which case users who do not
+-- require inbuilt networking support can depend on
+-- <https://hackage.haskell.org/package/scalpel-core scalpel-core> for a
+-- lightweight subset of this library that does not depend on curl.
 scrapeURL :: (Ord str, TagSoup.StringLike str)
           => URL -> Scraper str a -> IO (Maybe a)
 scrapeURL = scrapeURLWithOpts [Curl.CurlFollowLocation True]
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/scalpel-0.3.1/src/Text/HTML/Scalpel/Internal/Scrape.hs 
new/scalpel-0.5.0/src/Text/HTML/Scalpel/Internal/Scrape.hs
--- old/scalpel-0.3.1/src/Text/HTML/Scalpel/Internal/Scrape.hs  2016-05-22 
01:55:45.000000000 +0200
+++ new/scalpel-0.5.0/src/Text/HTML/Scalpel/Internal/Scrape.hs  1970-01-01 
01:00:00.000000000 +0100
@@ -1,173 +0,0 @@
-{-# OPTIONS_HADDOCK hide #-}
-module Text.HTML.Scalpel.Internal.Scrape (
-    Scraper
-,   scrape
-,   attr
-,   attrs
-,   html
-,   htmls
-,   innerHTML
-,   innerHTMLs
-,   text
-,   texts
-,   chroot
-,   chroots
-) where
-
-import Text.HTML.Scalpel.Internal.Select
-import Text.HTML.Scalpel.Internal.Select.Types
-
-import Control.Applicative
-import Control.Monad
-import Data.Maybe
-
-import qualified Text.HTML.TagSoup as TagSoup
-import qualified Text.StringLike as TagSoup
-
-
--- | A value of 'Scraper' @a@ defines a web scraper that is capable of 
consuming
--- a list of 'TagSoup.Tag's and optionally producing a value of type @a@.
-newtype Scraper str a = MkScraper {
-        scrapeOffsets :: [(TagSoup.Tag str, CloseOffset)] -> Maybe a
-    }
-
-instance Functor (Scraper str) where
-    fmap f (MkScraper a) = MkScraper $ fmap (fmap f) a
-
-instance Applicative (Scraper str) where
-    pure = MkScraper . const . Just
-    (MkScraper f) <*> (MkScraper a) = MkScraper applied
-        where applied tags | (Just aVal) <- a tags = ($ aVal) <$> f tags
-                           | otherwise             = Nothing
-
-instance Alternative (Scraper str) where
-    empty = MkScraper $ const Nothing
-    (MkScraper a) <|> (MkScraper b) = MkScraper choice
-        where choice tags | (Just aVal) <- a tags = Just aVal
-                          | otherwise             = b tags
-
-instance Monad (Scraper str) where
-    return = pure
-    (MkScraper a) >>= f = MkScraper combined
-        where combined tags | (Just aVal) <- a tags = let (MkScraper b) = f 
aVal
-                                                      in  b tags
-                            | otherwise             = Nothing
-
-instance MonadPlus (Scraper str) where
-    mzero = empty
-    mplus = (<|>)
-
--- | The 'scrape' function executes a 'Scraper' on a list of
--- 'TagSoup.Tag's and produces an optional value.
-scrape :: (Ord str, TagSoup.StringLike str)
-       => Scraper str a -> [TagSoup.Tag str] -> Maybe a
-scrape s = scrapeOffsets s . tagWithOffset . TagSoup.canonicalizeTags
-
--- | The 'chroot' function takes a selector and an inner scraper and executes
--- the inner scraper as if it were scraping a document that consists solely of
--- the tags corresponding to the selector.
---
--- This function will match only the first set of tags matching the selector, 
to
--- match every set of tags, use 'chroots'.
-chroot :: (Ord str, TagSoup.StringLike str, Selectable s)
-       => s -> Scraper str a -> Scraper str a
-chroot selector (MkScraper inner) = MkScraper
-                                  $ join . (inner <$>)
-                                  . listToMaybe . select selector
-
--- | The 'chroots' function takes a selector and an inner scraper and executes
--- the inner scraper as if it were scraping a document that consists solely of
--- the tags corresponding to the selector. The inner scraper is executed for
--- each set of tags matching the given selector.
-chroots :: (Ord str, TagSoup.StringLike str, Selectable s)
-        => s -> Scraper str a -> Scraper str [a]
-chroots selector (MkScraper inner) = MkScraper
-                                   $ return . mapMaybe inner . select selector
-
--- | The 'text' function takes a selector and returns the inner text from the
--- set of tags described by the given selector.
---
--- This function will match only the first set of tags matching the selector, 
to
--- match every set of tags, use 'texts'.
-text :: (Ord str, TagSoup.StringLike str, Selectable s) => s -> Scraper str str
-text s = MkScraper $ withHead tagsToText . select_ s
-
--- | The 'texts' function takes a selector and returns the inner text from 
every
--- set of tags matching the given selector.
-texts :: (Ord str, TagSoup.StringLike str, Selectable s)
-      => s -> Scraper str [str]
-texts s = MkScraper $ withAll tagsToText . select_ s
-
--- | The 'html' function takes a selector and returns the html string from the
--- set of tags described by the given selector.
---
--- This function will match only the first set of tags matching the selector, 
to
--- match every set of tags, use 'htmls'.
-html :: (Ord str, TagSoup.StringLike str, Selectable s) => s -> Scraper str str
-html s = MkScraper $ withHead tagsToHTML . select_ s
-
--- | The 'htmls' function takes a selector and returns the html string from
--- every set of tags matching the given selector.
-htmls :: (Ord str, TagSoup.StringLike str, Selectable s)
-      => s -> Scraper str [str]
-htmls s = MkScraper $ withAll tagsToHTML . select_ s
-
--- | The 'innerHTML' function takes a selector and returns the inner html 
string
--- from the set of tags described by the given selector. Inner html here 
meaning
--- the html within but not including the selected tags.
---
--- This function will match only the first set of tags matching the selector, 
to
--- match every set of tags, use 'innerHTMLs'.
-innerHTML :: (Ord str, TagSoup.StringLike str, Selectable s)
-          => s -> Scraper str str
-innerHTML s = MkScraper $ withHead tagsToInnerHTML . select_ s
-
--- | The 'innerHTMLs' function takes a selector and returns the inner html
--- string from every set of tags matching the given selector.
-innerHTMLs :: (Ord str, TagSoup.StringLike str, Selectable s)
-           => s -> Scraper str [str]
-innerHTMLs s = MkScraper $ withAll tagsToInnerHTML . select_ s
-
--- | The 'attr' function takes an attribute name and a selector and returns the
--- value of the attribute of the given name for the first opening tag that
--- matches the given selector.
---
--- This function will match only the opening tag matching the selector, to 
match
--- every tag, use 'attrs'.
-attr :: (Ord str, Show str, TagSoup.StringLike str, Selectable s)
-     => String -> s -> Scraper str str
-attr name s = MkScraper
-            $ join . withHead (tagsToAttr $ TagSoup.castString name) . select_ 
s
-
--- | The 'attrs' function takes an attribute name and a selector and returns 
the
--- value of the attribute of the given name for every opening tag that matches
--- the given selector.
-attrs :: (Ord str, Show str, TagSoup.StringLike str, Selectable s)
-     => String -> s -> Scraper str [str]
-attrs name s = MkScraper
-             $ fmap catMaybes . withAll (tagsToAttr nameStr) . select_ s
-    where nameStr = TagSoup.castString name
-
-withHead :: (a -> b) -> [a] -> Maybe b
-withHead _ []    = Nothing
-withHead f (x:_) = Just $ f x
-
-withAll :: (a -> b) -> [a] -> Maybe [b]
-withAll _ [] = Nothing
-withAll f xs = Just $ map f xs
-
-tagsToText :: TagSoup.StringLike str => [TagSoup.Tag str] -> str
-tagsToText = TagSoup.innerText
-
-tagsToHTML :: TagSoup.StringLike str => [TagSoup.Tag str] -> str
-tagsToHTML = TagSoup.renderTags
-
-tagsToInnerHTML :: TagSoup.StringLike str => [TagSoup.Tag str] -> str
-tagsToInnerHTML = tagsToHTML . reverse . drop 1 . reverse . drop 1
-
-tagsToAttr :: (Show str, TagSoup.StringLike str)
-           => str -> [TagSoup.Tag str] -> Maybe str
-tagsToAttr attr tags = do
-    tag <- listToMaybe tags
-    guard $ TagSoup.isTagOpen tag
-    return $ TagSoup.fromAttrib attr tag
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/scalpel-0.3.1/src/Text/HTML/Scalpel/Internal/Select/Combinators.hs 
new/scalpel-0.5.0/src/Text/HTML/Scalpel/Internal/Select/Combinators.hs
--- old/scalpel-0.3.1/src/Text/HTML/Scalpel/Internal/Select/Combinators.hs      
2016-05-22 02:59:40.000000000 +0200
+++ new/scalpel-0.5.0/src/Text/HTML/Scalpel/Internal/Select/Combinators.hs      
1970-01-01 01:00:00.000000000 +0100
@@ -1,76 +0,0 @@
-{-# LANGUAGE ImpredicativeTypes #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE MultiWayIf #-}
-{-# OPTIONS_HADDOCK hide #-}
-module Text.HTML.Scalpel.Internal.Select.Combinators (
-    (//)
-,   (@:)
-,   (@=)
-,   (@=~)
-,   hasClass
-,   match
-) where
-
-import Text.HTML.Scalpel.Internal.Select.Types
-
-import qualified Data.Text as T
-import qualified Text.Regex.Base.RegexLike as RE
-import qualified Text.StringLike as TagSoup
-
-
--- | The '@:' operator creates a 'Selector' by combining a 'TagName' with a 
list
--- of 'AttributePredicate's.
-(@:) :: TagName tag => tag -> [AttributePredicate] -> Selector
-(@:) tag attrs = MkSelector [toSelectNode tag attrs]
-infixl 9 @:
-
--- | The '@=' operator creates an 'AttributePredicate' that will match
--- attributes with the given name and value.
---
--- If you are attempting to match a specific class of a tag with potentially
--- multiple classes, you should use the 'hasClass' utility function.
-(@=) :: AttributeName key => key -> String -> AttributePredicate
-(@=) key value = MkAttributePredicate $ \(attrKey, attrValue) ->
-                                         matchKey key attrKey
-                                      && TagSoup.fromString value == attrValue
-infixl 6 @=
-
--- | The '@=~' operator creates an 'AttributePredicate' that will match
--- attributes with the given name and whose value matches the given regular
--- expression.
-(@=~) :: (AttributeName key, RE.RegexLike re String)
-      => key -> re -> AttributePredicate
-(@=~) key re = MkAttributePredicate $ \(attrKey, attrValue) ->
-       matchKey key attrKey
-    && RE.matchTest re (TagSoup.toString attrValue)
-infixl 6 @=~
-
--- | The '//' operator creates an 'Selector' by nesting one 'Selector' in
--- another. For example, @"div" // "a"@ will create a 'Selector' that matches
--- anchor tags that are nested arbitrarily deep within a div tag.
-(//) :: (Selectable a, Selectable b) => a -> b -> Selector
-(//) a b = MkSelector (as ++ bs)
-    where (MkSelector as) = toSelector a
-          (MkSelector bs) = toSelector b
-infixl 5 //
-
--- | The classes of a tag are defined in HTML as a space separated list given 
by
--- the @class@ attribute. The 'hasClass' function will match a @class@ 
attribute
--- if the given class appears anywhere in the space separated list of classes.
-hasClass :: String -> AttributePredicate
-hasClass clazz = MkAttributePredicate hasClass'
-    where
-        hasClass' (attrName, classes)
-            | "class" == TagSoup.toString attrName = textClass `elem` classList
-            | otherwise                            = False
-            where textClass   = TagSoup.castString clazz
-                  textClasses = TagSoup.castString classes
-                  classList   = T.split (== ' ') textClasses
-
--- | The 'match' function allows for the creation of arbitrary
--- 'AttributePredicate's. The argument is a function that takes the attribute
--- key followed by the attribute value and returns a boolean indicating if the
--- attribute satisfies the predicate.
-match :: (String -> String -> Bool) -> AttributePredicate
-match f = MkAttributePredicate $ \(attrKey, attrValue) ->
-              f (TagSoup.toString attrKey) (TagSoup.toString attrValue)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/scalpel-0.3.1/src/Text/HTML/Scalpel/Internal/Select/Types.hs 
new/scalpel-0.5.0/src/Text/HTML/Scalpel/Internal/Select/Types.hs
--- old/scalpel-0.3.1/src/Text/HTML/Scalpel/Internal/Select/Types.hs    
2016-02-01 00:18:21.000000000 +0100
+++ new/scalpel-0.5.0/src/Text/HTML/Scalpel/Internal/Select/Types.hs    
1970-01-01 01:00:00.000000000 +0100
@@ -1,88 +0,0 @@
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE Rank2Types #-}
-{-# LANGUAGE ImpredicativeTypes #-}
-{-# OPTIONS_HADDOCK hide #-}
-module Text.HTML.Scalpel.Internal.Select.Types (
-    Selector (..)
-,   Selectable (..)
-,   AttributePredicate (..)
-,   checkPred
-,   Any (..)
-,   AttributeName (..)
-,   TagName (..)
-
-,   SelectNode (..)
-) where
-
-import Data.Char (toLower)
-
-import qualified Text.HTML.TagSoup as TagSoup
-import qualified Text.StringLike as TagSoup
-
-
--- | The 'Selectable' class defines a class of types that are capable of being
--- cast into a 'Selector' which in turns describes a section of an HTML DOM
--- tree.
-class Selectable s where
-    toSelector :: s -> Selector
-
--- | The 'AttributeName' class defines a class of types that can be used when
--- creating 'Selector's to specify the name of an attribute of a tag.  
Currently
--- the only types of this class are 'String' for matching attributes exactly,
--- and 'Any' for matching attributes with any name.
-class AttributeName k where
-    matchKey :: TagSoup.StringLike str => k -> str -> Bool
-
--- | The 'TagName' class defines a class of types that can be used when 
creating
--- 'Selector's to specify the name of a tag. Currently the only types of this
--- class are 'String' for matching tags exactly, and 'Any' for matching tags
--- with any name.
-class TagName t where
-    toSelectNode :: t -> [AttributePredicate] -> SelectNode
-
--- | An 'AttributePredicate' is a method that takes a 'TagSoup.Attribute' and
--- returns a 'Bool' indicating if the given attribute matches a predicate.
-data AttributePredicate
-        = MkAttributePredicate
-                (forall str. TagSoup.StringLike str => TagSoup.Attribute str
-                                                    -> Bool)
-
-checkPred :: TagSoup.StringLike str
-          => AttributePredicate -> TagSoup.Attribute str -> Bool
-checkPred (MkAttributePredicate p) = p
-
--- | 'Any' can be used as a wildcard when constructing selectors to match tags
--- and attributes with any name.
---
--- For example, the selector @Any \@: [Any \@= \"foo\"]@ matches all tags that
--- have any attribute where the value is @\"foo\"@.
-data Any = Any
-
--- | 'Selector' defines a selection of an HTML DOM tree to be operated on by
--- a web scraper. The selection includes the opening tag that matches the
--- selection, all of the inner tags, and the corresponding closing tag.
-newtype Selector = MkSelector [SelectNode]
-
-data SelectNode = SelectNode String [AttributePredicate]
-                | SelectAny [AttributePredicate]
-
-instance Selectable Selector where
-    toSelector = id
-
-instance Selectable String where
-    toSelector node = MkSelector [SelectNode (map toLower node) []]
-
-instance Selectable Any where
-    toSelector = const (MkSelector [SelectAny []])
-
-instance AttributeName Any where
-    matchKey = const . const True
-
-instance AttributeName String where
-    matchKey = (==) . TagSoup.fromString . map toLower
-
-instance TagName Any where
-    toSelectNode = const SelectAny
-
-instance TagName String where
-    toSelectNode = SelectNode . TagSoup.fromString . map toLower
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/scalpel-0.3.1/src/Text/HTML/Scalpel/Internal/Select.hs 
new/scalpel-0.5.0/src/Text/HTML/Scalpel/Internal/Select.hs
--- old/scalpel-0.3.1/src/Text/HTML/Scalpel/Internal/Select.hs  2016-05-23 
01:15:18.000000000 +0200
+++ new/scalpel-0.5.0/src/Text/HTML/Scalpel/Internal/Select.hs  1970-01-01 
01:00:00.000000000 +0100
@@ -1,175 +0,0 @@
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE TupleSections #-}
-{-# OPTIONS_HADDOCK hide #-}
-module Text.HTML.Scalpel.Internal.Select (
-    CloseOffset
-
-,   select
-,   select_
-,   tagWithOffset
-) where
-
-import Text.HTML.Scalpel.Internal.Select.Types
-
-import Control.Applicative ((<$>), (<|>))
-import Control.Arrow (first)
-import Data.List (tails)
-import Data.Maybe (catMaybes)
-import GHC.Exts (sortWith)
-
-import qualified Data.Map.Strict as Map
-import qualified Text.HTML.TagSoup as TagSoup
-import qualified Text.StringLike as TagSoup
-
-
-type CloseOffset = Maybe Int
-
--- | The 'select' function takes a 'Selectable' value and a list of
--- 'TagSoup.Tag's and returns a list of every subsequence of the given list of
--- Tags that matches the given selector.
-select :: (Ord str, TagSoup.StringLike str, Selectable s)
-       => s
-       -> [(TagSoup.Tag str, CloseOffset)]
-       -> [[(TagSoup.Tag str, CloseOffset)]]
-select s = selectNodes nodes
-    where (MkSelector nodes) = toSelector s
-
--- | Like 'select' but strips the 'CloseOffset' from the result.
-select_ :: (Ord str, TagSoup.StringLike str, Selectable s)
-       => s
-       -> [(TagSoup.Tag str, CloseOffset)]
-       -> [[TagSoup.Tag str]]
-select_ s = map (map fst) . select s
-
--- | Annotate each tag with the offset to the corresponding closing tag. This
--- annotating is done in O(n * log(n)).
---
--- The algorithm works on a list of tags annotated with their index. It
--- maintains a map of unclosed open tags keyed by tag name.
---
---      (1) When an open tag is encountered it is pushed onto the list keyed by
---          its name.
---
---      (2) When a closing tag is encountered the corresponding opening tag is
---          popped, the offset between the two are computed, the opening tag is
---          annotated with the offset between the two, and both are added to 
the
---          result set.
---
---      (3) When any other tag is encountered it is added to the result set
---          immediately.
---
---      (4) After all tags are either in the result set or the state, all
---          unclosed tags from the state are added to the result set without a
---          closing offset.
---
---      (5) The result set is then sorted and the indices are stripped from the
---          tags.
-tagWithOffset :: forall str. (Ord str, TagSoup.StringLike str)
-              => [TagSoup.Tag str] -> [(TagSoup.Tag str, CloseOffset)]
-tagWithOffset tags = let indexed  = zip tags [0..]
-                         unsorted = go indexed Map.empty
-                         sorted   = sortWith snd unsorted
-                      in map fst sorted
-    where
-        go :: [(TagSoup.Tag str, Int)]
-           -> Map.Map str [(TagSoup.Tag str, Int)]
-           -> [((TagSoup.Tag str, CloseOffset), Int)]
-        go [] state = map (first (, Nothing)) $ concat $ Map.elems state
-        go (x@(tag, index) : xs) state
-            | TagSoup.isTagClose tag =
-                let maybeOpen = head <$> Map.lookup tagName state
-                    state'    = Map.alter popTag tagName state
-                    res       = catMaybes [
-                                        Just ((tag, Nothing), index)
-                                    ,   calcOffset <$> maybeOpen
-                                    ]
-                 in res ++ go xs state'
-            | TagSoup.isTagOpen tag  = go xs (Map.alter appendTag tagName 
state)
-            | otherwise              = ((tag, Nothing), index) : go xs state
-            where
-                tagName = getTagName tag
-
-                appendTag :: Maybe [(TagSoup.Tag str, Int)]
-                          -> Maybe [(TagSoup.Tag str, Int)]
-                appendTag m = (x :) <$> (m <|> Just [])
-
-                calcOffset :: (t, Int) -> ((t, Maybe Int), Int)
-                calcOffset (t, i) =
-                    let offset = index - i
-                     in offset `seq` ((t, Just offset), i)
-
-                popTag :: Maybe [a] -> Maybe [a]
-                popTag (Just (_ : y : xs)) = let s = y : xs in s `seq` Just s
-                popTag _                   = Nothing
-
-selectNodes :: TagSoup.StringLike str
-            => [SelectNode]
-            -> [(TagSoup.Tag str, CloseOffset)]
-            -> [[(TagSoup.Tag str, CloseOffset)]]
-selectNodes nodes tags = head' $ reverse results
-    where results = [concatMap (selectNode s) ts | s  <- nodes
-                                                 | ts <- [tags] : results]
-          head' []    = []
-          head' (x:_) = x
-
-selectNode :: TagSoup.StringLike str
-           => SelectNode
-           -> [(TagSoup.Tag str, CloseOffset)]
-           -> [[(TagSoup.Tag str, CloseOffset)]]
-selectNode (SelectNode node attributes) tags = concatMap extractTagBlock nodes
-    where nodes = filter (checkTag node attributes) $ tails tags
-selectNode (SelectAny attributes) tags = concatMap extractTagBlock nodes
-    where nodes = filter (checkPreds attributes) $ tails tags
-
--- | Given a tag name and a list of attribute predicates return a function that
--- returns true if a given tag matches the supplied name and predicates.
-checkTag :: TagSoup.StringLike str
-          => String
-          -> [AttributePredicate]
-          -> [(TagSoup.Tag str, CloseOffset)]
-          -> Bool
-checkTag name preds tags@((TagSoup.TagOpen str _, _) : _)
-    = TagSoup.fromString name == str && checkPreds preds tags
-checkTag _ _ _ = False
-
-checkPreds :: TagSoup.StringLike str
-            => [AttributePredicate] -> [(TagSoup.Tag str, CloseOffset)] -> Bool
-checkPreds preds ((TagSoup.TagOpen _ attrs, _) : _)
-    = and [or [checkPred p attr | attr <- attrs] | p <- preds]
-checkPreds _ _ = False
-
--- | Given a list of tags, return the prefix of the tags up to the closing tag
--- that corresponds to the initial tag.
-extractTagBlock :: TagSoup.StringLike str
-                => [(TagSoup.Tag str, CloseOffset)]
-                -> [[(TagSoup.Tag str, CloseOffset)]]
-extractTagBlock (ctag@(tag, maybeOffset) : tags)
-    | not $ TagSoup.isTagOpen tag = []
-    | Just offset <- maybeOffset  = [takeOrClose ctag offset tags]
-    -- To handle tags that do not have a closing tag, fake an empty block by
-    -- adding a closing tag. This function assumes that the tag is an open
-    -- tag.
-    | otherwise                   = [[ctag, (closeForOpen tag, Nothing)]]
-extractTagBlock _                 = []
-
--- | Take offset number of elements from tags if available. If there are not
--- that many available, then fake a closing tag for the open tag. This happens
--- with malformed HTML that looks like `<a><b></a></b>`.
-takeOrClose :: TagSoup.StringLike str
-            => (TagSoup.Tag str, CloseOffset)
-            -> Int
-            -> [(TagSoup.Tag str, CloseOffset)]
-            -> [(TagSoup.Tag str, CloseOffset)]
-takeOrClose open@(tag, _) offset tags = go offset tags (open :)
-    where
-        go 0 _        f = f []
-        go _ []       _ = [open, (closeForOpen tag, Nothing)]
-        go i (x : xs) f = go (i - 1) xs (f . (x :))
-
-closeForOpen :: TagSoup.StringLike str => TagSoup.Tag str -> TagSoup.Tag str
-closeForOpen = TagSoup.TagClose . getTagName
-
-getTagName :: TagSoup.StringLike str => TagSoup.Tag str -> str
-getTagName (TagSoup.TagOpen name _) = name
-getTagName (TagSoup.TagClose name)  = name
-getTagName _                        = undefined
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/scalpel-0.3.1/src/Text/HTML/Scalpel.hs 
new/scalpel-0.5.0/src/Text/HTML/Scalpel.hs
--- old/scalpel-0.3.1/src/Text/HTML/Scalpel.hs  2016-05-22 02:59:40.000000000 
+0200
+++ new/scalpel-0.5.0/src/Text/HTML/Scalpel.hs  2017-02-05 05:03:44.000000000 
+0100
@@ -100,12 +100,12 @@
 module Text.HTML.Scalpel (
 -- * Selectors
     Selector
-,   Selectable (..)
 ,   AttributePredicate
-,   AttributeName
-,   TagName
+,   AttributeName (..)
+,   TagName (..)
+,   tagSelector
 -- ** Wildcards
-,   Any (..)
+,   anySelector
 -- ** Tag combinators
 ,   (//)
 -- ** Attribute predicates
@@ -113,6 +113,7 @@
 ,   (@=)
 ,   (@=~)
 ,   hasClass
+,   notP
 ,   match
 
 -- * Scrapers
@@ -128,6 +129,7 @@
 ,   texts
 ,   chroot
 ,   chroots
+,   position
 -- ** Executing scrapers
 ,   scrape
 ,   scrapeStringLike
@@ -142,8 +144,5 @@
 ,   iso88591Decoder
 ) where
 
-import Text.HTML.Scalpel.Internal.Scrape
-import Text.HTML.Scalpel.Internal.Scrape.StringLike
+import Text.HTML.Scalpel.Core
 import Text.HTML.Scalpel.Internal.Scrape.URL
-import Text.HTML.Scalpel.Internal.Select.Combinators
-import Text.HTML.Scalpel.Internal.Select.Types
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/scalpel-0.3.1/tests/TestMain.hs 
new/scalpel-0.5.0/tests/TestMain.hs
--- old/scalpel-0.3.1/tests/TestMain.hs 2016-05-22 02:59:40.000000000 +0200
+++ new/scalpel-0.5.0/tests/TestMain.hs 1970-01-01 01:00:00.000000000 +0100
@@ -1,233 +0,0 @@
-{-# LANGUAGE FlexibleContexts #-}
-module Main (main) where
-
-import Text.HTML.Scalpel
-
-import Control.Applicative
-import System.Exit
-import Test.HUnit
-
-import qualified Text.HTML.TagSoup as TagSoup
-import qualified Text.Regex.TDFA
-
-
-main = exit . failures =<< runTestTT (TestList [scrapeTests])
-
-exit :: Int -> IO ()
-exit 0 = exitSuccess
-exit n = exitWith $ ExitFailure n
-
-re :: String -> Text.Regex.TDFA.Regex
-re = Text.Regex.TDFA.makeRegex
-
-scrapeTests = "scrapeTests" ~: TestList [
-        scrapeTest
-            "<a>foo</a>"
-            (Just ["<a>foo</a>"])
-            (htmls ("a" @: []))
-
-    ,   scrapeTest
-            "<a>foo</a><a>bar</a>"
-            (Just ["<a>foo</a>", "<a>bar</a>"])
-            (htmls ("a" @: []))
-
-    ,   scrapeTest
-            "<b><a>foo</a></b>"
-            (Just ["<a>foo</a>"])
-            (htmls ("a" @: []))
-
-    ,   scrapeTest
-            "<a><a>foo</a></a>"
-            (Just ["<a><a>foo</a></a>", "<a>foo</a>"])
-            (htmls ("a" @: []))
-
-    ,   scrapeTest
-            "<a>foo</a>"
-            Nothing
-            (htmls ("b" @: []))
-
-    ,   scrapeTest
-            "<a>foo"
-            (Just ["<a></a>"])
-            (htmls ("a" @: []))
-
-    ,   scrapeTest
-            "<a>foo</a><a key=\"value\">bar</a>"
-            (Just ["<a key=\"value\">bar</a>"])
-            (htmls ("a" @: ["key" @= "value"]))
-
-    ,   scrapeTest
-            "<a><b><c>foo</c></b></a>"
-            (Just ["<c>foo</c>"])
-            (htmls ("a" // "b" @: [] // "c"))
-
-    ,   scrapeTest
-            "<c><a><b>foo</b></a></c><c><a><d><b>bar</b></d></a></c><b>baz</b>"
-            (Just ["<b>foo</b>", "<b>bar</b>"])
-            (htmls ("a" // "b"))
-
-    ,   scrapeTest
-            "<a class=\"a b\">foo</a>"
-            (Just ["<a class=\"a b\">foo</a>"])
-            (htmls ("a" @: [hasClass "a"]))
-
-    ,   scrapeTest
-            "<a class=\"a b\">foo</a>"
-            Nothing
-            (htmls ("a" @: [hasClass "c"]))
-
-    ,   scrapeTest
-            "<a key=\"value\">foo</a>"
-            (Just ["<a key=\"value\">foo</a>"])
-            (htmls ("a" @: ["key" @=~ re "va(foo|bar|lu)e"]))
-
-    ,   scrapeTest
-            "<a foo=\"value\">foo</a><a bar=\"value\">bar</a>"
-            (Just ["<a foo=\"value\">foo</a>", "<a bar=\"value\">bar</a>"])
-            (htmls ("a" @: [Any @= "value"]))
-
-    ,   scrapeTest
-            "<a foo=\"other\">foo</a><a bar=\"value\">bar</a>"
-            (Just ["<a bar=\"value\">bar</a>"])
-            (htmls ("a" @: [Any @= "value"]))
-
-    ,   scrapeTest
-            "<a foo=\"value\">foo</a><b bar=\"value\">bar</b>"
-            (Just ["<a foo=\"value\">foo</a>", "<b bar=\"value\">bar</b>"])
-            (htmls (Any @: [Any @= "value"]))
-
-    ,   scrapeTest
-            "<a foo=\"other\">foo</a><b bar=\"value\">bar</b>"
-            (Just ["<b bar=\"value\">bar</b>"])
-            (htmls (Any @: [Any @= "value"]))
-
-    ,   scrapeTest
-            "<a foo=\"bar\">1</a><a foo=\"foo\">2</a><a bar=\"bar\">3</a>"
-            (Just ["<a foo=\"foo\">2</a>", "<a bar=\"bar\">3</a>"])
-            (htmls (Any @: [match (==)]))
-
-    ,   scrapeTest
-            "<a>foo</a>"
-            (Just "foo")
-            (text "a")
-
-    ,   scrapeTest
-            "<a>foo</a><a>bar</a>"
-            (Just "foo")
-            (text "a")
-
-    ,   scrapeTest
-            "<a>foo</a><a>bar</a>"
-            (Just ["foo", "bar"])
-            (texts "a")
-
-    ,   scrapeTest
-            "<a>foo</a><a>bar</a>"
-            (Just [True, False])
-            (map (== "foo") <$> texts "a")
-
-    ,   scrapeTest
-            "<a key=foo />"
-            (Just "foo")
-            (attr "key" "a")
-
-    ,   scrapeTest
-            "<a key1=foo/><b key1=bar key2=foo /><a key1=bar key2=baz />"
-            (Just "baz")
-            (attr "key2" $ "a" @: ["key1" @= "bar"])
-
-    ,   scrapeTest
-            "<a><b>foo</b></a><b>bar</b>"
-            (Just ["foo"])
-            (chroot "a" $ texts "b")
-
-    ,   scrapeTest
-            "<a><b>foo</b></a><a><b>bar</b></a>"
-            (Just ["foo", "bar"])
-            (chroots "a" $ text "b")
-
-    ,   scrapeTest
-            "<a><b>foo</b></a><a><c>bar</c></a>"
-            (Just "foo")
-            (text ("a" // "b") <|> text ("a" // "c"))
-
-    ,   scrapeTest
-            "<a><b>foo</b></a><a><c>bar</c></a>"
-            (Just "bar")
-            (text ("a" // "d") <|> text ("a" // "c"))
-
-    ,   scrapeTest "<img src='foobar'>" (Just "foobar") (attr "src" "img")
-
-    ,   scrapeTest "<img src='foobar' />" (Just "foobar") (attr "src" "img")
-
-    ,   scrapeTest
-            "<a>foo</a><A>bar</A>"
-            (Just ["foo", "bar"])
-            (texts "a")
-
-    ,   scrapeTest
-            "<a>foo</a><A>bar</A>"
-            (Just ["foo", "bar"])
-            (texts "A")
-
-    ,   scrapeTest
-            "<a B=C>foo</a>"
-            (Just ["foo"])
-            (texts $ "A" @: ["b" @= "C"])
-
-    ,   scrapeTest
-            "<a B=C>foo</a>"
-            Nothing
-            (texts $ "A" @: ["b" @= "c"])
-
-    ,   scrapeTest
-            "<a>foo</a>"
-            (Just "<a>foo</a>")
-            (html "a")
-
-    ,   scrapeTest
-            "<body><div><ul><li>1</li><li>2</li></ul></div></body>"
-            (Just "<li>1</li>")
-            (html "li")
-
-    ,   scrapeTest
-            "<body><div></div></body>"
-            (Just "<div></div>")
-            (html "div")
-
-    ,   scrapeTest
-            "<a>foo</a><a>bar</a>"
-            (Just ["<a>foo</a>","<a>bar</a>"])
-            (htmls "a")
-
-    ,   scrapeTest
-            "<body><div><ul><li>1</li><li>2</li></ul></div></body>"
-            (Just ["<li>1</li>", "<li>2</li>"])
-            (htmls "li")
-
-    ,   scrapeTest
-            "<body><div></div></body>"
-            (Just ["<div></div>"])
-            (htmls "div")
-
-    ,   scrapeTest
-            "<a>1<b>2</b>3</a>"
-            (Just "1<b>2</b>3")
-            (innerHTML Any)
-
-    ,   scrapeTest
-            "<a>"
-            (Just "")
-            (innerHTML Any)
-
-    ,   scrapeTest
-            "<a>foo</a><a>bar</a>"
-            (Just ["foo","bar"])
-            (innerHTMLs "a")
-    ]
-
-scrapeTest :: (Eq a, Show a) => String -> Maybe a -> Scraper String a -> Test
-scrapeTest html expected scraper = label ~: expected @=? actual
-    where
-        label  = "scrape (" ++ show html ++ ")"
-        actual = scrape scraper (TagSoup.parseTags html)

commit ghc-scalpel for openSUSE:Factory

Reply via email to