This is an automated email from the ASF dual-hosted git repository.

csantanapr pushed a commit to branch master
in repository 
https://gitbox.apache.org/repos/asf/incubator-openwhisk-utilities.git


The following commit(s) were added to refs/heads/master by this push:
     new e1faec9  Fix exclusion directory and file matching and add support for 
reading a top level gitignore file (#57)
e1faec9 is described below

commit e1faec929990f9107238c5287bae2209eca1b1a2
Author: rodric rabbah <rod...@gmail.com>
AuthorDate: Tue Apr 2 22:36:42 2019 -0400

    Fix exclusion directory and file matching and add support for reading a top 
level gitignore file (#57)
---
 .gitignore                    |   3 +
 LICENSE.txt                   |  12 ++
 README.md                     |   4 +-
 licenses/LICENSE-pathspec.txt | 374 ++++++++++++++++++++++++++++++++++++++++++
 scancode/lib/compat.py        |  45 +++++
 scancode/lib/gitwildmatch.py  | 325 ++++++++++++++++++++++++++++++++++++
 scancode/lib/pathspec.py      | 146 +++++++++++++++++
 scancode/lib/pattern.py       | 155 +++++++++++++++++
 scancode/lib/util.py          | 359 ++++++++++++++++++++++++++++++++++++++++
 scancode/scanCode.py          |  57 ++++---
 scancode/travis.cfg           |   3 +
 11 files changed, 1455 insertions(+), 28 deletions(-)

diff --git a/.gitignore b/.gitignore
index 23c57c9..fd8ce7c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,3 +26,6 @@ ansible/roles/nginx/files/*cert.pem
 
 # .zip files must be explicited whitelisted
 *.zip
+
+# .pyc files
+*.pyc
diff --git a/LICENSE.txt b/LICENSE.txt
index a2fe52f..23fba0a 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -200,3 +200,15 @@
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+
+
+========================================================================
+Mozilla Public License 2.0
+========================================================================
+
+This distribution bundles the following component, which is
+available under an Mozilla Public License 2.0
+(https://www.mozilla.org/en-US/MPL/2.0/).
+
+Pathspec 0.5.9 (https://pypi.org/project/pathspec/) under scanCode/lib.
+License included at licenses/LICENSE-pathspec.txt.
diff --git a/README.md b/README.md
index 93c7de3..fecac52 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ Scan detected 3 error(s) in 1 file(s):
 
 To make sure this never happens to you, you can run the same tests on your 
local machine every time you commit changes.
 
-1. Clone the OpenWhisk utilities project repo.:
+1. Clone the OpenWhisk utilities project:
 ```bash
 $ git clone https://github.com/apache/incubator-openwhisk-utilities.git
 ```
@@ -66,7 +66,7 @@ $ cat /path/to/openwhisk/.git/hooks/pre-commit
 
 # determine openwhisk base directory
 root="$(git rev-parse --show-toplevel)"
-python /path/to/incubator-openwhisk-utilities/scancode/scanCode.py . --config 
$root/tools/
+python /path/to/incubator-openwhisk-utilities/scancode/scanCode.py . --config 
$root/tools/ --gitignore $root/.gitignore
 ```
 
 _Note_: A hook a locally installed, so if you check out the repository again, 
you will need to reinstall it.
diff --git a/licenses/LICENSE-pathspec.txt b/licenses/LICENSE-pathspec.txt
new file mode 100644
index 0000000..52d1351
--- /dev/null
+++ b/licenses/LICENSE-pathspec.txt
@@ -0,0 +1,374 @@
+Mozilla Public License Version 2.0
+==================================
+
+1. Definitions
+--------------
+
+1.1. "Contributor"
+    means each individual or legal entity that creates, contributes to
+    the creation of, or owns Covered Software.
+
+1.2. "Contributor Version"
+    means the combination of the Contributions of others (if any) used
+    by a Contributor and that particular Contributor's Contribution.
+
+1.3. "Contribution"
+    means Covered Software of a particular Contributor.
+
+1.4. "Covered Software"
+    means Source Code Form to which the initial Contributor has attached
+    the notice in Exhibit A, the Executable Form of such Source Code
+    Form, and Modifications of such Source Code Form, in each case
+    including portions thereof.
+
+1.5. "Incompatible With Secondary Licenses"
+    means
+
+    (a) that the initial Contributor has attached the notice described
+        in Exhibit B to the Covered Software; or
+
+    (b) that the Covered Software was made available under the terms of
+        version 1.1 or earlier of the License, but not also under the
+        terms of a Secondary License.
+
+1.6. "Executable Form"
+    means any form of the work other than Source Code Form.
+
+1.7. "Larger Work"
+    means a work that combines Covered Software with other material, in 
+    a separate file or files, that is not Covered Software.
+
+1.8. "License"
+    means this document.
+
+1.9. "Licensable"
+    means having the right to grant, to the maximum extent possible,
+    whether at the time of the initial grant or subsequently, any and
+    all of the rights conveyed by this License.
+
+1.10. "Modifications"
+    means any of the following:
+
+    (a) any file in Source Code Form that results from an addition to,
+        deletion from, or modification of the contents of Covered
+        Software; or
+
+    (b) any new file in Source Code Form that contains any Covered
+        Software.
+
+1.11. "Patent Claims" of a Contributor
+    means any patent claim(s), including without limitation, method,
+    process, and apparatus claims, in any patent Licensable by such
+    Contributor that would be infringed, but for the grant of the
+    License, by the making, using, selling, offering for sale, having
+    made, import, or transfer of either its Contributions or its
+    Contributor Version.
+
+1.12. "Secondary License"
+    means either the GNU General Public License, Version 2.0, the GNU
+    Lesser General Public License, Version 2.1, the GNU Affero General
+    Public License, Version 3.0, or any later versions of those
+    licenses.
+
+1.13. "Source Code Form"
+    means the form of the work preferred for making modifications.
+
+1.14. "You" (or "Your")
+    means an individual or a legal entity exercising rights under this
+    License. For legal entities, "You" includes any entity that
+    controls, is controlled by, or is under common control with You. For
+    purposes of this definition, "control" means (a) the power, direct
+    or indirect, to cause the direction or management of such entity,
+    whether by contract or otherwise, or (b) ownership of more than
+    fifty percent (50%) of the outstanding shares or beneficial
+    ownership of such entity.
+
+2. License Grants and Conditions
+--------------------------------
+
+2.1. Grants
+
+Each Contributor hereby grants You a world-wide, royalty-free,
+non-exclusive license:
+
+(a) under intellectual property rights (other than patent or trademark)
+    Licensable by such Contributor to use, reproduce, make available,
+    modify, display, perform, distribute, and otherwise exploit its
+    Contributions, either on an unmodified basis, with Modifications, or
+    as part of a Larger Work; and
+
+(b) under Patent Claims of such Contributor to make, use, sell, offer
+    for sale, have made, import, and otherwise transfer either its
+    Contributions or its Contributor Version.
+
+2.2. Effective Date
+
+The licenses granted in Section 2.1 with respect to any Contribution
+become effective for each Contribution on the date the Contributor first
+distributes such Contribution.
+
+2.3. Limitations on Grant Scope
+
+The licenses granted in this Section 2 are the only rights granted under
+this License. No additional rights or licenses will be implied from the
+distribution or licensing of Covered Software under this License.
+Notwithstanding Section 2.1(b) above, no patent license is granted by a
+Contributor:
+
+(a) for any code that a Contributor has removed from Covered Software;
+    or
+
+(b) for infringements caused by: (i) Your and any other third party's
+    modifications of Covered Software, or (ii) the combination of its
+    Contributions with other software (except as part of its Contributor
+    Version); or
+
+(c) under Patent Claims infringed by Covered Software in the absence of
+    its Contributions.
+
+This License does not grant any rights in the trademarks, service marks,
+or logos of any Contributor (except as may be necessary to comply with
+the notice requirements in Section 3.4).
+
+2.4. Subsequent Licenses
+
+No Contributor makes additional grants as a result of Your choice to
+distribute the Covered Software under a subsequent version of this
+License (see Section 10.2) or under the terms of a Secondary License (if
+permitted under the terms of Section 3.3).
+
+2.5. Representation
+
+Each Contributor represents that the Contributor believes its
+Contributions are its original creation(s) or it has sufficient rights
+to grant the rights to its Contributions conveyed by this License.
+
+2.6. Fair Use
+
+This License is not intended to limit any rights You have under
+applicable copyright doctrines of fair use, fair dealing, or other
+equivalents.
+
+2.7. Conditions
+
+Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
+in Section 2.1.
+
+3. Responsibilities
+-------------------
+
+3.1. Distribution of Source Form
+
+All distribution of Covered Software in Source Code Form, including any
+Modifications that You create or to which You contribute, must be under
+the terms of this License. You must inform recipients that the Source
+Code Form of the Covered Software is governed by the terms of this
+License, and how they can obtain a copy of this License. You may not
+attempt to alter or restrict the recipients' rights in the Source Code
+Form.
+
+3.2. Distribution of Executable Form
+
+If You distribute Covered Software in Executable Form then:
+
+(a) such Covered Software must also be made available in Source Code
+    Form, as described in Section 3.1, and You must inform recipients of
+    the Executable Form how they can obtain a copy of such Source Code
+    Form by reasonable means in a timely manner, at a charge no more
+    than the cost of distribution to the recipient; and
+
+(b) You may distribute such Executable Form under the terms of this
+    License, or sublicense it under different terms, provided that the
+    license for the Executable Form does not attempt to limit or alter
+    the recipients' rights in the Source Code Form under this License.
+
+3.3. Distribution of a Larger Work
+
+You may create and distribute a Larger Work under terms of Your choice,
+provided that You also comply with the requirements of this License for
+the Covered Software. If the Larger Work is a combination of Covered
+Software with a work governed by one or more Secondary Licenses, and the
+Covered Software is not Incompatible With Secondary Licenses, this
+License permits You to additionally distribute such Covered Software
+under the terms of such Secondary License(s), so that the recipient of
+the Larger Work may, at their option, further distribute the Covered
+Software under the terms of either this License or such Secondary
+License(s).
+
+3.4. Notices
+
+You may not remove or alter the substance of any license notices
+(including copyright notices, patent notices, disclaimers of warranty,
+or limitations of liability) contained within the Source Code Form of
+the Covered Software, except that You may alter any license notices to
+the extent required to remedy known factual inaccuracies.
+
+3.5. Application of Additional Terms
+
+You may choose to offer, and to charge a fee for, warranty, support,
+indemnity or liability obligations to one or more recipients of Covered
+Software. However, You may do so only on Your own behalf, and not on
+behalf of any Contributor. You must make it absolutely clear that any
+such warranty, support, indemnity, or liability obligation is offered by
+You alone, and You hereby agree to indemnify every Contributor for any
+liability incurred by such Contributor as a result of warranty, support,
+indemnity or liability terms You offer. You may include additional
+disclaimers of warranty and limitations of liability specific to any
+jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+---------------------------------------------------
+
+If it is impossible for You to comply with any of the terms of this
+License with respect to some or all of the Covered Software due to
+statute, judicial order, or regulation then You must: (a) comply with
+the terms of this License to the maximum extent possible; and (b)
+describe the limitations and the code they affect. Such description must
+be placed in a text file included with all distributions of the Covered
+Software under this License. Except to the extent prohibited by statute
+or regulation, such description must be sufficiently detailed for a
+recipient of ordinary skill to be able to understand it.
+
+5. Termination
+--------------
+
+5.1. The rights granted under this License will terminate automatically
+if You fail to comply with any of its terms. However, if You become
+compliant, then the rights granted under this License from a particular
+Contributor are reinstated (a) provisionally, unless and until such
+Contributor explicitly and finally terminates Your grants, and (b) on an
+ongoing basis, if such Contributor fails to notify You of the
+non-compliance by some reasonable means prior to 60 days after You have
+come back into compliance. Moreover, Your grants from a particular
+Contributor are reinstated on an ongoing basis if such Contributor
+notifies You of the non-compliance by some reasonable means, this is the
+first time You have received notice of non-compliance with this License
+from such Contributor, and You become compliant prior to 30 days after
+Your receipt of the notice.
+
+5.2. If You initiate litigation against any entity by asserting a patent
+infringement claim (excluding declaratory judgment actions,
+counter-claims, and cross-claims) alleging that a Contributor Version
+directly or indirectly infringes any patent, then the rights granted to
+You by any and all Contributors for the Covered Software under Section
+2.1 of this License shall terminate.
+
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all
+end user license agreements (excluding distributors and resellers) which
+have been validly granted by You or Your distributors under this License
+prior to termination shall survive termination.
+
+************************************************************************
+*                                                                      *
+*  6. Disclaimer of Warranty                                           *
+*  -------------------------                                           *
+*                                                                      *
+*  Covered Software is provided under this License on an "as is"       *
+*  basis, without warranty of any kind, either expressed, implied, or  *
+*  statutory, including, without limitation, warranties that the       *
+*  Covered Software is free of defects, merchantable, fit for a        *
+*  particular purpose or non-infringing. The entire risk as to the     *
+*  quality and performance of the Covered Software is with You.        *
+*  Should any Covered Software prove defective in any respect, You     *
+*  (not any Contributor) assume the cost of any necessary servicing,   *
+*  repair, or correction. This disclaimer of warranty constitutes an   *
+*  essential part of this License. No use of any Covered Software is   *
+*  authorized under this License except under this disclaimer.         *
+*                                                                      *
+************************************************************************
+
+************************************************************************
+*                                                                      *
+*  7. Limitation of Liability                                          *
+*  --------------------------                                          *
+*                                                                      *
+*  Under no circumstances and under no legal theory, whether tort      *
+*  (including negligence), contract, or otherwise, shall any           *
+*  Contributor, or anyone who distributes Covered Software as          *
+*  permitted above, be liable to You for any direct, indirect,         *
+*  special, incidental, or consequential damages of any character      *
+*  including, without limitation, damages for lost profits, loss of    *
+*  goodwill, work stoppage, computer failure or malfunction, or any    *
+*  and all other commercial damages or losses, even if such party      *
+*  shall have been informed of the possibility of such damages. This   *
+*  limitation of liability shall not apply to liability for death or   *
+*  personal injury resulting from such party's negligence to the       *
+*  extent applicable law prohibits such limitation. Some               *
+*  jurisdictions do not allow the exclusion or limitation of           *
+*  incidental or consequential damages, so this exclusion and          *
+*  limitation may not apply to You.                                    *
+*                                                                      *
+************************************************************************
+
+8. Litigation
+-------------
+
+Any litigation relating to this License may be brought only in the
+courts of a jurisdiction where the defendant maintains its principal
+place of business and such litigation shall be governed by laws of that
+jurisdiction, without reference to its conflict-of-law provisions.
+Nothing in this Section shall prevent a party's ability to bring
+cross-claims or counter-claims.
+
+9. Miscellaneous
+----------------
+
+This License represents the complete agreement concerning the subject
+matter hereof. If any provision of this License is held to be
+unenforceable, such provision shall be reformed only to the extent
+necessary to make it enforceable. Any law or regulation which provides
+that the language of a contract shall be construed against the drafter
+shall not be used to construe this License against a Contributor.
+
+10. Versions of the License
+---------------------------
+
+10.1. New Versions
+
+Mozilla Foundation is the license steward. Except as provided in Section
+10.3, no one other than the license steward has the right to modify or
+publish new versions of this License. Each version will be given a
+distinguishing version number.
+
+10.2. Effect of New Versions
+
+You may distribute the Covered Software under the terms of the version
+of the License under which You originally received the Covered Software,
+or under the terms of any subsequent version published by the license
+steward.
+
+10.3. Modified Versions
+
+If you create software not governed by this License, and you want to
+create a new license for such software, you may create and use a
+modified version of this License if you rename the license and remove
+any references to the name of the license steward (except to note that
+such modified license differs from this License).
+
+10.4. Distributing Source Code Form that is Incompatible With Secondary
+Licenses
+
+If You choose to distribute Source Code Form that is Incompatible With
+Secondary Licenses under the terms of this version of the License, the
+notice described in Exhibit B of this License must be attached.
+
+Exhibit A - Source Code Form License Notice
+-------------------------------------------
+
+  This Source Code Form is subject to the terms of the Mozilla Public
+  License, v. 2.0. If a copy of the MPL was not distributed with this
+  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular
+file, then You may include the notice in a location (such as a LICENSE
+file in a relevant directory) where a recipient would be likely to look
+for such a notice.
+
+You may add additional accurate notices of copyright ownership.
+
+Exhibit B - "Incompatible With Secondary Licenses" Notice
+---------------------------------------------------------
+
+  This Source Code Form is "Incompatible With Secondary Licenses", as
+  defined by the Mozilla Public License, v. 2.0.
+
diff --git a/scancode/lib/compat.py b/scancode/lib/compat.py
new file mode 100644
index 0000000..540599b
--- /dev/null
+++ b/scancode/lib/compat.py
@@ -0,0 +1,45 @@
+# encoding: utf-8
+#
+# This is a copy of source code from Pathspec 0.5.9
+# (https://pypi.org/project/pathspec/) which is
+# available under an Mozilla Public License 2.0
+# (https://www.mozilla.org/en-US/MPL/2.0/).
+# A copy of the license is also available in 
+# ../../licenses/LICENSE-pathspec.txt.
+#
+"""
+This module provides compatibility between Python 2 and 3. Hardly
+anything is used by this project to constitute including `six`_.
+
+.. _`six`: http://pythonhosted.org/six
+"""
+
+import sys
+
+if sys.version_info[0] < 3:
+       # Python 2.
+       unicode = unicode
+       string_types = (basestring,)
+
+       from itertools import izip_longest
+
+       def iterkeys(mapping):
+               return mapping.iterkeys()
+
+else:
+       # Python 3.
+       unicode = str
+       string_types = (unicode,)
+
+       from itertools import zip_longest as izip_longest
+
+       def iterkeys(mapping):
+               return mapping.keys()
+
+try:
+       # Python 3.6+.
+       from collections.abc import Collection as collection_type
+except ImportError:
+       # Python 2.7 - 3.5.
+       from collections import Container as collection_type
+
diff --git a/scancode/lib/gitwildmatch.py b/scancode/lib/gitwildmatch.py
new file mode 100644
index 0000000..5076bd3
--- /dev/null
+++ b/scancode/lib/gitwildmatch.py
@@ -0,0 +1,325 @@
+# encoding: utf-8
+#
+# This is a copy of source code from Pathspec 0.5.9
+# (https://pypi.org/project/pathspec/) which is
+# available under an Mozilla Public License 2.0
+# (https://www.mozilla.org/en-US/MPL/2.0/).
+# A copy of the license is also available in 
+# ../../licenses/LICENSE-pathspec.txt.
+#
+"""
+This module implements Git's wildmatch pattern matching which itself is
+derived from Rsync's wildmatch. Git uses wildmatch for its ".gitignore"
+files.
+"""
+
+from __future__ import unicode_literals
+
+import re
+import warnings
+
+import util
+from compat import unicode
+from pattern import RegexPattern
+
+#: The encoding to use when parsing a byte string pattern.
+_BYTES_ENCODING = 'latin1'
+
+
+class GitWildMatchPattern(RegexPattern):
+       """
+       The :class:`GitWildMatchPattern` class represents a compiled Git
+       wildmatch pattern.
+       """
+
+       # Keep the dict-less class hierarchy.
+       __slots__ = ()
+
+       @classmethod
+       def pattern_to_regex(cls, pattern):
+               """
+               Convert the pattern into a regular expression.
+
+               *pattern* (:class:`unicode` or :class:`bytes`) is the pattern to
+               convert into a regular expression.
+
+               Returns the uncompiled regular expression (:class:`unicode`, 
:class:`bytes`,
+               or :data:`None`), and whether matched files should be included
+               (:data:`True`), excluded (:data:`False`), or if it is a
+               null-operation (:data:`None`).
+               """
+               if isinstance(pattern, unicode):
+                       return_type = unicode
+               elif isinstance(pattern, bytes):
+                       return_type = bytes
+                       pattern = pattern.decode(_BYTES_ENCODING)
+               else:
+                       raise TypeError("pattern:{!r} is not a unicode or byte 
string.".format(pattern))
+
+               pattern = pattern.strip()
+
+               if pattern.startswith('#'):
+                       # A pattern starting with a hash ('#') serves as a 
comment
+                       # (neither includes nor excludes files). Escape the 
hash with a
+                       # back-slash to match a literal hash (i.e., '\#').
+                       regex = None
+                       include = None
+
+               elif pattern == '/':
+                       # EDGE CASE: According to `git check-ignore` (v2.4.1), 
a single
+                       # '/' does not match any file.
+                       regex = None
+                       include = None
+
+               elif pattern:
+
+                       if pattern.startswith('!'):
+                               # A pattern starting with an exclamation mark 
('!') negates the
+                               # pattern (exclude instead of include). Escape 
the exclamation
+                               # mark with a back-slash to match a literal 
exclamation mark
+                               # (i.e., '\!').
+                               include = False
+                               # Remove leading exclamation mark.
+                               pattern = pattern[1:]
+                       else:
+                               include = True
+
+                       if pattern.startswith('\\'):
+                               # Remove leading back-slash escape for escaped 
hash ('#') or
+                               # exclamation mark ('!').
+                               pattern = pattern[1:]
+
+                       # Split pattern into segments.
+                       pattern_segs = pattern.split('/')
+
+                       # Normalize pattern to make processing easier.
+
+                       if not pattern_segs[0]:
+                               # A pattern beginning with a slash ('/') will 
only match paths
+                               # directly on the root directory instead of any 
descendant
+                               # paths. So, remove empty first segment to make 
pattern relative
+                               # to root.
+                               del pattern_segs[0]
+
+                       elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 
and not pattern_segs[1]):
+                               # A single pattern without a beginning slash 
('/') will match
+                               # any descendant path. This is equivalent to 
"**/{pattern}". So,
+                               # prepend with double-asterisks to make pattern 
relative to
+                               # root.
+                               # EDGE CASE: This also holds for a single 
pattern with a
+                               # trailing slash (e.g. dir/).
+                               if pattern_segs[0] != '**':
+                                       pattern_segs.insert(0, '**')
+
+                       else:
+                               # EDGE CASE: A pattern without a beginning 
slash ('/') but
+                               # contains at least one prepended directory 
(e.g.
+                               # "dir/{pattern}") should not match 
"**/dir/{pattern}",
+                               # according to `git check-ignore` (v2.4.1).
+                               pass
+
+                       if not pattern_segs[-1] and len(pattern_segs) > 1:
+                               # A pattern ending with a slash ('/') will 
match all descendant
+                               # paths if it is a directory but not if it is a 
regular file.
+                               # This is equivilent to "{pattern}/**". So, set 
last segment to
+                               # double asterisks to include all descendants.
+                               pattern_segs[-1] = '**'
+
+                       # Build regular expression from pattern.
+                       output = ['^']
+                       need_slash = False
+                       end = len(pattern_segs) - 1
+                       for i, seg in enumerate(pattern_segs):
+                               if seg == '**':
+                                       if i == 0 and i == end:
+                                               # A pattern consisting solely 
of double-asterisks ('**')
+                                               # will match every path.
+                                               output.append('.+')
+                                       elif i == 0:
+                                               # A normalized pattern 
beginning with double-asterisks
+                                               # ('**') will match any leading 
path segments.
+                                               output.append('(?:.+/)?')
+                                               need_slash = False
+                                       elif i == end:
+                                               # A normalized pattern ending 
with double-asterisks ('**')
+                                               # will match any trailing path 
segments.
+                                               output.append('/.*')
+                                       else:
+                                               # A pattern with inner 
double-asterisks ('**') will match
+                                               # multiple (or zero) inner path 
segments.
+                                               output.append('(?:/.+)?')
+                                               need_slash = True
+                               elif seg == '*':
+                                       # Match single path segment.
+                                       if need_slash:
+                                               output.append('/')
+                                       output.append('[^/]+')
+                                       need_slash = True
+                               else:
+                                       # Match segment glob pattern.
+                                       if need_slash:
+                                               output.append('/')
+                                       
output.append(cls._translate_segment_glob(seg))
+                                       if i == end and include is True:
+                                               # A pattern ending without a 
slash ('/') will match a file
+                                               # or a directory (with paths 
underneath it). E.g., "foo"
+                                               # matches "foo", "foo/bar", 
"foo/bar/baz", etc.
+                                               # EDGE CASE: However, this does 
not hold for exclusion cases
+                                               # according to `git 
check-ignore` (v2.4.1).
+                                               output.append('(?:/.*)?')
+                                       need_slash = True
+                       output.append('$')
+                       regex = ''.join(output)
+
+               else:
+                       # A blank pattern is a null-operation (neither includes 
nor
+                       # excludes files).
+                       regex = None
+                       include = None
+
+               if regex is not None and return_type is bytes:
+                       regex = regex.encode(_BYTES_ENCODING)
+
+               return regex, include
+
+       @staticmethod
+       def _translate_segment_glob(pattern):
+               """
+               Translates the glob pattern to a regular expression. This is 
used in
+               the constructor to translate a path segment glob pattern to its
+               corresponding regular expression.
+
+               *pattern* (:class:`str`) is the glob pattern.
+
+               Returns the regular expression (:class:`str`).
+               """
+               # NOTE: This is derived from `fnmatch.translate()` and is 
similar to
+               # the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag 
set.
+
+               escape = False
+               regex = ''
+               i, end = 0, len(pattern)
+               while i < end:
+                       # Get next character.
+                       char = pattern[i]
+                       i += 1
+
+                       if escape:
+                               # Escape the character.
+                               escape = False
+                               regex += re.escape(char)
+
+                       elif char == '\\':
+                               # Escape character, escape next character.
+                               escape = True
+
+                       elif char == '*':
+                               # Multi-character wildcard. Match any string 
(except slashes),
+                               # including an empty string.
+                               regex += '[^/]*'
+
+                       elif char == '?':
+                               # Single-character wildcard. Match any single 
character (except
+                               # a slash).
+                               regex += '[^/]'
+
+                       elif char == '[':
+                               # Braket expression wildcard. Except for the 
beginning
+                               # exclamation mark, the whole braket expression 
can be used
+                               # directly as regex but we have to find where 
the expression
+                               # ends.
+                               # - "[][!]" matchs ']', '[' and '!'.
+                               # - "[]-]" matchs ']' and '-'.
+                               # - "[!]a-]" matchs any character except ']', 
'a' and '-'.
+                               j = i
+                               # Pass brack expression negation.
+                               if j < end and pattern[j] == '!':
+                                       j += 1
+                               # Pass first closing braket if it is at the 
beginning of the
+                               # expression.
+                               if j < end and pattern[j] == ']':
+                                       j += 1
+                               # Find closing braket. Stop once we reach the 
end or find it.
+                               while j < end and pattern[j] != ']':
+                                       j += 1
+
+                               if j < end:
+                                       # Found end of braket expression. 
Increment j to be one past
+                                       # the closing braket:
+                                       #
+                                       #  [...]
+                                       #   ^   ^
+                                       #   i   j
+                                       #
+                                       j += 1
+                                       expr = '['
+
+                                       if pattern[i] == '!':
+                                               # Braket expression needs to be 
negated.
+                                               expr += '^'
+                                               i += 1
+                                       elif pattern[i] == '^':
+                                               # POSIX declares that the regex 
braket expression negation
+                                               # "[^...]" is undefined in a 
glob pattern. Python's
+                                               # `fnmatch.translate()` escapes 
the caret ('^') as a
+                                               # literal. To maintain 
consistency with undefined behavior,
+                                               # I am escaping the '^' as well.
+                                               expr += '\\^'
+                                               i += 1
+
+                                       # Build regex braket expression. Escape 
slashes so they are
+                                       # treated as literal slashes by regex 
as defined by POSIX.
+                                       expr += pattern[i:j].replace('\\', 
'\\\\')
+
+                                       # Add regex braket expression to regex 
result.
+                                       regex += expr
+
+                                       # Set i to one past the closing braket.
+                                       i = j
+
+                               else:
+                                       # Failed to find closing braket, treat 
opening braket as a
+                                       # braket literal instead of as an 
expression.
+                                       regex += '\\['
+
+                       else:
+                               # Regular character, escape it for regex.
+                               regex += re.escape(char)
+
+               return regex
+
+util.register_pattern('gitwildmatch', GitWildMatchPattern)
+
+
+class GitIgnorePattern(GitWildMatchPattern):
+       """
+       The :class:`GitIgnorePattern` class is deprecated by 
:class:`GitWildMatchPattern`.
+       This class only exists to maintain compatibility with v0.4.
+       """
+
+       def __init__(self, *args, **kw):
+               """
+               Warn about deprecation.
+               """
+               self._deprecated()
+               return super(GitIgnorePattern, self).__init__(*args, **kw)
+
+       @staticmethod
+       def _deprecated():
+               """
+               Warn about deprecation.
+               """
+               warnings.warn("GitIgnorePattern ('gitignore') is deprecated. 
Use GitWildMatchPattern ('gitwildmatch') instead.", DeprecationWarning, 
stacklevel=3)
+
+       @classmethod
+       def pattern_to_regex(cls, *args, **kw):
+               """
+               Warn about deprecation.
+               """
+               cls._deprecated()
+               return super(GitIgnorePattern, cls).pattern_to_regex(*args, 
**kw)
+
+# Register `GitIgnorePattern` as "gitignore" for backward compatibility
+# with v0.4.
+util.register_pattern('gitignore', GitIgnorePattern)
+
diff --git a/scancode/lib/pathspec.py b/scancode/lib/pathspec.py
new file mode 100644
index 0000000..da08db1
--- /dev/null
+++ b/scancode/lib/pathspec.py
@@ -0,0 +1,146 @@
+# encoding: utf-8
+#
+# This is a copy of source code from Pathspec 0.5.9
+# (https://pypi.org/project/pathspec/) which is
+# available under an Mozilla Public License 2.0
+# (https://www.mozilla.org/en-US/MPL/2.0/).
+# A copy of the license is also available in 
+# ../../licenses/LICENSE-pathspec.txt.
+#
+"""
+This module provides an object oriented interface for pattern matching
+of files.
+"""
+
+import util
+from compat import collection_type, iterkeys, izip_longest, string_types, 
unicode
+
+
+class PathSpec(object):
+       """
+       The :class:`PathSpec` class is a wrapper around a list of compiled
+       :class:`.Pattern` instances.
+       """
+
+       def __init__(self, patterns):
+               """
+               Initializes the :class:`PathSpec` instance.
+
+               *patterns* (:class:`~collections.abc.Collection` or 
:class:`~collections.abc.Iterable`)
+               yields each compiled pattern (:class:`.Pattern`).
+               """
+
+               self.patterns = patterns if isinstance(patterns, 
collection_type) else list(patterns)
+               """
+               *patterns* (:class:`~collections.abc.Collection` of 
:class:`.Pattern`)
+               contains the compiled patterns.
+               """
+
+       def __eq__(self, other):
+               """
+               Tests the equality of this path-spec with *other* 
(:class:`PathSpec`)
+               by comparing their :attr:`~PathSpec.patterns` attributes.
+               """
+               if isinstance(other, PathSpec):
+                       paired_patterns = izip_longest(self.patterns, 
other.patterns)
+                       return all(a == b for a, b in paired_patterns)
+               else:
+                       return NotImplemented
+
+       def __len__(self):
+               """
+               Returns the number of compiled patterns this path-spec contains
+               (:class:`int`).
+               """
+               return len(self.patterns)
+
+       @classmethod
+       def from_lines(cls, pattern_factory, lines):
+               """
+               Compiles the pattern lines.
+
+               *pattern_factory* can be either the name of a registered pattern
+               factory (:class:`str`), or a :class:`~collections.abc.Callable` 
used
+               to compile patterns. It must accept an uncompiled pattern 
(:class:`str`)
+               and return the compiled pattern (:class:`.Pattern`).
+
+               *lines* (:class:`~collections.abc.Iterable`) yields each 
uncompiled
+               pattern (:class:`str`). This simply has to yield each line so 
it can
+               be a :class:`file` (e.g., from :func:`open` or 
:class:`io.StringIO`)
+               or the result from :meth:`str.splitlines`.
+
+               Returns the :class:`PathSpec` instance.
+               """
+               if isinstance(pattern_factory, string_types):
+                       pattern_factory = util.lookup_pattern(pattern_factory)
+               if not callable(pattern_factory):
+                       raise TypeError("pattern_factory:{!r} is not 
callable.".format(pattern_factory))
+
+               if isinstance(lines, (bytes, unicode)):
+                       raise TypeError("lines:{!r} is not an 
iterable.".format(lines))
+
+               lines = [pattern_factory(line) for line in lines if line]
+               return cls(lines)
+
+       def match_file(self, file, separators=None):
+               """
+               Matches the file to this path-spec.
+
+               *file* (:class:`str`) is the file path to be matched against
+               :attr:`self.patterns <PathSpec.patterns>`.
+
+               *separators* (:class:`~collections.abc.Collection` of 
:class:`str`)
+               optionally contains the path separators to normalize. See
+               :func:`~pathspec.util.normalize_file` for more information.
+
+               Returns :data:`True` if *file* matched; otherwise, 
:data:`False`.
+               """
+               norm_file = util.normalize_file(file, separators=separators)
+               return util.match_file(self.patterns, norm_file)
+
+       def match_files(self, files, separators=None):
+               """
+               Matches the files to this path-spec.
+
+               *files* (:class:`~collections.abc.Iterable` of :class:`str`) 
contains
+               the file paths to be matched against :attr:`self.patterns
+               <PathSpec.patterns>`.
+
+               *separators* (:class:`~collections.abc.Collection` of 
:class:`str`;
+               or :data:`None`) optionally contains the path separators to
+               normalize. See :func:`~pathspec.util.normalize_file` for more
+               information.
+
+               Returns the matched files (:class:`~collections.abc.Iterable` of
+               :class:`str`).
+               """
+               if isinstance(files, (bytes, unicode)):
+                       raise TypeError("files:{!r} is not an 
iterable.".format(files))
+
+               file_map = util.normalize_files(files, separators=separators)
+               matched_files = util.match_files(self.patterns, 
iterkeys(file_map))
+               for path in matched_files:
+                       yield file_map[path]
+
+       def match_tree(self, root, on_error=None, follow_links=None):
+               """
+               Walks the specified root path for all files and matches them to 
this
+               path-spec.
+
+               *root* (:class:`str`) is the root directory to search for files.
+
+               *on_error* (:class:`~collections.abc.Callable` or :data:`None`)
+               optionally is the error handler for file-system exceptions. See
+               :func:`~pathspec.util.iter_tree` for more information.
+
+
+               *follow_links* (:class:`bool` or :data:`None`) optionally is 
whether
+               to walk symbolik links that resolve to directories. See
+               :func:`~pathspec.util.iter_tree` for more information.
+
+               Returns the matched files (:class:`~collections.abc.Iterable` of
+               :class:`str`).
+               """
+               files = util.iter_tree(root, on_error=on_error, 
follow_links=follow_links)
+               return self.match_files(files)
+
diff --git a/scancode/lib/pattern.py b/scancode/lib/pattern.py
new file mode 100644
index 0000000..b297f3c
--- /dev/null
+++ b/scancode/lib/pattern.py
@@ -0,0 +1,155 @@
+# encoding: utf-8
+#
+# This is a copy of source code from Pathspec 0.5.9
+# (https://pypi.org/project/pathspec/) which is
+# available under an Mozilla Public License 2.0
+# (https://www.mozilla.org/en-US/MPL/2.0/).
+# A copy of the license is also available in 
+# ../../licenses/LICENSE-pathspec.txt.
+#
+"""
+This module provides the base definition for patterns.
+"""
+
+import re
+
+from compat import unicode
+
+
+class Pattern(object):
+       """
+       The :class:`Pattern` class is the abstract definition of a pattern.
+       """
+
+       # Make the class dict-less.
+       __slots__ = ('include',)
+
+       def __init__(self, include):
+               """
+               Initializes the :class:`Pattern` instance.
+
+               *include* (:class:`bool` or :data:`None`) is whether the matched
+               files should be included (:data:`True`), excluded 
(:data:`False`),
+               or is a null-operation (:data:`None`).
+               """
+
+               self.include = include
+               """
+               *include* (:class:`bool` or :data:`None`) is whether the matched
+               files should be included (:data:`True`), excluded 
(:data:`False`),
+               or is a null-operation (:data:`None`).
+               """
+
+       def match(self, files):
+               """
+               Matches this pattern against the specified files.
+
+               *files* (:class:`~collections.abc.Iterable` of :class:`str`) 
contains
+               each file relative to the root directory (e.g., 
``"relative/path/to/file"``).
+
+               Returns an :class:`~collections.abc.Iterable` yielding each 
matched
+               file path (:class:`str`).
+               """
+               raise NotImplementedError("{}.{} must override 
match().".format(self.__class__.__module__, self.__class__.__name__))
+
+
+class RegexPattern(Pattern):
+       """
+       The :class:`RegexPattern` class is an implementation of a pattern
+       using regular expressions.
+       """
+
+       # Make the class dict-less.
+       __slots__ = ('regex',)
+
+       def __init__(self, pattern, include=None):
+               """
+               Initializes the :class:`RegexPattern` instance.
+
+               *pattern* (:class:`unicode`, :class:`bytes`, 
:class:`re.RegexObject`,
+               or :data:`None`) is the pattern to compile into a regular
+               expression.
+
+               *include* (:class:`bool` or :data:`None`) must be :data:`None`
+               unless *pattern* is a precompiled regular expression 
(:class:`re.RegexObject`)
+               in which case it is whether matched files should be included
+               (:data:`True`), excluded (:data:`False`), or is a null operation
+               (:data:`None`).
+
+                       .. NOTE:: Subclasses do not need to support the 
*include*
+                          parameter.
+               """
+
+               self.regex = None
+               """
+               *regex* (:class:`re.RegexObject`) is the regular expression for 
the
+               pattern.
+               """
+
+               if isinstance(pattern, (unicode, bytes)):
+                       assert include is None, "include:{!r} must be null when 
pattern:{!r} is a string.".format(include, pattern)
+                       regex, include = self.pattern_to_regex(pattern)
+                       # NOTE: Make sure to allow a null regular expression to 
be
+                       # returned for a null-operation.
+                       if include is not None:
+                               regex = re.compile(regex)
+
+               elif pattern is not None and hasattr(pattern, 'match'):
+                       # Assume pattern is a precompiled regular expression.
+                       # - NOTE: Used specified *include*.
+                       regex = pattern
+
+               elif pattern is None:
+                       # NOTE: Make sure to allow a null pattern to be passed 
for a
+                       # null-operation.
+                       assert include is None, "include:{!r} must be null when 
pattern:{!r} is null.".format(include, pattern)
+
+               else:
+                       raise TypeError("pattern:{!r} is not a string, 
RegexObject, or None.".format(pattern))
+
+               super(RegexPattern, self).__init__(include)
+               self.regex = regex
+
+       def __eq__(self, other):
+               """
+               Tests the equality of this regex pattern with *other* 
(:class:`RegexPattern`)
+               by comparing their :attr:`~Pattern.include` and 
:attr:`~RegexPattern.regex`
+               attributes.
+               """
+               if isinstance(other, RegexPattern):
+                       return self.include == other.include and self.regex == 
other.regex
+               else:
+                       return NotImplemented
+
+       def match(self, files):
+               """
+               Matches this pattern against the specified files.
+
+               *files* (:class:`~collections.abc.Iterable` of :class:`str`)
+               contains each file relative to the root directory (e.g., 
"relative/path/to/file").
+
+               Returns an :class:`~collections.abc.Iterable` yielding each 
matched
+               file path (:class:`str`).
+               """
+               if self.include is not None:
+                       for path in files:
+                               if self.regex.match(path) is not None:
+                                       yield path
+
+       @classmethod
+       def pattern_to_regex(cls, pattern):
+               """
+               Convert the pattern into an uncompiled regular expression.
+
+               *pattern* (:class:`str`) is the pattern to convert into a 
regular
+               expression.
+
+               Returns the uncompiled regular expression (:class:`str` or 
:data:`None`),
+               and whether matched files should be included (:data:`True`),
+               excluded (:data:`False`), or is a null-operation (:data:`None`).
+
+                       .. NOTE:: The default implementation simply returns 
*pattern* and
+                          :data:`True`.
+               """
+               return pattern, True
+
diff --git a/scancode/lib/util.py b/scancode/lib/util.py
new file mode 100644
index 0000000..c5bcd7b
--- /dev/null
+++ b/scancode/lib/util.py
@@ -0,0 +1,359 @@
+# encoding: utf-8
+#
+# This is a copy of source code from Pathspec 0.5.9
+# (https://pypi.org/project/pathspec/) which is
+# available under an Mozilla Public License 2.0
+# (https://www.mozilla.org/en-US/MPL/2.0/).
+# A copy of the license is also available in 
+# ../../licenses/LICENSE-pathspec.txt.
+#
+"""
+This module provides utility methods for dealing with path-specs.
+"""
+
+import os
+import os.path
+import posixpath
+import stat
+
+from compat import collection_type, string_types
+
+NORMALIZE_PATH_SEPS = [sep for sep in [os.sep, os.altsep] if sep and sep != 
posixpath.sep]
+"""
+*NORMALIZE_PATH_SEPS* (:class:`list` of :class:`str`) contains the path
+separators that need to be normalized to the POSIX separator for the
+current operating system. The separators are determined by examining
+:data:`os.sep` and :data:`os.altsep`.
+"""
+
+_registered_patterns = {}
+"""
+*_registered_patterns* (``dict``) maps a name (``str``) to the
+registered pattern factory (``callable``).
+"""
+
+def iter_tree(root, on_error=None, follow_links=None):
+       """
+       Walks the specified directory for all files.
+
+       *root* (:class:`str`) is the root directory to search for files.
+
+       *on_error* (:class:`~collections.abc.Callable` or :data:`None`)
+       optionally is the error handler for file-system exceptions. It will be
+       called with the exception (:exc:`OSError`). Reraise the exception to
+       abort the walk. Default is :data:`None` to ignore file-system
+       exceptions.
+
+       *follow_links* (:class:`bool` or :data:`None`) optionally is whether
+       to walk symbolik links that resolve to directories. Default is
+       :data:`None` for :data:`True`.
+
+       Raises :exc:`RecursionError` if recursion is detected.
+
+       Returns an :class:`~collections.abc.Iterable` yielding the path to
+       each file (:class:`str`) relative to *root*.
+       """
+       if on_error is not None and not callable(on_error):
+               raise TypeError("on_error:{!r} is not 
callable.".format(on_error))
+
+       if follow_links is None:
+               follow_links = True
+
+       for file_rel in _iter_tree_next(os.path.abspath(root), '', {}, 
on_error, follow_links):
+               yield file_rel
+
+def _iter_tree_next(root_full, dir_rel, memo, on_error, follow_links):
+       """
+       Scan the directory for all descendant files.
+
+       *root_full* (:class:`str`) the absolute path to the root directory.
+
+       *dir_rel* (:class:`str`) the path to the directory to scan relative to
+       *root_full*.
+
+       *memo* (:class:`dict`) keeps track of ancestor directories
+       encountered. Maps each ancestor real path (:class:`str``) to relative
+       path (:class:`str`).
+
+       *on_error* (:class:`~collections.abc.Callable` or :data:`None`)
+       optionally is the error handler for file-system exceptions.
+
+       *follow_links* (:class:`bool`) is whether to walk symbolik links that
+       resolve to directories.
+       """
+       dir_full = os.path.join(root_full, dir_rel)
+       dir_real = os.path.realpath(dir_full)
+
+       # Remember each encountered ancestor directory and its canonical
+       # (real) path. If a canonical path is encountered more than once,
+       # recursion has occurred.
+       if dir_real not in memo:
+               memo[dir_real] = dir_rel
+       else:
+               raise RecursionError(real_path=dir_real, 
first_path=memo[dir_real], second_path=dir_rel)
+
+       for node in os.listdir(dir_full):
+               node_rel = os.path.join(dir_rel, node)
+               node_full = os.path.join(root_full, node_rel)
+
+               # Inspect child node.
+               try:
+                       node_stat = os.lstat(node_full)
+               except OSError as e:
+                       if on_error is not None:
+                               on_error(e)
+                       continue
+
+               if stat.S_ISLNK(node_stat.st_mode):
+                       # Child node is a link, inspect the target node.
+                       is_link = True
+                       try:
+                               node_stat = os.stat(node_full)
+                       except OSError as e:
+                               if on_error is not None:
+                                       on_error(e)
+                               continue
+               else:
+                       is_link = False
+
+               if stat.S_ISDIR(node_stat.st_mode) and (follow_links or not 
is_link):
+                       # Child node is a directory, recurse into it and yield 
its
+                       # decendant files.
+                       for file_rel in _iter_tree_next(root_full, node_rel, 
memo, on_error, follow_links):
+                               yield file_rel
+
+               elif stat.S_ISREG(node_stat.st_mode):
+                       # Child node is a file, yield it.
+                       yield node_rel
+
+       # NOTE: Make sure to remove the canonical (real) path of the directory
+       # from the ancestors memo once we are done with it. This allows the
+       # same directory to appear multiple times. If this is not done, the
+       # second occurance of the directory will be incorrectly interpreted as
+       # a recursion. See 
<https://github.com/cpburnz/python-path-specification/pull/7>.
+       del memo[dir_real]
+
+def lookup_pattern(name):
+       """
+       Lookups a registered pattern factory by name.
+
+       *name* (:class:`str`) is the name of the pattern factory.
+
+       Returns the registered pattern factory 
(:class:`~collections.abc.Callable`).
+       If no pattern factory is registered, raises :exc:`KeyError`.
+       """
+       return _registered_patterns[name]
+
+def match_file(patterns, file):
+       """
+       Matches the file to the patterns.
+
+       *patterns* (:class:`~collections.abc.Iterable` of 
:class:`~pathspec.pattern.Pattern`)
+       contains the patterns to use.
+
+       *file* (:class:`str`) is the normalized file path to be matched
+       against *patterns*.
+
+       Returns :data:`True` if *file* matched; otherwise, :data:`False`.
+       """
+       matched = False
+       for pattern in patterns:
+               if pattern.include is not None:
+                       if file in pattern.match((file,)):
+                               matched = pattern.include
+       return matched
+
+def match_files(patterns, files):
+       """
+       Matches the files to the patterns.
+
+       *patterns* (:class:`~collections.abc.Iterable` of 
:class:`~pathspec.pattern.Pattern`)
+       contains the patterns to use.
+
+       *files* (:class:`~collections.abc.Iterable` of :class:`str`) contains
+       the normalized file paths to be matched against *patterns*.
+
+       Returns the matched files (:class:`set` of :class:`str`).
+       """
+       all_files = files if isinstance(files, collection_type) else list(files)
+       return_files = set()
+       for pattern in patterns:
+               if pattern.include is not None:
+                       result_files = pattern.match(all_files)
+                       if pattern.include:
+                               return_files.update(result_files)
+                       else:
+                               return_files.difference_update(result_files)
+       return return_files
+
+def normalize_file(file, separators=None):
+       """
+       Normalizes the file path to use the POSIX path separator (i.e., 
``'/'``).
+
+       *file* (:class:`str`) is the file path.
+
+       *separators* (:class:`~collections.abc.Collection` of :class:`str`; or
+       :data:`None`) optionally contains the path separators to normalize.
+       This does not need to include the POSIX path separator (``'/'``), but
+       including it will not affect the results. Default is :data:`None` for
+       :data:`NORMALIZE_PATH_SEPS`. To prevent normalization, pass an empty
+       container (e.g., an empty tuple ``()``).
+
+       Returns the normalized file path (:class:`str`).
+       """
+       # Normalize path separators.
+       if separators is None:
+               separators = NORMALIZE_PATH_SEPS
+       norm_file = file
+       for sep in separators:
+               norm_file = norm_file.replace(sep, posixpath.sep)
+
+       # Remove current directory prefix.
+       if norm_file.startswith('./'):
+               norm_file = norm_file[2:]
+
+       return norm_file
+
+def normalize_files(files, separators=None):
+       """
+       Normalizes the file paths to use the POSIX path separator.
+
+       *files* (:class:`~collections.abc.Iterable` of :class:`str`) contains
+       the file paths to be normalized.
+
+       *separators* (:class:`~collections.abc.Collection` of :class:`str`; or
+       :data:`None`) optionally contains the path separators to normalize.
+       See :func:`normalize_file` for more information.
+
+       Returns a :class:`dict` mapping the each normalized file path 
(:class:`str`)
+       to the original file path (:class:`str`)
+       """
+       norm_files = {}
+       for path in files:
+               norm_files[normalize_file(path, separators=separators)] = path
+       return norm_files
+
+def register_pattern(name, pattern_factory, override=None):
+       """
+       Registers the specified pattern factory.
+
+       *name* (:class:`str`) is the name to register the pattern factory
+       under.
+
+       *pattern_factory* (:class:`~collections.abc.Callable`) is used to
+       compile patterns. It must accept an uncompiled pattern (:class:`str`)
+       and return the compiled pattern (:class:`.Pattern`).
+
+       *override* (:class:`bool` or :data:`None`) optionally is whether to
+       allow overriding an already registered pattern under the same name
+       (:data:`True`), instead of raising an :exc:`AlreadyRegisteredError`
+       (:data:`False`). Default is :data:`None` for :data:`False`.
+       """
+       if not isinstance(name, string_types):
+               raise TypeError("name:{!r} is not a string.".format(name))
+       if not callable(pattern_factory):
+               raise TypeError("pattern_factory:{!r} is not 
callable.".format(pattern_factory))
+       if name in _registered_patterns and not override:
+               raise AlreadyRegisteredError(name, _registered_patterns[name])
+       _registered_patterns[name] = pattern_factory
+
+
+class AlreadyRegisteredError(Exception):
+       """
+       The :exc:`AlreadyRegisteredError` exception is raised when a pattern
+       factory is registered under a name already in use.
+       """
+
+       def __init__(self, name, pattern_factory):
+               """
+               Initializes the :exc:`AlreadyRegisteredError` instance.
+
+               *name* (:class:`str`) is the name of the registered pattern.
+
+               *pattern_factory* (:class:`~collections.abc.Callable`) is the
+               registered pattern factory.
+               """
+               super(AlreadyRegisteredError, self).__init__(name, 
pattern_factory)
+
+       @property
+       def message(self):
+               """
+               *message* (:class:`str`) is the error message.
+               """
+               return "{name!r} is already registered for pattern 
factory:{pattern_factory!r}.".format(
+                       name=self.name,
+                       pattern_factory=self.pattern_factory,
+               )
+
+       @property
+       def name(self):
+               """
+               *name* (:class:`str`) is the name of the registered pattern.
+               """
+               return self.args[0]
+
+       @property
+       def pattern_factory(self):
+               """
+               *pattern_factory* (:class:`~collections.abc.Callable`) is the
+               registered pattern factory.
+               """
+               return self.args[1]
+
+
+class RecursionError(Exception):
+       """
+       The :exc:`RecursionError` exception is raised when recursion is
+       detected.
+       """
+
+       def __init__(self, real_path, first_path, second_path):
+               """
+               Initializes the :exc:`RecursionError` instance.
+
+               *real_path* (:class:`str`) is the real path that recursion was
+               encountered on.
+
+               *first_path* (:class:`str`) is the first path encountered for
+               *real_path*.
+
+               *second_path* (:class:`str`) is the second path encountered for
+               *real_path*.
+               """
+               super(RecursionError, self).__init__(real_path, first_path, 
second_path)
+
+       @property
+       def first_path(self):
+               """
+               *first_path* (:class:`str`) is the first path encountered for
+               :attr:`self.real_path <RecursionError.real_path>`.
+               """
+               return self.args[1]
+
+       @property
+       def message(self):
+               """
+               *message* (:class:`str`) is the error message.
+               """
+               return "Real path {real!r} was encountered at {first!r} and 
then {second!r}.".format(
+                       real=self.real_path,
+                       first=self.first_path,
+                       second=self.second_path,
+               )
+
+       @property
+       def real_path(self):
+               """
+               *real_path* (:class:`str`) is the real path that recursion was
+               encountered on.
+               """
+               return self.args[0]
+
+       @property
+       def second_path(self):
+               """
+               *second_path* (:class:`str`) is the second path encountered for
+               :attr:`self.real_path <RecursionError.real_path>`.
+               """
+               return self.args[2]
+
diff --git a/scancode/scanCode.py b/scancode/scanCode.py
index 029bfad..06d7dd3 100755
--- a/scancode/scanCode.py
+++ b/scancode/scanCode.py
@@ -40,6 +40,11 @@ import re
 import sys
 import textwrap
 
+# import pathspec from local lib path
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)) + '/lib')
+import pathspec
+from gitwildmatch import GitWildMatchPattern
+
 VERBOSE = False
 
 # Terminal colors
@@ -65,6 +70,7 @@ ERR_TABS = "line contains tabs."
 ERR_TRAILING_WHITESPACE = "line has trailing whitespace."
 
 HELP_CONFIG_FILE = "provide custom configuration file"
+HELP_GITIGNORE_FILE = "provide .gitignore file for additional path exclusions"
 HELP_DISPLAY_EXCLUSIONS = "display path exclusion information"
 HELP_ROOT_DIR = "starting directory for the scan"
 HELP_VERBOSE = "enable verbose output"
@@ -74,6 +80,7 @@ MSG_CHECKS_PASSED = "All checks passed."
 MSG_CONFIG_ADDING_LICENSE_FILE = "Adding valid license from: [%s], value:\n%s"
 MSG_ERROR_SUMMARY = "Scan detected %d error(s) in %d file(s):"
 MSG_READING_CONFIGURATION = "Reading configuration file [%s]..."
+MSG_READING_GITIGNORE = "Reading gitignore file [%s]..."
 MSG_READING_LICENSE_FILE = "Reading license file [%s]..."
 MSG_RUNNING_FILE_CHECKS = "    Running File Check [%s]"
 MSG_RUNNING_LINE_CHECKS = "    Running Line Check [%s]"
@@ -81,10 +88,10 @@ MSG_SCANNING_FILTER = "Scanning files with filter: [%s]:"
 MSG_SCANNING_STARTED = "Scanning files starting at [%s]..."
 
 WARN_CONFIG_SECTION_NOT_FOUND = "Configuration file section [%s] not found."
-WARN_SCAN_EXCLUDED_PATH_SUMMARY = "Scan excluded (%s) directories:"
+WARN_SCAN_EXCLUDED_PATH_SUMMARY = "Scan excluded (%s) patterns:"
 WARN_SCAN_EXCLUDED_FILE_SUMMARY = "Scan excluded (%s) files:"
 WARN_SCAN_EXCLUDED_FILE = "  Excluded file: %s"
-WARN_SCAN_EXCLUDED_PATH = "  Excluded path: %s"
+WARN_SCAN_EXCLUDED_PATH = "  Excluded pattern: %s"
 
 MSG_DESCRIPTION = "Scans all source code under specified directory for " \
                   "project compliance using provided configuration."
@@ -211,18 +218,21 @@ def read_license_files(config):
         raise Exception(ERR_REQUIRED_SECTION % SECTION_LICENSE)
 
 
-def read_path_exclusions(config):
+def read_path_exclusions(config, gitignore_file):
     """Read the list of paths to exclude from the scan."""
     path_dict = get_config_section_dict(config, SECTION_EXCLUDE)
     # vprint("path_dict: " + str(path_dict))
     if path_dict is not None:
         # each 'key' is an exclusion path
         for key in path_dict:
+            key = str.strip(key)
             if key is not None:
                 exclusion_paths.append(key)
-    else:
-        raise Exception(ERR_REQUIRED_SECTION % SECTION_LICENSE)
 
+    if gitignore_file is not None:
+        print_highlight(MSG_READING_GITIGNORE % gitignore_file.name)
+        for line in gitignore_file.read().splitlines():
+            exclusion_paths.append(line)
 
 def read_scan_options(config):
     """Read the Options from the configuration file."""
@@ -251,7 +261,7 @@ def read_regex(config):
         raise Exception(ERR_REQUIRED_SECTION % SECTION_REGEX)
 
 
-def read_config_file(file):
+def read_config_file(file, gitignore_file):
     """Read in and validate configuration file."""
     try:
         print_highlight(MSG_READING_CONFIGURATION % file.name)
@@ -263,7 +273,7 @@ def read_config_file(file):
         config.readfp(file)
         read_license_files(config)
         read_path_inclusions(config)
-        read_path_exclusions(config)
+        read_path_exclusions(config, gitignore_file)
         read_scan_options(config)
         read_regex(config)
     except Exception as e:
@@ -399,32 +409,20 @@ def run_line_checks(file_path, checks):
                     errors.append((line_number, err))
     return errors
 
-
 def all_paths(root_dir):
     """Generator that returns files with known extensions that can be scanned.
 
     Iteration is recursive beginning at the passed root directory and
     skipping directories that are listed as exception paths.
     """
-    # For every file in every directory (path) starting at "root_dir"
+    spec = pathspec.PathSpec.from_lines(GitWildMatchPattern, exclusion_paths)
+    exclusion_files_set = set(map(lambda f: os.path.join(root_dir, f), 
spec.match_tree(root_dir)))
+
     for dir_path, dir_names, files in os.walk(root_dir):
         for f in files:
             filename = os.path.join(dir_path, f)
-
-            # Map will contain a boolean for each exclusion path tested
-            # as input to the lambda function.
-            # only if all() values in the Map are "True" (meaning the file is
-            # not excluded) then it should yield the filename to run checks on.
-            # not dir_path.endswith(p) and
-            if all(map(lambda p: p not in dir_path, exclusion_paths)):
-               # directory not excluded, now check for any file exclusions
-               if all(map(lambda p: p not in filename, exclusion_paths)):
-                   yield filename
-               else:
-                   exclusion_files_set.add(filename)
-            else:
-                # directory is excluded
-                exclusion_files_set.add(filename)
+            if filename not in exclusion_files_set:
+                yield filename
 
 def colors():
     """Create a collection of helper functions to colorize strings."""
@@ -488,6 +486,11 @@ if __name__ == "__main__":
                         dest="config",
                         default=DEFAULT_CONFIG_FILE,
                         help=HELP_CONFIG_FILE)
+    parser.add_argument("--gitignore",
+                        type=argparse.FileType('r'),
+                        action="store",
+                        dest="gitignore",
+                        help=HELP_GITIGNORE_FILE)
     parser.add_argument("root_directory",
                         type=str,
                         default=DEFAULT_ROOT_DIR,
@@ -500,6 +503,7 @@ if __name__ == "__main__":
 
     # Config file at this point is an actual file object
     config_file = args.config
+    gitignore_file = args.gitignore
 
     # Assign supported scan functions to either file or line globals
     # These checks run once per-file
@@ -517,7 +521,7 @@ if __name__ == "__main__":
     })
 
     # Read / load configuration file from file (pointer)
-    if read_config_file(config_file) == -1:
+    if read_config_file(config_file, gitignore_file) == -1:
         exit(1)
 
     # Verify starting path parameter is valid
@@ -532,11 +536,12 @@ if __name__ == "__main__":
     # Runs all listed checks on all relevant files.
     all_errors = []
 
+    paths_to_check = set(all_paths(root_dir))
     for fltr, chks1, chks2 in FILTERS_WITH_CHECK_FUNCTIONS:
         # print_error(col.cyan(MSG_SCANNING_FILTER % fltr))
         # print_error("chks1=" + str(chks1))
         # print_error("chks2=" + str(chks2))
-        for path in fnmatch.filter(all_paths(root_dir), fltr):
+        for path in fnmatch.filter(paths_to_check, fltr):
             errors = run_file_checks(path, chks1)
             errors += run_line_checks(path, chks2)
             all_errors += map(lambda p: (path, p[0], p[1]), errors)
diff --git a/scancode/travis.cfg b/scancode/travis.cfg
index 9c10889..8f90563 100644
--- a/scancode/travis.cfg
+++ b/scancode/travis.cfg
@@ -52,6 +52,9 @@ ASFMinifiedLicenseHeaderREM.txt
 # Scancode unit tests
 tests/exclude
 
+# Pathspec library
+lib/
+
 [Options]
 # Not all code files allow licenses to appear starting at the first character
 # of the file. This option tells the scan to allow licenses to appear starting

Reply via email to