D7925: rust-matchers: add `IgnoreMatcher`

2020-03-11 Thread Raphaël Gomès
Closed by commit rHGc697638e0e91: rust-matchers: add `IgnoreMatcher` (authored 
by Alphare).
This revision was automatically updated to reflect the committed changes.
This revision was not accepted when it landed; it landed in state "Needs 
Review".

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7925?vs=20495=20715

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7925/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7925

AFFECTED FILES
  rust/hg-core/src/matchers.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/matchers.rs b/rust/hg-core/src/matchers.rs
--- a/rust/hg-core/src/matchers.rs
+++ b/rust/hg-core/src/matchers.rs
@@ -10,14 +10,25 @@
 #[cfg(feature = "with-re2")]
 use crate::re2::Re2;
 use crate::{
-filepatterns::{build_single_regex, PatternResult},
-utils::hg_path::{HgPath, HgPathBuf},
-DirsMultiset, DirstateMapError, IgnorePattern, PatternError,
+dirstate::dirs_multiset::DirsChildrenMultiset,
+filepatterns::{
+build_single_regex, filter_subincludes, get_patterns_from_file,
+PatternFileWarning, PatternResult, SubInclude,
+},
+utils::{
+files::find_dirs,
+hg_path::{HgPath, HgPathBuf},
+Escaped,
+},
+DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
 PatternSyntax,
 };
+
 use std::collections::HashSet;
+use std::fmt::{Display, Error, Formatter};
 use std::iter::FromIterator;
 use std::ops::Deref;
+use std::path::Path;
 
 #[derive(Debug, PartialEq)]
 pub enum VisitChildrenSet<'a> {
@@ -223,6 +234,87 @@
 }
 }
 
+/// Matches files that are included in the ignore rules.
+#[cfg_attr(
+feature = "with-re2",
+doc = r##"
+```
+use hg::{
+matchers::{IncludeMatcher, Matcher},
+IgnorePattern,
+PatternSyntax,
+utils::hg_path::HgPath
+};
+use std::path::Path;
+///
+let ignore_patterns =
+vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
+let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
+///
+assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
+assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
+assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
+assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
+```
+"##
+)]
+pub struct IncludeMatcher<'a> {
+patterns: Vec,
+match_fn: Box Fn(&'r HgPath) -> bool + 'a + Sync>,
+/// Whether all the patterns match a prefix (i.e. recursively)
+prefix: bool,
+roots: HashSet,
+dirs: HashSet,
+parents: HashSet,
+}
+
+impl<'a> Matcher for IncludeMatcher<'a> {
+fn file_set() -> Option<<>> {
+None
+}
+
+fn exact_match(, _filename: impl AsRef) -> bool {
+false
+}
+
+fn matches(, filename: impl AsRef) -> bool {
+(self.match_fn)(filename.as_ref())
+}
+
+fn visit_children_set(
+,
+directory: impl AsRef,
+) -> VisitChildrenSet {
+let dir = directory.as_ref();
+if self.prefix && self.roots.contains(dir) {
+return VisitChildrenSet::Recursive;
+}
+if self.roots.contains(HgPath::new(b""))
+|| self.roots.contains(dir)
+|| self.dirs.contains(dir)
+|| find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
+{
+return VisitChildrenSet::This;
+}
+
+if self.parents.contains(directory.as_ref()) {
+let multiset = self.get_all_parents_children();
+if let Some(children) = multiset.get(dir) {
+return VisitChildrenSet::Set(children.to_owned());
+}
+}
+VisitChildrenSet::Empty
+}
+
+fn matches_everything() -> bool {
+false
+}
+
+fn is_exact() -> bool {
+false
+}
+}
+
 #[cfg(feature = "with-re2")]
 /// Returns a function that matches an `HgPath` against the given regex
 /// pattern.
@@ -361,6 +453,175 @@
 })
 }
 
+/// Returns a function that checks whether a given file (in the general sense)
+/// should be matched.
+fn build_match<'a, 'b>(
+ignore_patterns: &'a [IgnorePattern],
+root_dir: impl AsRef,
+) -> PatternResult<(
+Vec,
+Box bool + 'b + Sync>,
+Vec,
+)> {
+let mut match_funcs: Vec bool + Sync>> = vec![];
+// For debugging and printing
+let mut patterns = vec![];
+let mut all_warnings = vec![];
+
+let (subincludes, ignore_patterns) =
+filter_subincludes(ignore_patterns, root_dir)?;
+
+if !subincludes.is_empty() {
+// Build prefix-based matcher functions for subincludes
+let mut submatchers = FastHashMap::default();
+let mut prefixes = vec![];
+
+for SubInclude { prefix, root, path } in subincludes.into_iter() {
+let (match_fn, warnings) = get_ignore_function(&[path], root)?;
+all_warnings.extend(warnings);
+prefixes.push(prefix.to_owned());
+

D7925: rust-matchers: add `IgnoreMatcher`

2020-03-05 Thread Raphaël Gomès
Alphare updated this revision to Diff 20495.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7925?vs=20163=20495

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7925/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7925

AFFECTED FILES
  rust/hg-core/src/matchers.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/matchers.rs b/rust/hg-core/src/matchers.rs
--- a/rust/hg-core/src/matchers.rs
+++ b/rust/hg-core/src/matchers.rs
@@ -10,14 +10,25 @@
 #[cfg(feature = "with-re2")]
 use crate::re2::Re2;
 use crate::{
-filepatterns::{build_single_regex, PatternResult},
-utils::hg_path::{HgPath, HgPathBuf},
-DirsMultiset, DirstateMapError, IgnorePattern, PatternError,
+dirstate::dirs_multiset::DirsChildrenMultiset,
+filepatterns::{
+build_single_regex, filter_subincludes, get_patterns_from_file,
+PatternFileWarning, PatternResult, SubInclude,
+},
+utils::{
+files::find_dirs,
+hg_path::{HgPath, HgPathBuf},
+Escaped,
+},
+DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
 PatternSyntax,
 };
+
 use std::collections::HashSet;
+use std::fmt::{Display, Error, Formatter};
 use std::iter::FromIterator;
 use std::ops::Deref;
+use std::path::Path;
 
 #[derive(Debug, PartialEq)]
 pub enum VisitChildrenSet<'a> {
@@ -223,6 +234,87 @@
 }
 }
 
+/// Matches files that are included in the ignore rules.
+#[cfg_attr(
+feature = "with-re2",
+doc = r##"
+```
+use hg::{
+matchers::{IncludeMatcher, Matcher},
+IgnorePattern,
+PatternSyntax,
+utils::hg_path::HgPath
+};
+use std::path::Path;
+///
+let ignore_patterns =
+vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
+let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
+///
+assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
+assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
+assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
+assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
+```
+"##
+)]
+pub struct IncludeMatcher<'a> {
+patterns: Vec,
+match_fn: Box Fn(&'r HgPath) -> bool + 'a + Sync>,
+/// Whether all the patterns match a prefix (i.e. recursively)
+prefix: bool,
+roots: HashSet,
+dirs: HashSet,
+parents: HashSet,
+}
+
+impl<'a> Matcher for IncludeMatcher<'a> {
+fn file_set() -> Option<<>> {
+None
+}
+
+fn exact_match(, _filename: impl AsRef) -> bool {
+false
+}
+
+fn matches(, filename: impl AsRef) -> bool {
+(self.match_fn)(filename.as_ref())
+}
+
+fn visit_children_set(
+,
+directory: impl AsRef,
+) -> VisitChildrenSet {
+let dir = directory.as_ref();
+if self.prefix && self.roots.contains(dir) {
+return VisitChildrenSet::Recursive;
+}
+if self.roots.contains(HgPath::new(b""))
+|| self.roots.contains(dir)
+|| self.dirs.contains(dir)
+|| find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
+{
+return VisitChildrenSet::This;
+}
+
+if self.parents.contains(directory.as_ref()) {
+let multiset = self.get_all_parents_children();
+if let Some(children) = multiset.get(dir) {
+return VisitChildrenSet::Set(children.to_owned());
+}
+}
+VisitChildrenSet::Empty
+}
+
+fn matches_everything() -> bool {
+false
+}
+
+fn is_exact() -> bool {
+false
+}
+}
+
 #[cfg(feature = "with-re2")]
 /// Returns a function that matches an `HgPath` against the given regex
 /// pattern.
@@ -361,6 +453,175 @@
 })
 }
 
+/// Returns a function that checks whether a given file (in the general sense)
+/// should be matched.
+fn build_match<'a, 'b>(
+ignore_patterns: &'a [IgnorePattern],
+root_dir: impl AsRef,
+) -> PatternResult<(
+Vec,
+Box bool + 'b + Sync>,
+Vec,
+)> {
+let mut match_funcs: Vec bool + Sync>> = vec![];
+// For debugging and printing
+let mut patterns = vec![];
+let mut all_warnings = vec![];
+
+let (subincludes, ignore_patterns) =
+filter_subincludes(ignore_patterns, root_dir)?;
+
+if !subincludes.is_empty() {
+// Build prefix-based matcher functions for subincludes
+let mut submatchers = FastHashMap::default();
+let mut prefixes = vec![];
+
+for SubInclude { prefix, root, path } in subincludes.into_iter() {
+let (match_fn, warnings) = get_ignore_function(&[path], root)?;
+all_warnings.extend(warnings);
+prefixes.push(prefix.to_owned());
+submatchers.insert(prefix.to_owned(), match_fn);
+}
+
+let match_subinclude = move |filename: | {
+for prefix in prefixes.iter() {
+if let Some(rel) = 

D7925: rust-matchers: add `IgnoreMatcher`

2020-02-11 Thread Raphaël Gomès
Alphare updated this revision to Diff 20163.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7925?vs=20043=20163

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7925/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7925

AFFECTED FILES
  rust/hg-core/src/matchers.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/matchers.rs b/rust/hg-core/src/matchers.rs
--- a/rust/hg-core/src/matchers.rs
+++ b/rust/hg-core/src/matchers.rs
@@ -10,14 +10,25 @@
 #[cfg(feature = "with-re2")]
 use crate::re2::Re2;
 use crate::{
-filepatterns::{build_single_regex, PatternResult},
-utils::hg_path::{HgPath, HgPathBuf},
-DirsMultiset, DirstateMapError, IgnorePattern, PatternError,
+dirstate::dirs_multiset::DirsChildrenMultiset,
+filepatterns::{
+build_single_regex, filter_subincludes, get_patterns_from_file,
+PatternFileWarning, PatternResult, SubInclude,
+},
+utils::{
+files::find_dirs,
+hg_path::{HgPath, HgPathBuf},
+Escaped,
+},
+DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
 PatternSyntax,
 };
+
 use std::collections::HashSet;
+use std::fmt::{Display, Error, Formatter};
 use std::iter::FromIterator;
 use std::ops::Deref;
+use std::path::Path;
 
 #[derive(Debug, PartialEq)]
 pub enum VisitChildrenSet<'a> {
@@ -223,6 +234,88 @@
 }
 }
 
+/// Matches files that are included in the ignore rules.
+///
+#[cfg_attr(
+feature = "with-re2",
+doc = r##"
+```
+use hg::{
+matchers::{IncludeMatcher, Matcher},
+IgnorePattern,
+PatternSyntax,
+utils::hg_path::HgPath
+};
+use std::path::Path;
+///
+let ignore_patterns =
+vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
+let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
+///
+assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
+assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
+assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
+assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
+```
+"##
+)]
+pub struct IncludeMatcher<'a> {
+patterns: Vec,
+match_fn: Box Fn(&'r HgPath) -> bool + 'a + Sync>,
+/// Whether all the patterns match a prefix (i.e. recursively)
+prefix: bool,
+roots: HashSet,
+dirs: HashSet,
+parents: HashSet,
+}
+
+impl<'a> Matcher for IncludeMatcher<'a> {
+fn file_set() -> Option<<>> {
+None
+}
+
+fn exact_match(, _filename: impl AsRef) -> bool {
+false
+}
+
+fn matches(, filename: impl AsRef) -> bool {
+(self.match_fn)(filename.as_ref())
+}
+
+fn visit_children_set(
+,
+directory: impl AsRef,
+) -> VisitChildrenSet {
+let dir = directory.as_ref();
+if self.prefix && self.roots.contains(dir) {
+return VisitChildrenSet::Recursive;
+}
+if self.roots.contains(HgPath::new(b""))
+|| self.roots.contains(dir)
+|| self.dirs.contains(dir)
+|| find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
+{
+return VisitChildrenSet::This;
+}
+
+if self.parents.contains(directory.as_ref()) {
+let multiset = self.get_all_parents_children();
+if let Some(children) = multiset.get(dir) {
+return VisitChildrenSet::Set(children.to_owned());
+}
+}
+VisitChildrenSet::Empty
+}
+
+fn matches_everything() -> bool {
+false
+}
+
+fn is_exact() -> bool {
+false
+}
+}
+
 #[cfg(feature = "with-re2")]
 /// Returns a function that matches an `HgPath` against the given regex
 /// pattern.
@@ -361,6 +454,175 @@
 })
 }
 
+/// Returns a function that checks whether a given file (in the general sense)
+/// should be matched.
+fn build_match<'a, 'b>(
+ignore_patterns: &'a [IgnorePattern],
+root_dir: impl AsRef,
+) -> PatternResult<(
+Vec,
+Box bool + 'b + Sync>,
+Vec,
+)> {
+let mut match_funcs: Vec bool + Sync>> = vec![];
+// For debugging and printing
+let mut patterns = vec![];
+let mut all_warnings = vec![];
+
+let (subincludes, ignore_patterns) =
+filter_subincludes(ignore_patterns, root_dir)?;
+
+if !subincludes.is_empty() {
+// Build prefix-based matcher functions for subincludes
+let mut submatchers = FastHashMap::default();
+let mut prefixes = vec![];
+
+for SubInclude { prefix, root, path } in subincludes.into_iter() {
+let (match_fn, warnings) = get_ignore_function(&[path], root)?;
+all_warnings.extend(warnings);
+prefixes.push(prefix.to_owned());
+submatchers.insert(prefix.to_owned(), match_fn);
+}
+
+let match_subinclude = move |filename: | {
+for prefix in prefixes.iter() {
+if let Some(rel) = 

D7925: rust-matchers: add `IgnoreMatcher`

2020-02-10 Thread Raphaël Gomès
Alphare updated this revision to Diff 20043.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7925?vs=19941=20043

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7925/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7925

AFFECTED FILES
  rust/hg-core/src/matchers.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/matchers.rs b/rust/hg-core/src/matchers.rs
--- a/rust/hg-core/src/matchers.rs
+++ b/rust/hg-core/src/matchers.rs
@@ -10,14 +10,25 @@
 #[cfg(feature = "with-re2")]
 use crate::re2::Re2;
 use crate::{
-filepatterns::{build_single_regex, PatternResult},
-utils::hg_path::{HgPath, HgPathBuf},
-DirsMultiset, DirstateMapError, IgnorePattern, PatternError,
+dirstate::dirs_multiset::DirsChildrenMultiset,
+filepatterns::{
+build_single_regex, filter_subincludes, get_patterns_from_file,
+PatternFileWarning, PatternResult, SubInclude,
+},
+utils::{
+files::find_dirs,
+hg_path::{HgPath, HgPathBuf},
+Escaped,
+},
+DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
 PatternSyntax,
 };
+
 use std::collections::HashSet;
+use std::fmt::{Display, Error, Formatter};
 use std::iter::FromIterator;
 use std::ops::Deref;
+use std::path::Path;
 
 #[derive(Debug, PartialEq)]
 pub enum VisitChildrenSet<'a> {
@@ -223,6 +234,88 @@
 }
 }
 
+/// Matches files that are included in the ignore rules.
+///
+#[cfg_attr(
+feature = "with-re2",
+doc = r##"
+```
+use hg::{
+matchers::{IncludeMatcher, Matcher},
+IgnorePattern,
+PatternSyntax,
+utils::hg_path::HgPath
+};
+use std::path::Path;
+///
+let ignore_patterns =
+vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
+let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
+///
+assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
+assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
+assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
+assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
+```
+"##
+)]
+pub struct IncludeMatcher<'a> {
+patterns: Vec,
+match_fn: Box Fn(&'r HgPath) -> bool + 'a + Sync>,
+/// Whether all the patterns match a prefix (i.e. recursively)
+prefix: bool,
+roots: HashSet,
+dirs: HashSet,
+parents: HashSet,
+}
+
+impl<'a> Matcher for IncludeMatcher<'a> {
+fn file_set() -> Option<<>> {
+None
+}
+
+fn exact_match(, _filename: impl AsRef) -> bool {
+false
+}
+
+fn matches(, filename: impl AsRef) -> bool {
+(self.match_fn)(filename.as_ref())
+}
+
+fn visit_children_set(
+,
+directory: impl AsRef,
+) -> VisitChildrenSet {
+let dir = directory.as_ref();
+if self.prefix && self.roots.contains(dir) {
+return VisitChildrenSet::Recursive;
+}
+if self.roots.contains(HgPath::new(b""))
+|| self.roots.contains(dir)
+|| self.dirs.contains(dir)
+|| find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
+{
+return VisitChildrenSet::This;
+}
+
+if self.parents.contains(directory.as_ref()) {
+let multiset = self.get_all_parents_children();
+if let Some(children) = multiset.get(dir) {
+return VisitChildrenSet::Set(children.to_owned());
+}
+}
+VisitChildrenSet::Empty
+}
+
+fn matches_everything() -> bool {
+false
+}
+
+fn is_exact() -> bool {
+false
+}
+}
+
 const MAX_RE_SIZE: usize = 2;
 
 #[cfg(feature = "with-re2")]
@@ -396,6 +489,175 @@
 })
 }
 
+/// Returns a function that checks whether a given file (in the general sense)
+/// should be matched.
+fn build_match<'a, 'b>(
+ignore_patterns: &'a [IgnorePattern],
+root_dir: impl AsRef,
+) -> PatternResult<(
+Vec,
+Box bool + 'b + Sync>,
+Vec,
+)> {
+let mut match_funcs: Vec bool + Sync>> = vec![];
+// For debugging and printing
+let mut patterns = vec![];
+let mut all_warnings = vec![];
+
+let (subincludes, ignore_patterns) =
+filter_subincludes(ignore_patterns, root_dir)?;
+
+if !subincludes.is_empty() {
+// Build prefix-based matcher functions for subincludes
+let mut submatchers = FastHashMap::default();
+let mut prefixes = vec![];
+
+for SubInclude { prefix, root, path } in subincludes.into_iter() {
+let (match_fn, warnings) = get_ignore_function(&[path], root)?;
+all_warnings.extend(warnings);
+prefixes.push(prefix.to_owned());
+submatchers.insert(prefix.to_owned(), match_fn);
+}
+
+let match_subinclude = move |filename: | {
+for prefix in prefixes.iter() {
+if let Some(rel) = filename.relative_to(prefix) {
+if 

D7925: rust-matchers: add `IgnoreMatcher`

2020-02-06 Thread Raphaël Gomès
Alphare updated this revision to Diff 19941.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7925?vs=19420=19941

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7925/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7925

AFFECTED FILES
  rust/hg-core/src/matchers.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/matchers.rs b/rust/hg-core/src/matchers.rs
--- a/rust/hg-core/src/matchers.rs
+++ b/rust/hg-core/src/matchers.rs
@@ -10,14 +10,25 @@
 #[cfg(feature = "with-re2")]
 use crate::re2::Re2;
 use crate::{
-filepatterns::{build_single_regex, PatternResult},
-utils::hg_path::{HgPath, HgPathBuf},
-DirsMultiset, DirstateMapError, IgnorePattern, PatternError,
+dirstate::dirs_multiset::DirsChildrenMultiset,
+filepatterns::{
+build_single_regex, filter_subincludes, get_patterns_from_file,
+PatternFileWarning, PatternResult, SubInclude,
+},
+utils::{
+files::find_dirs,
+hg_path::{HgPath, HgPathBuf},
+Escaped,
+},
+DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
 PatternSyntax,
 };
+
 use std::collections::HashSet;
+use std::fmt::{Display, Error, Formatter};
 use std::iter::FromIterator;
 use std::ops::Deref;
+use std::path::Path;
 
 #[derive(Debug, PartialEq)]
 pub enum VisitChildrenSet<'a> {
@@ -223,6 +234,88 @@
 }
 }
 
+/// Matches files that are included in the ignore rules.
+///
+#[cfg_attr(
+feature = "with-re2",
+doc = r##"
+```
+use hg::{
+matchers::{IncludeMatcher, Matcher},
+IgnorePattern,
+PatternSyntax,
+utils::hg_path::HgPath
+};
+use std::path::Path;
+///
+let ignore_patterns =
+vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
+let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
+///
+assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
+assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
+assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
+assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
+```
+"##
+)]
+pub struct IncludeMatcher<'a> {
+patterns: Vec,
+match_fn: Box Fn(&'r HgPath) -> bool + 'a + Sync>,
+/// Whether all the patterns match a prefix (i.e. recursively)
+prefix: bool,
+roots: HashSet,
+dirs: HashSet,
+parents: HashSet,
+}
+
+impl<'a> Matcher for IncludeMatcher<'a> {
+fn file_set() -> Option<<>> {
+None
+}
+
+fn exact_match(, _filename: impl AsRef) -> bool {
+false
+}
+
+fn matches(, filename: impl AsRef) -> bool {
+(self.match_fn)(filename.as_ref())
+}
+
+fn visit_children_set(
+,
+directory: impl AsRef,
+) -> VisitChildrenSet {
+let dir = directory.as_ref();
+if self.prefix && self.roots.contains(dir) {
+return VisitChildrenSet::Recursive;
+}
+if self.roots.contains(HgPath::new(b""))
+|| self.roots.contains(dir)
+|| self.dirs.contains(dir)
+|| find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
+{
+return VisitChildrenSet::This;
+}
+
+if self.parents.contains(directory.as_ref()) {
+let multiset = self.get_all_parents_children();
+if let Some(children) = multiset.get(dir) {
+return VisitChildrenSet::Set(children.to_owned());
+}
+}
+VisitChildrenSet::Empty
+}
+
+fn matches_everything() -> bool {
+false
+}
+
+fn is_exact() -> bool {
+false
+}
+}
+
 const MAX_RE_SIZE: usize = 2;
 
 #[cfg(feature = "with-re2")]
@@ -396,6 +489,175 @@
 })
 }
 
+/// Returns a function that checks whether a given file (in the general sense)
+/// should be matched.
+fn build_match<'a, 'b>(
+ignore_patterns: &'a [IgnorePattern],
+root_dir: impl AsRef,
+) -> PatternResult<(
+Vec,
+Box bool + 'b + Sync>,
+Vec,
+)> {
+let mut match_funcs: Vec bool + Sync>> = vec![];
+// For debugging and printing
+let mut patterns = vec![];
+let mut all_warnings = vec![];
+
+let (subincludes, ignore_patterns) =
+filter_subincludes(ignore_patterns, root_dir)?;
+
+if !subincludes.is_empty() {
+// Build prefix-based matcher functions for subincludes
+let mut submatchers = FastHashMap::default();
+let mut prefixes = vec![];
+
+for SubInclude { prefix, root, path } in subincludes.into_iter() {
+let (match_fn, warnings) = get_ignore_function(&[path], root)?;
+all_warnings.extend(warnings);
+prefixes.push(prefix.to_owned());
+submatchers.insert(prefix.to_owned(), match_fn);
+}
+
+let match_subinclude = move |filename: | {
+for prefix in prefixes.iter() {
+if let Some(rel) = filename.relative_to(prefix) {
+if 

D7925: rust-matchers: add `IgnoreMatcher`

2020-01-17 Thread Raphaël Gomès
Alphare updated this revision to Diff 19420.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7925?vs=19404=19420

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7925/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7925

AFFECTED FILES
  rust/hg-core/src/matchers.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/matchers.rs b/rust/hg-core/src/matchers.rs
--- a/rust/hg-core/src/matchers.rs
+++ b/rust/hg-core/src/matchers.rs
@@ -10,14 +10,25 @@
 #[cfg(feature = "with-re2")]
 use crate::re2::Re2;
 use crate::{
-filepatterns::{build_single_regex, PatternResult},
-utils::hg_path::{HgPath, HgPathBuf},
-DirsMultiset, DirstateMapError, IgnorePattern, PatternError,
+dirstate::dirs_multiset::DirsChildrenMultiset,
+filepatterns::{
+build_single_regex, filter_subincludes, get_patterns_from_file,
+PatternFileWarning, PatternResult, SubInclude,
+},
+utils::{
+files::find_dirs,
+hg_path::{HgPath, HgPathBuf},
+Escaped,
+},
+DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
 PatternSyntax,
 };
+
 use std::collections::HashSet;
+use std::fmt::{Display, Error, Formatter};
 use std::iter::FromIterator;
 use std::ops::Deref;
+use std::path::Path;
 
 #[derive(Debug, PartialEq)]
 pub enum VisitChildrenSet<'a> {
@@ -223,6 +234,88 @@
 }
 }
 
+/// Matches files that are included in the ignore rules.
+///
+#[cfg_attr(
+feature = "with-re2",
+doc = r##"
+```
+use hg::{
+matchers::{IncludeMatcher, Matcher},
+IgnorePattern,
+PatternSyntax,
+utils::hg_path::HgPath
+};
+use std::path::Path;
+///
+let ignore_patterns =
+vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
+let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
+///
+assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
+assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
+assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
+assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
+```
+"##
+)]
+pub struct IncludeMatcher<'a> {
+patterns: Vec,
+match_fn: Box Fn(&'r HgPath) -> bool + 'a + Sync>,
+/// Whether all the patterns match a prefix (i.e. recursively)
+prefix: bool,
+roots: HashSet,
+dirs: HashSet,
+parents: HashSet,
+}
+
+impl<'a> Matcher for IncludeMatcher<'a> {
+fn file_set() -> Option<<>> {
+None
+}
+
+fn exact_match(, _filename: impl AsRef) -> bool {
+false
+}
+
+fn matches(, filename: impl AsRef) -> bool {
+(self.match_fn)(filename.as_ref())
+}
+
+fn visit_children_set(
+,
+directory: impl AsRef,
+) -> VisitChildrenSet {
+let dir = directory.as_ref();
+if self.prefix && self.roots.contains(dir) {
+return VisitChildrenSet::Recursive;
+}
+if self.roots.contains(HgPath::new(b""))
+|| self.roots.contains(dir)
+|| self.dirs.contains(dir)
+|| find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
+{
+return VisitChildrenSet::This;
+}
+
+if self.parents.contains(directory.as_ref()) {
+let multiset = self.get_all_parents_children();
+if let Some(children) = multiset.get(dir) {
+return VisitChildrenSet::Set(children.to_owned());
+}
+}
+VisitChildrenSet::Empty
+}
+
+fn matches_everything() -> bool {
+false
+}
+
+fn is_exact() -> bool {
+false
+}
+}
+
 const MAX_RE_SIZE: usize = 2;
 
 #[cfg(feature = "with-re2")]
@@ -395,6 +488,175 @@
 })
 }
 
+/// Returns a function that checks whether a given file (in the general sense)
+/// should be matched.
+fn build_match<'a, 'b>(
+ignore_patterns: &'a [IgnorePattern],
+root_dir: impl AsRef,
+) -> PatternResult<(
+Vec,
+Box bool + 'b + Sync>,
+Vec,
+)> {
+let mut match_funcs: Vec bool + Sync>> = vec![];
+// For debugging and printing
+let mut patterns = vec![];
+let mut all_warnings = vec![];
+
+let (subincludes, ignore_patterns) =
+filter_subincludes(ignore_patterns, root_dir)?;
+
+if !subincludes.is_empty() {
+// Build prefix-based matcher functions for subincludes
+let mut submatchers = FastHashMap::default();
+let mut prefixes = vec![];
+
+for SubInclude { prefix, root, path } in subincludes.into_iter() {
+let (match_fn, warnings) = get_ignore_function(&[path], root)?;
+all_warnings.extend(warnings);
+prefixes.push(prefix.to_owned());
+submatchers.insert(prefix.to_owned(), match_fn);
+}
+
+let match_subinclude = move |filename: | {
+for prefix in prefixes.iter() {
+if let Some(rel) = filename.relative_to(prefix) {
+if 

D7925: rust-matchers: add `IgnoreMatcher`

2020-01-17 Thread Raphaël Gomès
Alphare created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  This is a big change but all of the pieces call each other, so it makes sense
  to have this all in one patch.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7925

AFFECTED FILES
  rust/hg-core/src/matchers.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/matchers.rs b/rust/hg-core/src/matchers.rs
--- a/rust/hg-core/src/matchers.rs
+++ b/rust/hg-core/src/matchers.rs
@@ -10,14 +10,25 @@
 #[cfg(feature = "with-re2")]
 use crate::re2::Re2;
 use crate::{
-filepatterns::{build_single_regex, PatternResult},
-utils::hg_path::{HgPath, HgPathBuf},
-DirsMultiset, DirstateMapError, IgnorePattern, PatternError,
+dirstate::dirs_multiset::DirsChildrenMultiset,
+filepatterns::{
+build_single_regex, filter_subincludes, get_patterns_from_file,
+PatternFileWarning, PatternResult, SubInclude,
+},
+utils::{
+files::find_dirs,
+hg_path::{HgPath, HgPathBuf},
+Escaped,
+},
+DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
 PatternSyntax,
 };
+
 use std::collections::HashSet;
+use std::fmt::{Display, Error, Formatter};
 use std::iter::FromIterator;
 use std::ops::Deref;
+use std::path::Path;
 
 #[derive(Debug, PartialEq)]
 pub enum VisitChildrenSet<'a> {
@@ -223,6 +234,88 @@
 }
 }
 
+/// Matches files that are included in the ignore rules.
+///
+#[cfg_attr(
+feature = "with-re2",
+doc = r##"
+```
+use hg::{
+matchers::{IncludeMatcher, Matcher},
+IgnorePattern,
+PatternSyntax,
+utils::hg_path::HgPath
+};
+use std::path::Path;
+///
+let ignore_patterns =
+vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
+let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
+///
+assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
+assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
+assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
+assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
+```
+"##
+)]
+pub struct IncludeMatcher<'a> {
+patterns: Vec,
+match_fn: Box Fn(&'r HgPath) -> bool + 'a + Sync>,
+/// Whether all the patterns match a prefix (i.e. recursively)
+prefix: bool,
+roots: HashSet,
+dirs: HashSet,
+parents: HashSet,
+}
+
+impl<'a> Matcher for IncludeMatcher<'a> {
+fn file_set() -> Option<<>> {
+None
+}
+
+fn exact_match(, _filename: impl AsRef) -> bool {
+false
+}
+
+fn matches(, filename: impl AsRef) -> bool {
+(self.match_fn)(filename.as_ref())
+}
+
+fn visit_children_set(
+,
+directory: impl AsRef,
+) -> VisitChildrenSet {
+let dir = directory.as_ref();
+if self.prefix && self.roots.contains(dir) {
+return VisitChildrenSet::Recursive;
+}
+if self.roots.contains(HgPath::new(b""))
+|| self.roots.contains(dir)
+|| self.dirs.contains(dir)
+|| find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
+{
+return VisitChildrenSet::This;
+}
+
+if self.parents.contains(directory.as_ref()) {
+let multiset = self.get_all_parents_children();
+if let Some(children) = multiset.get(dir) {
+return VisitChildrenSet::Set(children.to_owned());
+}
+}
+VisitChildrenSet::Empty
+}
+
+fn matches_everything() -> bool {
+false
+}
+
+fn is_exact() -> bool {
+false
+}
+}
+
 const MAX_RE_SIZE: usize = 2;
 
 #[cfg(feature = "with-re2")]
@@ -387,6 +480,171 @@
 Ok((HashSet::from_iter(roots), HashSet::from_iter(dirs), parents))
 }
 
+/// Returns a function that checks whether a given file (in the general sense)
+/// should be matched.
+fn build_match<'a, 'b>(
+ignore_patterns: &'a [IgnorePattern],
+root_dir: impl AsRef,
+) -> PatternResult<(
+Vec,
+Box bool + 'b + Sync>,
+Vec,
+)> {
+let mut match_funcs: Vec bool + Sync>> = vec![];
+// For debugging and printing
+let mut patterns = vec![];
+let mut all_warnings = vec![];
+
+let (subincludes, ignore_patterns) =
+filter_subincludes(ignore_patterns, root_dir)?;
+
+if !subincludes.is_empty() {
+// Build prefix-based matcher functions for subincludes
+let mut submatchers = FastHashMap::default();
+let mut prefixes = vec![];
+
+for SubInclude { prefix, root, path } in subincludes.into_iter() {
+let (match_fn, warnings) = get_ignore_function(&[path], root)?;
+all_warnings.extend(warnings);
+prefixes.push(prefix.to_owned());
+submatchers.insert(prefix.to_owned(), match_fn);
+}
+
+let match_subinclude = move |filename: | {
+