D6260: rust-discovery: takefullsample() core implementation

2019-05-16 Thread gracinet (Georges Racinet)
This revision was automatically updated to reflect the committed changes.
gracinet marked an inline comment as done.
Closed by commit rHG6a8aec3f03df: rust-discovery: takefullsample() core 
implementation (authored by gracinet, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D6260?vs=15022=15146

REVISION DETAIL
  https://phab.mercurial-scm.org/D6260

AFFECTED FILES
  rust/hg-core/src/discovery.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs
--- a/rust/hg-core/src/discovery.rs
+++ b/rust/hg-core/src/discovery.rs
@@ -26,6 +26,7 @@
 graph: G, // plays the role of self._repo
 common: MissingAncestors,
 undecided: Option>,
+children_cache: Option>>,
 missing: HashSet,
 rng: Rng,
 }
@@ -49,13 +50,16 @@
 /// - `sample`: a sample to update
 /// - `parentfn`: a callable to resolve parents for a revision
 /// - `quicksamplesize`: optional target size of the sample
-fn update_sample(
+fn update_sample(
 revs: Option<>,
 heads: impl IntoIterator,
 sample:  HashSet,
-parentsfn: impl Fn(Revision) -> Result<[Revision; 2], GraphError>,
+parentsfn: impl Fn(Revision) -> Result,
 quicksamplesize: Option,
-) -> Result<(), GraphError> {
+) -> Result<(), GraphError>
+where
+I: Iterator,
+{
 let mut distances: HashMap = HashMap::new();
 let mut visit: VecDeque = heads.into_iter().collect();
 let mut factor: u32 = 1;
@@ -83,10 +87,7 @@
 }
 }
 }
-for  in (current)? {
-if p == NULL_REVISION {
-continue;
-}
+for p in parentsfn(current)? {
 if let Some(revs) = revs {
 if !revs.contains() {
 continue;
@@ -99,6 +100,39 @@
 Ok(())
 }
 
+struct ParentsIterator {
+parents: [Revision; 2],
+cur: usize,
+}
+
+impl ParentsIterator {
+fn graph_parents(
+graph:  Graph,
+r: Revision,
+) -> Result {
+Ok(ParentsIterator {
+parents: graph.parents(r)?,
+cur: 0,
+})
+}
+}
+
+impl Iterator for ParentsIterator {
+type Item = Revision;
+
+fn next( self) -> Option {
+if self.cur > 1 {
+return None;
+}
+let rev = self.parents[self.cur];
+self.cur += 1;
+if rev == NULL_REVISION {
+return self.next();
+}
+Some(rev)
+}
+}
+
 impl PartialDiscovery {
 /// Create a PartialDiscovery object, with the intent
 /// of comparing our `::` revset to the contents of another
@@ -123,6 +157,7 @@
 ) -> Self {
 PartialDiscovery {
 undecided: None,
+children_cache: None,
 target_heads: Some(target_heads),
 graph: graph.clone(),
 common: MissingAncestors::new(graph, vec![]),
@@ -228,6 +263,22 @@
 Ok(())
 }
 
+fn ensure_children_cache( self) -> Result<(), GraphError> {
+if self.children_cache.is_some() {
+return Ok(());
+}
+self.ensure_undecided()?;
+
+let mut children: HashMap> = HashMap::new();
+for  in self.undecided.as_ref().unwrap() {
+for p in ParentsIterator::graph_parents(, rev)? {
+children.entry(p).or_insert_with(|| Vec::new()).push(rev);
+}
+}
+self.children_cache = Some(children);
+Ok(())
+}
+
 /// Provide statistics about the current state of the discovery process
 pub fn stats() -> DiscoveryStats {
 DiscoveryStats {
@@ -255,11 +306,104 @@
 None,
 headrevs,
  sample,
-|r| self.graph.parents(r),
+|r| ParentsIterator::graph_parents(, r),
 Some(size),
 )?;
 Ok(sample.into_iter().collect())
 }
+
+/// Extract a sample from `self.undecided`, going from its heads and roots.
+///
+/// The `size` parameter is used to avoid useless computations if
+/// it turns out to be bigger than the whole set of undecided Revisions.
+///
+/// The sample is taken by using `update_sample` from the heads, then
+/// from the roots, working on the reverse DAG,
+/// expressed by `self.children_cache`.
+///
+/// No effort is being made to complete or limit the sample to `size`
+fn bidirectional_sample(
+ self,
+size: usize,
+) -> Result, GraphError> {
+self.ensure_undecided()?;
+{
+// we don't want to compute children_cache before this
+// but doing it after extracting self.undecided takes a mutable
+// ref to self while a shareable one is still active.
+let undecided = self.undecided.as_ref().unwrap();
+if undecided.len() <= size {
+return Ok(undecided.clone());
+}
+}
+
+self.ensure_children_cache()?;
+let revs = 

D6260: rust-discovery: takefullsample() core implementation

2019-05-06 Thread gracinet (Georges Racinet)
gracinet marked an inline comment as done.
gracinet added a comment.


  I think it's maybe ready to land in that form. In the future, I'd like to put 
this `ParentsIterator` in a more generic place, and IMHO, it should become part 
of an `AbstractGraph` trait, that could live in a `graph` module.
  
  Ideally, we should even be able to generically reverse these graphs, so that 
the children iteration that's been done in this discovery process would just be 
the same, as seen from `update_sample`, but it's maybe too much asking.

INLINE COMMENTS

> kevincox wrote in discovery.rs:137
> Can't you just call `self.next()` in both cases?

Yes that's what I actually did first, and then I tried this variant to see if 
it's faster. It may be a bit, but nothing worth doing in a first inclusion.

> kevincox wrote in discovery.rs:280
> Generally I would just do this.
> 
>   for  in self.undecided.as_ref().unwrap() {

yes, same as in parent commit. Indeed it's a nicer way to iterate on references 
to `Copy` objects

> kevincox wrote in discovery.rs:281
> Same.

for this one, I can actually use `ParentsIterator`, now, spares me the 
`NULL_REVISION` check

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6260

To: gracinet, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D6260: rust-discovery: takefullsample() core implementation

2019-05-06 Thread gracinet (Georges Racinet)
gracinet updated this revision to Diff 15022.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D6260?vs=14797=15022

REVISION DETAIL
  https://phab.mercurial-scm.org/D6260

AFFECTED FILES
  rust/hg-core/src/discovery.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs
--- a/rust/hg-core/src/discovery.rs
+++ b/rust/hg-core/src/discovery.rs
@@ -26,6 +26,7 @@
 graph: G, // plays the role of self._repo
 common: MissingAncestors,
 undecided: Option>,
+children_cache: Option>>,
 missing: HashSet,
 rng: Rng,
 }
@@ -49,13 +50,16 @@
 /// - `sample`: a sample to update
 /// - `parentfn`: a callable to resolve parents for a revision
 /// - `quicksamplesize`: optional target size of the sample
-fn update_sample(
+fn update_sample(
 revs: Option<>,
 heads: impl IntoIterator,
 sample:  HashSet,
-parentsfn: impl Fn(Revision) -> Result<[Revision; 2], GraphError>,
+parentsfn: impl Fn(Revision) -> Result,
 quicksamplesize: Option,
-) -> Result<(), GraphError> {
+) -> Result<(), GraphError>
+where
+I: Iterator,
+{
 let mut distances: HashMap = HashMap::new();
 let mut visit: VecDeque = heads.into_iter().collect();
 let mut factor: u32 = 1;
@@ -83,10 +87,7 @@
 }
 }
 }
-for  in (current)? {
-if p == NULL_REVISION {
-continue;
-}
+for p in parentsfn(current)? {
 if let Some(revs) = revs {
 if !revs.contains() {
 continue;
@@ -99,6 +100,39 @@
 Ok(())
 }
 
+struct ParentsIterator {
+parents: [Revision; 2],
+cur: usize,
+}
+
+impl ParentsIterator {
+fn graph_parents(
+graph:  Graph,
+r: Revision,
+) -> Result {
+Ok(ParentsIterator {
+parents: graph.parents(r)?,
+cur: 0,
+})
+}
+}
+
+impl Iterator for ParentsIterator {
+type Item = Revision;
+
+fn next( self) -> Option {
+if self.cur > 1 {
+return None;
+}
+let rev = self.parents[self.cur];
+self.cur += 1;
+if rev == NULL_REVISION {
+return self.next();
+}
+Some(rev)
+}
+}
+
 impl PartialDiscovery {
 /// Create a PartialDiscovery object, with the intent
 /// of comparing our `::` revset to the contents of another
@@ -123,6 +157,7 @@
 ) -> Self {
 PartialDiscovery {
 undecided: None,
+children_cache: None,
 target_heads: Some(target_heads),
 graph: graph.clone(),
 common: MissingAncestors::new(graph, vec![]),
@@ -228,6 +263,22 @@
 Ok(())
 }
 
+fn ensure_children_cache( self) -> Result<(), GraphError> {
+if self.children_cache.is_some() {
+return Ok(());
+}
+self.ensure_undecided()?;
+
+let mut children: HashMap> = HashMap::new();
+for  in self.undecided.as_ref().unwrap() {
+for p in ParentsIterator::graph_parents(, rev)? {
+children.entry(p).or_insert_with(|| Vec::new()).push(rev);
+}
+}
+self.children_cache = Some(children);
+Ok(())
+}
+
 /// Provide statistics about the current state of the discovery process
 pub fn stats() -> DiscoveryStats {
 DiscoveryStats {
@@ -255,11 +306,104 @@
 None,
 headrevs,
  sample,
-|r| self.graph.parents(r),
+|r| ParentsIterator::graph_parents(, r),
 Some(size),
 )?;
 Ok(sample.into_iter().collect())
 }
+
+/// Extract a sample from `self.undecided`, going from its heads and roots.
+///
+/// The `size` parameter is used to avoid useless computations if
+/// it turns out to be bigger than the whole set of undecided Revisions.
+///
+/// The sample is taken by using `update_sample` from the heads, then
+/// from the roots, working on the reverse DAG,
+/// expressed by `self.children_cache`.
+///
+/// No effort is being made to complete or limit the sample to `size`
+fn bidirectional_sample(
+ self,
+size: usize,
+) -> Result, GraphError> {
+self.ensure_undecided()?;
+{
+// we don't want to compute children_cache before this
+// but doing it after extracting self.undecided takes a mutable
+// ref to self while a shareable one is still active.
+let undecided = self.undecided.as_ref().unwrap();
+if undecided.len() <= size {
+return Ok(undecided.clone());
+}
+}
+
+self.ensure_children_cache()?;
+let revs = self.undecided.as_ref().unwrap();
+let mut sample: HashSet = revs.clone();
+
+// it's possible that leveraging the children cache would be more
+// efficient here
+

D6260: rust-discovery: takefullsample() core implementation

2019-04-17 Thread kevincox (Kevin Cox)
kevincox accepted this revision.
kevincox added inline comments.

INLINE COMMENTS

> discovery.rs:108
> +cur: usize,
> +}
> +

You might also consider an enum

  enum Parents {
Zero,
One(Revision),
Two(Revision, Revision),
  }

This also makes the iterator implementation a little clearer.

> discovery.rs:137
> +return self.next();
> +}
> +}

Can't you just call `self.next()` in both cases?

> discovery.rs:280
> +let mut children: HashMap> = HashMap::new();
> +for rev in self.undecided.as_ref().unwrap().iter().cloned() {
> +for p in self.graph.parents(rev)?.iter().cloned() {

Generally I would just do this.

  for  in self.undecided.as_ref().unwrap() {

> discovery.rs:281
> +for rev in self.undecided.as_ref().unwrap().iter().cloned() {
> +for p in self.graph.parents(rev)?.iter().cloned() {
> +if p != NULL_REVISION {

Same.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6260

To: gracinet, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D6260: rust-discovery: takefullsample() core implementation

2019-04-17 Thread gracinet (Georges Racinet)
gracinet added a comment.


  As I've tried to make clear in the changeset description, this is still work 
in progress.
  
  That being said, I wanted to put in on display because:
  
  - together with its descendants, it leads to a drop-in replacement for the 
Python `partialdiscovery` object
  - it's a good support for feedback on the question of more abstract DAG 
representations
  - the children cache leads to the same design problem as the `undecided` 
member attributed had (see https://phab.mercurial-scm.org/D6231). I expect to 
get more across similar issues while translating Python code
  
  Thanks !

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6260

To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D6260: rust-discovery: takefullsample() core implementation

2019-04-17 Thread gracinet (Georges Racinet)
gracinet created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  take_full_sample() browses the undecided set in both directions: from
  its roots as well as from its heads.
  
  Following what's done on the Python side, we alter update_sample()
  signature to take a closure returning an iterator: either ParentsIterator
  or an iterator over the children found in `children_cache`. These constructs
  should probably be split off in a separate module.
  
  This is a first concrete example where a more abstract graph notion (probably
   a trait) would be useful, as this is nothing but an operation on the reversed
  DAG.
  
  A similar motivation in the context of the discovery
  process would be to replace the call to dagops::range in
  `add_missing_revisions()` with a simple iteration over descendents, again an
  operation on the reversed graph.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6260

AFFECTED FILES
  rust/hg-core/src/discovery.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs
--- a/rust/hg-core/src/discovery.rs
+++ b/rust/hg-core/src/discovery.rs
@@ -26,6 +26,7 @@
 graph: G, // plays the role of self._repo
 common: MissingAncestors,
 undecided: Option>,
+children_cache: Option>>,
 missing: HashSet,
 rng: Rng,
 }
@@ -49,13 +50,16 @@
 /// - `sample`: a sample to update
 /// - `parentfn`: a callable to resolve parents for a revision
 /// - `quicksamplesize`: optional target size of the sample
-fn update_sample(
+fn update_sample(
 revs: Option<>,
 heads: impl IntoIterator,
 sample:  HashSet,
-parentsfn: impl Fn(Revision) -> Result<[Revision; 2], GraphError>,
+parentsfn: impl Fn(Revision) -> Result,
 quicksamplesize: Option,
-) -> Result<(), GraphError> {
+) -> Result<(), GraphError>
+where
+I: Iterator,
+{
 let mut distances: HashMap = HashMap::new();
 let mut visit: VecDeque = heads.into_iter().collect();
 let mut factor: u32 = 1;
@@ -84,10 +88,7 @@
 }
 }
 seen.insert(current);
-for p in parentsfn(current)?.iter().cloned() {
-if p == NULL_REVISION {
-continue;
-}
+for p in parentsfn(current)? {
 if let Some(revs) = revs {
 if !revs.contains() {
 continue;
@@ -100,6 +101,45 @@
 Ok(())
 }
 
+// TODO reread this and decide Vec/Box, and where to put it
+struct ParentsIterator {
+parents: [Revision; 2],
+cur: usize,
+}
+
+impl ParentsIterator {
+fn graph_parents(
+graph:  Graph,
+r: Revision,
+) -> Result {
+Ok(ParentsIterator {
+parents: graph.parents(r)?,
+cur: 0,
+})
+}
+}
+
+impl Iterator for ParentsIterator {
+type Item = Revision;
+
+fn next( self) -> Option {
+if self.cur > 1 {
+return None;
+}
+let rev = self.parents[self.cur];
+self.cur += 1;
+if rev == NULL_REVISION {
+if self.cur == 2 {
+return None;
+} else {
+// exceptional branch
+return self.next();
+}
+}
+Some(rev)
+}
+}
+
 impl PartialDiscovery {
 /// Create a PartialDiscovery object, with the intent
 /// of comparing our `::` revset to the contents of another
@@ -124,6 +164,7 @@
 ) -> Self {
 PartialDiscovery {
 undecided: None,
+children_cache: None,
 target_heads: Some(target_heads),
 graph: graph.clone(),
 common: MissingAncestors::new(graph, vec![]),
@@ -229,6 +270,24 @@
 Ok(())
 }
 
+fn ensure_children_cache( self) -> Result<(), GraphError> {
+if self.children_cache.is_some() {
+return Ok(());
+}
+self.ensure_undecided()?;
+
+let mut children: HashMap> = HashMap::new();
+for rev in self.undecided.as_ref().unwrap().iter().cloned() {
+for p in self.graph.parents(rev)?.iter().cloned() {
+if p != NULL_REVISION {
+children.entry(p).or_insert_with(|| Vec::new()).push(rev);
+}
+}
+}
+self.children_cache = Some(children);
+Ok(())
+}
+
 /// Provide statistics about the current state of the discovery process
 pub fn stats() -> DiscoveryStats {
 DiscoveryStats {
@@ -256,11 +315,82 @@
 None,
 headrevs,
  sample,
-|r| self.graph.parents(r),
+|r| ParentsIterator::graph_parents(, r),
 Some(size),
 )?;
 Ok(sample.into_iter().collect())
 }
+
+fn bidirectional_sample(
+ self,
+size: usize,
+) -> Result, GraphError> {
+self.ensure_undecided()?;
+{
+