D6260: rust-discovery: takefullsample() core implementation
This revision was automatically updated to reflect the committed changes. gracinet marked an inline comment as done. Closed by commit rHG6a8aec3f03df: rust-discovery: takefullsample() core implementation (authored by gracinet, committed by ). REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D6260?vs=15022=15146 REVISION DETAIL https://phab.mercurial-scm.org/D6260 AFFECTED FILES rust/hg-core/src/discovery.rs CHANGE DETAILS diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs --- a/rust/hg-core/src/discovery.rs +++ b/rust/hg-core/src/discovery.rs @@ -26,6 +26,7 @@ graph: G, // plays the role of self._repo common: MissingAncestors, undecided: Option>, +children_cache: Option>>, missing: HashSet, rng: Rng, } @@ -49,13 +50,16 @@ /// - `sample`: a sample to update /// - `parentfn`: a callable to resolve parents for a revision /// - `quicksamplesize`: optional target size of the sample -fn update_sample( +fn update_sample( revs: Option<>, heads: impl IntoIterator, sample: HashSet, -parentsfn: impl Fn(Revision) -> Result<[Revision; 2], GraphError>, +parentsfn: impl Fn(Revision) -> Result, quicksamplesize: Option, -) -> Result<(), GraphError> { +) -> Result<(), GraphError> +where +I: Iterator, +{ let mut distances: HashMap = HashMap::new(); let mut visit: VecDeque = heads.into_iter().collect(); let mut factor: u32 = 1; @@ -83,10 +87,7 @@ } } } -for in (current)? { -if p == NULL_REVISION { -continue; -} +for p in parentsfn(current)? { if let Some(revs) = revs { if !revs.contains() { continue; @@ -99,6 +100,39 @@ Ok(()) } +struct ParentsIterator { +parents: [Revision; 2], +cur: usize, +} + +impl ParentsIterator { +fn graph_parents( +graph: Graph, +r: Revision, +) -> Result { +Ok(ParentsIterator { +parents: graph.parents(r)?, +cur: 0, +}) +} +} + +impl Iterator for ParentsIterator { +type Item = Revision; + +fn next( self) -> Option { +if self.cur > 1 { +return None; +} +let rev = self.parents[self.cur]; +self.cur += 1; +if rev == NULL_REVISION { +return self.next(); +} +Some(rev) +} +} + impl PartialDiscovery { /// Create a PartialDiscovery object, with the intent /// of comparing our `::` revset to the contents of another @@ -123,6 +157,7 @@ ) -> Self { PartialDiscovery { undecided: None, +children_cache: None, target_heads: Some(target_heads), graph: graph.clone(), common: MissingAncestors::new(graph, vec![]), @@ -228,6 +263,22 @@ Ok(()) } +fn ensure_children_cache( self) -> Result<(), GraphError> { +if self.children_cache.is_some() { +return Ok(()); +} +self.ensure_undecided()?; + +let mut children: HashMap> = HashMap::new(); +for in self.undecided.as_ref().unwrap() { +for p in ParentsIterator::graph_parents(, rev)? { +children.entry(p).or_insert_with(|| Vec::new()).push(rev); +} +} +self.children_cache = Some(children); +Ok(()) +} + /// Provide statistics about the current state of the discovery process pub fn stats() -> DiscoveryStats { DiscoveryStats { @@ -255,11 +306,104 @@ None, headrevs, sample, -|r| self.graph.parents(r), +|r| ParentsIterator::graph_parents(, r), Some(size), )?; Ok(sample.into_iter().collect()) } + +/// Extract a sample from `self.undecided`, going from its heads and roots. +/// +/// The `size` parameter is used to avoid useless computations if +/// it turns out to be bigger than the whole set of undecided Revisions. +/// +/// The sample is taken by using `update_sample` from the heads, then +/// from the roots, working on the reverse DAG, +/// expressed by `self.children_cache`. +/// +/// No effort is being made to complete or limit the sample to `size` +fn bidirectional_sample( + self, +size: usize, +) -> Result, GraphError> { +self.ensure_undecided()?; +{ +// we don't want to compute children_cache before this +// but doing it after extracting self.undecided takes a mutable +// ref to self while a shareable one is still active. +let undecided = self.undecided.as_ref().unwrap(); +if undecided.len() <= size { +return Ok(undecided.clone()); +} +} + +self.ensure_children_cache()?; +let revs =
D6260: rust-discovery: takefullsample() core implementation
gracinet marked an inline comment as done. gracinet added a comment. I think it's maybe ready to land in that form. In the future, I'd like to put this `ParentsIterator` in a more generic place, and IMHO, it should become part of an `AbstractGraph` trait, that could live in a `graph` module. Ideally, we should even be able to generically reverse these graphs, so that the children iteration that's been done in this discovery process would just be the same, as seen from `update_sample`, but it's maybe too much asking. INLINE COMMENTS > kevincox wrote in discovery.rs:137 > Can't you just call `self.next()` in both cases? Yes that's what I actually did first, and then I tried this variant to see if it's faster. It may be a bit, but nothing worth doing in a first inclusion. > kevincox wrote in discovery.rs:280 > Generally I would just do this. > > for in self.undecided.as_ref().unwrap() { yes, same as in parent commit. Indeed it's a nicer way to iterate on references to `Copy` objects > kevincox wrote in discovery.rs:281 > Same. for this one, I can actually use `ParentsIterator`, now, spares me the `NULL_REVISION` check REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D6260 To: gracinet, #hg-reviewers, kevincox Cc: durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D6260: rust-discovery: takefullsample() core implementation
gracinet updated this revision to Diff 15022. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D6260?vs=14797=15022 REVISION DETAIL https://phab.mercurial-scm.org/D6260 AFFECTED FILES rust/hg-core/src/discovery.rs CHANGE DETAILS diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs --- a/rust/hg-core/src/discovery.rs +++ b/rust/hg-core/src/discovery.rs @@ -26,6 +26,7 @@ graph: G, // plays the role of self._repo common: MissingAncestors, undecided: Option>, +children_cache: Option>>, missing: HashSet, rng: Rng, } @@ -49,13 +50,16 @@ /// - `sample`: a sample to update /// - `parentfn`: a callable to resolve parents for a revision /// - `quicksamplesize`: optional target size of the sample -fn update_sample( +fn update_sample( revs: Option<>, heads: impl IntoIterator, sample: HashSet, -parentsfn: impl Fn(Revision) -> Result<[Revision; 2], GraphError>, +parentsfn: impl Fn(Revision) -> Result, quicksamplesize: Option, -) -> Result<(), GraphError> { +) -> Result<(), GraphError> +where +I: Iterator, +{ let mut distances: HashMap = HashMap::new(); let mut visit: VecDeque = heads.into_iter().collect(); let mut factor: u32 = 1; @@ -83,10 +87,7 @@ } } } -for in (current)? { -if p == NULL_REVISION { -continue; -} +for p in parentsfn(current)? { if let Some(revs) = revs { if !revs.contains() { continue; @@ -99,6 +100,39 @@ Ok(()) } +struct ParentsIterator { +parents: [Revision; 2], +cur: usize, +} + +impl ParentsIterator { +fn graph_parents( +graph: Graph, +r: Revision, +) -> Result { +Ok(ParentsIterator { +parents: graph.parents(r)?, +cur: 0, +}) +} +} + +impl Iterator for ParentsIterator { +type Item = Revision; + +fn next( self) -> Option { +if self.cur > 1 { +return None; +} +let rev = self.parents[self.cur]; +self.cur += 1; +if rev == NULL_REVISION { +return self.next(); +} +Some(rev) +} +} + impl PartialDiscovery { /// Create a PartialDiscovery object, with the intent /// of comparing our `::` revset to the contents of another @@ -123,6 +157,7 @@ ) -> Self { PartialDiscovery { undecided: None, +children_cache: None, target_heads: Some(target_heads), graph: graph.clone(), common: MissingAncestors::new(graph, vec![]), @@ -228,6 +263,22 @@ Ok(()) } +fn ensure_children_cache( self) -> Result<(), GraphError> { +if self.children_cache.is_some() { +return Ok(()); +} +self.ensure_undecided()?; + +let mut children: HashMap> = HashMap::new(); +for in self.undecided.as_ref().unwrap() { +for p in ParentsIterator::graph_parents(, rev)? { +children.entry(p).or_insert_with(|| Vec::new()).push(rev); +} +} +self.children_cache = Some(children); +Ok(()) +} + /// Provide statistics about the current state of the discovery process pub fn stats() -> DiscoveryStats { DiscoveryStats { @@ -255,11 +306,104 @@ None, headrevs, sample, -|r| self.graph.parents(r), +|r| ParentsIterator::graph_parents(, r), Some(size), )?; Ok(sample.into_iter().collect()) } + +/// Extract a sample from `self.undecided`, going from its heads and roots. +/// +/// The `size` parameter is used to avoid useless computations if +/// it turns out to be bigger than the whole set of undecided Revisions. +/// +/// The sample is taken by using `update_sample` from the heads, then +/// from the roots, working on the reverse DAG, +/// expressed by `self.children_cache`. +/// +/// No effort is being made to complete or limit the sample to `size` +fn bidirectional_sample( + self, +size: usize, +) -> Result, GraphError> { +self.ensure_undecided()?; +{ +// we don't want to compute children_cache before this +// but doing it after extracting self.undecided takes a mutable +// ref to self while a shareable one is still active. +let undecided = self.undecided.as_ref().unwrap(); +if undecided.len() <= size { +return Ok(undecided.clone()); +} +} + +self.ensure_children_cache()?; +let revs = self.undecided.as_ref().unwrap(); +let mut sample: HashSet = revs.clone(); + +// it's possible that leveraging the children cache would be more +// efficient here +
D6260: rust-discovery: takefullsample() core implementation
kevincox accepted this revision. kevincox added inline comments. INLINE COMMENTS > discovery.rs:108 > +cur: usize, > +} > + You might also consider an enum enum Parents { Zero, One(Revision), Two(Revision, Revision), } This also makes the iterator implementation a little clearer. > discovery.rs:137 > +return self.next(); > +} > +} Can't you just call `self.next()` in both cases? > discovery.rs:280 > +let mut children: HashMap> = HashMap::new(); > +for rev in self.undecided.as_ref().unwrap().iter().cloned() { > +for p in self.graph.parents(rev)?.iter().cloned() { Generally I would just do this. for in self.undecided.as_ref().unwrap() { > discovery.rs:281 > +for rev in self.undecided.as_ref().unwrap().iter().cloned() { > +for p in self.graph.parents(rev)?.iter().cloned() { > +if p != NULL_REVISION { Same. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D6260 To: gracinet, #hg-reviewers, kevincox Cc: durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D6260: rust-discovery: takefullsample() core implementation
gracinet added a comment. As I've tried to make clear in the changeset description, this is still work in progress. That being said, I wanted to put in on display because: - together with its descendants, it leads to a drop-in replacement for the Python `partialdiscovery` object - it's a good support for feedback on the question of more abstract DAG representations - the children cache leads to the same design problem as the `undecided` member attributed had (see https://phab.mercurial-scm.org/D6231). I expect to get more across similar issues while translating Python code Thanks ! REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D6260 To: gracinet, #hg-reviewers Cc: durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D6260: rust-discovery: takefullsample() core implementation
gracinet created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY take_full_sample() browses the undecided set in both directions: from its roots as well as from its heads. Following what's done on the Python side, we alter update_sample() signature to take a closure returning an iterator: either ParentsIterator or an iterator over the children found in `children_cache`. These constructs should probably be split off in a separate module. This is a first concrete example where a more abstract graph notion (probably a trait) would be useful, as this is nothing but an operation on the reversed DAG. A similar motivation in the context of the discovery process would be to replace the call to dagops::range in `add_missing_revisions()` with a simple iteration over descendents, again an operation on the reversed graph. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D6260 AFFECTED FILES rust/hg-core/src/discovery.rs CHANGE DETAILS diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs --- a/rust/hg-core/src/discovery.rs +++ b/rust/hg-core/src/discovery.rs @@ -26,6 +26,7 @@ graph: G, // plays the role of self._repo common: MissingAncestors, undecided: Option>, +children_cache: Option>>, missing: HashSet, rng: Rng, } @@ -49,13 +50,16 @@ /// - `sample`: a sample to update /// - `parentfn`: a callable to resolve parents for a revision /// - `quicksamplesize`: optional target size of the sample -fn update_sample( +fn update_sample( revs: Option<>, heads: impl IntoIterator, sample: HashSet, -parentsfn: impl Fn(Revision) -> Result<[Revision; 2], GraphError>, +parentsfn: impl Fn(Revision) -> Result, quicksamplesize: Option, -) -> Result<(), GraphError> { +) -> Result<(), GraphError> +where +I: Iterator, +{ let mut distances: HashMap = HashMap::new(); let mut visit: VecDeque = heads.into_iter().collect(); let mut factor: u32 = 1; @@ -84,10 +88,7 @@ } } seen.insert(current); -for p in parentsfn(current)?.iter().cloned() { -if p == NULL_REVISION { -continue; -} +for p in parentsfn(current)? { if let Some(revs) = revs { if !revs.contains() { continue; @@ -100,6 +101,45 @@ Ok(()) } +// TODO reread this and decide Vec/Box, and where to put it +struct ParentsIterator { +parents: [Revision; 2], +cur: usize, +} + +impl ParentsIterator { +fn graph_parents( +graph: Graph, +r: Revision, +) -> Result { +Ok(ParentsIterator { +parents: graph.parents(r)?, +cur: 0, +}) +} +} + +impl Iterator for ParentsIterator { +type Item = Revision; + +fn next( self) -> Option { +if self.cur > 1 { +return None; +} +let rev = self.parents[self.cur]; +self.cur += 1; +if rev == NULL_REVISION { +if self.cur == 2 { +return None; +} else { +// exceptional branch +return self.next(); +} +} +Some(rev) +} +} + impl PartialDiscovery { /// Create a PartialDiscovery object, with the intent /// of comparing our `::` revset to the contents of another @@ -124,6 +164,7 @@ ) -> Self { PartialDiscovery { undecided: None, +children_cache: None, target_heads: Some(target_heads), graph: graph.clone(), common: MissingAncestors::new(graph, vec![]), @@ -229,6 +270,24 @@ Ok(()) } +fn ensure_children_cache( self) -> Result<(), GraphError> { +if self.children_cache.is_some() { +return Ok(()); +} +self.ensure_undecided()?; + +let mut children: HashMap> = HashMap::new(); +for rev in self.undecided.as_ref().unwrap().iter().cloned() { +for p in self.graph.parents(rev)?.iter().cloned() { +if p != NULL_REVISION { +children.entry(p).or_insert_with(|| Vec::new()).push(rev); +} +} +} +self.children_cache = Some(children); +Ok(()) +} + /// Provide statistics about the current state of the discovery process pub fn stats() -> DiscoveryStats { DiscoveryStats { @@ -256,11 +315,82 @@ None, headrevs, sample, -|r| self.graph.parents(r), +|r| ParentsIterator::graph_parents(, r), Some(size), )?; Ok(sample.into_iter().collect()) } + +fn bidirectional_sample( + self, +size: usize, +) -> Result, GraphError> { +self.ensure_undecided()?; +{ +