D5551: rust-cpython: using MissingAncestors from Python code
gracinet created this revision. Herald added a reviewer: indygreg. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers. REVISION SUMMARY As precedently done with LazyAncestors on cpython.rs, we test for the presence of the 'rustext' module. For now, on MissingAncestors instances, the 'bases' attribute is a method returning a tuple, so we adapt that in the only current caller. We don't have a hg perf command for MissingAncestors, and it's not very clear how to generate relevant `bases` attributes and `revs` argument to create one. Nevertheless, our preliminary observations within discovery indicate a x5 speedup for the missingancestors methods themselves with the Rust version on the pypy and mozilla-central repos. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5551 AFFECTED FILES mercurial/revlog.py mercurial/setdiscovery.py CHANGE DETAILS diff --git a/mercurial/setdiscovery.py b/mercurial/setdiscovery.py --- a/mercurial/setdiscovery.py +++ b/mercurial/setdiscovery.py @@ -186,9 +186,15 @@ """the heads of the known common set""" # heads(common) == heads(common.bases) since common represents # common.bases and all its ancestors +bases = self._common.bases +if callable(bases): +# This happens for Rust exported object, and we don't know +# at the moment how to make an equivalent of property for them +bases = bases() + # The presence of nullrev will confuse heads(). So filter it out. return set(self._repo.revs('heads(%ld)', - self._common.bases - {nullrev})) + (r for r in bases if r != nullrev))) def findcommonheads(ui, local, remote, initialsamplesize=100, diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -875,6 +875,8 @@ if common is None: common = [nullrev] +if rustext is not None: +return rustext.ancestor.MissingAncestors(self.index, common) return ancestor.incrementalmissingancestors(self.parentrevs, common) def findmissingrevs(self, common=None, heads=None): To: gracinet, indygreg, #hg-reviewers Cc: mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5550: rust-cpython: bindings for MissingAncestors
gracinet created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY The exposition is rather straightforward, except for the remove_ancestors_from() method, which forces us to an inefficient conversion between Python sets and Rust HashSets. Two alternatives are proposed in comments: - changing the inner API to "emit" the revision numbers to discard this would be a substantial change, and it would be better only in the cases where there are more to retain than to discard - mutating the Python set directly: this would force us to define an abstract `RevisionSet` trait, and implement it both for plain `HashSet` and for a struct enclosing a Python set with the GIL marker `Python<'p>`, also a non trivial effort. The main (and seemingly only) caller of this method being `mercurial.setdiscovery`, which is currently undergoing serious refactoring, it's not clear whether these improvements would be worth the effort right now, so we're leaving it as-is. Also, in `get_bases()` (will also be used by `setdiscovery`), we'd prefer to build a Python set directly, but we resort to returning a tuple, waiting to hear back from our PR onto rust-cpython about that REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5550 AFFECTED FILES rust/hg-cpython/src/ancestors.rs tests/test-rust-ancestor.py CHANGE DETAILS diff --git a/tests/test-rust-ancestor.py b/tests/test-rust-ancestor.py --- a/tests/test-rust-ancestor.py +++ b/tests/test-rust-ancestor.py @@ -11,7 +11,8 @@ # this would fail already without appropriate ancestor.__package__ from mercurial.rustext.ancestor import ( AncestorsIterator, -LazyAncestors +LazyAncestors, +MissingAncestors, ) try: @@ -105,6 +106,22 @@ # let's check bool for an empty one self.assertFalse(LazyAncestors(idx, [0], 0, False)) +def testmissingancestors(self): +idx = self.parseindex() +missanc = MissingAncestors(idx, [1]) +self.assertTrue(missanc.hasbases()) +self.assertEqual(missanc.missingancestors([3]), [2, 3]) +missanc.addbases({2}) +self.assertEqual(set(missanc.bases()), {1, 2}) +self.assertEqual(missanc.missingancestors([3]), [3]) + +def testmissingancestorsremove(self): +idx = self.parseindex() +missanc = MissingAncestors(idx, [1]) +revs = {0, 1, 2, 3} +missanc.removeancestorsfrom(revs) +self.assertEqual(revs, {2, 3}) + def testrefcount(self): idx = self.parseindex() start_count = sys.getrefcount(idx) diff --git a/rust/hg-cpython/src/ancestors.rs b/rust/hg-cpython/src/ancestors.rs --- a/rust/hg-cpython/src/ancestors.rs +++ b/rust/hg-cpython/src/ancestors.rs @@ -15,22 +15,39 @@ //! The only difference is that it is instantiated with a C `parsers.index` //! instance instead of a parents function. //! +//! - [`MissingAncestors`] is the Rust implementation of +//! `mercurial.ancestor.incrementalmissingancestors`. +//! +//! API differences: +//!+ it is instantiated with a C `parsers.index` +//! instance instead of a parents function. +//!+ `MissingAncestors.bases` is a method returning a tuple instead of +//! a set-valued attribute. We could return a Python set easily if our +//! [PySet PR](https://github.com/dgrunwald/rust-cpython/pull/165) +//! is accepted. +//! //! - [`AncestorsIterator`] is the Rust counterpart of the //! `ancestor._lazyancestorsiter` Python generator. //! From Python, instances of this should be mainly obtained by calling //! `iter()` on a [`LazyAncestors`] instance. //! //! [`LazyAncestors`]: struct.LazyAncestors.html +//! [`MissingAncestors`]: struct.MissingAncestors.html //! [`AncestorsIterator`]: struct.AncestorsIterator.html use cindex::Index; use cpython::{ -ObjectProtocol, PyClone, PyDict, PyModule, PyObject, PyResult, Python, +ObjectProtocol, PyClone, PyDict, PyList, PyModule, PyObject, +PyResult, PyTuple, Python, PythonObject, ToPyObject, }; use exceptions::GraphError; use hg::Revision; -use hg::{AncestorsIterator as CoreIterator, LazyAncestors as CoreLazy}; +use hg::{ +AncestorsIterator as CoreIterator, LazyAncestors as CoreLazy, +MissingAncestors as CoreMissing, +}; use std::cell::RefCell; use std::iter::FromIterator; +use std::collections::HashSet; /// Utility function to convert a Python iterable into various collections /// @@ -119,7 +136,85 @@ }); -/// Create the module, with `__package__` given from parent +py_class!(pub class MissingAncestors |py| { +data inner: RefCell>>; + +def __new__(_cls, index: PyObject, bases: PyObject) -> PyResult { +let bases_vec: Vec = rev_pyiter_collect(py, )?; +let inner = CoreMissing::new(Index::new(py, index)?, bases_vec); +MissingAncestors::create_instance(py,
D5549: rust-cpython: generalised conversion function
gracinet created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY Because `hg::ancestors::MissingAncestors` has methods taking some `HashSet` besides `impl IntoIterator` as parameters we'll need the more generic `rev_pyiter_collect()` function to also build these REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5549 AFFECTED FILES rust/hg-cpython/src/ancestors.rs CHANGE DETAILS diff --git a/rust/hg-cpython/src/ancestors.rs b/rust/hg-cpython/src/ancestors.rs --- a/rust/hg-cpython/src/ancestors.rs +++ b/rust/hg-cpython/src/ancestors.rs @@ -30,13 +30,18 @@ use hg::Revision; use hg::{AncestorsIterator as CoreIterator, LazyAncestors as CoreLazy}; use std::cell::RefCell; +use std::iter::FromIterator; -/// Utility function to convert a Python iterable into a Vec +/// Utility function to convert a Python iterable into various collections /// -/// We need this to feed to `AncestorIterators` constructors because -/// a `PyErr` can arise at each step of iteration, whereas our inner objects +/// We need this in particular to feed to various methods of inner objects +/// with `impl IntoIterator` arguments, because +/// a `PyErr` can arise at each step of iteration, whereas these methods /// expect iterables over `Revision`, not over some `Result` -fn reviter_to_revvec(py: Python, revs: PyObject) -> PyResult> { +fn rev_pyiter_collect(py: Python, revs: ) -> PyResult +where +C: FromIterator, +{ revs.iter(py)? .map(|r| r.and_then(|o| o.extract::(py))) .collect() @@ -64,7 +69,7 @@ def __new__(_cls, index: PyObject, initrevs: PyObject, stoprev: Revision, inclusive: bool) -> PyResult { -let initvec = reviter_to_revvec(py, initrevs)?; +let initvec: Vec = rev_pyiter_collect(py, )?; let ait = CoreIterator::new( Index::new(py, index)?, initvec, @@ -103,7 +108,7 @@ def __new__(_cls, index: PyObject, initrevs: PyObject, stoprev: Revision, inclusive: bool) -> PyResult { -let initvec = reviter_to_revvec(py, initrevs)?; +let initvec: Vec = rev_pyiter_collect(py, )?; let lazy = CoreLazy::new(Index::new(py, index)?, initvec, stoprev, inclusive) To: gracinet, #hg-reviewers Cc: durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5548: rust-cpython: style consistency leftovers
gracinet created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY In particular, during review of `LazyAncestors` bindings, most `match` statements for error conversion have been replaced by higher level methods of `Result` and most personal insecurity comments have been removed. This makes it more systematic. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5548 AFFECTED FILES rust/hg-cpython/src/ancestors.rs CHANGE DETAILS diff --git a/rust/hg-cpython/src/ancestors.rs b/rust/hg-cpython/src/ancestors.rs --- a/rust/hg-cpython/src/ancestors.rs +++ b/rust/hg-cpython/src/ancestors.rs @@ -43,7 +43,6 @@ } py_class!(pub class AncestorsIterator |py| { -// TODO RW lock ? data inner: RefCell>>; def __next__() -> PyResult> { @@ -55,7 +54,8 @@ } def __contains__(, rev: Revision) -> PyResult { -self.inner(py).borrow_mut().contains(rev).map_err(|e| GraphError::pynew(py, e)) +self.inner(py).borrow_mut().contains(rev) +.map_err(|e| GraphError::pynew(py, e)) } def __iter__() -> PyResult { @@ -65,14 +65,13 @@ def __new__(_cls, index: PyObject, initrevs: PyObject, stoprev: Revision, inclusive: bool) -> PyResult { let initvec = reviter_to_revvec(py, initrevs)?; -let ait = match CoreIterator::new(Index::new(py, index)?, - initvec, stoprev, - inclusive) { -Ok(ait) => ait, -Err(e) => { -return Err(GraphError::pynew(py, e)); -} -}; +let ait = CoreIterator::new( +Index::new(py, index)?, +initvec, +stoprev, +inclusive, +) +.map_err(|e| GraphError::pynew(py, e))?; AncestorsIterator::from_inner(py, ait) } To: gracinet, #hg-reviewers Cc: durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5546: rust-cpython: rustdoc improvements
gracinet created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY By default, `cargo doc` builds the documentation for public constructs only, so we make public those that can. Since `cindex` is not safe, we keep it private. Unfortunately, the macro syntax of rust-cpython doesn't allow us to document the classes directly, so we resort to do that at the module level. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5546 AFFECTED FILES rust/hg-cpython/src/ancestors.rs rust/hg-cpython/src/exceptions.rs rust/hg-cpython/src/lib.rs CHANGE DETAILS diff --git a/rust/hg-cpython/src/lib.rs b/rust/hg-cpython/src/lib.rs --- a/rust/hg-cpython/src/lib.rs +++ b/rust/hg-cpython/src/lib.rs @@ -12,7 +12,8 @@ //! it behaves as the `cext` package. //! //! Example: -//! ``` +//! +//! ```text //! >>> from mercurial.rustext import ancestor //! >>> ancestor.__doc__ //! 'Generic DAG ancestor algorithms - Rust implementation' @@ -23,9 +24,9 @@ extern crate hg; extern crate libc; -mod ancestors; +pub mod ancestors; mod cindex; -mod exceptions; +pub mod exceptions; py_module_initializer!(rustext, initrustext, PyInit_rustext, |py, m| { m.add( diff --git a/rust/hg-cpython/src/exceptions.rs b/rust/hg-cpython/src/exceptions.rs --- a/rust/hg-cpython/src/exceptions.rs +++ b/rust/hg-cpython/src/exceptions.rs @@ -1,3 +1,15 @@ +// ancestors.rs +// +// Copyright 2018 Georges Racinet +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +//! Bindings for Rust errors +//! +//! [`GraphError`] exposes `hg::GraphError` as a subclass of `ValueError` +//! +//! [`GraphError`]: struct.GraphError.html use cpython::exc::ValueError; use cpython::{PyErr, Python}; use hg; diff --git a/rust/hg-cpython/src/ancestors.rs b/rust/hg-cpython/src/ancestors.rs --- a/rust/hg-cpython/src/ancestors.rs +++ b/rust/hg-cpython/src/ancestors.rs @@ -5,8 +5,23 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. -//! Bindings for the hg::ancestors module provided by the +//! Bindings for the `hg::ancestors` module provided by the //! `hg-core` crate. From Python, this will be seen as `rustext.ancestor` +//! and can be used as replacement for the the pure `ancestor` Python module. +//! +//! # Classes visible from Python: +//! - [`LazyAncestors`] is the Rust implementation of +//! `mercurial.ancestor.lazyancestors`. +//! The only difference is that it is instantiated with a C `parsers.index` +//! instance instead of a parents function. +//! +//! - [`AncestorsIterator`] is the Rust counterpart of the +//! `ancestor._lazyancestorsiter` Python generator. +//! From Python, instances of this should be mainly obtained by calling +//! `iter()` on a [`LazyAncestors`] instance. +//! +//! [`LazyAncestors`]: struct.LazyAncestors.html +//! [`AncestorsIterator`]: struct.AncestorsIterator.html use cindex::Index; use cpython::{ ObjectProtocol, PyClone, PyDict, PyModule, PyObject, PyResult, Python, @@ -19,16 +34,16 @@ /// Utility function to convert a Python iterable into a Vec /// -/// We need this to feed to AncestorIterators constructors because -/// a PyErr can arise at each step of iteration, whereas our inner objects -/// expect iterables over Revision, not over some Result +/// We need this to feed to `AncestorIterators` constructors because +/// a `PyErr` can arise at each step of iteration, whereas our inner objects +/// expect iterables over `Revision`, not over some `Result` fn reviter_to_revvec(py: Python, revs: PyObject) -> PyResult> { revs.iter(py)? .map(|r| r.and_then(|o| o.extract::(py))) .collect() } -py_class!(class AncestorsIterator |py| { +py_class!(pub class AncestorsIterator |py| { // TODO RW lock ? data inner: RefCell>>; @@ -70,7 +85,7 @@ } } -py_class!(class LazyAncestors |py| { +py_class!(pub class LazyAncestors |py| { data inner: RefCell>>; def __contains__(, rev: Revision) -> PyResult { @@ -101,7 +116,7 @@ }); -/// Create the module, with __package__ given from parent +/// Create the module, with `__package__` given from parent pub fn init_module(py: Python, package: ) -> PyResult { let dotted_name = !("{}.ancestor", package); let m = PyModule::new(py, dotted_name)?; To: gracinet, #hg-reviewers Cc: durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5547: rust-cpython: consistency in use of hg-core constructs
gracinet created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY While not everybody likes the `CoreLazy` and `CoreIterator` aliases, it's better not to mix them with direct references. Note: it's quite possible in the future that these would stop being exposed at the top of the `hg` crate REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5547 AFFECTED FILES rust/hg-cpython/src/ancestors.rs CHANGE DETAILS diff --git a/rust/hg-cpython/src/ancestors.rs b/rust/hg-cpython/src/ancestors.rs --- a/rust/hg-cpython/src/ancestors.rs +++ b/rust/hg-cpython/src/ancestors.rs @@ -27,7 +27,6 @@ ObjectProtocol, PyClone, PyDict, PyModule, PyObject, PyResult, Python, }; use exceptions::GraphError; -use hg; use hg::Revision; use hg::{AncestorsIterator as CoreIterator, LazyAncestors as CoreLazy}; use std::cell::RefCell; @@ -66,9 +65,9 @@ def __new__(_cls, index: PyObject, initrevs: PyObject, stoprev: Revision, inclusive: bool) -> PyResult { let initvec = reviter_to_revvec(py, initrevs)?; -let ait = match hg::AncestorsIterator::new(Index::new(py, index)?, - initvec, stoprev, - inclusive) { +let ait = match CoreIterator::new(Index::new(py, index)?, + initvec, stoprev, + inclusive) { Ok(ait) => ait, Err(e) => { return Err(GraphError::pynew(py, e)); To: gracinet, #hg-reviewers Cc: durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5442: rust-cpython: using the new bindings from Python
This revision was automatically updated to reflect the committed changes. Closed by commit rHG536c83535cbd: rust-cpython: using the new bindings from Python (authored by gracinet, committed by ). REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5442?vs=13008=13015 REVISION DETAIL https://phab.mercurial-scm.org/D5442 AFFECTED FILES mercurial/revlog.py CHANGE DETAILS diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -97,6 +97,11 @@ REVIDX_RAWTEXT_CHANGING_FLAGS parsers = policy.importmod(r'parsers') +try: +from . import rustext +rustext.__name__ # force actual import (see hgdemandimport) +except ImportError: +rustext = None # Aliased for performance. _zlibdecompress = zlib.decompress @@ -779,12 +784,17 @@ for r in revs: checkrev(r) # and we're sure ancestors aren't filtered as well -if util.safehasattr(parsers, 'rustlazyancestors'): -return ancestor.rustlazyancestors( -self.index, revs, -stoprev=stoprev, inclusive=inclusive) -return ancestor.lazyancestors(self._uncheckedparentrevs, revs, - stoprev=stoprev, inclusive=inclusive) + +if rustext is not None: +lazyancestors = rustext.ancestor.LazyAncestors +arg = self.index +elif util.safehasattr(parsers, 'rustlazyancestors'): +lazyancestors = ancestor.rustlazyancestors +arg = self.index +else: +lazyancestors = ancestor.lazyancestors +arg = self._uncheckedparentrevs +return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive) def descendants(self, revs): return dagop.descendantrevs(revs, self.revs, self.parentrevs) To: gracinet, indygreg, #hg-reviewers Cc: yuja, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5441: rust-cpython: binding for LazyAncestors
This revision was automatically updated to reflect the committed changes. Closed by commit rHGb31a41f24864: rust-cpython: binding for LazyAncestors (authored by gracinet, committed by ). REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5441?vs=13007=13014 REVISION DETAIL https://phab.mercurial-scm.org/D5441 AFFECTED FILES rust/hg-cpython/src/ancestors.rs tests/test-rust-ancestor.py CHANGE DETAILS diff --git a/tests/test-rust-ancestor.py b/tests/test-rust-ancestor.py --- a/tests/test-rust-ancestor.py +++ b/tests/test-rust-ancestor.py @@ -9,7 +9,10 @@ rustext = None else: # this would fail already without appropriate ancestor.__package__ -from mercurial.rustext.ancestor import AncestorsIterator +from mercurial.rustext.ancestor import ( +AncestorsIterator, +LazyAncestors +) try: from mercurial.cext import parsers as cparsers @@ -71,6 +74,37 @@ ait = AncestorsIterator(idx, [3], 0, False) self.assertEqual([r for r in ait], [2, 1, 0]) +def testlazyancestors(self): +idx = self.parseindex() +start_count = sys.getrefcount(idx) # should be 2 (see Python doc) +self.assertEqual({i: (r[5], r[6]) for i, r in enumerate(idx)}, + {0: (-1, -1), + 1: (0, -1), + 2: (1, -1), + 3: (2, -1)}) +lazy = LazyAncestors(idx, [3], 0, True) +# we have two more references to the index: +# - in its inner iterator for __contains__ and __bool__ +# - in the LazyAncestors instance itself (to spawn new iterators) +self.assertEqual(sys.getrefcount(idx), start_count + 2) + +self.assertTrue(2 in lazy) +self.assertTrue(bool(lazy)) +self.assertEqual(list(lazy), [3, 2, 1, 0]) +# a second time to validate that we spawn new iterators +self.assertEqual(list(lazy), [3, 2, 1, 0]) + +# now let's watch the refcounts closer +ait = iter(lazy) +self.assertEqual(sys.getrefcount(idx), start_count + 3) +del ait +self.assertEqual(sys.getrefcount(idx), start_count + 2) +del lazy +self.assertEqual(sys.getrefcount(idx), start_count) + +# let's check bool for an empty one +self.assertFalse(LazyAncestors(idx, [0], 0, False)) + def testrefcount(self): idx = self.parseindex() start_count = sys.getrefcount(idx) @@ -87,7 +121,7 @@ # and removing ref to the index after iterator init is no issue ait = AncestorsIterator(idx, [3], 0, True) del idx -self.assertEqual([r for r in ait], [3, 2, 1, 0]) +self.assertEqual(list(ait), [3, 2, 1, 0]) def testgrapherror(self): data = (data_non_inlined[:64 + 27] + diff --git a/rust/hg-cpython/src/ancestors.rs b/rust/hg-cpython/src/ancestors.rs --- a/rust/hg-cpython/src/ancestors.rs +++ b/rust/hg-cpython/src/ancestors.rs @@ -13,8 +13,8 @@ }; use exceptions::GraphError; use hg; -use hg::AncestorsIterator as CoreIterator; use hg::Revision; +use hg::{AncestorsIterator as CoreIterator, LazyAncestors as CoreLazy}; use std::cell::RefCell; /// Utility function to convert a Python iterable into a Vec @@ -70,6 +70,37 @@ } } +py_class!(class LazyAncestors |py| { +data inner: RefCell>>; + +def __contains__(, rev: Revision) -> PyResult { +self.inner(py) +.borrow_mut() +.contains(rev) +.map_err(|e| GraphError::pynew(py, e)) +} + +def __iter__() -> PyResult { +AncestorsIterator::from_inner(py, self.inner(py).borrow().iter()) +} + +def __bool__() -> PyResult { +Ok(!self.inner(py).borrow().is_empty()) +} + +def __new__(_cls, index: PyObject, initrevs: PyObject, stoprev: Revision, +inclusive: bool) -> PyResult { +let initvec = reviter_to_revvec(py, initrevs)?; + +let lazy = +CoreLazy::new(Index::new(py, index)?, initvec, stoprev, inclusive) +.map_err(|e| GraphError::pynew(py, e))?; + +Self::create_instance(py, RefCell::new(Box::new(lazy))) +} + +}); + /// Create the module, with __package__ given from parent pub fn init_module(py: Python, package: ) -> PyResult { let dotted_name = !("{}.ancestor", package); @@ -81,6 +112,7 @@ "Generic DAG ancestor algorithms - Rust implementation", )?; m.add_class::(py)?; +m.add_class::(py)?; let sys = PyModule::import(py, "sys")?; let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?; To: gracinet, #hg-reviewers, kevincox Cc: yuja, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5443: ancestor: uniformity of calling lazyancestors classes
gracinet abandoned this revision. gracinet added a comment. Given this conversation, and its outcome as materialised with latest versions of https://phab.mercurial-scm.org/D5441 and https://phab.mercurial-scm.org/D5442, we can now simply abandon this one. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5443 To: gracinet, indygreg, #hg-reviewers Cc: yuja, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5442: rust-cpython: using the new bindings from Python
gracinet updated this revision to Diff 13008. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5442?vs=12879=13008 REVISION DETAIL https://phab.mercurial-scm.org/D5442 AFFECTED FILES mercurial/revlog.py CHANGE DETAILS diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -97,6 +97,11 @@ REVIDX_RAWTEXT_CHANGING_FLAGS parsers = policy.importmod(r'parsers') +try: +from . import rustext +rustext.__name__ # force actual import (see hgdemandimport) +except ImportError: +rustext = None # Aliased for performance. _zlibdecompress = zlib.decompress @@ -779,12 +784,17 @@ for r in revs: checkrev(r) # and we're sure ancestors aren't filtered as well -if util.safehasattr(parsers, 'rustlazyancestors'): -return ancestor.rustlazyancestors( -self.index, revs, -stoprev=stoprev, inclusive=inclusive) -return ancestor.lazyancestors(self._uncheckedparentrevs, revs, - stoprev=stoprev, inclusive=inclusive) + +if rustext is not None: +lazyancestors = rustext.ancestor.LazyAncestors +arg = self.index +elif util.safehasattr(parsers, 'rustlazyancestors'): +lazyancestors = ancestor.rustlazyancestors +arg = self.index +else: +lazyancestors = ancestor.lazyancestors +arg = self._uncheckedparentrevs +return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive) def descendants(self, revs): return dagop.descendantrevs(revs, self.revs, self.parentrevs) To: gracinet, indygreg, #hg-reviewers Cc: yuja, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5441: rust-cpython: binding for LazyAncestors
gracinet added a comment. @yuja this new version exports the LazyAncestors name to Python, expecting `revlog.ancestors` to do the right thing (as a factory then). I agree that the destiny of the older `rustlazyancestors` is to disappear, but I'd prefer to wait a bit before doing that @kevincox I'd agree in principle, but since we only have one caller, whose role is precisely to abstract those differencies, we decided that we actually don't need to maintain that convention (and rustext can ne considered a foreign module, so that this is not really a breaking of the naming convention for new classes within hg) REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5441 To: gracinet, #hg-reviewers, kevincox Cc: yuja, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5441: rust-cpython: binding for LazyAncestors
gracinet updated this revision to Diff 13007. gracinet marked an inline comment as done. gracinet edited the summary of this revision. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5441?vs=12952=13007 REVISION DETAIL https://phab.mercurial-scm.org/D5441 AFFECTED FILES rust/hg-cpython/src/ancestors.rs tests/test-rust-ancestor.py CHANGE DETAILS diff --git a/tests/test-rust-ancestor.py b/tests/test-rust-ancestor.py --- a/tests/test-rust-ancestor.py +++ b/tests/test-rust-ancestor.py @@ -9,7 +9,10 @@ rustext = None else: # this would fail already without appropriate ancestor.__package__ -from mercurial.rustext.ancestor import AncestorsIterator +from mercurial.rustext.ancestor import ( +AncestorsIterator, +LazyAncestors +) try: from mercurial.cext import parsers as cparsers @@ -71,6 +74,37 @@ ait = AncestorsIterator(idx, [3], 0, False) self.assertEqual([r for r in ait], [2, 1, 0]) +def testlazyancestors(self): +idx = self.parseindex() +start_count = sys.getrefcount(idx) # should be 2 (see Python doc) +self.assertEqual({i: (r[5], r[6]) for i, r in enumerate(idx)}, + {0: (-1, -1), + 1: (0, -1), + 2: (1, -1), + 3: (2, -1)}) +lazy = LazyAncestors(idx, [3], 0, True) +# we have two more references to the index: +# - in its inner iterator for __contains__ and __bool__ +# - in the LazyAncestors instance itself (to spawn new iterators) +self.assertEqual(sys.getrefcount(idx), start_count + 2) + +self.assertTrue(2 in lazy) +self.assertTrue(bool(lazy)) +self.assertEqual(list(lazy), [3, 2, 1, 0]) +# a second time to validate that we spawn new iterators +self.assertEqual(list(lazy), [3, 2, 1, 0]) + +# now let's watch the refcounts closer +ait = iter(lazy) +self.assertEqual(sys.getrefcount(idx), start_count + 3) +del ait +self.assertEqual(sys.getrefcount(idx), start_count + 2) +del lazy +self.assertEqual(sys.getrefcount(idx), start_count) + +# let's check bool for an empty one +self.assertFalse(LazyAncestors(idx, [0], 0, False)) + def testrefcount(self): idx = self.parseindex() start_count = sys.getrefcount(idx) @@ -87,7 +121,7 @@ # and removing ref to the index after iterator init is no issue ait = AncestorsIterator(idx, [3], 0, True) del idx -self.assertEqual([r for r in ait], [3, 2, 1, 0]) +self.assertEqual(list(ait), [3, 2, 1, 0]) def testgrapherror(self): data = (data_non_inlined[:64 + 27] + diff --git a/rust/hg-cpython/src/ancestors.rs b/rust/hg-cpython/src/ancestors.rs --- a/rust/hg-cpython/src/ancestors.rs +++ b/rust/hg-cpython/src/ancestors.rs @@ -13,8 +13,8 @@ }; use exceptions::GraphError; use hg; -use hg::AncestorsIterator as CoreIterator; use hg::Revision; +use hg::{AncestorsIterator as CoreIterator, LazyAncestors as CoreLazy}; use std::cell::RefCell; /// Utility function to convert a Python iterable into a Vec @@ -70,6 +70,37 @@ } } +py_class!(class LazyAncestors |py| { +data inner: RefCell>>; + +def __contains__(, rev: Revision) -> PyResult { +self.inner(py) +.borrow_mut() +.contains(rev) +.map_err(|e| GraphError::pynew(py, e)) +} + +def __iter__() -> PyResult { +AncestorsIterator::from_inner(py, self.inner(py).borrow().iter()) +} + +def __bool__() -> PyResult { +Ok(!self.inner(py).borrow().is_empty()) +} + +def __new__(_cls, index: PyObject, initrevs: PyObject, stoprev: Revision, +inclusive: bool) -> PyResult { +let initvec = reviter_to_revvec(py, initrevs)?; + +let lazy = +CoreLazy::new(Index::new(py, index)?, initvec, stoprev, inclusive) +.map_err(|e| GraphError::pynew(py, e))?; + +Self::create_instance(py, RefCell::new(Box::new(lazy))) +} + +}); + /// Create the module, with __package__ given from parent pub fn init_module(py: Python, package: ) -> PyResult { let dotted_name = !("{}.ancestor", package); @@ -81,6 +112,7 @@ "Generic DAG ancestor algorithms - Rust implementation", )?; m.add_class::(py)?; +m.add_class::(py)?; let sys = PyModule::import(py, "sys")?; let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?; To: gracinet, #hg-reviewers, kevincox Cc: yuja, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5442: rust-cpython: using the new bindings from Python
gracinet added a comment. @yuja about the lazy importer, and just to clarify: so you're saying it's not worth it to delay until rustext is actually used. I'm fine with that, will do it. I didn't amend that patch yet, but you might have received notifications due to evolve of the whole stack. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5442 To: gracinet, indygreg, #hg-reviewers Cc: yuja, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5441: rust-cpython: binding for LazyAncestors
gracinet marked 2 inline comments as done. gracinet added a comment. > Does the class name actually matter? Personally I don't care if > lazyancestors() function returns a LazyAncestors object. We'll anyway > need a wrapper function to make pure ancestors and rustext ancestors > compatible. Yes, that's in line with your other comments, whereas I was pursuing the goal of putting the whole `ancestor` module under responsibility of `policy.importmod` : in that case, it would have been necessary to have the same name. I think I still prefer to put all the dispatching in the `revlog` module (ie have it play the factory role) rather than put a factory function inside `ancestor`. Unless you disagree, I'm going to make a new revision going in that direction, ie - the class would be called `LazyAncestors` - the dispatching would stay in `revlog` - maybe for clarity, `ancestor.rustlazyancestors` should be renamed and in the next series (bindings for `MissingAncestors`), similar dispatching would occur in `incrementalmissingrevs` For now, `revlog` can choose according to whether `rustext` is (fully) importable (some other comment about that). Cheers, REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5441 To: gracinet, #hg-reviewers, kevincox Cc: yuja, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5438: rust-cpython: implementing Graph using C parents function
gracinet added a comment. @yuja, thanks for the queueing ! > Index could be backed by e.g. Rc> to allow any objects to own > copies, but I don't feel like this is a good design. If we want one day to have Mercurial work in a multi-threaded way, I guess that something like `Arc>` would be about necessary for an index implemented in Rust. But anyway, we are thinking alike. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5438 To: gracinet, #hg-reviewers Cc: yuja, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5438: rust-cpython: implementing Graph using C parents function
This revision was automatically updated to reflect the committed changes. Closed by commit rHG4c25038c112c: rust-cpython: implement Graph using C parents function (authored by gracinet, committed by ). CHANGED PRIOR TO COMMIT https://phab.mercurial-scm.org/D5438?vs=12949=12967#toc REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5438?vs=12949=12967 REVISION DETAIL https://phab.mercurial-scm.org/D5438 AFFECTED FILES mercurial/cext/revlog.c rust/hg-cpython/src/cindex.rs rust/hg-cpython/src/lib.rs CHANGE DETAILS diff --git a/rust/hg-cpython/src/lib.rs b/rust/hg-cpython/src/lib.rs --- a/rust/hg-cpython/src/lib.rs +++ b/rust/hg-cpython/src/lib.rs @@ -21,8 +21,10 @@ #[macro_use] extern crate cpython; extern crate hg; +extern crate libc; mod ancestors; +mod cindex; mod exceptions; py_module_initializer!(rustext, initrustext, PyInit_rustext, |py, m| { diff --git a/rust/hg-cpython/src/cindex.rs b/rust/hg-cpython/src/cindex.rs new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/src/cindex.rs @@ -0,0 +1,121 @@ +// cindex.rs +// +// Copyright 2018 Georges Racinet +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +//! Bindings to use the Index defined by the parsers C extension +//! +//! Ideally, we should use an Index entirely implemented in Rust, +//! but this will take some time to get there. +#[cfg(feature = "python27")] +extern crate python27_sys as python_sys; +#[cfg(feature = "python3")] +extern crate python3_sys as python_sys; + +use self::python_sys::PyCapsule_Import; +use cpython::{PyErr, PyObject, PyResult, Python}; +use hg::{Graph, GraphError, Revision}; +use libc::c_int; +use std::ffi::CStr; +use std::mem::transmute; + +type IndexParentsFn = unsafe extern "C" fn( +index: *mut python_sys::PyObject, +rev: c_int, +ps: *mut [c_int; 2], +) -> c_int; + +/// A `Graph` backed up by objects and functions from revlog.c +/// +/// This implementation of the `Graph` trait, relies on (pointers to) +/// - the C index object (`index` member) +/// - the `index_get_parents()` function (`parents` member) +/// +/// # Safety +/// +/// The C index itself is mutable, and this Rust exposition is **not +/// protected by the GIL**, meaning that this construct isn't safe with respect +/// to Python threads. +/// +/// All callers of this `Index` must acquire the GIL and must not release it +/// while working. +/// +/// # TODO find a solution to make it GIL safe again. +/// +/// This is non trivial, and can wait until we have a clearer picture with +/// more Rust Mercurial constructs. +/// +/// One possibility would be to a `GILProtectedIndex` wrapper enclosing +/// a `Python<'p>` marker and have it be the one implementing the +/// `Graph` trait, but this would mean the `Graph` implementor would become +/// likely to change between subsequent method invocations of the `hg-core` +/// objects (a serious change of the `hg-core` API): +/// either exposing ways to mutate the `Graph`, or making it a non persistent +/// parameter in the relevant methods that need one. +/// +/// Another possibility would be to introduce an abstract lock handle into +/// the core API, that would be tied to `GILGuard` / `Python<'p>` +/// in the case of the `cpython` crate bindings yet could leave room for other +/// mechanisms in other contexts. + +pub struct Index { +index: PyObject, +parents: IndexParentsFn, +} + +impl Index { +pub fn new(py: Python, index: PyObject) -> PyResult { +Ok(Index { +index: index, +parents: decapsule_parents_fn(py)?, +}) +} +} + +impl Graph for Index { +/// wrap a call to the C extern parents function +fn parents(, rev: Revision) -> Result<[Revision; 2], GraphError> { +let mut res: [c_int; 2] = [0; 2]; +let code = unsafe { +(self.parents)( +self.index.as_ptr(), +rev as c_int, + res as *mut [c_int; 2], +) +}; +match code { +0 => Ok(res), +_ => Err(GraphError::ParentOutOfRange(rev)), +} +} +} + +/// Return the `index_get_parents` function of the parsers C Extension module. +/// +/// A pointer to the function is stored in the `parsers` module as a +/// standard [Python capsule](https://docs.python.org/2/c-api/capsule.html). +/// +/// This function retrieves the capsule and casts the function pointer +/// +/// Casting function pointers is one of the rare cases of +/// legitimate use cases of `mem::transmute()` (see +/// https://doc.rust-lang.org/std/mem/fn.transmute.html of +/// `mem::transmute()`. +/// It is inappropriate for architectures where +/// function and data pointer sizes differ (so-called "Harvard +/// architectures"), but these are nowadays mostly DSPs +/// and micr
D5440: rust: core implementation for lazyancestors
This revision was automatically updated to reflect the committed changes. Closed by commit rHGef54bd33b476: rust: core implementation for lazyancestors (authored by gracinet, committed by ). REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5440?vs=12954=12968 REVISION DETAIL https://phab.mercurial-scm.org/D5440 AFFECTED FILES rust/hg-core/src/ancestors.rs rust/hg-core/src/lib.rs rust/hg-cpython/src/cindex.rs CHANGE DETAILS diff --git a/rust/hg-cpython/src/cindex.rs b/rust/hg-cpython/src/cindex.rs --- a/rust/hg-cpython/src/cindex.rs +++ b/rust/hg-cpython/src/cindex.rs @@ -15,7 +15,7 @@ extern crate python3_sys as python_sys; use self::python_sys::PyCapsule_Import; -use cpython::{PyErr, PyObject, PyResult, Python}; +use cpython::{PyClone, PyErr, PyObject, PyResult, Python}; use hg::{Graph, GraphError, Revision}; use libc::c_int; use std::ffi::CStr; @@ -59,7 +59,6 @@ /// the core API, that would be tied to `GILGuard` / `Python<'p>` /// in the case of the `cpython` crate bindings yet could leave room for other /// mechanisms in other contexts. - pub struct Index { index: PyObject, parents: IndexParentsFn, @@ -74,6 +73,16 @@ } } +impl Clone for Index { +fn clone() -> Self { +let guard = Python::acquire_gil(); +Index { +index: self.index.clone_ref(guard.python()), +parents: self.parents.clone(), +} +} +} + impl Graph for Index { /// wrap a call to the C extern parents function fn parents(, rev: Revision) -> Result<[Revision; 2], GraphError> { diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -3,7 +3,7 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. mod ancestors; -pub use ancestors::{AncestorsIterator, MissingAncestors}; +pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors}; /// Mercurial revision numbers /// diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -25,6 +25,15 @@ stoprev: Revision, } +/// Lazy ancestors set, backed by AncestorsIterator +pub struct LazyAncestors { +graph: G, +containsiter: AncestorsIterator, +initrevs: Vec, +stoprev: Revision, +inclusive: bool, +} + pub struct MissingAncestors { graph: G, bases: HashSet, @@ -98,9 +107,31 @@ } Ok(false) } + +pub fn peek() -> Option { +self.visit.peek().map(|| r) +} + +/// Tell if the iterator is about an empty set +/// +/// The result does not depend whether the iterator has been consumed +/// or not. +/// This is mostly meant for iterators backing a lazy ancestors set +pub fn is_empty() -> bool { +if self.visit.len() > 0 { +return false; +} +if self.seen.len() > 1 { +return false; +} +// at this point, the seen set is at most a singleton. +// If not `self.inclusive`, it's still possible that it has only +// the null revision +self.seen.is_empty() || self.seen.contains(_REVISION) +} } -/// Main implementation. +/// Main implementation for the iterator /// /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py` /// with a few non crucial differences: @@ -137,6 +168,49 @@ } } +impl LazyAncestors { +pub fn new( +graph: G, +initrevs: impl IntoIterator, +stoprev: Revision, +inclusive: bool, +) -> Result { +let v: Vec = initrevs.into_iter().collect(); +Ok(LazyAncestors { +graph: graph.clone(), +containsiter: AncestorsIterator::new( +graph, +v.iter().cloned(), +stoprev, +inclusive, +)?, +initrevs: v, +stoprev: stoprev, +inclusive: inclusive, +}) +} + +pub fn contains( self, rev: Revision) -> Result { +self.containsiter.contains(rev) +} + +pub fn is_empty() -> bool { +self.containsiter.is_empty() +} + +pub fn iter() -> AncestorsIterator { +// the arguments being the same as for self.containsiter, we know +// for sure that AncestorsIterator constructor can't fail +AncestorsIterator::new( +self.graph.clone(), +self.initrevs.iter().cloned(), +self.stoprev, +self.inclusive, +) +.unwrap() +} +} + impl MissingAncestors { pub fn new(graph: G, bases: impl IntoIterator) -> Self { let mut bases: HashSet = bases.into_iter().collect(); @@ -407,7 +481,40 @@ assert!(!lazy.contains(NULL_REVISION).unwrap()); } +#[test] +fn test_peek() { +let
D5439: rust-cpython: binding for AncestorsIterator
This revision was automatically updated to reflect the committed changes. Closed by commit rHGd9f439fcdb4c: rust-cpython: binding for AncestorsIterator (authored by gracinet, committed by ). REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5439?vs=12953=12966 REVISION DETAIL https://phab.mercurial-scm.org/D5439 AFFECTED FILES rust/hg-cpython/src/ancestors.rs tests/test-rust-ancestor.py CHANGE DETAILS diff --git a/tests/test-rust-ancestor.py b/tests/test-rust-ancestor.py --- a/tests/test-rust-ancestor.py +++ b/tests/test-rust-ancestor.py @@ -1,19 +1,46 @@ from __future__ import absolute_import +import sys import unittest try: from mercurial import rustext +rustext.__name__ # trigger immediate actual import except ImportError: rustext = None +else: +# this would fail already without appropriate ancestor.__package__ +from mercurial.rustext.ancestor import AncestorsIterator try: from mercurial.cext import parsers as cparsers except ImportError: cparsers = None +# picked from test-parse-index2, copied rather than imported +# so that it stays stable even if test-parse-index2 changes or disappears. +data_non_inlined = ( +b'\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01D\x19' +b'\x00\x07e\x12\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff' +b'\xff\xff\xff\xff\xd1\xf4\xbb\xb0\xbe\xfc\x13\xbd\x8c\xd3\x9d' +b'\x0f\xcd\xd9;\x8c\x07\x8cJ/\x00\x00\x00\x00\x00\x00\x00\x00\x00' +b'\x00\x00\x00\x00\x00\x00\x01D\x19\x00\x00\x00\x00\x00\xdf\x00' +b'\x00\x01q\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\xff' +b'\xff\xff\xff\xc1\x12\xb9\x04\x96\xa4Z1t\x91\xdfsJ\x90\xf0\x9bh' +b'\x07l&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +b'\x00\x01D\xf8\x00\x00\x00\x00\x01\x1b\x00\x00\x01\xb8\x00\x00' +b'\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\xff\xff\xff\xff\x02\n' +b'\x0e\xc6&\xa1\x92\xae6\x0b\x02i\xfe-\xe5\xbao\x05\xd1\xe7\x00' +b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01F' +b'\x13\x00\x00\x00\x00\x01\xec\x00\x00\x03\x06\x00\x00\x00\x01' +b'\x00\x00\x00\x03\x00\x00\x00\x02\xff\xff\xff\xff\x12\xcb\xeby1' +b'\xb6\r\x98B\xcb\x07\xbd`\x8f\x92\xd9\xc4\x84\xbdK\x00\x00\x00' +b'\x00\x00\x00\x00\x00\x00\x00\x00\x00' +) + + @unittest.skipIf(rustext is None or cparsers is None, - "rustext.ancestor or the C Extension parsers module " - "it relies on is not available") + "rustext or the C Extension parsers module " + "ancestor relies on is not available") class rustancestorstest(unittest.TestCase): """Test the correctness of binding to Rust code. @@ -27,11 +54,52 @@ Algorithmic correctness is asserted by the Rust unit tests. """ -def testmodule(self): -self.assertTrue('DAG' in rustext.ancestor.__doc__) +def parseindex(self): +return cparsers.parse_index2(data_non_inlined, False)[0] + +def testiteratorrevlist(self): +idx = self.parseindex() +# checking test assumption about the index binary data: +self.assertEqual({i: (r[5], r[6]) for i, r in enumerate(idx)}, + {0: (-1, -1), + 1: (0, -1), + 2: (1, -1), + 3: (2, -1)}) +ait = AncestorsIterator(idx, [3], 0, True) +self.assertEqual([r for r in ait], [3, 2, 1, 0]) + +ait = AncestorsIterator(idx, [3], 0, False) +self.assertEqual([r for r in ait], [2, 1, 0]) + +def testrefcount(self): +idx = self.parseindex() +start_count = sys.getrefcount(idx) + +# refcount increases upon iterator init... +ait = AncestorsIterator(idx, [3], 0, True) +self.assertEqual(sys.getrefcount(idx), start_count + 1) +self.assertEqual(next(ait), 3) + +# and decreases once the iterator is removed +del ait +self.assertEqual(sys.getrefcount(idx), start_count) + +# and removing ref to the index after iterator init is no issue +ait = AncestorsIterator(idx, [3], 0, True) +del idx +self.assertEqual([r for r in ait], [3, 2, 1, 0]) def testgrapherror(self): -self.assertTrue('GraphError' in dir(rustext)) +data = (data_non_inlined[:64 + 27] + +b'\xf2' + +data_non_inlined[64 + 28:]) +idx = cparsers.parse_index2(data, False)[0] +with self.assertRaises(rustext.GraphError) as arc: +AncestorsIterator(idx, [1], -1, False) +exc = arc.exception +self.assertIsInstance(exc, ValueError) +# rust-cpython issues appropriate str instances for Python 2 and 3 +self.assertEqual(exc.args, ('ParentOutOfRange', 1)) if __name__ == '__main__': diff --git a/rust/hg-cpython/src/ancestors.rs b/rust/hg-cpython/src/ancestors.rs --- a/rust/hg-cpython/src/ancestors.rs +++
D5441: rust-cpython: binding for LazyAncestors
gracinet added inline comments. INLINE COMMENTS > kevincox wrote in ancestors.rs:98 > It should be fine with tbe GIL. RefCell is basically a single-thread lock. In > this case it should be fine to have no renterance. Yes, I'll remove these comments, same with the ones for RW lock, it's pretty clear to me now that such would be relevant only for code meant to be executed in a `allow_threads` closure. > kevincox wrote in ancestors.rs:108 > .map_err(|e| GraphError::new(py, format!("{:?}", e))) > > Or even better is to implement `Into` so that you can use `?` Ah yes, forgot to upgrade that one, thanks. The current good spelling would be the one used in `__contains__`, where `GraphError::pynew` encloses the conversion details. I believe implementing `Into` is not readily possible with the rust-cpython way of creating Python exceptions : we can't do it without the `py` marker (it's actually listed as one of the advantages of PyO3). And since `Python<'p>` is not defined by our crate, we'd need to enclose in a wrapper in the `excepions` module: pub struct ExceptionConverter<'p>(pub Python<'p>, pub hg::GraphError); impl <'p> From> for PyErr { fn from(ec: ExceptionConverter<'p>) -> Self { GraphError::pynew(ec.0, ec.1) } } and then def __new__(_cls, index: PyObject, initrevs: PyObject, stoprev: Revision, inclusive: bool) -> PyResult { let initvec = reviter_to_revvec(py, initrevs)?; let lazy = CoreLazy::new( Index::new(py, index)?, initvec, stoprev, inclusive).map_err(|e| ExceptionConverter(py, e))?; Self::create_instance(py, RefCell::new(Box::new(lazy))) } This does compile (did not think hard about the namings), and I'd be surprised if it didn't the right thng, but that's a lot of effort for a result that's only a bit better. What do you think ? REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5441 To: gracinet, #hg-reviewers, kevincox Cc: yuja, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5439: rust-cpython: binding for AncestorsIterator
gracinet added a comment. @yuja actually my first version of `CoreIterator` (and later `CoreLazy`) used the `hg::AncestorsIterator` spelling, but actually I believe we might end up after a while not exporting everything at the top of the `hg` crate, leaving us either to `use hg::ancestors`, giving us a long and puzzling `ancestors::AncestorsIterator` or directly as `hg:ancestors::AncestorsIterator`. I don't think it would be tasteful to `use hg::ancestors as core`. That's why I found it clearer to call them `CoreIterator` etc. But I can change it if you really dislike them. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5439 To: gracinet, #hg-reviewers, kevincox Cc: yuja, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5440: rust: core implementation for lazyancestors
gracinet updated this revision to Diff 12954. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5440?vs=12951=12954 REVISION DETAIL https://phab.mercurial-scm.org/D5440 AFFECTED FILES rust/hg-core/src/ancestors.rs rust/hg-core/src/lib.rs rust/hg-cpython/src/cindex.rs CHANGE DETAILS diff --git a/rust/hg-cpython/src/cindex.rs b/rust/hg-cpython/src/cindex.rs --- a/rust/hg-cpython/src/cindex.rs +++ b/rust/hg-cpython/src/cindex.rs @@ -15,7 +15,7 @@ extern crate python3_sys as python_sys; use self::python_sys::PyCapsule_Import; -use cpython::{PyErr, PyObject, PyResult, Python}; +use cpython::{PyClone, PyErr, PyObject, PyResult, Python}; use hg::{Graph, GraphError, Revision}; use libc::c_int; use std::ffi::CStr; @@ -59,7 +59,6 @@ /// the core API, that would be tied to `GILGuard` / `Python<'p>` /// in the case of the `cpython` crate bindings yet could leave room for other /// mechanisms in other contexts. - pub struct Index { index: PyObject, parents: IndexParentsFn, @@ -74,6 +73,16 @@ } } +impl Clone for Index { +fn clone() -> Self { +let guard = Python::acquire_gil(); +Index { +index: self.index.clone_ref(guard.python()), +parents: self.parents.clone(), +} +} +} + impl Graph for Index { /// wrap a call to the C extern parents function fn parents(, rev: Revision) -> Result<[Revision; 2], GraphError> { diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -3,7 +3,7 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. mod ancestors; -pub use ancestors::{AncestorsIterator, MissingAncestors}; +pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors}; /// Mercurial revision numbers /// diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -25,6 +25,15 @@ stoprev: Revision, } +/// Lazy ancestors set, backed by AncestorsIterator +pub struct LazyAncestors { +graph: G, +containsiter: AncestorsIterator, +initrevs: Vec, +stoprev: Revision, +inclusive: bool, +} + pub struct MissingAncestors { graph: G, bases: HashSet, @@ -98,9 +107,31 @@ } Ok(false) } + +pub fn peek() -> Option { +self.visit.peek().map(|| r) +} + +/// Tell if the iterator is about an empty set +/// +/// The result does not depend whether the iterator has been consumed +/// or not. +/// This is mostly meant for iterators backing a lazy ancestors set +pub fn is_empty() -> bool { +if self.visit.len() > 0 { +return false; +} +if self.seen.len() > 1 { +return false; +} +// at this point, the seen set is at most a singleton. +// If not `self.inclusive`, it's still possible that it has only +// the null revision +self.seen.is_empty() || self.seen.contains(_REVISION) +} } -/// Main implementation. +/// Main implementation for the iterator /// /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py` /// with a few non crucial differences: @@ -137,6 +168,49 @@ } } +impl LazyAncestors { +pub fn new( +graph: G, +initrevs: impl IntoIterator, +stoprev: Revision, +inclusive: bool, +) -> Result { +let v: Vec = initrevs.into_iter().collect(); +Ok(LazyAncestors { +graph: graph.clone(), +containsiter: AncestorsIterator::new( +graph, +v.iter().cloned(), +stoprev, +inclusive, +)?, +initrevs: v, +stoprev: stoprev, +inclusive: inclusive, +}) +} + +pub fn contains( self, rev: Revision) -> Result { +self.containsiter.contains(rev) +} + +pub fn is_empty() -> bool { +self.containsiter.is_empty() +} + +pub fn iter() -> AncestorsIterator { +// the arguments being the same as for self.containsiter, we know +// for sure that AncestorsIterator constructor can't fail +AncestorsIterator::new( +self.graph.clone(), +self.initrevs.iter().cloned(), +self.stoprev, +self.inclusive, +) +.unwrap() +} +} + impl MissingAncestors { pub fn new(graph: G, bases: impl IntoIterator) -> Self { let mut bases: HashSet = bases.into_iter().collect(); @@ -407,7 +481,40 @@ assert!(!lazy.contains(NULL_REVISION).unwrap()); } +#[test] +fn test_peek() { +let mut iter = +AncestorsIterator::new(Stub, vec![10], 0, true).unwrap(); +// peek() gives us the next value +
D5440: rust: core implementation for lazyancestors
gracinet updated this revision to Diff 12951. gracinet marked 4 inline comments as done. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5440?vs=12877=12951 REVISION DETAIL https://phab.mercurial-scm.org/D5440 AFFECTED FILES rust/hg-core/src/ancestors.rs rust/hg-core/src/lib.rs rust/hg-cpython/src/cindex.rs CHANGE DETAILS diff --git a/rust/hg-cpython/src/cindex.rs b/rust/hg-cpython/src/cindex.rs --- a/rust/hg-cpython/src/cindex.rs +++ b/rust/hg-cpython/src/cindex.rs @@ -15,7 +15,7 @@ extern crate python3_sys as python_sys; use self::python_sys::PyCapsule_Import; -use cpython::{PyErr, PyObject, PyResult, Python}; +use cpython::{PyClone, PyErr, PyObject, PyResult, Python}; use hg::{Graph, GraphError, Revision}; use libc::c_int; use std::ffi::CStr; @@ -59,7 +59,6 @@ /// the core API, that would be tied to `GILGuard` / `Python<'p>` /// in the case of the `cpython` crate bindings yet could leave room for other /// mechanisms in other contexts. - pub struct Index { index: PyObject, parents: IndexParentsFn, @@ -74,6 +73,16 @@ } } +impl Clone for Index { +fn clone() -> Self { +let guard = Python::acquire_gil(); +Index { +index: self.index.clone_ref(guard.python()), +parents: self.parents.clone(), +} +} +} + impl Graph for Index { /// wrap a call to the C extern parents function fn parents(, rev: Revision) -> Result<[Revision; 2], GraphError> { diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -3,7 +3,7 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. mod ancestors; -pub use ancestors::{AncestorsIterator, MissingAncestors}; +pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors}; /// Mercurial revision numbers /// diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -25,6 +25,15 @@ stoprev: Revision, } +/// Lazy ancestors set, backed by AncestorsIterator +pub struct LazyAncestors { +graph: G, +containsiter: AncestorsIterator, +initrevs: Vec, +stoprev: Revision, +inclusive: bool, +} + pub struct MissingAncestors { graph: G, bases: HashSet, @@ -98,9 +107,31 @@ } Ok(false) } + +pub fn peek() -> Option { +self.visit.peek().map(|| r) +} + +/// Tell if the iterator is about an empty set +/// +/// The result does not depend whether the iterator has been consumed +/// or not. +/// This is mostly meant for iterators backing a lazy ancestors set +pub fn is_empty() -> bool { +if self.visit.len() > 0 { +return false; +} +if self.seen.len() > 1 { +return false; +} +// at this point, the seen set is at most a singleton. +// If not `self.inclusive`, it's still possible that it has only +// the null revision +self.seen.is_empty() || self.seen.contains(_REVISION) +} } -/// Main implementation. +/// Main implementation for the iterator /// /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py` /// with a few non crucial differences: @@ -137,6 +168,49 @@ } } +impl LazyAncestors { +pub fn new( +graph: G, +initrevs: impl IntoIterator, +stoprev: Revision, +inclusive: bool, +) -> Result { +let v: Vec = initrevs.into_iter().collect(); +Ok(LazyAncestors { +graph: graph.clone(), +containsiter: AncestorsIterator::new( +graph, +v.iter().cloned(), +stoprev, +inclusive, +)?, +initrevs: v, +stoprev: stoprev, +inclusive: inclusive, +}) +} + +pub fn contains( self, rev: Revision) -> Result { +self.containsiter.contains(rev) +} + +pub fn is_empty() -> bool { +self.containsiter.is_empty() +} + +pub fn iter() -> AncestorsIterator { +// the arguments being the same as for self.containsiter, we know +// for sure that AncestorsIterator constructor can't fail +AncestorsIterator::new( +self.graph.clone(), +self.initrevs.iter().cloned(), +self.stoprev, +self.inclusive, +) +.unwrap() +} +} + impl MissingAncestors { pub fn new(graph: G, bases: impl IntoIterator) -> Self { let mut bases: HashSet = bases.into_iter().collect(); @@ -407,7 +481,40 @@ assert!(!lazy.contains(NULL_REVISION).unwrap()); } +#[test] +fn test_peek() { +let mut iter = +AncestorsIterator::new(Stub, vec![10], 0, true).unwrap(); +// peek()
D5439: rust-cpython: binding for AncestorsIterator
gracinet updated this revision to Diff 12953. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5439?vs=12950=12953 REVISION DETAIL https://phab.mercurial-scm.org/D5439 AFFECTED FILES rust/hg-cpython/src/ancestors.rs tests/test-rust-ancestor.py CHANGE DETAILS diff --git a/tests/test-rust-ancestor.py b/tests/test-rust-ancestor.py --- a/tests/test-rust-ancestor.py +++ b/tests/test-rust-ancestor.py @@ -1,19 +1,46 @@ from __future__ import absolute_import +import sys import unittest try: from mercurial import rustext +rustext.__name__ # trigger immediate actual import except ImportError: rustext = None +else: +# this would fail already without appropriate ancestor.__package__ +from mercurial.rustext.ancestor import AncestorsIterator try: from mercurial.cext import parsers as cparsers except ImportError: cparsers = None +# picked from test-parse-index2, copied rather than imported +# so that it stays stable even if test-parse-index2 changes or disappears. +data_non_inlined = ( +b'\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01D\x19' +b'\x00\x07e\x12\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff' +b'\xff\xff\xff\xff\xd1\xf4\xbb\xb0\xbe\xfc\x13\xbd\x8c\xd3\x9d' +b'\x0f\xcd\xd9;\x8c\x07\x8cJ/\x00\x00\x00\x00\x00\x00\x00\x00\x00' +b'\x00\x00\x00\x00\x00\x00\x01D\x19\x00\x00\x00\x00\x00\xdf\x00' +b'\x00\x01q\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\xff' +b'\xff\xff\xff\xc1\x12\xb9\x04\x96\xa4Z1t\x91\xdfsJ\x90\xf0\x9bh' +b'\x07l&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +b'\x00\x01D\xf8\x00\x00\x00\x00\x01\x1b\x00\x00\x01\xb8\x00\x00' +b'\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\xff\xff\xff\xff\x02\n' +b'\x0e\xc6&\xa1\x92\xae6\x0b\x02i\xfe-\xe5\xbao\x05\xd1\xe7\x00' +b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01F' +b'\x13\x00\x00\x00\x00\x01\xec\x00\x00\x03\x06\x00\x00\x00\x01' +b'\x00\x00\x00\x03\x00\x00\x00\x02\xff\xff\xff\xff\x12\xcb\xeby1' +b'\xb6\r\x98B\xcb\x07\xbd`\x8f\x92\xd9\xc4\x84\xbdK\x00\x00\x00' +b'\x00\x00\x00\x00\x00\x00\x00\x00\x00' +) + + @unittest.skipIf(rustext is None or cparsers is None, - "rustext.ancestor or the C Extension parsers module " - "it relies on is not available") + "rustext or the C Extension parsers module " + "ancestor relies on is not available") class rustancestorstest(unittest.TestCase): """Test the correctness of binding to Rust code. @@ -27,11 +54,52 @@ Algorithmic correctness is asserted by the Rust unit tests. """ -def testmodule(self): -self.assertTrue('DAG' in rustext.ancestor.__doc__) +def parseindex(self): +return cparsers.parse_index2(data_non_inlined, False)[0] + +def testiteratorrevlist(self): +idx = self.parseindex() +# checking test assumption about the index binary data: +self.assertEqual({i: (r[5], r[6]) for i, r in enumerate(idx)}, + {0: (-1, -1), + 1: (0, -1), + 2: (1, -1), + 3: (2, -1)}) +ait = AncestorsIterator(idx, [3], 0, True) +self.assertEqual([r for r in ait], [3, 2, 1, 0]) + +ait = AncestorsIterator(idx, [3], 0, False) +self.assertEqual([r for r in ait], [2, 1, 0]) + +def testrefcount(self): +idx = self.parseindex() +start_count = sys.getrefcount(idx) + +# refcount increases upon iterator init... +ait = AncestorsIterator(idx, [3], 0, True) +self.assertEqual(sys.getrefcount(idx), start_count + 1) +self.assertEqual(next(ait), 3) + +# and decreases once the iterator is removed +del ait +self.assertEqual(sys.getrefcount(idx), start_count) + +# and removing ref to the index after iterator init is no issue +ait = AncestorsIterator(idx, [3], 0, True) +del idx +self.assertEqual([r for r in ait], [3, 2, 1, 0]) def testgrapherror(self): -self.assertTrue('GraphError' in dir(rustext)) +data = (data_non_inlined[:64 + 27] + +b'\xf2' + +data_non_inlined[64 + 28:]) +idx = cparsers.parse_index2(data, False)[0] +with self.assertRaises(rustext.GraphError) as arc: +AncestorsIterator(idx, [1], -1, False) +exc = arc.exception +self.assertIsInstance(exc, ValueError) +# rust-cpython issues appropriate str instances for Python 2 and 3 +self.assertEqual(exc.args, ('ParentOutOfRange', 1)) if __name__ == '__main__': diff --git a/rust/hg-cpython/src/ancestors.rs b/rust/hg-cpython/src/ancestors.rs --- a/rust/hg-cpython/src/ancestors.rs +++ b/rust/hg-cpython/src/ancestors.rs @@ -7,7 +7,68 @@ //! Bindings for the hg::ancestors module provided by the //! `hg-core` crate. From Python, this will be seen
D5438: rust-cpython: implementing Graph using C parents function
gracinet updated this revision to Diff 12949. gracinet edited the summary of this revision. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5438?vs=12875=12949 REVISION DETAIL https://phab.mercurial-scm.org/D5438 AFFECTED FILES mercurial/cext/revlog.c rust/hg-cpython/src/cindex.rs rust/hg-cpython/src/lib.rs CHANGE DETAILS diff --git a/rust/hg-cpython/src/lib.rs b/rust/hg-cpython/src/lib.rs --- a/rust/hg-cpython/src/lib.rs +++ b/rust/hg-cpython/src/lib.rs @@ -21,8 +21,10 @@ #[macro_use] extern crate cpython; extern crate hg; +extern crate libc; mod ancestors; +mod cindex; mod exceptions; py_module_initializer!(rustext, initrustext, PyInit_rustext, |py, m| { diff --git a/rust/hg-cpython/src/cindex.rs b/rust/hg-cpython/src/cindex.rs new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/src/cindex.rs @@ -0,0 +1,121 @@ +// cindex.rs +// +// Copyright 2018 Georges Racinet +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +//! Bindings to use the Index defined by the parsers C extension +//! +//! Ideally, we should use an Index entirely implemented in Rust, +//! but this will take some time to get there. +#[cfg(feature = "python27")] +extern crate python27_sys as python_sys; +#[cfg(feature = "python3")] +extern crate python3_sys as python_sys; + +use self::python_sys::PyCapsule_Import; +use cpython::{PyErr, PyObject, PyResult, Python}; +use hg::{Graph, GraphError, Revision}; +use libc::c_int; +use std::ffi::CStr; +use std::mem::transmute; + +type IndexParentsFn = unsafe extern "C" fn( +index: *mut python_sys::PyObject, +rev: c_int, +ps: *mut [c_int; 2], +) -> c_int; + +/// A `Graph` backed up by objects and functions from revlog.c +/// +/// This implementation of the `Graph` trait, relies on (pointers to) +/// - the C index object (`index` member) +/// - the `index_get_parents()` function (`parents` member) +/// +/// # Safety +/// +/// The C index itself is mutable, and this Rust exposition is **not +/// protected by the GIL**, meaning that this construct isn't safe with respect +/// to Python threads. +/// +/// All callers of this `Index` must acquire the GIL and must not release it +/// while working. +/// +/// # TODO find a solution to make it GIL safe again. +/// +/// This is non trivial, and can wait until we have a clearer picture with +/// more Rust Mercurial constructs. +/// +/// One possibility would be to a `GILProtectedIndex` wrapper enclosing +/// a `Python<'p>` marker and have it be the one implementing the +/// `Graph` trait, but this would mean the `Graph` implementor would become +/// likely to change between subsequent method invocations of the `hg-core` +/// objects (a serious change of the `hg-core` API): +/// either exposing ways to mutate the `Graph`, or making it a non persistent +/// parameter in the relevant methods that need one. +/// +/// Another possibility would be to introduce an abstract lock handle into +/// the core API, that would be tied to `GILGuard` / `Python<'p>` +/// in the case of the `cpython` crate bindings yet could leave room for other +/// mechanisms in other contexts. + +pub struct Index { +index: PyObject, +parents: IndexParentsFn, +} + +impl Index { +pub fn new(py: Python, index: PyObject) -> PyResult { +Ok(Index { +index: index, +parents: decapsule_parents_fn(py)?, +}) +} +} + +impl Graph for Index { +/// wrap a call to the C extern parents function +fn parents(, rev: Revision) -> Result<[Revision; 2], GraphError> { +let mut res: [c_int; 2] = [0; 2]; +let code = unsafe { +(self.parents)( +self.index.as_ptr(), +rev as c_int, + res as *mut [c_int; 2], +) +}; +match code { +0 => Ok(res), +_ => Err(GraphError::ParentOutOfRange(rev)), +} +} +} + +/// Return the `index_get_parents` function of the parsers C Extension module. +/// +/// A pointer to the function is stored in the `parsers` module as a +/// standard [Python capsule](https://docs.python.org/2/c-api/capsule.html). +/// +/// This function retrieves the capsule and casts the function pointer +/// +/// Casting function pointers is one of the rare cases of +/// legitimate use cases of `mem::transmute()` (see +/// https://doc.rust-lang.org/std/mem/fn.transmute.html of +/// `mem::transmute()`. +/// It is inappropriate for architectures where +/// function and data pointer sizes differ (so-called "Harvard +/// architectures"), but these are nowadays mostly DSPs +/// and microcontrollers, hence out of our scope. +fn decapsule_parents_fn(py: Python) -> PyResult { +unsafe { +let caps_name = CStr::from_bytes_with_nul_unchecked( +b"mer
D5441: rust-cpython: binding for LazyAncestors
gracinet updated this revision to Diff 12952. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5441?vs=12878=12952 REVISION DETAIL https://phab.mercurial-scm.org/D5441 AFFECTED FILES rust/hg-cpython/src/ancestors.rs tests/test-rust-ancestor.py CHANGE DETAILS diff --git a/tests/test-rust-ancestor.py b/tests/test-rust-ancestor.py --- a/tests/test-rust-ancestor.py +++ b/tests/test-rust-ancestor.py @@ -9,7 +9,10 @@ rustext = None else: # this would fail already without appropriate ancestor.__package__ -from mercurial.rustext.ancestor import AncestorsIterator +from mercurial.rustext.ancestor import ( +AncestorsIterator, +lazyancestors +) try: from mercurial.cext import parsers as cparsers @@ -71,6 +74,37 @@ ait = AncestorsIterator(idx, [3], 0, False) self.assertEqual([r for r in ait], [2, 1, 0]) +def testlazyancestors(self): +idx = self.parseindex() +start_count = sys.getrefcount(idx) # should be 2 (see Python doc) +self.assertEqual({i: (r[5], r[6]) for i, r in enumerate(idx)}, + {0: (-1, -1), + 1: (0, -1), + 2: (1, -1), + 3: (2, -1)}) +lazy = lazyancestors(idx, [3], 0, True) +# we have two more references to the index: +# - in its inner iterator for __contains__ and __bool__ +# - in the lazyancestors instance itself (to spawn new iterators) +self.assertEqual(sys.getrefcount(idx), start_count + 2) + +self.assertTrue(2 in lazy) +self.assertTrue(bool(lazy)) +self.assertEqual([r for r in lazy], [3, 2, 1, 0]) +# a second time to validate that we spawn new iterators +self.assertEqual([r for r in lazy], [3, 2, 1, 0]) + +# now let's watch the refcounts closer +ait = iter(lazy) +self.assertEqual(sys.getrefcount(idx), start_count + 3) +del ait +self.assertEqual(sys.getrefcount(idx), start_count + 2) +del lazy +self.assertEqual(sys.getrefcount(idx), start_count) + +# let's check bool for an empty one +self.assertFalse(lazyancestors(idx, [0], 0, False)) + def testrefcount(self): idx = self.parseindex() start_count = sys.getrefcount(idx) diff --git a/rust/hg-cpython/src/ancestors.rs b/rust/hg-cpython/src/ancestors.rs --- a/rust/hg-cpython/src/ancestors.rs +++ b/rust/hg-cpython/src/ancestors.rs @@ -12,8 +12,8 @@ }; use exceptions::GraphError; use hg; -use hg::AncestorsIterator as CoreIterator; use hg::Revision; +use hg::{AncestorsIterator as CoreIterator, LazyAncestors as CoreLazy}; use std::cell::RefCell; /// Utility function to convert a Python iterable into a Vec @@ -27,7 +27,7 @@ .collect() } -py_class!(class AncestorsIterator |py| { +py_class!(pub class AncestorsIterator |py| { // TODO RW lock ? data inner: RefCell>>; @@ -69,6 +69,52 @@ } } +/// The purpose of this module is to hide identifiers from other Rust users +/// +/// Some of the identifiers we are defining are meant for consumption +/// from Python with other naming conventions. For instance, `py_class!` +/// does not let us make a distinction between the Python and the Rust name, +/// and that name ends up in particular in the `__class__` attribute, so that +/// renaming it inside its Python module is not enough. +/// +/// For Rust consumers, we will reexport these definitions, following the +/// appropriate naming convention. +mod concealed_detail { +use super::*; + +py_class!(pub class lazyancestors |py| { +data inner: RefCell>>; + +def __contains__(, rev: Revision) -> PyResult { +self.inner(py).borrow_mut().contains(rev).map_err(|e| GraphError::pynew(py, e)) +} + +def __iter__() -> PyResult { +AncestorsIterator::from_inner(py, + self.inner(py).borrow().iter()) +} + +def __bool__() -> PyResult { +Ok(!self.inner(py).borrow().is_empty()) +} + +def __new__(_cls, index: PyObject, initrevs: PyObject, +stoprev: Revision, inclusive: bool) -> PyResult { +let initvec = reviter_to_revvec(py, initrevs)?; +let lazy = match CoreLazy::new( +Index::new(py, index)?, initvec, stoprev, inclusive) { +Ok(lazy) => lazy, +Err(e) => { +return Err(GraphError::new(py, format!("{:?}", e))); +} +}; +Self::create_instance(py, RefCell::new(Box::new(lazy))) +} +}); +} + +pub use self::concealed_detail::lazyancestors as LazyAncestors; + /// Create the module, with __package__ given from parent pub fn init_module(py: Python, package: ) -> PyResult { let dotted_name = !("{}.ancestor", package); @@ -80,6 +126,7 @@
D5439: rust-cpython: binding for AncestorsIterator
gracinet updated this revision to Diff 12950. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5439?vs=12876=12950 REVISION DETAIL https://phab.mercurial-scm.org/D5439 AFFECTED FILES rust/hg-cpython/src/ancestors.rs rust/hg-cpython/src/lib.rs tests/test-rust-ancestor.py CHANGE DETAILS diff --git a/tests/test-rust-ancestor.py b/tests/test-rust-ancestor.py --- a/tests/test-rust-ancestor.py +++ b/tests/test-rust-ancestor.py @@ -1,19 +1,46 @@ from __future__ import absolute_import +import sys import unittest try: from mercurial import rustext +rustext.__name__ # trigger immediate actual import except ImportError: rustext = None +else: +# this would fail already without appropriate ancestor.__package__ +from mercurial.rustext.ancestor import AncestorsIterator try: from mercurial.cext import parsers as cparsers except ImportError: cparsers = None +# picked from test-parse-index2, copied rather than imported +# so that it stays stable even if test-parse-index2 changes or disappears. +data_non_inlined = ( +b'\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01D\x19' +b'\x00\x07e\x12\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff' +b'\xff\xff\xff\xff\xd1\xf4\xbb\xb0\xbe\xfc\x13\xbd\x8c\xd3\x9d' +b'\x0f\xcd\xd9;\x8c\x07\x8cJ/\x00\x00\x00\x00\x00\x00\x00\x00\x00' +b'\x00\x00\x00\x00\x00\x00\x01D\x19\x00\x00\x00\x00\x00\xdf\x00' +b'\x00\x01q\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\xff' +b'\xff\xff\xff\xc1\x12\xb9\x04\x96\xa4Z1t\x91\xdfsJ\x90\xf0\x9bh' +b'\x07l&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +b'\x00\x01D\xf8\x00\x00\x00\x00\x01\x1b\x00\x00\x01\xb8\x00\x00' +b'\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\xff\xff\xff\xff\x02\n' +b'\x0e\xc6&\xa1\x92\xae6\x0b\x02i\xfe-\xe5\xbao\x05\xd1\xe7\x00' +b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01F' +b'\x13\x00\x00\x00\x00\x01\xec\x00\x00\x03\x06\x00\x00\x00\x01' +b'\x00\x00\x00\x03\x00\x00\x00\x02\xff\xff\xff\xff\x12\xcb\xeby1' +b'\xb6\r\x98B\xcb\x07\xbd`\x8f\x92\xd9\xc4\x84\xbdK\x00\x00\x00' +b'\x00\x00\x00\x00\x00\x00\x00\x00\x00' +) + + @unittest.skipIf(rustext is None or cparsers is None, - "rustext.ancestor or the C Extension parsers module " - "it relies on is not available") + "rustext or the C Extension parsers module " + "ancestor relies on is not available") class rustancestorstest(unittest.TestCase): """Test the correctness of binding to Rust code. @@ -27,11 +54,52 @@ Algorithmic correctness is asserted by the Rust unit tests. """ -def testmodule(self): -self.assertTrue('DAG' in rustext.ancestor.__doc__) +def parseindex(self): +return cparsers.parse_index2(data_non_inlined, False)[0] + +def testiteratorrevlist(self): +idx = self.parseindex() +# checking test assumption about the index binary data: +self.assertEqual({i: (r[5], r[6]) for i, r in enumerate(idx)}, + {0: (-1, -1), + 1: (0, -1), + 2: (1, -1), + 3: (2, -1)}) +ait = AncestorsIterator(idx, [3], 0, True) +self.assertEqual([r for r in ait], [3, 2, 1, 0]) + +ait = AncestorsIterator(idx, [3], 0, False) +self.assertEqual([r for r in ait], [2, 1, 0]) + +def testrefcount(self): +idx = self.parseindex() +start_count = sys.getrefcount(idx) + +# refcount increases upon iterator init... +ait = AncestorsIterator(idx, [3], 0, True) +self.assertEqual(sys.getrefcount(idx), start_count + 1) +self.assertEqual(next(ait), 3) + +# and decreases once the iterator is removed +del ait +self.assertEqual(sys.getrefcount(idx), start_count) + +# and removing ref to the index after iterator init is no issue +ait = AncestorsIterator(idx, [3], 0, True) +del idx +self.assertEqual([r for r in ait], [3, 2, 1, 0]) def testgrapherror(self): -self.assertTrue('GraphError' in dir(rustext)) +data = (data_non_inlined[:64 + 27] + +b'\xf2' + +data_non_inlined[64 + 28:]) +idx = cparsers.parse_index2(data, False)[0] +with self.assertRaises(rustext.GraphError) as arc: +AncestorsIterator(idx, [1], -1, False) +exc = arc.exception +self.assertIsInstance(exc, ValueError) +# rust-cpython issues appropriate str instances for Python 2 and 3 +self.assertEqual(exc.args, ('ParentOutOfRange', 1)) if __name__ == '__main__': diff --git a/rust/hg-cpython/src/lib.rs b/rust/hg-cpython/src/lib.rs --- a/rust/hg-cpython/src/lib.rs +++ b/rust/hg-cpython/src/lib.rs @@ -23,9 +23,9 @@ extern crate hg; extern crate libc; -mod ancestors; +pub mod ancestors; mod cindex; -mod exceptions;
D5440: rust: core implementation for lazyancestors
gracinet added a comment. @yuja, yes a `Graph` not implementing `Clone` is already a good thing, as it avoids to implement `Clone` in `hg-direct-ffi` prematurely. I think the whole `hg::LazyAncestors` can end up being useful from core Rust code, too, that's why I prefer that to a `hg-cpython` only implementation. INLINE COMMENTS > kevincox wrote in ancestors.rs:124 > I think this variable makes the code harder to read. I would just repeat the > calls to `.len()`. agreed, and made a clearer version of the last lines as an OR statement using `self.seen.is_empty()` REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5440 To: gracinet, #hg-reviewers, kevincox Cc: yuja, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5443: ancestor: uniformity of calling lazyancestors classes
gracinet added a comment. @yuja I'm not sure by what you consider exactly to be internals here. If that's the `[5]` and [6]`, maybe a `parents(revision)` method on the index would be better ? The obvious drawback would be to write more C code. Given that my ultimate goal here is to delegate the whole `ancestors` module to `policy.importmod`, where would we have the factory function you're thinking of ? In some sense, that's the role `revlog.py` fulfills isn't it ? Maybe all access to ancestors logic should keep on going through it ? REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5443 To: gracinet, indygreg, #hg-reviewers Cc: yuja, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5446: rust-cpython: build and support for Python3
gracinet added a comment. @yuja great, thanks. Didn't know about this suffix, apparently it can be obtained through the `sysconfig` module (thats what `$PYTHON-config` does) ~ $ python2 -c "import sysconfig; print(sysconfig.get_config_var('SO'))" .so ~ $ python3 -c "import sysconfig; print(sysconfig.get_config_var('SO'))" -c:1: DeprecationWarning: SO is deprecated, use EXT_SUFFIX .cpython-37m-x86_64-linux-gnu.so ~ $ python3 -c "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX'))" .cpython-37m-x86_64-linux-gnu.so I suppose that's also the thing to use to build for Windows or other operating systems. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5446 To: gracinet, #hg-reviewers Cc: yuja, mjpieters, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5446: rust-cpython: build and support for Python3
This revision was automatically updated to reflect the committed changes. Closed by commit rHG4277e20cfec4: rust-cpython: build and support for Python3 (authored by gracinet, committed by ). REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5446?vs=12889=12890 REVISION DETAIL https://phab.mercurial-scm.org/D5446 AFFECTED FILES mercurial/__init__.py rust/Cargo.lock rust/hg-cpython/Cargo.toml setup.py CHANGE DETAILS diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -922,11 +922,13 @@ rusttargetdir = os.path.join('rust', 'target', 'release') -def __init__(self, mpath, sources, rustlibname, subcrate, **kw): +def __init__(self, mpath, sources, rustlibname, subcrate, + py3_features=None, **kw): Extension.__init__(self, mpath, sources, **kw) if hgrustext is None: return srcdir = self.rustsrcdir = os.path.join('rust', subcrate) +self.py3_features = py3_features # adding Rust source and control files to depends so that the extension # gets rebuilt if they've changed @@ -957,6 +959,9 @@ env['HOME'] = pwd.getpwuid(os.getuid()).pw_dir cargocmd = ['cargo', 'build', '-vv', '--release'] +if sys.version_info[0] == 3 and self.py3_features is not None: +cargocmd.extend(('--features', self.py3_features, + '--no-default-features')) try: subprocess.check_call(cargocmd, env=env, cwd=self.rustsrcdir) except OSError as exc: @@ -1047,7 +1052,8 @@ if hgrustext == 'cpython': extmodules.append( -RustStandaloneExtension('mercurial.rustext', 'hg-cpython', 'librusthg') +RustStandaloneExtension('mercurial.rustext', 'hg-cpython', 'librusthg', +py3_features='python3') ) diff --git a/rust/hg-cpython/Cargo.toml b/rust/hg-cpython/Cargo.toml --- a/rust/hg-cpython/Cargo.toml +++ b/rust/hg-cpython/Cargo.toml @@ -8,9 +8,14 @@ crate-type = ["cdylib"] [features] -default = ["python27", "python27-sys"] +default = ["python27"] -python27 = ["cpython/python27-sys", "cpython/extension-module-2-7"] +python27 = ["cpython/python27-sys", +"cpython/extension-module-2-7", +"python27-sys", +] + +python3 = ["python3-sys", "cpython/python3-sys", "cpython/extension-module"] [dependencies] hg-core = { path = "../hg-core" } diff --git a/rust/Cargo.lock b/rust/Cargo.lock --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -19,6 +19,7 @@ "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)", "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", "python27-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "python3-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] diff --git a/mercurial/__init__.py b/mercurial/__init__.py --- a/mercurial/__init__.py +++ b/mercurial/__init__.py @@ -40,6 +40,10 @@ # zstd is already dual-version clean, don't try and mangle it if fullname.startswith('mercurial.zstd'): return None +# rustext is built for the right python version, +# don't try and mangle it +if fullname.startswith('mercurial.rustext'): +return None # pywatchman is already dual-version clean, don't try and mangle it if fullname.startswith('hgext.fsmonitor.pywatchman'): return None To: gracinet, #hg-reviewers Cc: yuja, mjpieters, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5439: rust-cpython: binding for AncestorsIterator
gracinet added a comment. @yuja you're right! My first attempt was tainted with assigning to a `Vec`, whereas upon type inference with the actual return type `PyResult`, the compiler does the right thing so that was the escape plan for early returns in closures :-) amazing. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5439 To: gracinet, #hg-reviewers, kevincox Cc: yuja, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5446: rust-cpython: build and support for Python3
gracinet created this revision. Herald added subscribers: mercurial-devel, mjpieters. Herald added a reviewer: hg-reviewers. REVISION SUMMARY Defined Cargo features for Python3, making them overall simpler to use, hooked them in build and made mercurial.rustext importable. This is tested with Python 3.6.7. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5446 AFFECTED FILES mercurial/__init__.py rust/Cargo.lock rust/hg-cpython/Cargo.toml setup.py CHANGE DETAILS diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -919,11 +919,13 @@ rusttargetdir = os.path.join('rust', 'target', 'release') -def __init__(self, mpath, sources, rustlibname, subcrate, **kw): +def __init__(self, mpath, sources, rustlibname, subcrate, + py3_features=None, **kw): Extension.__init__(self, mpath, sources, **kw) if hgrustext is None: return srcdir = self.rustsrcdir = os.path.join('rust', subcrate) +self.py3_features = py3_features # adding Rust source and control files to depends so that the extension # gets rebuilt if they've changed @@ -954,6 +956,9 @@ env['HOME'] = pwd.getpwuid(os.getuid()).pw_dir cargocmd = ['cargo', 'build', '-vv', '--release'] +if sys.version_info[0] == 3 and self.py3_features is not None: +cargocmd.extend(('--features', self.py3_features, + '--no-default-features')) try: subprocess.check_call(cargocmd, env=env, cwd=self.rustsrcdir) except OSError as exc: @@ -1044,7 +1049,8 @@ if hgrustext == 'cpython': extmodules.append( -RustStandaloneExtension('mercurial.rustext', 'hg-cpython', 'librusthg') +RustStandaloneExtension('mercurial.rustext', 'hg-cpython', 'librusthg', +py3_features='python3') ) diff --git a/rust/hg-cpython/Cargo.toml b/rust/hg-cpython/Cargo.toml --- a/rust/hg-cpython/Cargo.toml +++ b/rust/hg-cpython/Cargo.toml @@ -8,9 +8,14 @@ crate-type = ["cdylib"] [features] -default = ["python27", "python27-sys"] +default = ["python27"] -python27 = ["cpython/python27-sys", "cpython/extension-module-2-7"] +python27 = ["cpython/python27-sys", +"cpython/extension-module-2-7", +"python27-sys", +] + +python3 = ["python3-sys", "cpython/python3-sys", "cpython/extension-module"] [dependencies] hg-core = { path = "../hg-core" } diff --git a/rust/Cargo.lock b/rust/Cargo.lock --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -19,6 +19,7 @@ "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)", "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", "python27-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "python3-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] diff --git a/mercurial/__init__.py b/mercurial/__init__.py --- a/mercurial/__init__.py +++ b/mercurial/__init__.py @@ -40,6 +40,10 @@ # zstd is already dual-version clean, don't try and mangle it if fullname.startswith('mercurial.zstd'): return None +# rustext is built for the right python version, +# don't try and mangle it +if fullname.startswith('mercurial.rustext'): +return None # pywatchman is already dual-version clean, don't try and mangle it if fullname.startswith('hgext.fsmonitor.pywatchman'): return None To: gracinet, #hg-reviewers Cc: mjpieters, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5439: rust-cpython: binding for AncestorsIterator
gracinet added a comment. @yuja thanks for spotting the Python3 incompatibility. As you can guess, I didn't compile with Python 3, and actually I hadn't even defined the features to that extent. I will submit separately a change that takes care of the build with Python 3 before updating that one (but yes, got `test-rust-ancestor.py` to pass). OTOH I'll have to keep the loop so that I can get the early return upon error (otherwise we'd get a Vec> @kevincox thanks for the shorter version, wasn't used to these sugars when I wrote this long ago :-) REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5439 To: gracinet, #hg-reviewers, kevincox Cc: yuja, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5438: rust-cpython: implementing Graph using C parents function
gracinet added a comment. I gave the solution to reacquire the GIL explicitely from Index a quick try, without even releasing it from the callers (which is ok according to https://docs.python.org/2.7/c-api/init.html#c.PyGILState_Ensure), and it is more than a 20% penalty. I'm measuring with `hg perfancestors` on mozilla-central, here are the medians of wall time pure Python: 0.295 Rust cpython (unmodified): 0.101 Rust direct-ffi: 0.092 Rust cpython reacquiring the GIL: 0.124 REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5438 To: gracinet, #hg-reviewers Cc: yuja, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5438: rust-cpython: implementing Graph using C parents function
gracinet added a comment. @yuja yes sorry, forgot to adapt to the new signature. That'll be fixed in next version, and in `revlog.c`, the capsule pointer declaration is now at the top. About protecting parents() with the GIL `Python<'p>`, I'll have to think more about it, but the solution that comes to mind is as you suggest a two-step wrapping: the long lived `Index` would wrap the actual index object and the function pointer retrieved from the capsule, but would not implement the `Graph` trait directly. A shorter term `GILProtectedIndex` would then implement the `Graph` trait by wrapping the `Index` together with the `Python<'p>` marker coming from a call to the Python interface (methods of the `py_class!` of this series), hence forbidding to release the GIL from any code that calls `Graph::parents()`. I think the current usage is safe, though, because our Python interface methods don't release the GIL, but it's true the `Index` isn't inherentrly safe. A simpler possibility would be to go the other way: release the GIL from the Python interface methods, and reacquire it at each call of `Graph::parents`. I don't know what the overhead would be, especially with Mercurial being currently monothreaded (unless I'm mistaken), meaning that we don't have benefits to reap. Maybe I'll try that one, since it's so simple, and if I measure negligible overhead, I'll go for it. As a side note, the same problem could arise with the direct-ffi bindings: the safety promise is held by the caller, this time from C code. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5438 To: gracinet, #hg-reviewers Cc: yuja, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5434: rust-cpython: started cpython crate bindings
This revision was automatically updated to reflect the committed changes. Closed by commit rHGa3ba080b3118: rust-cpython: start cpython crate bindings (authored by gracinet, committed by ). CHANGED PRIOR TO COMMIT https://phab.mercurial-scm.org/D5434?vs=12871=12883#toc REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5434?vs=12871=12883 REVISION DETAIL https://phab.mercurial-scm.org/D5434 AFFECTED FILES rust/Cargo.lock rust/Cargo.toml rust/hg-cpython/Cargo.toml rust/hg-cpython/rustfmt.toml rust/hg-cpython/src/ancestors.rs rust/hg-cpython/src/exceptions.rs rust/hg-cpython/src/lib.rs CHANGE DETAILS diff --git a/rust/hg-cpython/src/lib.rs b/rust/hg-cpython/src/lib.rs new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/src/lib.rs @@ -0,0 +1,40 @@ +// lib.rs +// +// Copyright 2018 Georges Racinet +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +//! Python bindings of `hg-core` objects using the `cpython` crate. +//! Once compiled, the resulting single shared library object can be placed in +//! the `mercurial` package directly as `rustext.so` or `rustext.dll`. +//! It holds several modules, so that from the point of view of Python, +//! it behaves as the `cext` package. +//! +//! Example: +//! ``` +//! >>> from mercurial.rustext import ancestor +//! >>> ancestor.__doc__ +//! 'Generic DAG ancestor algorithms - Rust implementation' +//! ``` + +#[macro_use] +extern crate cpython; +extern crate hg; + +mod ancestors; +mod exceptions; + +py_module_initializer!(rustext, initrustext, PyInit_rustext, |py, m| { +m.add( +py, +"__doc__", +"Mercurial core concepts - Rust implementation", +)?; + +let dotted_name: String = m.get(py, "__name__")?.extract(py)?; +m.add(py, "__package__", "mercurial")?; +m.add(py, "ancestor", ancestors::init_module(py, _name)?)?; +m.add(py, "GraphError", py.get_type::())?; +Ok(()) +}); diff --git a/rust/hg-cpython/src/exceptions.rs b/rust/hg-cpython/src/exceptions.rs new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/src/exceptions.rs @@ -0,0 +1,15 @@ +use cpython::exc::ValueError; +use cpython::{PyErr, Python}; +use hg; + +py_exception!(rustext, GraphError, ValueError); + +impl GraphError { +pub fn pynew(py: Python, inner: hg::GraphError) -> PyErr { +match inner { +hg::GraphError::ParentOutOfRange(r) => { +GraphError::new(py, ("ParentOutOfRange", r)) +} +} +} +} diff --git a/rust/hg-cpython/src/ancestors.rs b/rust/hg-cpython/src/ancestors.rs new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/src/ancestors.rs @@ -0,0 +1,30 @@ +// ancestors.rs +// +// Copyright 2018 Georges Racinet +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +//! Bindings for the hg::ancestors module provided by the +//! `hg-core` crate. From Python, this will be seen as `rustext.ancestor` +use cpython::{PyDict, PyModule, PyResult, Python}; + +/// Create the module, with __package__ given from parent +pub fn init_module(py: Python, package: ) -> PyResult { +let dotted_name = !("{}.ancestor", package); +let m = PyModule::new(py, dotted_name)?; +m.add(py, "__package__", package)?; +m.add( +py, +"__doc__", +"Generic DAG ancestor algorithms - Rust implementation", +)?; + +let sys = PyModule::import(py, "sys")?; +let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?; +sys_modules.set_item(py, dotted_name, )?; +// Example C code (see pyexpat.c and import.c) will "give away the +// reference", but we won't because it will be consumed once the +// Rust PyObject is dropped. +Ok(m) +} diff --git a/rust/hg-cpython/rustfmt.toml b/rust/hg-cpython/rustfmt.toml new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/rustfmt.toml @@ -0,0 +1,3 @@ +max_width = 79 +wrap_comments = true +error_on_line_overflow = true diff --git a/rust/hg-cpython/Cargo.toml b/rust/hg-cpython/Cargo.toml new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "hg-cpython" +version = "0.1.0" +authors = ["Georges Racinet "] + +[lib] +name='rusthg' +crate-type = ["cdylib"] + +[features] +default = ["python27", "python27-sys"] + +python27 = ["cpython/python27-sys", "cpython/extension-module-2-7"] + +[dependencies] +hg-core = { path = "../hg-core" } +libc = '*' + +[dependencies.cpython] +version = "*" +default-features = false + +[dependencies.python27-sys] +version = "0.2.1
D5433: rust-cpython: excluded hgcli from workspace
This revision was automatically updated to reflect the committed changes. Closed by commit rHG6e815adf91de: rust-cpython: exclude hgcli from workspace (authored by gracinet, committed by ). CHANGED PRIOR TO COMMIT https://phab.mercurial-scm.org/D5433?vs=12870=12882#toc REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5433?vs=12870=12882 REVISION DETAIL https://phab.mercurial-scm.org/D5433 AFFECTED FILES rust/Cargo.lock rust/Cargo.toml rust/hgcli/Cargo.lock CHANGE DETAILS diff --git a/rust/Cargo.lock b/rust/hgcli/Cargo.lock copy from rust/Cargo.lock copy to rust/hgcli/Cargo.lock --- a/rust/Cargo.lock +++ b/rust/hgcli/Cargo.lock @@ -11,33 +11,21 @@ version = "0.1.0" source = "git+https://github.com/indygreg/rust-cpython.git?rev=c90d65cf84abfffce7ef54476bbfed56017a2f52#c90d65cf84abfffce7ef54476bbfed56017a2f52; dependencies = [ - "libc 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)", - "num-traits 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", "python27-sys 0.1.2 (git+https://github.com/indygreg/rust-cpython.git?rev=c90d65cf84abfffce7ef54476bbfed56017a2f52)", ] [[package]] -name = "hg-core" -version = "0.1.0" - -[[package]] name = "hgcli" version = "0.1.0" dependencies = [ "cpython 0.1.0 (git+https://github.com/indygreg/rust-cpython.git?rev=c90d65cf84abfffce7ef54476bbfed56017a2f52)", - "libc 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)", "python27-sys 0.1.2 (git+https://github.com/indygreg/rust-cpython.git?rev=c90d65cf84abfffce7ef54476bbfed56017a2f52)", ] [[package]] -name = "hgdirectffi" -version = "0.1.0" -dependencies = [ - "hg-core 0.1.0", - "libc 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] name = "kernel32-sys" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index; @@ -48,28 +36,36 @@ [[package]] name = "libc" -version = "0.2.35" +version = "0.2.45" source = "registry+https://github.com/rust-lang/crates.io-index; [[package]] name = "memchr" version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index; dependencies = [ - "libc 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "num-traits" -version = "0.1.41" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index; +dependencies = [ + "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-traits" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index; [[package]] name = "python27-sys" version = "0.1.2" source = "git+https://github.com/indygreg/rust-cpython.git?rev=c90d65cf84abfffce7ef54476bbfed56017a2f52#c90d65cf84abfffce7ef54476bbfed56017a2f52; dependencies = [ - "libc 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)", "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -96,7 +92,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index; dependencies = [ "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -126,9 +122,10 @@ "checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" "checksum cpython 0.1.0 (git+https://github.com/indygreg/rust-cpython.git?rev=c90d65cf84abfffce7ef54476bbfed56017a2f52)" = "" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" -"checksum libc 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)" = "96264e9b293e95d25bfcbbf8a88ffd1aedc85b754eba8b7d78012f638ba220eb" +"checksum libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)" = "2d2857ec59fadc0773853c664d2d18e7198e83883e7060b63c924cb077bd5c74" "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" -"checksum num-traits 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)" = "cacfcab5eb48250ee7d0c7896b51a2c5eec99c1feea5f32025635f5ae4b00070" +"checksum num-traits 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "92e5113e9fd4cc14ded8e499429f396a20f98c772a47cc8622a736e1ec843c31" +"checksum num-traits 0.2.6
D5436: rust-cpython: build via HGWITHRUSTEXT=cpython
This revision was automatically updated to reflect the committed changes. Closed by commit rHG9e755c16f05d: rust-cpython: build via HGWITHRUSTEXT=cpython (authored by gracinet, committed by ). CHANGED PRIOR TO COMMIT https://phab.mercurial-scm.org/D5436?vs=12873=12885#toc REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5436?vs=12873=12885 REVISION DETAIL https://phab.mercurial-scm.org/D5436 AFFECTED FILES Makefile setup.py CHANGE DETAILS diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -132,7 +132,11 @@ ispypy = "PyPy" in sys.version -iswithrustextensions = 'HGWITHRUSTEXT' in os.environ +hgrustext = os.environ.get('HGWITHRUSTEXT') +# TODO record it for proper rebuild upon changes +# (see mercurial/__modulepolicy__.py) +if hgrustext != 'cpython' and hgrustext is not None: +hgrustext = 'direct-ffi' import ctypes import errno @@ -458,11 +462,18 @@ return build_ext.initialize_options(self) def build_extensions(self): +ruststandalones = [e for e in self.extensions + if isinstance(e, RustStandaloneExtension)] +self.extensions = [e for e in self.extensions + if e not in ruststandalones] # Filter out zstd if disabled via argument. if not self.zstd: self.extensions = [e for e in self.extensions if e.name != 'mercurial.zstd'] +for rustext in ruststandalones: +rustext.build('' if self.inplace else self.build_lib) + return build_ext.build_extensions(self) def build_extension(self, ext): @@ -903,20 +914,16 @@ """Exception class for Rust compilation errors.""" class RustExtension(Extension): -"""A C Extension, conditionnally enhanced with Rust code. - -if iswithrustextensions is False, does nothing else than plain Extension +"""Base classes for concrete Rust Extension classes. """ rusttargetdir = os.path.join('rust', 'target', 'release') def __init__(self, mpath, sources, rustlibname, subcrate, **kw): Extension.__init__(self, mpath, sources, **kw) -if not iswithrustextensions: +if hgrustext is None: return srcdir = self.rustsrcdir = os.path.join('rust', subcrate) -self.libraries.append(rustlibname) -self.extra_compile_args.append('-DWITH_RUST') # adding Rust source and control files to depends so that the extension # gets rebuilt if they've changed @@ -930,7 +937,7 @@ if os.path.splitext(fname)[1] == '.rs') def rustbuild(self): -if not iswithrustextensions: +if hgrustext is None: return env = os.environ.copy() if 'HGTEST_RESTOREENV' in env: @@ -962,6 +969,40 @@ "Cargo failed. Working directory: %r, " "command: %r, environment: %r" % (self.rustsrcdir, cmd, env)) +class RustEnhancedExtension(RustExtension): +"""A C Extension, conditionally enhanced with Rust code. + +If the HGRUSTEXT environment variable is set to something else +than 'cpython', the Rust sources get compiled and linked within the +C target shared library object. +""" + +def __init__(self, mpath, sources, rustlibname, subcrate, **kw): +RustExtension.__init__(self, mpath, sources, rustlibname, subcrate, + **kw) +if hgrustext != 'direct-ffi': +return +self.extra_compile_args.append('-DWITH_RUST') +self.libraries.append(rustlibname) +self.library_dirs.append(self.rusttargetdir) + +class RustStandaloneExtension(RustExtension): + +def __init__(self, pydottedname, rustcrate, dylibname, **kw): +RustExtension.__init__(self, pydottedname, [], dylibname, rustcrate, + **kw) +self.dylibname = dylibname + +def build(self, target_dir): +self.rustbuild() +target = [target_dir] +target.extend(self.name.split('.')) +ext = '.so' # TODO Unix only +target[-1] += ext +shutil.copy2(os.path.join(self.rusttargetdir, self.dylibname + ext), + os.path.join(*target)) + + extmodules = [ Extension('mercurial.cext.base85', ['mercurial/cext/base85.c'], include_dirs=common_include_dirs, @@ -974,19 +1015,20 @@ 'mercurial/cext/mpatch.c'], include_dirs=common_include_dirs, depends=common_depends), -RustExtension('mercurial.cext.parsers', ['mercurial/cext/charencode.c', - 'mercurial/cext/dirs.c', - 'mercurial/cext/manifest.c', - 'mercurial/cext/parsers.c', - 'mercurial/cext/pathencode.c', -
D5437: rust-cpython: testing the bindings from Python
This revision was automatically updated to reflect the committed changes. Closed by commit rHG57e3dfeb3a5d: rust-cpython: testing the bindings from Python (authored by gracinet, committed by ). REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5437?vs=12874=12886 REVISION DETAIL https://phab.mercurial-scm.org/D5437 AFFECTED FILES tests/test-rust-ancestor.py CHANGE DETAILS diff --git a/tests/test-rust-ancestor.py b/tests/test-rust-ancestor.py new file mode 100644 --- /dev/null +++ b/tests/test-rust-ancestor.py @@ -0,0 +1,39 @@ +from __future__ import absolute_import +import unittest + +try: +from mercurial import rustext +except ImportError: +rustext = None + +try: +from mercurial.cext import parsers as cparsers +except ImportError: +cparsers = None + +@unittest.skipIf(rustext is None or cparsers is None, + "rustext.ancestor or the C Extension parsers module " + "it relies on is not available") +class rustancestorstest(unittest.TestCase): +"""Test the correctness of binding to Rust code. + +This test is merely for the binding to Rust itself: extraction of +Python variable, giving back the results etc. + +It is not meant to test the algorithmic correctness of the operations +on ancestors it provides. Hence the very simple embedded index data is +good enough. + +Algorithmic correctness is asserted by the Rust unit tests. +""" + +def testmodule(self): +self.assertTrue('DAG' in rustext.ancestor.__doc__) + +def testgrapherror(self): +self.assertTrue('GraphError' in dir(rustext)) + + +if __name__ == '__main__': +import silenttestrunner +silenttestrunner.main(__name__) To: gracinet, #hg-reviewers Cc: mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5435: rust: better treatment of cargo/rustc errors
This revision was automatically updated to reflect the committed changes. Closed by commit rHG5955cf85ed74: rust: better treatment of cargo/rustc errors (authored by gracinet, committed by ). CHANGED PRIOR TO COMMIT https://phab.mercurial-scm.org/D5435?vs=12872=12884#toc REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5435?vs=12872=12884 REVISION DETAIL https://phab.mercurial-scm.org/D5435 AFFECTED FILES setup.py CHANGE DETAILS diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -135,6 +135,7 @@ iswithrustextensions = 'HGWITHRUSTEXT' in os.environ import ctypes +import errno import stat, subprocess, time import re import shutil @@ -898,6 +899,9 @@ 'mercurial/thirdparty/xdiff/xutils.h', ] +class RustCompilationError(CCompilerError): +"""Exception class for Rust compilation errors.""" + class RustExtension(Extension): """A C Extension, conditionnally enhanced with Rust code. @@ -942,9 +946,21 @@ import pwd env['HOME'] = pwd.getpwuid(os.getuid()).pw_dir -subprocess.check_call(['cargo', 'build', '-vv', '--release'], - env=env, cwd=self.rustsrcdir) -self.library_dirs.append(self.rusttargetdir) +cargocmd = ['cargo', 'build', '-vv', '--release'] +try: +subprocess.check_call(cargocmd, env=env, cwd=self.rustsrcdir) +except OSError as exc: +if exc.errno == errno.ENOENT: +raise RustCompilationError("Cargo not found") +elif exc.errno == errno.EACCES: +raise RustCompilationError( +"Cargo found, but permisssion to execute it is denied") +else: +raise +except subprocess.CalledProcessError: +raise RustCompilationError( +"Cargo failed. Working directory: %r, " +"command: %r, environment: %r" % (self.rustsrcdir, cmd, env)) extmodules = [ Extension('mercurial.cext.base85', ['mercurial/cext/base85.c'], To: gracinet, #hg-reviewers Cc: yuja, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5443: ancestor: uniformity of calling lazyancestors classes
gracinet added a comment. obviously, this one could be adapted for application before the rust-cpython bindings, and extended for the `incrementalmissingancestors` as well REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5443 To: gracinet, indygreg, #hg-reviewers Cc: mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5438: rust-cpython: implementing Graph using C parents function
gracinet added a comment. Here, I'd be tempted to submit a `py_capsule_fn` macro to rust-cpython, but I guess it can wait. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5438 To: gracinet, #hg-reviewers Cc: durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5370: rust: core implementation of missingancestors (no bindings)
gracinet abandoned this revision. gracinet marked an inline comment as done. gracinet added a comment. This Differential has been superseded by https://phab.mercurial-scm.org/D5414 through https://phab.mercurial-scm.org/D5417 REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5370 To: gracinet, #hg-reviewers Cc: yuja, durin42, kevincox, mjpieters, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5443: ancestor: uniformity of calling lazyancestors classes
gracinet created this revision. Herald added a reviewer: indygreg. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers. REVISION SUMMARY Up to now, the pure Python lazyancestors had been taking the parents function in its constructor, whereas Rust-backed variants would have needed a revlog index. With this change, instantiation of all lazyancestors work uniformely with an index. This requires only minor wrapping in test-ancestor.py. It could be argued that this introduces a new duplication, and that therefore, parentsfunc(index) should be provided in some utility module and used both from revlog and ancestor modules. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5443 AFFECTED FILES mercurial/ancestor.py mercurial/revlog.py tests/test-ancestor.py CHANGE DETAILS diff --git a/tests/test-ancestor.py b/tests/test-ancestor.py --- a/tests/test-ancestor.py +++ b/tests/test-ancestor.py @@ -240,10 +240,26 @@ else: print("Ok") +class fakeindex(object): +'''A pseudo index backed by a dict or list graph + +It's only meant for parents lookup, and +all that matters is that self.graph[rev] is the pair of rev's parents. +''' +def __init__(self, graph): +self.graph = graph +def __getitem__(self, rev): +res = [None] * 5 +try: +res.extend(self.graph[rev]) +except KeyError: +raise IndexError(rev) +return res + def genlazyancestors(revs, stoprev=0, inclusive=False): print(("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" % (revs, stoprev, inclusive))) -return ancestor.lazyancestors(graph.get, revs, stoprev=stoprev, +return ancestor.lazyancestors(fakeindex(graph), revs, stoprev=stoprev, inclusive=inclusive) def printlazyancestors(s, l): diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -675,9 +675,6 @@ return entry[5], entry[6] -# fast parentrevs(rev) where rev isn't filtered -_uncheckedparentrevs = parentrevs - def node(self, rev): try: return self.index[rev][7] @@ -797,14 +794,12 @@ rustext = None if rustext is not None: lazyancestors = rustext.ancestor.lazyancestors -arg = self.index elif util.safehasattr(parsers, 'rustlazyancestors'): lazyancestors = ancestor.rustlazyancestors -arg = self.index else: lazyancestors = ancestor.lazyancestors -arg = self._uncheckedparentrevs -return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive) +return lazyancestors(self.index, revs, + stoprev=stoprev, inclusive=inclusive) def descendants(self, revs): return dagop.descendantrevs(revs, self.revs, self.parentrevs) diff --git a/mercurial/ancestor.py b/mercurial/ancestor.py --- a/mercurial/ancestor.py +++ b/mercurial/ancestor.py @@ -9,8 +9,12 @@ import heapq -from .node import nullrev +from .node import ( +nullrev, +wdirrev, +) from . import ( +error, policy, pycompat, ) @@ -306,8 +310,20 @@ heappush(visit, -p2) see(p2) +def parentsfunc(index): +def parentrevs(rev): +try: +entry = index[rev] +except IndexError: +if rev == wdirrev: +raise error.WdirUnsupported +raise + +return entry[5], entry[6] +return parentrevs + class lazyancestors(object): -def __init__(self, pfunc, revs, stoprev=0, inclusive=False): +def __init__(self, index, revs, stoprev=0, inclusive=False): """Create a new object generating ancestors for the given revs. Does not generate revs lower than stoprev. @@ -319,7 +335,7 @@ than stoprev will not be generated. Result does not include the null revision.""" -self._parentrevs = pfunc +self._parentrevs = parentsfunc(index) self._initrevs = revs = [r for r in revs if r >= stoprev] self._stoprev = stoprev self._inclusive = inclusive @@ -330,6 +346,7 @@ self._stoprev, self._inclusive) + def __nonzero__(self): """False if the set is empty, True otherwise.""" try: To: gracinet, indygreg, #hg-reviewers Cc: mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5442: rust-cpython: using the new bindings from Python
gracinet created this revision. Herald added a reviewer: indygreg. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers. REVISION SUMMARY The Python callers detect if we have cpython or direct-ffi bindings and fallback to the Python implementation if none is present. This intermediate state allows to compare the three possibilities. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5442 AFFECTED FILES mercurial/revlog.py CHANGE DETAILS diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -97,6 +97,10 @@ REVIDX_RAWTEXT_CHANGING_FLAGS parsers = policy.importmod(r'parsers') +try: +from . import rustext +except ImportError: +rustext = None # Aliased for performance. _zlibdecompress = zlib.decompress @@ -779,12 +783,28 @@ for r in revs: checkrev(r) # and we're sure ancestors aren't filtered as well -if util.safehasattr(parsers, 'rustlazyancestors'): -return ancestor.rustlazyancestors( -self.index, revs, -stoprev=stoprev, inclusive=inclusive) -return ancestor.lazyancestors(self._uncheckedparentrevs, revs, - stoprev=stoprev, inclusive=inclusive) + +# rustext can be an unloaded module (see hgdemandimport) +# so we need to trigger an actual import to check for its presence, +# which is done by looking up any attribute. +# If in the future we reach the point where the ancestor module +# can be imported by policy, we will be able to get rid of this +global rustext +if rustext is not None: +try: +rustext.__name__ +except ImportError: +rustext = None +if rustext is not None: +lazyancestors = rustext.ancestor.lazyancestors +arg = self.index +elif util.safehasattr(parsers, 'rustlazyancestors'): +lazyancestors = ancestor.rustlazyancestors +arg = self.index +else: +lazyancestors = ancestor.lazyancestors +arg = self._uncheckedparentrevs +return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive) def descendants(self, revs): return dagop.descendantrevs(revs, self.revs, self.parentrevs) To: gracinet, indygreg, #hg-reviewers Cc: mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5440: rust: core implementation for lazyancestors
gracinet created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY Once exposed through appropriate bindings, this should be able to replace ancestor.lazyancestors entirely. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5440 AFFECTED FILES rust/hg-core/src/ancestors.rs rust/hg-core/src/lib.rs rust/hg-cpython/src/cindex.rs rust/hg-direct-ffi/src/ancestors.rs CHANGE DETAILS diff --git a/rust/hg-direct-ffi/src/ancestors.rs b/rust/hg-direct-ffi/src/ancestors.rs --- a/rust/hg-direct-ffi/src/ancestors.rs +++ b/rust/hg-direct-ffi/src/ancestors.rs @@ -30,6 +30,12 @@ /// This implementation of the Graph trait, relies on (pointers to) /// - the C index object (`index` member) /// - the `index_get_parents()` function (`parents` member) +/// +/// In case of `.clone()` (would be from a full binding for `LazyAncestors` +/// that doesn't exist at the time of this writing), reference increasing +/// and decreasing would be the responsibility of the caller, same as it +/// is for `AncestorsIterator` +#[derive(Clone, Debug)] pub struct Index { index: IndexPtr, } diff --git a/rust/hg-cpython/src/cindex.rs b/rust/hg-cpython/src/cindex.rs --- a/rust/hg-cpython/src/cindex.rs +++ b/rust/hg-cpython/src/cindex.rs @@ -15,7 +15,7 @@ extern crate python3_sys as python_sys; use self::python_sys::PyCapsule_Import; -use cpython::{PyErr, PyObject, PyResult, Python}; +use cpython::{PyClone, PyErr, PyObject, PyResult, Python}; use hg::{Graph, GraphError, Revision}; use libc::{c_int, ssize_t}; use std::ffi::CStr; @@ -47,6 +47,16 @@ } } +impl Clone for Index { +fn clone() -> Self { +let guard = Python::acquire_gil(); +Index { +index: self.index.clone_ref(guard.python()), +parents: self.parents.clone(), +} +} +} + impl Graph for Index { /// wrap a call to the C extern parents function fn parents(, rev: Revision) -> Result<[Revision; 2], GraphError> { diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -2,8 +2,10 @@ // // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. +use std::clone::Clone; + mod ancestors; -pub use ancestors::{AncestorsIterator, MissingAncestors}; +pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors}; /// Mercurial revision numbers /// @@ -14,7 +16,7 @@ pub const NULL_REVISION: Revision = -1; /// The simplest expression of what we need of Mercurial DAGs. -pub trait Graph { +pub trait Graph: Clone { /// Return the two parents of the given `Revision`. /// /// Each of the parents can be independently `NULL_REVISION` diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -25,6 +25,15 @@ stoprev: Revision, } +/// Lazy ancestors set, backed by AncestorsIterator +pub struct LazyAncestors { +graph: G, +containsiter: AncestorsIterator, +initrevs: Vec, +stoprev: Revision, +inclusive: bool, +} + pub struct MissingAncestors { graph: G, bases: HashSet, @@ -98,9 +107,34 @@ } Ok(false) } + +pub fn peek() -> Option { +self.visit.peek().map(|| r) +} + +/// Tell if the iterator is about an empty set +/// +/// The result does not depend whether the iterator has been consumed +/// or not. +/// This is mostly meant for iterators backing a lazy ancestors set +pub fn empty() -> bool { +if self.visit.len() > 0 { +return false; +} +let seen = self.seen.len(); +if seen > 1 { +return false; +} +// at this point, the seen set is a singleton. If not self.inclusive, +// then its only element can be the null revision +if seen == 0 || self.seen.contains(_REVISION) { +return true; +} +false +} } -/// Main implementation. +/// Main implementation for the iterator /// /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py` /// with a few non crucial differences: @@ -137,6 +171,51 @@ } } +impl LazyAncestors { +pub fn new( +graph: G, +initrevs: impl IntoIterator, +stoprev: Revision, +inclusive: bool, +) -> Result { +let v: Vec = initrevs.into_iter().collect(); +Ok(LazyAncestors { +graph: graph.clone(), +containsiter: AncestorsIterator::new( +graph, +v.iter().cloned(), +stoprev, +inclusive, +)?, +initrevs: v, +stoprev: stoprev, +inclusive: inclusive, +}) +} + +#[inline] +pub fn
D5441: rust-cpython: binding for LazyAncestors
gracinet created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY For consistency with Python implementation, we're exposing it with the lower case spelling, so that if one day the whole ancestor module has a Rust implementation, it would be readily compatible, ready for HGMODULEPOLICY. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5441 AFFECTED FILES rust/hg-cpython/src/ancestors.rs tests/test-rust-ancestor.py CHANGE DETAILS diff --git a/tests/test-rust-ancestor.py b/tests/test-rust-ancestor.py --- a/tests/test-rust-ancestor.py +++ b/tests/test-rust-ancestor.py @@ -9,7 +9,10 @@ rustext = None else: # this would fail already without appropriate ancestor.__package__ -from mercurial.rustext.ancestor import AncestorsIterator +from mercurial.rustext.ancestor import ( +AncestorsIterator, +lazyancestors +) try: from mercurial.cext import parsers as cparsers @@ -71,6 +74,37 @@ ait = AncestorsIterator(idx, [3], 0, False) self.assertEqual([r for r in ait], [2, 1, 0]) +def testlazyancestors(self): +idx = self.parseindex() +start_count = sys.getrefcount(idx) # should be 2 (see Python doc) +self.assertEqual({i: (r[5], r[6]) for i, r in enumerate(idx)}, + {0: (-1, -1), + 1: (0, -1), + 2: (1, -1), + 3: (2, -1)}) +lazy = lazyancestors(idx, [3], 0, True) +# we have two more references to the index: +# - in its inner iterator for __contains__ and __bool__ +# - in the lazyancestors instance itself (to spawn new iterators) +self.assertEqual(sys.getrefcount(idx), start_count + 2) + +self.assertTrue(2 in lazy) +self.assertTrue(bool(lazy)) +self.assertEqual([r for r in lazy], [3, 2, 1, 0]) +# a second time to validate that we spawn new iterators +self.assertEqual([r for r in lazy], [3, 2, 1, 0]) + +# now let's watch the refcounts closer +ait = iter(lazy) +self.assertEqual(sys.getrefcount(idx), start_count + 3) +del ait +self.assertEqual(sys.getrefcount(idx), start_count + 2) +del lazy +self.assertEqual(sys.getrefcount(idx), start_count) + +# let's check bool for an empty one +self.assertFalse(lazyancestors(idx, [0], 0, False)) + def testrefcount(self): idx = self.parseindex() start_count = sys.getrefcount(idx) diff --git a/rust/hg-cpython/src/ancestors.rs b/rust/hg-cpython/src/ancestors.rs --- a/rust/hg-cpython/src/ancestors.rs +++ b/rust/hg-cpython/src/ancestors.rs @@ -13,8 +13,8 @@ }; use exceptions::GraphError; use hg; -use hg::AncestorsIterator as CoreIterator; use hg::Revision; +use hg::{AncestorsIterator as CoreIterator, LazyAncestors as CoreLazy}; use std::cell::RefCell; /// Utility function to convert a Python iterable into a Vec @@ -85,6 +85,43 @@ } } +py_class!(class lazyancestors |py| { +// TODO RW lock ? +data inner: RefCell>>; + +def __contains__(, rev: Revision) -> PyResult { +self.inner(py).borrow_mut().contains(rev).map_err(|e| GraphError::pynew(py, e)) +} + +def __iter__() -> PyResult { +// TODO borrow() can panic, replace with try_borrow() +// or consider it's ok thanks to the GIL +AncestorsIterator::from_inner(py, + self.inner(py).borrow().iter()) +} + +def __bool__() -> PyResult { +// TODO borrow() panic, see __iter__() +Ok(!self.inner(py).borrow().is_empty()) +} + +def __new__(_cls, index: PyObject, initrevs: PyObject, stoprev: Revision, +inclusive: bool) -> PyResult { +let initvec = reviter_to_revvec(py, initrevs)?; + +let lazy = match CoreLazy::new( +Index::new(py, index)?, initvec, stoprev, inclusive) { +Ok(lazy) => lazy, +Err(e) => { +return Err(GraphError::new(py, format!("{:?}", e))); +} +}; + +Self::create_instance(py, RefCell::new(Box::new(lazy))) +} + +}); + /// Create the module, with __package__ given from parent pub fn init_module(py: Python, package: ) -> PyResult { let dotted_name = !("{}.ancestor", package); @@ -96,6 +133,7 @@ "Generic DAG ancestor algorithms - Rust implementation", )?; m.add_class::(py)?; +m.add_class::(py)?; let sys = PyModule::import(py, "sys")?; let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?; To: gracinet, #hg-reviewers Cc: durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5438: rust-cpython: implementing Graph using C parents function
gracinet created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY A pointer to the parents function is stored on the parsers C extension module as a capsule object. This is the recommended way to export a C API for consumption from other extensions. See also: https://docs.python.org/2.7/c-api/capsule.html In our case, we use it in cindex.rs, retrieving function pointer from the capsule and storing it within the CIndex struct, alongside with a pointer to the index. From there, the implementation is very close to the one from hg-direct-ffi. The naming convention for the capsule is inspired from the one in datetime: >>> import datetime >>> datetime.datetime_CAPI REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5438 AFFECTED FILES mercurial/cext/revlog.c rust/hg-cpython/src/cindex.rs rust/hg-cpython/src/lib.rs CHANGE DETAILS diff --git a/rust/hg-cpython/src/lib.rs b/rust/hg-cpython/src/lib.rs --- a/rust/hg-cpython/src/lib.rs +++ b/rust/hg-cpython/src/lib.rs @@ -21,6 +21,7 @@ #[macro_use] extern crate cpython; extern crate hg; +extern crate libc; mod ancestors; mod exceptions; diff --git a/rust/hg-cpython/src/cindex.rs b/rust/hg-cpython/src/cindex.rs new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/src/cindex.rs @@ -0,0 +1,95 @@ +// cindex.rs +// +// Copyright 2018 Georges Racinet +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +//! Bindings to use the Index defined by the parsers C extension +//! +//! Ideally, we should use an Index entirely implemented in Rust, +//! but this will take some time to get there. +#[cfg(feature = "python27")] +extern crate python27_sys as python_sys; +#[cfg(feature = "python3")] +extern crate python3_sys as python_sys; + +use self::python_sys::PyCapsule_Import; +use cpython::{PyErr, PyObject, PyResult, Python}; +use hg::{Graph, GraphError, Revision}; +use libc::{c_int, ssize_t}; +use std::ffi::CStr; +use std::mem::transmute; + +type IndexParentsFn = unsafe extern "C" fn( +index: *mut python_sys::PyObject, +rev: ssize_t, +ps: *mut [c_int; 2], +max_rev: c_int, +) -> c_int; + +/// A Graph backed up by objects and functions from revlog.c +/// +/// This implementation of the Graph trait, relies on (pointers to) +/// - the C index object (`index` member) +/// - the `index_get_parents()` function (`parents` member) +pub struct Index { +index: PyObject, +parents: IndexParentsFn, +} + +impl Index { +pub fn new(py: Python, index: PyObject) -> PyResult { +Ok(Index { +index: index, +parents: decapsule_parents_fn(py)?, +}) +} +} + +impl Graph for Index { +/// wrap a call to the C extern parents function +fn parents(, rev: Revision) -> Result<[Revision; 2], GraphError> { +let mut res: [c_int; 2] = [0; 2]; +let code = unsafe { +(self.parents)( +self.index.as_ptr(), +rev as ssize_t, + res as *mut [c_int; 2], +rev, +) +}; +match code { +0 => Ok(res), +_ => Err(GraphError::ParentOutOfRange(rev)), +} +} +} + +/// Return the `index_get_parents` function of the parsers C Extension module. +/// +/// A pointer to the function is stored in the `parsers` module as a +/// standard [Python capsule](https://docs.python.org/2/c-api/capsule.html). +/// +/// This function retrieves the capsule and casts the function pointer +/// +/// Casting function pointers is one of the rare cases of +/// legitimate use cases of `mem::transmute()` (see +/// https://doc.rust-lang.org/std/mem/fn.transmute.html of +/// `mem::transmute()`. +/// It is inappropriate for architectures where +/// function and data pointer sizes differ (so-called "Harvard +/// architectures"), but these are nowadays mostly DSPs +/// and microcontrollers, hence out of our scope. +fn decapsule_parents_fn(py: Python) -> PyResult { +unsafe { +let caps_name = CStr::from_bytes_with_nul_unchecked( +b"mercurial.cext.parsers.index_get_parents_CAPI\0", +); +let from_caps = PyCapsule_Import(caps_name.as_ptr(), 0); +if from_caps.is_null() { +return Err(PyErr::fetch(py)); +} +Ok(transmute(from_caps)) +} +} diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c +++ b/mercurial/cext/revlog.c @@ -2878,6 +2878,12 @@ if (nullentry) PyObject_GC_UnTrack(nullentry); + void *caps = PyCapsule_New( + HgRevlogIndex_GetParents, + "mercurial.cext.parsers.index_get_parents_CAPI", NULL); +
D5434: rust-cpython: started cpython crate bindings
gracinet created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY This changeset introduces the hg-cpython crate, that compiles as a shared library holding a whole Python package (mercurial.rustext), with only the empty 'ancestor' submodule for now. Such bindings will be easier and safer to develop and maintain that those of `hg-direct-ffi`. They don't involve C code, only unsafe Rust that's mostly isolated within the cpython crate. The long-term goal would be to import the provided modules, such as rustext.ancestor with mercurial.policy.importmod, same as we already do with cext modules. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5434 AFFECTED FILES rust/Cargo.lock rust/Cargo.toml rust/hg-cpython/Cargo.toml rust/hg-cpython/rustfmt.toml rust/hg-cpython/src/ancestors.rs rust/hg-cpython/src/exceptions.rs rust/hg-cpython/src/lib.rs CHANGE DETAILS diff --git a/rust/hg-cpython/src/lib.rs b/rust/hg-cpython/src/lib.rs new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/src/lib.rs @@ -0,0 +1,40 @@ +// lib.rs +// +// Copyright 2018 Georges Racinet +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +//! Python bindings of `hg-core` objects using the `cpython` crate. +//! Once compiled, the resulting single shared library object can be placed in +//! the `mercurial` package directly as `rustext.so` or `rustext.dll`. +//! It holds several modules, so that from the point of view of Python, +//! it behaves as the `cext` package. +//! +//! Example: +//! ``` +//! >>> from mercurial.rustext import ancestor +//! >>> ancestor.__doc__ +//! 'Generic DAG ancestor algorithms - Rust implementation' +//! ``` + +#[macro_use] +extern crate cpython; +extern crate hg; + +mod ancestors; +mod exceptions; + +py_module_initializer!(rustext, initrustext, PyInit_rustext, |py, m| { +m.add( +py, +"__doc__", +"Mercurial core concepts - Rust implementation", +)?; + +let dotted_name: String = m.get(py, "__name__")?.extract(py)?; +m.add(py, "__package__", "mercurial")?; +m.add(py, "ancestor", ancestors::init_module(py, _name)?)?; +m.add(py, "GraphError", py.get_type::())?; +Ok(()) +}); diff --git a/rust/hg-cpython/src/exceptions.rs b/rust/hg-cpython/src/exceptions.rs new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/src/exceptions.rs @@ -0,0 +1,15 @@ +use cpython::exc::ValueError; +use cpython::{PyErr, Python}; +use hg; + +py_exception!(rustext, GraphError, ValueError); + +impl GraphError { +pub fn pynew(py: Python, inner: hg::GraphError) -> PyErr { +match inner { +hg::GraphError::ParentOutOfRange(r) => { +GraphError::new(py, ("ParentOutOfRange", r)) +} +} +} +} diff --git a/rust/hg-cpython/src/ancestors.rs b/rust/hg-cpython/src/ancestors.rs new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/src/ancestors.rs @@ -0,0 +1,30 @@ +// ancestors.rs +// +// Copyright 2018 Georges Racinet +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +//! Bindings for the hg::ancestors module provided by the +//! `hg-core` crate. From Python, this will be seen as `rustext.ancestor` +use cpython::{PyDict, PyModule, PyResult, Python}; + +/// Create the module, with __package__ given from parent +pub fn init_module(py: Python, package: ) -> PyResult { +let dotted_name = !("{}.ancestor", package); +let m = PyModule::new(py, dotted_name)?; +m.add(py, "__package__", package)?; +m.add( +py, +"__doc__", +"Generic DAG ancestor algorithms - Rust implementation", +)?; + +let sys = PyModule::import(py, "sys")?; +let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?; +sys_modules.set_item(py, dotted_name, )?; +// Example C code (see pyexpat.c and import.c) will "give away the +// reference", but we won't because it will be consumed once the +// Rust PyObject is dropped. +Ok(m) +} diff --git a/rust/hg-cpython/rustfmt.toml b/rust/hg-cpython/rustfmt.toml new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/rustfmt.toml @@ -0,0 +1,3 @@ +max_width = 79 +wrap_comments = true +error_on_line_overflow = true diff --git a/rust/hg-cpython/Cargo.toml b/rust/hg-cpython/Cargo.toml new file mode 100644 --- /dev/null +++ b/rust/hg-cpython/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "hg-cpython" +version = "0.1.0" +authors = ["Georges Racinet "] + +[lib] +name='rusthg' +crate-type = ["cdylib"] + +[fea
D5437: rust-cpython: testing the bindings from Python
gracinet created this revision. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers. REVISION SUMMARY This is easier and more convincing than doing the same tests from a Rust tests module. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5437 AFFECTED FILES tests/test-rust-ancestor.py CHANGE DETAILS diff --git a/tests/test-rust-ancestor.py b/tests/test-rust-ancestor.py new file mode 100644 --- /dev/null +++ b/tests/test-rust-ancestor.py @@ -0,0 +1,39 @@ +from __future__ import absolute_import +import unittest + +try: +from mercurial import rustext +except ImportError: +rustext = None + +try: +from mercurial.cext import parsers as cparsers +except ImportError: +cparsers = None + +@unittest.skipIf(rustext is None or cparsers is None, + "rustext.ancestor or the C Extension parsers module " + "it relies on is not available") +class rustancestorstest(unittest.TestCase): +"""Test the correctness of binding to Rust code. + +This test is merely for the binding to Rust itself: extraction of +Python variable, giving back the results etc. + +It is not meant to test the algorithmic correctness of the operations +on ancestors it provides. Hence the very simple embedded index data is +good enough. + +Algorithmic correctness is asserted by the Rust unit tests. +""" + +def testmodule(self): +self.assertTrue('DAG' in rustext.ancestor.__doc__) + +def testgrapherror(self): +self.assertTrue('GraphError' in dir(rustext)) + + +if __name__ == '__main__': +import silenttestrunner +silenttestrunner.main(__name__) To: gracinet, #hg-reviewers Cc: mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5439: rust-cpython: binding for AncestorsIterator
gracinet created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY It's now reachable from Python as rustext.ancestor.AncestorsIterator Tests are provided in the previously introduced Python testcase: this is much more convenient that writing lengthy Rust code to call into Python. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5439 AFFECTED FILES rust/hg-cpython/src/ancestors.rs rust/hg-cpython/src/lib.rs tests/test-rust-ancestor.py CHANGE DETAILS diff --git a/tests/test-rust-ancestor.py b/tests/test-rust-ancestor.py --- a/tests/test-rust-ancestor.py +++ b/tests/test-rust-ancestor.py @@ -1,19 +1,46 @@ from __future__ import absolute_import +import sys import unittest try: from mercurial import rustext +rustext.__name__ # trigger immediate actual import except ImportError: rustext = None +else: +# this would fail already without appropriate ancestor.__package__ +from mercurial.rustext.ancestor import AncestorsIterator try: from mercurial.cext import parsers as cparsers except ImportError: cparsers = None +# picked from test-parse-index2, copied rather than imported +# so that it stays stable even if test-parse-index2 changes or disappears. +data_non_inlined = ( +b'\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01D\x19' +b'\x00\x07e\x12\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff' +b'\xff\xff\xff\xff\xd1\xf4\xbb\xb0\xbe\xfc\x13\xbd\x8c\xd3\x9d' +b'\x0f\xcd\xd9;\x8c\x07\x8cJ/\x00\x00\x00\x00\x00\x00\x00\x00\x00' +b'\x00\x00\x00\x00\x00\x00\x01D\x19\x00\x00\x00\x00\x00\xdf\x00' +b'\x00\x01q\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\xff' +b'\xff\xff\xff\xc1\x12\xb9\x04\x96\xa4Z1t\x91\xdfsJ\x90\xf0\x9bh' +b'\x07l&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +b'\x00\x01D\xf8\x00\x00\x00\x00\x01\x1b\x00\x00\x01\xb8\x00\x00' +b'\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\xff\xff\xff\xff\x02\n' +b'\x0e\xc6&\xa1\x92\xae6\x0b\x02i\xfe-\xe5\xbao\x05\xd1\xe7\x00' +b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01F' +b'\x13\x00\x00\x00\x00\x01\xec\x00\x00\x03\x06\x00\x00\x00\x01' +b'\x00\x00\x00\x03\x00\x00\x00\x02\xff\xff\xff\xff\x12\xcb\xeby1' +b'\xb6\r\x98B\xcb\x07\xbd`\x8f\x92\xd9\xc4\x84\xbdK\x00\x00\x00' +b'\x00\x00\x00\x00\x00\x00\x00\x00\x00' +) + + @unittest.skipIf(rustext is None or cparsers is None, - "rustext.ancestor or the C Extension parsers module " - "it relies on is not available") + "rustext or the C Extension parsers module " + "ancestor relies on is not available") class rustancestorstest(unittest.TestCase): """Test the correctness of binding to Rust code. @@ -27,12 +54,51 @@ Algorithmic correctness is asserted by the Rust unit tests. """ -def testmodule(self): -self.assertTrue('DAG' in rustext.ancestor.__doc__) +def parseindex(self): +return cparsers.parse_index2(data_non_inlined, False)[0] + +def testiteratorrevlist(self): +idx = self.parseindex() +# checking test assumption about the index binary data: +self.assertEqual({i: (r[5], r[6]) for i, r in enumerate(idx)}, + {0: (-1, -1), + 1: (0, -1), + 2: (1, -1), + 3: (2, -1)}) +ait = AncestorsIterator(idx, [3], 0, True) +self.assertEqual([r for r in ait], [3, 2, 1, 0]) + +ait = AncestorsIterator(idx, [3], 0, False) +self.assertEqual([r for r in ait], [2, 1, 0]) + +def testrefcount(self): +idx = self.parseindex() +start_count = sys.getrefcount(idx) + +# refcount increases upon iterator init... +ait = AncestorsIterator(idx, [3], 0, True) +self.assertEqual(sys.getrefcount(idx), start_count + 1) +self.assertEqual(next(ait), 3) + +# and decreases once the iterator is removed +del ait +self.assertEqual(sys.getrefcount(idx), start_count) + +# and removing ref to the index after iterator init is no issue +ait = AncestorsIterator(idx, [3], 0, True) +del idx +self.assertEqual([r for r in ait], [3, 2, 1, 0]) def testgrapherror(self): -self.assertTrue('GraphError' in dir(rustext)) - +data = (data_non_inlined[:64 + 27] + +b'\xf2' + +data_non_inlined[64 + 28:]) +idx = cparsers.parse_index2(data, False)[0] +with self.assertRaises(rustext.GraphError) as arc: +AncestorsIterator(idx, [1], -1, False) +exc = arc.exception +self.assertIsInstance(exc, ValueError) +self.assertEqual(exc.args, (b'ParentOutOfRange', 1)) if __name__ == '__main__': import silenttestrunner diff --git a/rust/hg-cpython/src/lib.rs
D5436: rust-cpython: build via HGWITHRUSTEXT=cpython
gracinet created this revision. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers. REVISION SUMMARY The existing behaviour, building the direct ffi bindings if HGIWTHRUSTEXT is just set is unchanged, but if HGWITHRUSTEXT is cpython, then the cpython bindings (aka mercurial/rustext.so) are built. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5436 AFFECTED FILES Makefile setup.py CHANGE DETAILS diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -132,7 +132,11 @@ ispypy = "PyPy" in sys.version -iswithrustextensions = 'HGWITHRUSTEXT' in os.environ +hgrustext = os.environ.get('HGWITHRUSTEXT') +# TODO record it for proper rebuild upon changes +# (see mercurial/__modulepolicy__.py) +if hgrustext != 'cpython' and hgrustext is not None: +hgrustext = 'direct-ffi' import ctypes import stat, subprocess, time @@ -457,11 +461,18 @@ return build_ext.initialize_options(self) def build_extensions(self): +ruststandalones = [e for e in self.extensions + if isinstance(e, RustStandaloneExtension)] +self.extensions = [e for e in self.extensions + if e not in ruststandalones] # Filter out zstd if disabled via argument. if not self.zstd: self.extensions = [e for e in self.extensions if e.name != 'mercurial.zstd'] +for rustext in ruststandalones: +rustext.build('' if self.inplace else self.build_lib) + return build_ext.build_extensions(self) def build_extension(self, ext): @@ -902,20 +913,16 @@ """Exception class for Rust compilation errors.""" class RustExtension(Extension): -"""A C Extension, conditionnally enhanced with Rust code. - -if iswithrustextensions is False, does nothing else than plain Extension +"""Base classes for concrete Rust Extension classes. """ rusttargetdir = os.path.join('rust', 'target', 'release') def __init__(self, mpath, sources, rustlibname, subcrate, **kw): Extension.__init__(self, mpath, sources, **kw) -if not iswithrustextensions: +if hgrustext is None: return srcdir = self.rustsrcdir = os.path.join('rust', subcrate) -self.libraries.append(rustlibname) -self.extra_compile_args.append('-DWITH_RUST') # adding Rust source and control files to depends so that the extension # gets rebuilt if they've changed @@ -929,7 +936,7 @@ if os.path.splitext(fname)[1] == '.rs') def rustbuild(self): -if not iswithrustextensions: +if hgrustext is None: return env = os.environ.copy() if 'HGTEST_RESTOREENV' in env: @@ -961,6 +968,40 @@ "Cargo failed. Working directory: %r, " "command: %r, environment: %r" % (self.rustsrcdir, cmd, env)) +class RustEnhancedExtension(RustExtension): +"""A C Extension, conditionally enhanced with Rust code. + +If the HGRUSTEXT environment variable is set to something else +than 'cpython', the Rust sources get compiled and linked within the +C target shared library object. +""" + +def __init__(self, mpath, sources, rustlibname, subcrate, **kw): +RustExtension.__init__(self, mpath, sources, rustlibname, subcrate, + **kw) +if hgrustext != 'direct-ffi': +return +self.extra_compile_args.append('-DWITH_RUST') +self.libraries.append(rustlibname) +self.library_dirs.append(self.rusttargetdir) + +class RustStandaloneExtension(RustExtension): + +def __init__(self, pydottedname, rustcrate, dylibname, **kw): +RustExtension.__init__(self, pydottedname, [], dylibname, rustcrate, + **kw) +self.dylibname = dylibname + +def build(self, target_dir): +self.rustbuild() +target = [target_dir] +target.extend(self.name.split('.')) +ext = '.so' # TODO Unix only +target[-1] += ext +shutil.copy2(os.path.join(self.rusttargetdir, self.dylibname + ext), + os.path.join(*target)) + + extmodules = [ Extension('mercurial.cext.base85', ['mercurial/cext/base85.c'], include_dirs=common_include_dirs, @@ -973,19 +1014,20 @@ 'mercurial/cext/mpatch.c'], include_dirs=common_include_dirs, depends=common_depends), -RustExtension('mercurial.cext.parsers', ['mercurial/cext/charencode.c', - 'mercurial/cext/dirs.c', - 'mercurial/cext/manifest.c', - 'mercurial/cext/parsers.c', - 'mercurial/cext/pathencode.c', -
D5435: rust: better treatment of cargo/rustc errors
gracinet created this revision. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5435 AFFECTED FILES setup.py CHANGE DETAILS diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -898,6 +898,9 @@ 'mercurial/thirdparty/xdiff/xutils.h', ] +class RustCompilationError(CCompilerError): +"""Exception class for Rust compilation errors.""" + class RustExtension(Extension): """A C Extension, conditionnally enhanced with Rust code. @@ -942,9 +945,21 @@ import pwd env['HOME'] = pwd.getpwuid(os.getuid()).pw_dir -subprocess.check_call(['cargo', 'build', '-vv', '--release'], - env=env, cwd=self.rustsrcdir) -self.library_dirs.append(self.rusttargetdir) +cargocmd = ['cargo', 'build', '-vv', '--release'] +try: +subprocess.check_call(cargocmd, env=env, cwd=self.rustsrcdir) +except OSError as exc: +if exc.errno == os.errno.ENOENT: +raise RustCompilationError("Cargo not found") +elif exc.errno == os.errno.EACCES: +raise RustCompilationError( +"Cargo found, but permisssion to execute it is denied") +else: +raise +except subprocess.CalledProcessError: +raise RustCompilationError( +"Cargo failed. Working directory: %r, " +"command: %r, environment: %r" % (self.rustsrcdir, cmd, env)) extmodules = [ Extension('mercurial.cext.base85', ['mercurial/cext/base85.c'], To: gracinet, #hg-reviewers Cc: mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5433: rust-cpython: excluded hgcli from workspace
gracinet created this revision. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers. REVISION SUMMARY hgcli uses a specific rust-cpython commit by indygreg, of which a PR has been derived which is not merged nor released yet. But we can't use several versions of the sys-python2.7 crate in a single workspace: it makes for a build error. Since hgcli does not at the time being need anything from hg-core, whereas the upcoming hg-cpython will. So for now we're moving hgcli aside, hoping we could base all of them on the same version of rust-cpython again in the future. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5433 AFFECTED FILES rust/Cargo.lock rust/Cargo.toml rust/hgcli/Cargo.lock CHANGE DETAILS diff --git a/rust/hgcli/Cargo.lock b/rust/hgcli/Cargo.lock new file mode 100644 --- /dev/null +++ b/rust/hgcli/Cargo.lock @@ -0,0 +1,136 @@ +[[package]] +name = "aho-corasick" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index; +dependencies = [ + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "cpython" +version = "0.1.0" +source = "git+https://github.com/indygreg/rust-cpython.git?rev=c90d65cf84abfffce7ef54476bbfed56017a2f52#c90d65cf84abfffce7ef54476bbfed56017a2f52; +dependencies = [ + "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", + "python27-sys 0.1.2 (git+https://github.com/indygreg/rust-cpython.git?rev=c90d65cf84abfffce7ef54476bbfed56017a2f52)", +] + +[[package]] +name = "hgcli" +version = "0.1.0" +dependencies = [ + "cpython 0.1.0 (git+https://github.com/indygreg/rust-cpython.git?rev=c90d65cf84abfffce7ef54476bbfed56017a2f52)", + "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)", + "python27-sys 0.1.2 (git+https://github.com/indygreg/rust-cpython.git?rev=c90d65cf84abfffce7ef54476bbfed56017a2f52)", +] + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index; +dependencies = [ + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "libc" +version = "0.2.45" +source = "registry+https://github.com/rust-lang/crates.io-index; + +[[package]] +name = "memchr" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index; +dependencies = [ + "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-traits" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index; +dependencies = [ + "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-traits" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index; + +[[package]] +name = "python27-sys" +version = "0.1.2" +source = "git+https://github.com/indygreg/rust-cpython.git?rev=c90d65cf84abfffce7ef54476bbfed56017a2f52#c90d65cf84abfffce7ef54476bbfed56017a2f52; +dependencies = [ + "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex" +version = "0.1.80" +source = "registry+https://github.com/rust-lang/crates.io-index; +dependencies = [ + "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index; + +[[package]] +name = "thread-id" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index; +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thread_local" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index; +dependencies = [ + "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "utf8-ranges" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index; + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index; + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index; + +[metadata] +"checksum aho-corasick 0.5.3
D5417: rust: translated random test of missingancestors
gracinet added a comment. @yuja: I'll look into it, maybe that's a case for benches (I've not played with them yet). REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5417 To: gracinet, #hg-reviewers Cc: yuja, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5416: rust: translation of missingancestors
This revision was automatically updated to reflect the committed changes. gracinet marked an inline comment as done. Closed by commit rHG5817c3b186a7: rust: translation of missingancestors (authored by gracinet, committed by ). REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5416?vs=12855=12869 REVISION DETAIL https://phab.mercurial-scm.org/D5416 AFFECTED FILES rust/hg-core/src/ancestors.rs rust/hg-core/src/lib.rs tests/test-ancestor.py tests/test-ancestor.py.out CHANGE DETAILS diff --git a/tests/test-ancestor.py.out b/tests/test-ancestor.py.out --- a/tests/test-ancestor.py.out +++ b/tests/test-ancestor.py.out @@ -1,3 +1,17 @@ +% removeancestorsfrom(), example 1 +remaining (sorted): [5, 6, 8, 9] +% removeancestorsfrom(), example 2 +remaining (sorted): [11, 12, 13, 14] +% removeancestorsfrom(), example 3 +remaining (sorted): [3, 5] +% missingancestors(), example 1 +return [3, 7, 11] +% missingancestors(), example 2 +return [5, 10] +% missingancestors(), example 3 +return [3, 6, 9, 11] +% removeancestorsfrom(), bigger graph +Ok % lazy ancestor set for [], stoprev = 0, inclusive = False membership: [] iteration: [] diff --git a/tests/test-ancestor.py b/tests/test-ancestor.py --- a/tests/test-ancestor.py +++ b/tests/test-ancestor.py @@ -182,6 +182,64 @@ 5: [4, -1], 6: [4, -1], 7: [4, -1], 8: [-1, -1], 9: [6, 7], 10: [5, -1], 11: [3, 7], 12: [9, -1], 13: [8, -1]} +def test_missingancestors_explicit(): +"""A few explicit cases, easier to check for catching errors in refactors. + +The bigger graph at the end has been produced by the random generator +above, and we have some evidence that the other tests don't cover it. +""" +for i, (bases, revs) in enumerate((({1, 2, 3, 4, 7}, set(xrange(10))), + ({10}, set({11, 12, 13, 14})), + ({7}, set({1, 2, 3, 4, 5})), + )): +print("%% removeancestorsfrom(), example %d" % (i + 1)) +missanc = ancestor.incrementalmissingancestors(graph.get, bases) +missanc.removeancestorsfrom(revs) +print("remaining (sorted): %s" % sorted(list(revs))) + +for i, (bases, revs) in enumerate((({10}, {11}), + ({11}, {10}), + ({7}, {9, 11}), + )): +print("%% missingancestors(), example %d" % (i + 1)) +missanc = ancestor.incrementalmissingancestors(graph.get, bases) +print("return %s" % missanc.missingancestors(revs)) + +print("% removeancestorsfrom(), bigger graph") +vecgraph = [ +[-1, -1], [0, -1], [1, 0], [2, 1], [3, -1], [4, -1], [5, 1], +[2, -1], [7, -1], [8, -1], [9, -1], [10, 1], [3, -1], [12, -1], +[13, -1], [14, -1], [4, -1], [16, -1], [17, -1], [18, -1], +[19, 11], [20, -1], [21, -1], [22, -1], [23, -1], [2, -1], +[3, -1], [26, 24], [27, -1], [28, -1], [12, -1], [1, -1], [1, 9], +[32, -1], [33, -1], [34, 31], [35, -1], [36, 26], [37, -1], +[38, -1], [39, -1], [40, -1], [41, -1], [42, 26], [0, -1], +[44, -1], [45, 4], [40, -1], [47, -1], [36, 0], [49, -1], +[-1, -1], [51, -1], [52, -1], [53, -1], [14, -1], +[55, -1], [15, -1], [23, -1], [58, -1], [59, -1], [2, -1], +[61, 59], [62, -1], [63, -1], [-1, -1], [65, -1], +[66, -1], [67, -1], [68, -1], [37, 28], [69, 25], +[71, -1], [72, -1], [50, 2], [74, -1], [12, -1], +[18, -1], [77, -1], [78, -1], [79, -1], [43, 33], +[81, -1], [82, -1], [83, -1], [84, 45], [85, -1], +[86, -1], [-1, -1], [88, -1], [-1, -1], [76, 83], [44, -1], +[92, -1], [93, -1], [9, -1], [95, 67], [96, -1], [97, -1], +[-1, -1]] +problem_rev = 28 +problem_base = 70 +# problem_rev is a parent of problem_base, but a faulty implementation +# could forget to remove it. +bases = {60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6} +if problem_rev not in vecgraph[problem_base] or problem_base not in bases: +print("Conditions have changed") +missanc = ancestor.incrementalmissingancestors(vecgraph.__getitem__, bases) +revs = {4, 12, 41, 28, 68, 38, 1, 30, 56, 44} +missanc.removeancestorsfrom(revs) +if 28 in revs: +print("Failed!") +else: +print("Ok") + def genlazyancestors(revs, stoprev=0, inclusive=False): print(("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" % (revs, stoprev, inclusive))) @@ -276,6 +334,7 @@ seed = long(time.time() * 1000) rng = random.Random(seed) +test_missingancestors_explicit() test_missingancestors(seed, rng) test_lazyancestors() test_gca() diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -3,7 +3,7 @@ // This software may
D5416: rust: translation of missingancestors
gracinet marked 2 inline comments as done. gracinet added a comment. Thanks for the useful tips! INLINE COMMENTS > kevincox wrote in ancestors.rs:154 > Why not just write: > > self.bases.iter().any(|b| != NULL_REVISION) > > It is much clearer what you mean and I suspect the performance is very > similar. See the below example. If you want the performance then I prefer `c` > which is even a bit faster looking. > > https://rust.godbolt.org/z/-JJ61P I like the `any()` variant. Performance for that method isn't really critical anyway. > kevincox wrote in ancestors.rs:161 > Would it be possible to return a more abstract type such as > `Interator`? I would also prefer to just enclose this > completely as IIUC we are somewhat lazily computing the bases so this > function has some unspecified preconditions. I agree it's not really pretty, even with Rust mutability rules having our back, but I'm not sure: the only caller candidate besides unit tests (Python `setdiscovery` module) is happy to read from the `bases` attribute directly, and hence cope with the fact that it is a moving target. One advantage of returning `` is that we'll get a straightforward conversion to Python sets for free as soon as the bindings support them (see also https://github.com/dgrunwald/rust-cpython/pull/165) It's quite possible that we'll see some refactoring of `setdiscovery` and these missing ancestors in the future. I believe it'll be a good time to revise this if that becomes an issue. > kevincox wrote in ancestors.rs:448 > I'm surprised that the `.iter().cloned()` is required with the argument being > an `impl IntoIterator` I believe that is because `IntoIterator` is implemented only for references to arrays, (with `Item` being references, of course). A bit frustrating (after all `vec!` was simpler, but it doesn't matter much in the context of this test). REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5416 To: gracinet, #hg-reviewers Cc: durin42, kevincox, mjpieters, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5417: rust: translated random test of missingancestors
gracinet updated this revision to Diff 12856. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5417?vs=12833=12856 REVISION DETAIL https://phab.mercurial-scm.org/D5417 AFFECTED FILES rust/Cargo.lock rust/hg-core/Cargo.toml rust/hg-core/src/ancestors.rs CHANGE DETAILS diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -318,6 +318,11 @@ mod tests { use super::*; +extern crate rand; +use self::rand::distributions::{Distribution, LogNormal, Uniform}; +use self::rand::{thread_rng, Rng, RngCore}; +use std::cmp::min; +use std::fmt::Debug; use std::iter::FromIterator; #[derive(Clone, Debug)] @@ -643,4 +648,249 @@ assert!(!revs.contains(_rev)); } +fn build_random_graph( +nodes_opt: Option, +rootprob_opt: Option, +mergeprob_opt: Option, +prevprob_opt: Option, +) -> VecGraph { +let nodes = nodes_opt.unwrap_or(100); +let rootprob = rootprob_opt.unwrap_or(0.05); +let mergeprob = mergeprob_opt.unwrap_or(0.2); +let prevprob = prevprob_opt.unwrap_or(0.7); + +let mut rng = thread_rng(); +let mut vg: VecGraph = Vec::with_capacity(nodes); +for i in 0..nodes { +if i == 0 || rng.gen_bool(rootprob) { +vg.push([NULL_REVISION, NULL_REVISION]) +} else if i == 1 { +vg.push([0, NULL_REVISION]) +} else if rng.gen_bool(mergeprob) { +let p1 = { +if i == 2 || rng.gen_bool(prevprob) { +(i - 1) as Revision +} else { +rng.gen_range(0, i - 1) as Revision +} +}; +// p2 is a random revision lower than i and different from p1 +let mut p2 = rng.gen_range(0, i - 1) as Revision; +if p2 >= p1 { +p2 = p2 + 1; +} +vg.push([p1, p2]); +} else if rng.gen_bool(prevprob) { +vg.push([(i - 1) as Revision, NULL_REVISION]) +} else { +vg.push([rng.gen_range(0, i - 1) as Revision, NULL_REVISION]) +} +} +vg +} + +/// Compute the ancestors set of all revisions of a VecGraph +fn ancestors_sets(vg: ) -> Vec> { +let mut ancs: Vec> = Vec::new(); +for i in 0..vg.len() { +let mut ancs_i = HashSet::new(); +ancs_i.insert(i as Revision); +for p in vg[i].iter().cloned() { +if p != NULL_REVISION { +ancs_i.extend([p as usize]); +} +} +ancs.push(ancs_i); +} +ancs +} + +#[derive(Clone, Debug)] +enum MissingAncestorsAction { +InitialBases(HashSet), +AddBases(HashSet), +RemoveAncestorsFrom(HashSet), +MissingAncestors(HashSet), +} + +/// An instrumented naive yet obviously correct implementation +/// +/// It also records all its actions for easy reproduction for replay +/// of problematic cases +struct NaiveMissingAncestors<'a> { +ancestors_sets: &'a Vec>, +graph: &'a VecGraph, // used for error reporting only +bases: HashSet, +history: Vec, +} + +impl<'a> NaiveMissingAncestors<'a> { +fn new( +graph: &'a VecGraph, +ancestors_sets: &'a Vec>, +bases: , +) -> Self { +Self { +ancestors_sets: ancestors_sets, +bases: bases.clone(), +graph: graph, +history: vec![MissingAncestorsAction::InitialBases( +bases.clone(), +)], +} +} + +fn add_bases( self, new_bases: HashSet) { +self.bases.extend(_bases); +self.history +.push(MissingAncestorsAction::AddBases(new_bases)) +} + +fn remove_ancestors_from( self, revs: HashSet) { +revs.remove(_REVISION); +self.history +.push(MissingAncestorsAction::RemoveAncestorsFrom( +revs.clone(), +)); +for base in self.bases.iter().cloned() { +if base != NULL_REVISION { +for rev in _sets[base as usize] { +revs.remove(); +} +} +} +} + +fn missing_ancestors( + self, +revs: impl IntoIterator, +) -> Vec { +let revs_as_set: HashSet = revs.into_iter().collect(); + +let mut missing: HashSet = HashSet::new(); +for rev in revs.iter().cloned() { +if rev != NULL_REVISION { +
D5416: rust: translation of missingancestors
gracinet updated this revision to Diff 12855. gracinet marked an inline comment as done. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5416?vs=12832=12855 REVISION DETAIL https://phab.mercurial-scm.org/D5416 AFFECTED FILES rust/hg-core/src/ancestors.rs rust/hg-core/src/lib.rs tests/test-ancestor.py tests/test-ancestor.py.out CHANGE DETAILS diff --git a/tests/test-ancestor.py.out b/tests/test-ancestor.py.out --- a/tests/test-ancestor.py.out +++ b/tests/test-ancestor.py.out @@ -1,3 +1,17 @@ +% removeancestorsfrom(), example 1 +remaining (sorted): [5, 6, 8, 9] +% removeancestorsfrom(), example 2 +remaining (sorted): [11, 12, 13, 14] +% removeancestorsfrom(), example 3 +remaining (sorted): [3, 5] +% missingancestors(), example 1 +return [3, 7, 11] +% missingancestors(), example 2 +return [5, 10] +% missingancestors(), example 3 +return [3, 6, 9, 11] +% removeancestorsfrom(), bigger graph +Ok % lazy ancestor set for [], stoprev = 0, inclusive = False membership: [] iteration: [] diff --git a/tests/test-ancestor.py b/tests/test-ancestor.py --- a/tests/test-ancestor.py +++ b/tests/test-ancestor.py @@ -182,6 +182,64 @@ 5: [4, -1], 6: [4, -1], 7: [4, -1], 8: [-1, -1], 9: [6, 7], 10: [5, -1], 11: [3, 7], 12: [9, -1], 13: [8, -1]} +def test_missingancestors_explicit(): +"""A few explicit cases, easier to check for catching errors in refactors. + +The bigger graph at the end has been produced by the random generator +above, and we have some evidence that the other tests don't cover it. +""" +for i, (bases, revs) in enumerate((({1, 2, 3, 4, 7}, set(xrange(10))), + ({10}, set({11, 12, 13, 14})), + ({7}, set({1, 2, 3, 4, 5})), + )): +print("%% removeancestorsfrom(), example %d" % (i + 1)) +missanc = ancestor.incrementalmissingancestors(graph.get, bases) +missanc.removeancestorsfrom(revs) +print("remaining (sorted): %s" % sorted(list(revs))) + +for i, (bases, revs) in enumerate((({10}, {11}), + ({11}, {10}), + ({7}, {9, 11}), + )): +print("%% missingancestors(), example %d" % (i + 1)) +missanc = ancestor.incrementalmissingancestors(graph.get, bases) +print("return %s" % missanc.missingancestors(revs)) + +print("% removeancestorsfrom(), bigger graph") +vecgraph = [ +[-1, -1], [0, -1], [1, 0], [2, 1], [3, -1], [4, -1], [5, 1], +[2, -1], [7, -1], [8, -1], [9, -1], [10, 1], [3, -1], [12, -1], +[13, -1], [14, -1], [4, -1], [16, -1], [17, -1], [18, -1], +[19, 11], [20, -1], [21, -1], [22, -1], [23, -1], [2, -1], +[3, -1], [26, 24], [27, -1], [28, -1], [12, -1], [1, -1], [1, 9], +[32, -1], [33, -1], [34, 31], [35, -1], [36, 26], [37, -1], +[38, -1], [39, -1], [40, -1], [41, -1], [42, 26], [0, -1], +[44, -1], [45, 4], [40, -1], [47, -1], [36, 0], [49, -1], +[-1, -1], [51, -1], [52, -1], [53, -1], [14, -1], +[55, -1], [15, -1], [23, -1], [58, -1], [59, -1], [2, -1], +[61, 59], [62, -1], [63, -1], [-1, -1], [65, -1], +[66, -1], [67, -1], [68, -1], [37, 28], [69, 25], +[71, -1], [72, -1], [50, 2], [74, -1], [12, -1], +[18, -1], [77, -1], [78, -1], [79, -1], [43, 33], +[81, -1], [82, -1], [83, -1], [84, 45], [85, -1], +[86, -1], [-1, -1], [88, -1], [-1, -1], [76, 83], [44, -1], +[92, -1], [93, -1], [9, -1], [95, 67], [96, -1], [97, -1], +[-1, -1]] +problem_rev = 28 +problem_base = 70 +# problem_rev is a parent of problem_base, but a faulty implementation +# could forget to remove it. +bases = {60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6} +if problem_rev not in vecgraph[problem_base] or problem_base not in bases: +print("Conditions have changed") +missanc = ancestor.incrementalmissingancestors(vecgraph.__getitem__, bases) +revs = {4, 12, 41, 28, 68, 38, 1, 30, 56, 44} +missanc.removeancestorsfrom(revs) +if 28 in revs: +print("Failed!") +else: +print("Ok") + def genlazyancestors(revs, stoprev=0, inclusive=False): print(("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" % (revs, stoprev, inclusive))) @@ -276,6 +334,7 @@ seed = long(time.time() * 1000) rng = random.Random(seed) +test_missingancestors_explicit() test_missingancestors(seed, rng) test_lazyancestors() test_gca() diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -3,7 +3,7 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. mod ancestors; -pub use
D5418: perfdiscovery: benching findcommonheads()
This revision was automatically updated to reflect the committed changes. Closed by commit rHG716ce792886c: perfdiscovery: benching findcommonheads() (authored by gracinet, committed by ). REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5418?vs=12837=12838 REVISION DETAIL https://phab.mercurial-scm.org/D5418 AFFECTED FILES contrib/perf.py tests/test-contrib-perf.t CHANGE DETAILS diff --git a/tests/test-contrib-perf.t b/tests/test-contrib-perf.t --- a/tests/test-contrib-perf.t +++ b/tests/test-contrib-perf.t @@ -79,6 +79,9 @@ (no help text available) perfdirstatewrite (no help text available) + perfdiscovery + benchmark discovery between local repo and the peer at given + path perffncacheencode (no help text available) perffncacheload @@ -206,6 +209,7 @@ $ hg perfvolatilesets $ hg perfwalk $ hg perfparents + $ hg perfdiscovery -q . test actual output -- diff --git a/contrib/perf.py b/contrib/perf.py --- a/contrib/perf.py +++ b/contrib/perf.py @@ -37,6 +37,7 @@ copies, error, extensions, +hg, mdiff, merge, revlog, @@ -67,6 +68,11 @@ from mercurial import scmutil # since 1.9 (or 8b252e826c68) except ImportError: pass +try: +from mercurial import setdiscovery # since 1.9 (or cb98fed52495) +except ImportError: +pass + def identity(a): return a @@ -581,6 +587,21 @@ timer(d) fm.end() +@command(b'perfdiscovery', formatteropts, b'PATH') +def perfdiscovery(ui, repo, path, **opts): +"""benchmark discovery between local repo and the peer at given path +""" +repos = [repo, None] +timer, fm = gettimer(ui, opts) +path = ui.expandpath(path) + +def s(): +repos[1] = hg.peer(ui, opts, path) +def d(): +setdiscovery.findcommonheads(ui, *repos) +timer(d, setup=s) +fm.end() + @command(b'perfbookmarks', formatteropts + [ (b'', b'clear-revlogs', False, b'refresh changelog and manifest'), To: gracinet, #hg-reviewers, pulkit Cc: pulkit, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5418: perfdiscovery: benching findcommonheads()
gracinet updated this revision to Diff 12837. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5418?vs=12836=12837 REVISION DETAIL https://phab.mercurial-scm.org/D5418 AFFECTED FILES contrib/perf.py tests/test-contrib-perf.t CHANGE DETAILS diff --git a/tests/test-contrib-perf.t b/tests/test-contrib-perf.t --- a/tests/test-contrib-perf.t +++ b/tests/test-contrib-perf.t @@ -79,6 +79,9 @@ (no help text available) perfdirstatewrite (no help text available) + perfdiscovery + benchmark discovery between local repo and the peer at given + path perffncacheencode (no help text available) perffncacheload @@ -206,6 +209,7 @@ $ hg perfvolatilesets $ hg perfwalk $ hg perfparents + $ hg perfdiscovery -q . test actual output -- diff --git a/contrib/perf.py b/contrib/perf.py --- a/contrib/perf.py +++ b/contrib/perf.py @@ -37,6 +37,7 @@ copies, error, extensions, +hg, mdiff, merge, revlog, @@ -67,6 +68,11 @@ from mercurial import scmutil # since 1.9 (or 8b252e826c68) except ImportError: pass +try: +from mercurial import setdiscovery # since 1.9 (or cb98fed52495) +except ImportError: +pass + def identity(a): return a @@ -581,6 +587,21 @@ timer(d) fm.end() +@command(b'perfdiscovery', formatteropts, b'PATH') +def perfdiscovery(ui, repo, path, **opts): +"""benchmark discovery between local repo and the peer at given path +""" +repos = [repo, None] +timer, fm = gettimer(ui, opts) +path = ui.expandpath(path) + +def s(): +repos[1] = hg.peer(ui, opts, path) +def d(): +setdiscovery.findcommonheads(ui, *repos) +timer(d, setup=s) +fm.end() + @command(b'perfbookmarks', formatteropts + [ (b'', b'clear-revlogs', False, b'refresh changelog and manifest'), To: gracinet, #hg-reviewers Cc: pulkit, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5418: perfdiscovery: benching findcommonheads()
gracinet added a comment. @pulkit indeed, I didn't think of that, seems like a good idea, thanks. I wouldn't go as far as putting `path=default` by default, though, so that users don't start hammering providers unwillingly. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5418 To: gracinet, #hg-reviewers Cc: pulkit, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5418: perfdiscovery: benching findcommonheads()
gracinet created this revision. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers. REVISION SUMMARY This works between the local repo and any peer given by its path, and should be useful for further work on discovery REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5418 AFFECTED FILES contrib/perf.py tests/test-contrib-perf.t CHANGE DETAILS diff --git a/tests/test-contrib-perf.t b/tests/test-contrib-perf.t --- a/tests/test-contrib-perf.t +++ b/tests/test-contrib-perf.t @@ -79,6 +79,9 @@ (no help text available) perfdirstatewrite (no help text available) + perfdiscovery + benchmark discovery between local repo and the peer at given + path perffncacheencode (no help text available) perffncacheload @@ -206,6 +209,7 @@ $ hg perfvolatilesets $ hg perfwalk $ hg perfparents + $ hg perfdiscovery -q . test actual output -- diff --git a/contrib/perf.py b/contrib/perf.py --- a/contrib/perf.py +++ b/contrib/perf.py @@ -37,6 +37,7 @@ copies, error, extensions, +hg, mdiff, merge, revlog, @@ -67,6 +68,11 @@ from mercurial import scmutil # since 1.9 (or 8b252e826c68) except ImportError: pass +try: +from mercurial import setdiscovery # since 1.9 (or cb98fed52495) +except ImportError: +pass + def identity(a): return a @@ -581,6 +587,20 @@ timer(d) fm.end() +@command(b'perfdiscovery', formatteropts, b'PATH') +def perfdiscovery(ui, repo, path, **opts): +"""benchmark discovery between local repo and the peer at given path +""" +repos = [repo, None] +timer, fm = gettimer(ui, opts) + +def s(): +repos[1] = hg.peer(ui, opts, path) +def d(): +setdiscovery.findcommonheads(ui, *repos) +timer(d, setup=s) +fm.end() + @command(b'perfbookmarks', formatteropts + [ (b'', b'clear-revlogs', False, b'refresh changelog and manifest'), To: gracinet, #hg-reviewers Cc: mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5415: rust: changed Graph.parents to return [Revision; 2]
This revision was automatically updated to reflect the committed changes. gracinet marked 2 inline comments as done. Closed by commit rHGa6ba978d9ffb: rust: changed Graph.parents to return [Revision; 2] (authored by gracinet, committed by ). REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5415?vs=12831=12835 REVISION DETAIL https://phab.mercurial-scm.org/D5415 AFFECTED FILES rust/hg-core/src/ancestors.rs rust/hg-core/src/lib.rs rust/hg-direct-ffi/src/ancestors.rs CHANGE DETAILS diff --git a/rust/hg-direct-ffi/src/ancestors.rs b/rust/hg-direct-ffi/src/ancestors.rs --- a/rust/hg-direct-ffi/src/ancestors.rs +++ b/rust/hg-direct-ffi/src/ancestors.rs @@ -44,12 +44,12 @@ impl Graph for Index { /// wrap a call to the C extern parents function -fn parents(, rev: Revision) -> Result<(Revision, Revision), GraphError> { +fn parents(, rev: Revision) -> Result<[Revision; 2], GraphError> { let mut res: [c_int; 2] = [0; 2]; let code = unsafe { HgRevlogIndex_GetParents(self.index, rev, res as *mut [c_int; 2]) }; match code { -0 => Ok((res[0], res[1])), +0 => Ok(res), _ => Err(GraphError::ParentOutOfRange(rev)), } } @@ -176,10 +176,10 @@ struct Stub; impl Graph for Stub { -fn parents(, r: Revision) -> Result<(Revision, Revision), GraphError> { +fn parents(, r: Revision) -> Result<[Revision; 2], GraphError> { match r { 25 => Err(GraphError::ParentOutOfRange(25)), -_ => Ok((1, 2)), +_ => Ok([1, 2]), } } } diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -15,7 +15,7 @@ /// The simplest expression of what we need of Mercurial DAGs. pub trait Graph { -fn parents(, Revision) -> Result<(Revision, Revision), GraphError>; +fn parents(, Revision) -> Result<[Revision; 2], GraphError>; } #[derive(Clone, Debug, PartialEq)] diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -57,9 +57,9 @@ }; this.seen.insert(NULL_REVISION); for rev in filtered_initrevs { -let parents = this.graph.parents(rev)?; -this.conditionally_push_rev(parents.0); -this.conditionally_push_rev(parents.1); +for parent in this.graph.parents(rev)?.iter().cloned() { +this.conditionally_push_rev(parent); +} } Ok(this) } @@ -115,7 +115,7 @@ } Some(c) => *c, }; -let (p1, p2) = match self.graph.parents(current) { +let [p1, p2] = match self.graph.parents(current) { Ok(ps) => ps, Err(e) => return Some(Err(e)), }; @@ -141,25 +141,22 @@ /// This is the same as the dict from test-ancestors.py impl Graph for Stub { -fn parents( -, -rev: Revision, -) -> Result<(Revision, Revision), GraphError> { +fn parents(, rev: Revision) -> Result<[Revision; 2], GraphError> { match rev { -0 => Ok((-1, -1)), -1 => Ok((0, -1)), -2 => Ok((1, -1)), -3 => Ok((1, -1)), -4 => Ok((2, -1)), -5 => Ok((4, -1)), -6 => Ok((4, -1)), -7 => Ok((4, -1)), -8 => Ok((-1, -1)), -9 => Ok((6, 7)), -10 => Ok((5, -1)), -11 => Ok((3, 7)), -12 => Ok((9, -1)), -13 => Ok((8, -1)), +0 => Ok([-1, -1]), +1 => Ok([0, -1]), +2 => Ok([1, -1]), +3 => Ok([1, -1]), +4 => Ok([2, -1]), +5 => Ok([4, -1]), +6 => Ok([4, -1]), +7 => Ok([4, -1]), +8 => Ok([-1, -1]), +9 => Ok([6, 7]), +10 => Ok([5, -1]), +11 => Ok([3, 7]), +12 => Ok([9, -1]), +13 => Ok([8, -1]), r => Err(GraphError::ParentOutOfRange(r)), } } @@ -231,12 +228,9 @@ struct Corrupted; impl Graph for Corrupted { -fn parents( -, -rev: Revision, -) -> Result<(Revision, Revision), GraphError> { +fn parents(, rev: Revision) -> Result<[Revision; 2], GraphError> { match rev { -1 => Ok((0, -1)), +1 => Ok([0, -1]), r => Err(GraphError::ParentOutOfRange(r)), } } To: gracinet, #hg-reviewers Cc: yuja, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list
D5414: rust: improved docstring
This revision was automatically updated to reflect the committed changes. Closed by commit rHGceb695c3c154: rust: improved docstring (authored by gracinet, committed by ). REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5414?vs=12827=12834 REVISION DETAIL https://phab.mercurial-scm.org/D5414 AFFECTED FILES rust/hg-core/src/ancestors.rs CHANGE DETAILS diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -101,9 +101,9 @@ /// /// - there's no filtering of invalid parent revisions. Actually, it should be /// consistent and more efficient to filter them from the end caller. -/// - we don't have the optimization for adjacent revs -/// (case where p1 == rev-1), because it amounts to update the first element -/// of the heap without sifting, which Rust's BinaryHeap doesn't let us do. +/// - we don't have the optimization for adjacent revisions (i.e., the case +/// where `p1 == rev - 1`), because it amounts to update the first element of +/// the heap without sifting, which Rust's BinaryHeap doesn't let us do. /// - we save a few pushes by comparing with `stoprev` before pushing impl Iterator for AncestorsIterator { type Item = Result; To: gracinet, #hg-reviewers Cc: durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5415: rust: changed Graph.parents to return [Revision; 2]
gracinet marked 2 inline comments as done. gracinet added a subscriber: yuja. gracinet added a comment. Thanks ! About the fact to always returning two elements, I've been hesitating about that. This is what the C function we're wrapping provides anyway, so you could say it's for simplicity. I don't take any risks either wrt performance. But it's true it would be also simpler for callers not to worry about that at all. Another point is that there is no firm guarantee that a valid revision occurs before `NULL_REVISION`. I've found examples in `revlog.c` where the second parent is (seemlingly on purpose) not ignored if the first is `NULL_REVISION`, but I wouldn't want at this point to impose it from the `parents()` method right away. As @yuja said on https://phab.mercurial-scm.org/D5370, we could make a utility function to filter out `NULL_REVISION` later on. Or (why not) an iterator that does all needed sanitizing. INLINE COMMENTS > kevincox wrote in ancestors.rs:62 > I would do `let [p1, p2]` still as it shows the reader that you handled all > the nodes. Alternatively loop over them all. > > for parent in this.graph.parents(rev)? { > this.conditionally_push_rev(parent); > } I like the looping. > kevincox wrote in ancestors.rs:118 > let [p1, p2] = ... oh, great, didn't know it would work, should have tried ! REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5415 To: gracinet, #hg-reviewers Cc: yuja, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5417: rust: translated random test of missingancestors
gracinet updated this revision to Diff 12833. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5417?vs=12830=12833 REVISION DETAIL https://phab.mercurial-scm.org/D5417 AFFECTED FILES rust/Cargo.lock rust/hg-core/Cargo.toml rust/hg-core/src/ancestors.rs CHANGE DETAILS diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -322,6 +322,11 @@ mod tests { use super::*; +extern crate rand; +use self::rand::distributions::{Distribution, LogNormal, Uniform}; +use self::rand::{thread_rng, Rng, RngCore}; +use std::cmp::min; +use std::fmt::Debug; use std::iter::FromIterator; #[derive(Clone, Debug)] @@ -620,14 +625,12 @@ ]; let problem_rev = 28 as Revision; let problem_base = 70 as Revision; -// making the problem evident: problem_rev is a parent of problem_base +// making the problem obvious: problem_rev is a parent of problem_base assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev); let mut missanc: MissingAncestors = MissingAncestors::new( graph, -[60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6] -.iter() -.cloned(), +[60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6].iter().cloned(), ); assert!(missanc.bases.contains(_base)); @@ -640,4 +643,249 @@ assert!(!revs.contains(_rev)); } +fn build_random_graph( +nodes_opt: Option, +rootprob_opt: Option, +mergeprob_opt: Option, +prevprob_opt: Option, +) -> VecGraph { +let nodes = nodes_opt.unwrap_or(100); +let rootprob = rootprob_opt.unwrap_or(0.05); +let mergeprob = mergeprob_opt.unwrap_or(0.2); +let prevprob = prevprob_opt.unwrap_or(0.7); + +let mut rng = thread_rng(); +let mut vg: VecGraph = Vec::with_capacity(nodes); +for i in 0..nodes { +if i == 0 || rng.gen_bool(rootprob) { +vg.push([NULL_REVISION, NULL_REVISION]) +} else if i == 1 { +vg.push([0, NULL_REVISION]) +} else if rng.gen_bool(mergeprob) { +let p1 = { +if i == 2 || rng.gen_bool(prevprob) { +(i - 1) as Revision +} else { +rng.gen_range(0, i - 1) as Revision +} +}; +// p2 is a random revision lower than i and different from p1 +let mut p2 = rng.gen_range(0, i - 1) as Revision; +if p2 >= p1 { +p2 = p2 + 1; +} +vg.push([p1, p2]); +} else if rng.gen_bool(prevprob) { +vg.push([(i - 1) as Revision, NULL_REVISION]) +} else { +vg.push([rng.gen_range(0, i - 1) as Revision, NULL_REVISION]) +} +} +vg +} + +/// Compute the ancestors set of all revisions of a VecGraph +fn ancestors_sets(vg: ) -> Vec> { +let mut ancs: Vec> = Vec::new(); +for i in 0..vg.len() { +let mut ancs_i = HashSet::new(); +ancs_i.insert(i as Revision); +for p in vg[i].iter().cloned() { +if p != NULL_REVISION { +ancs_i.extend([p as usize]); +} +} +ancs.push(ancs_i); +} +ancs +} + +#[derive(Clone, Debug)] +enum MissingAncestorsAction { +InitialBases(HashSet), +AddBases(HashSet), +RemoveAncestorsFrom(HashSet), +MissingAncestors(HashSet), +} + +/// An instrumented naive yet obviously correct implementation +/// +/// It also records all its actions for easy reproduction for replay +/// of problematic cases +struct NaiveMissingAncestors<'a> { +ancestors_sets: &'a Vec>, +graph: &'a VecGraph, // used for error reporting only +bases: HashSet, +history: Vec, +} + +impl<'a> NaiveMissingAncestors<'a> { +fn new( +graph: &'a VecGraph, +ancestors_sets: &'a Vec>, +bases: , +) -> Self { +Self { +ancestors_sets: ancestors_sets, +bases: bases.clone(), +graph: graph, +history: vec![MissingAncestorsAction::InitialBases( +bases.clone(), +)], +} +} + +fn add_bases( self, new_bases: HashSet) { +self.bases.extend(_bases); +self.history +.push(MissingAncestorsAction::AddBases(new_bases)) +} + +fn remove_ancestors_from( self, revs: HashSet) { +revs.remove(_REVISION); +self.history +
D5416: rust: translation of missingancestors
gracinet updated this revision to Diff 12832. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5416?vs=12829=12832 REVISION DETAIL https://phab.mercurial-scm.org/D5416 AFFECTED FILES rust/hg-core/src/ancestors.rs rust/hg-core/src/lib.rs tests/test-ancestor.py tests/test-ancestor.py.out CHANGE DETAILS diff --git a/tests/test-ancestor.py.out b/tests/test-ancestor.py.out --- a/tests/test-ancestor.py.out +++ b/tests/test-ancestor.py.out @@ -1,3 +1,17 @@ +% removeancestorsfrom(), example 1 +remaining (sorted): [5, 6, 8, 9] +% removeancestorsfrom(), example 2 +remaining (sorted): [11, 12, 13, 14] +% removeancestorsfrom(), example 3 +remaining (sorted): [3, 5] +% missingancestors(), example 1 +return [3, 7, 11] +% missingancestors(), example 2 +return [5, 10] +% missingancestors(), example 3 +return [3, 6, 9, 11] +% removeancestorsfrom(), bigger graph +Ok % lazy ancestor set for [], stoprev = 0, inclusive = False membership: [] iteration: [] diff --git a/tests/test-ancestor.py b/tests/test-ancestor.py --- a/tests/test-ancestor.py +++ b/tests/test-ancestor.py @@ -182,6 +182,64 @@ 5: [4, -1], 6: [4, -1], 7: [4, -1], 8: [-1, -1], 9: [6, 7], 10: [5, -1], 11: [3, 7], 12: [9, -1], 13: [8, -1]} +def test_missingancestors_explicit(): +"""A few explicit cases, easier to check for catching errors in refactors. + +The bigger graph at the end has been produced by the random generator +above, and we have some evidence that the other tests don't cover it. +""" +for i, (bases, revs) in enumerate((({1, 2, 3, 4, 7}, set(xrange(10))), + ({10}, set({11, 12, 13, 14})), + ({7}, set({1, 2, 3, 4, 5})), + )): +print("%% removeancestorsfrom(), example %d" % (i + 1)) +missanc = ancestor.incrementalmissingancestors(graph.get, bases) +missanc.removeancestorsfrom(revs) +print("remaining (sorted): %s" % sorted(list(revs))) + +for i, (bases, revs) in enumerate((({10}, {11}), + ({11}, {10}), + ({7}, {9, 11}), + )): +print("%% missingancestors(), example %d" % (i + 1)) +missanc = ancestor.incrementalmissingancestors(graph.get, bases) +print("return %s" % missanc.missingancestors(revs)) + +print("% removeancestorsfrom(), bigger graph") +vecgraph = [ +[-1, -1], [0, -1], [1, 0], [2, 1], [3, -1], [4, -1], [5, 1], +[2, -1], [7, -1], [8, -1], [9, -1], [10, 1], [3, -1], [12, -1], +[13, -1], [14, -1], [4, -1], [16, -1], [17, -1], [18, -1], +[19, 11], [20, -1], [21, -1], [22, -1], [23, -1], [2, -1], +[3, -1], [26, 24], [27, -1], [28, -1], [12, -1], [1, -1], [1, 9], +[32, -1], [33, -1], [34, 31], [35, -1], [36, 26], [37, -1], +[38, -1], [39, -1], [40, -1], [41, -1], [42, 26], [0, -1], +[44, -1], [45, 4], [40, -1], [47, -1], [36, 0], [49, -1], +[-1, -1], [51, -1], [52, -1], [53, -1], [14, -1], +[55, -1], [15, -1], [23, -1], [58, -1], [59, -1], [2, -1], +[61, 59], [62, -1], [63, -1], [-1, -1], [65, -1], +[66, -1], [67, -1], [68, -1], [37, 28], [69, 25], +[71, -1], [72, -1], [50, 2], [74, -1], [12, -1], +[18, -1], [77, -1], [78, -1], [79, -1], [43, 33], +[81, -1], [82, -1], [83, -1], [84, 45], [85, -1], +[86, -1], [-1, -1], [88, -1], [-1, -1], [76, 83], [44, -1], +[92, -1], [93, -1], [9, -1], [95, 67], [96, -1], [97, -1], +[-1, -1]] +problem_rev = 28 +problem_base = 70 +# problem_rev is a parent of problem_base, but a faulty implementation +# could forget to remove it. +bases = {60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6} +if problem_rev not in vecgraph[problem_base] or problem_base not in bases: +print("Conditions have changed") +missanc = ancestor.incrementalmissingancestors(vecgraph.__getitem__, bases) +revs = {4, 12, 41, 28, 68, 38, 1, 30, 56, 44} +missanc.removeancestorsfrom(revs) +if 28 in revs: +print("Failed!") +else: +print("Ok") + def genlazyancestors(revs, stoprev=0, inclusive=False): print(("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" % (revs, stoprev, inclusive))) @@ -276,6 +334,7 @@ seed = long(time.time() * 1000) rng = random.Random(seed) +test_missingancestors_explicit() test_missingancestors(seed, rng) test_lazyancestors() test_gca() diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -3,7 +3,7 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. mod ancestors; -pub use ancestors::AncestorsIterator; +pub use
D5415: rust: changed Graph.parents to return [Revision; 2]
gracinet updated this revision to Diff 12831. REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5415?vs=12828=12831 REVISION DETAIL https://phab.mercurial-scm.org/D5415 AFFECTED FILES rust/hg-core/src/ancestors.rs rust/hg-core/src/lib.rs rust/hg-direct-ffi/src/ancestors.rs CHANGE DETAILS diff --git a/rust/hg-direct-ffi/src/ancestors.rs b/rust/hg-direct-ffi/src/ancestors.rs --- a/rust/hg-direct-ffi/src/ancestors.rs +++ b/rust/hg-direct-ffi/src/ancestors.rs @@ -44,12 +44,12 @@ impl Graph for Index { /// wrap a call to the C extern parents function -fn parents(, rev: Revision) -> Result<(Revision, Revision), GraphError> { +fn parents(, rev: Revision) -> Result<[Revision; 2], GraphError> { let mut res: [c_int; 2] = [0; 2]; let code = unsafe { HgRevlogIndex_GetParents(self.index, rev, res as *mut [c_int; 2]) }; match code { -0 => Ok((res[0], res[1])), +0 => Ok(res), _ => Err(GraphError::ParentOutOfRange(rev)), } } @@ -176,10 +176,10 @@ struct Stub; impl Graph for Stub { -fn parents(, r: Revision) -> Result<(Revision, Revision), GraphError> { +fn parents(, r: Revision) -> Result<[Revision; 2], GraphError> { match r { 25 => Err(GraphError::ParentOutOfRange(25)), -_ => Ok((1, 2)), +_ => Ok([1, 2]), } } } diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -15,7 +15,7 @@ /// The simplest expression of what we need of Mercurial DAGs. pub trait Graph { -fn parents(, Revision) -> Result<(Revision, Revision), GraphError>; +fn parents(, Revision) -> Result<[Revision; 2], GraphError>; } #[derive(Clone, Debug, PartialEq)] diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -57,9 +57,9 @@ }; this.seen.insert(NULL_REVISION); for rev in filtered_initrevs { -let parents = this.graph.parents(rev)?; -this.conditionally_push_rev(parents.0); -this.conditionally_push_rev(parents.1); +for parent in this.graph.parents(rev)?.iter().cloned() { +this.conditionally_push_rev(parent); +} } Ok(this) } @@ -115,7 +115,7 @@ } Some(c) => *c, }; -let (p1, p2) = match self.graph.parents(current) { +let [p1, p2] = match self.graph.parents(current) { Ok(ps) => ps, Err(e) => return Some(Err(e)), }; @@ -141,25 +141,22 @@ /// This is the same as the dict from test-ancestors.py impl Graph for Stub { -fn parents( -, -rev: Revision, -) -> Result<(Revision, Revision), GraphError> { +fn parents(, rev: Revision) -> Result<[Revision; 2], GraphError> { match rev { -0 => Ok((-1, -1)), -1 => Ok((0, -1)), -2 => Ok((1, -1)), -3 => Ok((1, -1)), -4 => Ok((2, -1)), -5 => Ok((4, -1)), -6 => Ok((4, -1)), -7 => Ok((4, -1)), -8 => Ok((-1, -1)), -9 => Ok((6, 7)), -10 => Ok((5, -1)), -11 => Ok((3, 7)), -12 => Ok((9, -1)), -13 => Ok((8, -1)), +0 => Ok([-1, -1]), +1 => Ok([0, -1]), +2 => Ok([1, -1]), +3 => Ok([1, -1]), +4 => Ok([2, -1]), +5 => Ok([4, -1]), +6 => Ok([4, -1]), +7 => Ok([4, -1]), +8 => Ok([-1, -1]), +9 => Ok([6, 7]), +10 => Ok([5, -1]), +11 => Ok([3, 7]), +12 => Ok([9, -1]), +13 => Ok([8, -1]), r => Err(GraphError::ParentOutOfRange(r)), } } @@ -231,12 +228,9 @@ struct Corrupted; impl Graph for Corrupted { -fn parents( -, -rev: Revision, -) -> Result<(Revision, Revision), GraphError> { +fn parents(, rev: Revision) -> Result<[Revision; 2], GraphError> { match rev { -1 => Ok((0, -1)), +1 => Ok([0, -1]), r => Err(GraphError::ParentOutOfRange(r)), } } To: gracinet, #hg-reviewers Cc: durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5417: rust: translated random test of missingancestors
gracinet created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY In case of a failed example, all needed information for reproduction is included in the panic message, so that we don't need to seed the random generator explicitely. This is how `test_remove_ancestors_from_case1()` has been generated. An alternative would have been to expose to Python MissingAncestors but that would have meant - pollution of the release build used from Python, whereas we do it in this changeset within the tests submodule - waiting on rust-cpython bindings to be ready or doing the cumbersome direct-ffi (more pollution with unsafe C code) REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5417 AFFECTED FILES rust/Cargo.lock rust/hg-core/Cargo.toml rust/hg-core/src/ancestors.rs CHANGE DETAILS diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -323,6 +323,11 @@ mod tests { use super::*; +extern crate rand; +use self::rand::distributions::{Distribution, LogNormal, Uniform}; +use self::rand::{thread_rng, Rng, RngCore}; +use std::cmp::min; +use std::fmt::Debug; use std::iter::FromIterator; #[derive(Clone, Debug)] @@ -621,14 +626,12 @@ ]; let problem_rev = 28 as Revision; let problem_base = 70 as Revision; -// making the problem evident: problem_rev is a parent of problem_base +// making the problem obvious: problem_rev is a parent of problem_base assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev); let mut missanc: MissingAncestors = MissingAncestors::new( graph, -[60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6] -.iter() -.cloned(), +[60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6].iter().cloned(), ); assert!(missanc.bases.contains(_base)); @@ -641,4 +644,249 @@ assert!(!revs.contains(_rev)); } +fn build_random_graph( +nodes_opt: Option, +rootprob_opt: Option, +mergeprob_opt: Option, +prevprob_opt: Option, +) -> VecGraph { +let nodes = nodes_opt.unwrap_or(100); +let rootprob = rootprob_opt.unwrap_or(0.05); +let mergeprob = mergeprob_opt.unwrap_or(0.2); +let prevprob = prevprob_opt.unwrap_or(0.7); + +let mut rng = thread_rng(); +let mut vg: VecGraph = Vec::with_capacity(nodes); +for i in 0..nodes { +if i == 0 || rng.gen_bool(rootprob) { +vg.push([NULL_REVISION, NULL_REVISION]) +} else if i == 1 { +vg.push([0, NULL_REVISION]) +} else if rng.gen_bool(mergeprob) { +let p1 = { +if i == 2 || rng.gen_bool(prevprob) { +(i - 1) as Revision +} else { +rng.gen_range(0, i - 1) as Revision +} +}; +// p2 is a random revision lower than i and different from p1 +let mut p2 = rng.gen_range(0, i - 1) as Revision; +if p2 >= p1 { +p2 = p2 + 1; +} +vg.push([p1, p2]); +} else if rng.gen_bool(prevprob) { +vg.push([(i - 1) as Revision, NULL_REVISION]) +} else { +vg.push([rng.gen_range(0, i - 1) as Revision, NULL_REVISION]) +} +} +vg +} + +/// Compute the ancestors set of all revisions of a VecGraph +fn ancestors_sets(vg: ) -> Vec> { +let mut ancs: Vec> = Vec::new(); +for i in 0..vg.len() { +let mut ancs_i = HashSet::new(); +ancs_i.insert(i as Revision); +for p in vg[i].iter().cloned() { +if p != NULL_REVISION { +ancs_i.extend([p as usize]); +} +} +ancs.push(ancs_i); +} +ancs +} + +#[derive(Clone, Debug)] +enum MissingAncestorsAction { +InitialBases(HashSet), +AddBases(HashSet), +RemoveAncestorsFrom(HashSet), +MissingAncestors(HashSet), +} + +/// An instrumented naive yet obviously correct implementation +/// +/// It also records all its actions for easy reproduction for replay +/// of problematic cases +struct NaiveMissingAncestors<'a> { +ancestors_sets: &'a Vec>, +graph: &'a VecGraph, // used for error reporting only +bases: HashSet, +history: Vec, +} + +impl<'a> NaiveMissingAncestors<'a> { +fn new( +graph: &'a VecGraph, +ancestors_sets: &'a Vec>, +bases: , +) -> Self { +Self { +
D5416: rust: translation of missingancestors
gracinet created this revision. Herald added subscribers: mercurial-devel, mjpieters, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY This is as direct as possible a translation of the ancestor.missingancestors Python class in pure Rust. The goal for this changeset is to make it easy to compare with the Python version. We also add to Python tests the cases that helped us develop and debug this implementation. Some possible optimizations are marked along the way as TODO comments REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5416 AFFECTED FILES rust/hg-core/src/ancestors.rs rust/hg-core/src/lib.rs tests/test-ancestor.py tests/test-ancestor.py.out CHANGE DETAILS diff --git a/tests/test-ancestor.py.out b/tests/test-ancestor.py.out --- a/tests/test-ancestor.py.out +++ b/tests/test-ancestor.py.out @@ -1,3 +1,17 @@ +% removeancestorsfrom(), example 1 +remaining (sorted): [5, 6, 8, 9] +% removeancestorsfrom(), example 2 +remaining (sorted): [11, 12, 13, 14] +% removeancestorsfrom(), example 3 +remaining (sorted): [3, 5] +% missingancestors(), example 1 +return [3, 7, 11] +% missingancestors(), example 2 +return [5, 10] +% missingancestors(), example 3 +return [3, 6, 9, 11] +% removeancestorsfrom(), bigger graph +Ok % lazy ancestor set for [], stoprev = 0, inclusive = False membership: [] iteration: [] diff --git a/tests/test-ancestor.py b/tests/test-ancestor.py --- a/tests/test-ancestor.py +++ b/tests/test-ancestor.py @@ -182,6 +182,64 @@ 5: [4, -1], 6: [4, -1], 7: [4, -1], 8: [-1, -1], 9: [6, 7], 10: [5, -1], 11: [3, 7], 12: [9, -1], 13: [8, -1]} +def test_missingancestors_explicit(): +"""A few explicit cases, easier to check for catching errors in refactors. + +The bigger graph at the end has been produced by the random generator +above, and we have some evidence that the other tests don't cover it. +""" +for i, (bases, revs) in enumerate((({1, 2, 3, 4, 7}, set(xrange(10))), + ({10}, set({11, 12, 13, 14})), + ({7}, set({1, 2, 3, 4, 5})), + )): +print("%% removeancestorsfrom(), example %d" % (i + 1)) +missanc = ancestor.incrementalmissingancestors(graph.get, bases) +missanc.removeancestorsfrom(revs) +print("remaining (sorted): %s" % sorted(list(revs))) + +for i, (bases, revs) in enumerate((({10}, {11}), + ({11}, {10}), + ({7}, {9, 11}), + )): +print("%% missingancestors(), example %d" % (i + 1)) +missanc = ancestor.incrementalmissingancestors(graph.get, bases) +print("return %s" % missanc.missingancestors(revs)) + +print("% removeancestorsfrom(), bigger graph") +vecgraph = [ +[-1, -1], [0, -1], [1, 0], [2, 1], [3, -1], [4, -1], [5, 1], +[2, -1], [7, -1], [8, -1], [9, -1], [10, 1], [3, -1], [12, -1], +[13, -1], [14, -1], [4, -1], [16, -1], [17, -1], [18, -1], +[19, 11], [20, -1], [21, -1], [22, -1], [23, -1], [2, -1], +[3, -1], [26, 24], [27, -1], [28, -1], [12, -1], [1, -1], [1, 9], +[32, -1], [33, -1], [34, 31], [35, -1], [36, 26], [37, -1], +[38, -1], [39, -1], [40, -1], [41, -1], [42, 26], [0, -1], +[44, -1], [45, 4], [40, -1], [47, -1], [36, 0], [49, -1], +[-1, -1], [51, -1], [52, -1], [53, -1], [14, -1], +[55, -1], [15, -1], [23, -1], [58, -1], [59, -1], [2, -1], +[61, 59], [62, -1], [63, -1], [-1, -1], [65, -1], +[66, -1], [67, -1], [68, -1], [37, 28], [69, 25], +[71, -1], [72, -1], [50, 2], [74, -1], [12, -1], +[18, -1], [77, -1], [78, -1], [79, -1], [43, 33], +[81, -1], [82, -1], [83, -1], [84, 45], [85, -1], +[86, -1], [-1, -1], [88, -1], [-1, -1], [76, 83], [44, -1], +[92, -1], [93, -1], [9, -1], [95, 67], [96, -1], [97, -1], +[-1, -1]] +problem_rev = 28 +problem_base = 70 +# problem_rev is a parent of problem_base, but a faulty implementation +# could forget to remove it. +bases = {60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6} +if problem_rev not in vecgraph[problem_base] or problem_base not in bases: +print("Conditions have changed") +missanc = ancestor.incrementalmissingancestors(vecgraph.__getitem__, bases) +revs = {4, 12, 41, 28, 68, 38, 1, 30, 56, 44} +missanc.removeancestorsfrom(revs) +if 28 in revs: +print("Failed!") +else: +print("Ok") + def genlazyancestors(revs, stoprev=0, inclusive=False): print(("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" % (revs, stoprev, inclusive))) @@ -276,6 +334,7 @@ seed = long(time.time() * 1000) rng = random.Random(seed) +test_missingancestors_explicit()
D5415: rust: changed Graph.parents to return [Revision; 2]
gracinet created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY This will allow for simple iteration on parent revisions, such as: for parent in graph.parents(rev)?.iter().cloned() This seems to be a zero overhead abstraction once built in release mode. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5415 AFFECTED FILES rust/hg-core/src/ancestors.rs rust/hg-core/src/lib.rs rust/hg-direct-ffi/src/ancestors.rs CHANGE DETAILS diff --git a/rust/hg-direct-ffi/src/ancestors.rs b/rust/hg-direct-ffi/src/ancestors.rs --- a/rust/hg-direct-ffi/src/ancestors.rs +++ b/rust/hg-direct-ffi/src/ancestors.rs @@ -44,12 +44,12 @@ impl Graph for Index { /// wrap a call to the C extern parents function -fn parents(, rev: Revision) -> Result<(Revision, Revision), GraphError> { +fn parents(, rev: Revision) -> Result<[Revision; 2], GraphError> { let mut res: [c_int; 2] = [0; 2]; let code = unsafe { HgRevlogIndex_GetParents(self.index, rev, res as *mut [c_int; 2]) }; match code { -0 => Ok((res[0], res[1])), +0 => Ok(res), _ => Err(GraphError::ParentOutOfRange(rev)), } } @@ -176,10 +176,10 @@ struct Stub; impl Graph for Stub { -fn parents(, r: Revision) -> Result<(Revision, Revision), GraphError> { +fn parents(, r: Revision) -> Result<[Revision; 2], GraphError> { match r { 25 => Err(GraphError::ParentOutOfRange(25)), -_ => Ok((1, 2)), +_ => Ok([1, 2]), } } } diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -15,7 +15,7 @@ /// The simplest expression of what we need of Mercurial DAGs. pub trait Graph { -fn parents(, Revision) -> Result<(Revision, Revision), GraphError>; +fn parents(, Revision) -> Result<[Revision; 2], GraphError>; } #[derive(Clone, Debug, PartialEq)] diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -58,8 +58,8 @@ this.seen.insert(NULL_REVISION); for rev in filtered_initrevs { let parents = this.graph.parents(rev)?; -this.conditionally_push_rev(parents.0); -this.conditionally_push_rev(parents.1); +this.conditionally_push_rev(parents[0]); +this.conditionally_push_rev(parents[1]); } Ok(this) } @@ -115,10 +115,11 @@ } Some(c) => *c, }; -let (p1, p2) = match self.graph.parents(current) { +let parents = match self.graph.parents(current) { Ok(ps) => ps, Err(e) => return Some(Err(e)), }; +let (p1, p2) = (parents[0], parents[1]); if p1 < self.stoprev || self.seen.contains() { self.visit.pop(); } else { @@ -141,25 +142,22 @@ /// This is the same as the dict from test-ancestors.py impl Graph for Stub { -fn parents( -, -rev: Revision, -) -> Result<(Revision, Revision), GraphError> { +fn parents(, rev: Revision) -> Result<[Revision; 2], GraphError> { match rev { -0 => Ok((-1, -1)), -1 => Ok((0, -1)), -2 => Ok((1, -1)), -3 => Ok((1, -1)), -4 => Ok((2, -1)), -5 => Ok((4, -1)), -6 => Ok((4, -1)), -7 => Ok((4, -1)), -8 => Ok((-1, -1)), -9 => Ok((6, 7)), -10 => Ok((5, -1)), -11 => Ok((3, 7)), -12 => Ok((9, -1)), -13 => Ok((8, -1)), +0 => Ok([-1, -1]), +1 => Ok([0, -1]), +2 => Ok([1, -1]), +3 => Ok([1, -1]), +4 => Ok([2, -1]), +5 => Ok([4, -1]), +6 => Ok([4, -1]), +7 => Ok([4, -1]), +8 => Ok([-1, -1]), +9 => Ok([6, 7]), +10 => Ok([5, -1]), +11 => Ok([3, 7]), +12 => Ok([9, -1]), +13 => Ok([8, -1]), r => Err(GraphError::ParentOutOfRange(r)), } } @@ -231,12 +229,9 @@ struct Corrupted; impl Graph for Corrupted { -fn parents( -, -rev: Revision, -) -> Result<(Revision, Revision), GraphError> { +fn parents(, rev: Revision) -> Result<[Revision; 2], GraphError> { match rev { -1 => Ok((0, -1)), +1 => Ok([0, -1]), r => Err(GraphError::ParentOutOfRange(r)), } } To: gracinet,
D5414: rust: improved docstring
gracinet created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY In the previous wording, rustfmt wanted to cut at the == which is not very readable in my taste. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5414 AFFECTED FILES rust/hg-core/src/ancestors.rs CHANGE DETAILS diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -101,9 +101,9 @@ /// /// - there's no filtering of invalid parent revisions. Actually, it should be /// consistent and more efficient to filter them from the end caller. -/// - we don't have the optimization for adjacent revs -/// (case where p1 == rev-1), because it amounts to update the first element -/// of the heap without sifting, which Rust's BinaryHeap doesn't let us do. +/// - we don't have the optimization for adjacent revisions (i.e., the case +/// where `p1 == rev - 1`), because it amounts to update the first element of +/// the heap without sifting, which Rust's BinaryHeap doesn't let us do. /// - we save a few pushes by comparing with `stoprev` before pushing impl Iterator for AncestorsIterator { type Item = Result; To: gracinet, #hg-reviewers Cc: durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5370: rust: core implementation of missingancestors (no bindings)
gracinet marked 11 inline comments as done. gracinet added a comment. @yuja trying to submit the v2 with phabsend instead of arcanist… hope it'll be linked properly INLINE COMMENTS > kevincox wrote in ancestors.rs:201 > Isn't this loop redundant with the retain call above? indeed, must have been a leftover > kevincox wrote in ancestors.rs:235 > I would prefer `let (p0, p1) = ...`. This makes it obvious exactly what data > you are getting. In the next version, I'm simply doing a for loop on `.iter().cloned()`, it's even better imho. > kevincox wrote in lib.rs:26 > You can add `.cloned()` to remove the references. I would be surprised if > this has poor code generation. > > As for HashSet vs bit array I would stick to HashSet for now. HashSet also > has the advantage of being sparse. I guess after the initial implementation > we could benchmark the two to see what is better. Thanks for the tip with `.cloned()` that's exactly what I was missing. Out of curiosity, I took a look at the generated optimized assembly code: zero cost abstraction, indeed. About HashSet, yes that's exactly what I'm thinking. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5370 To: gracinet, #hg-reviewers Cc: yuja, durin42, kevincox, mjpieters, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH] rust: adapted hg-core tests for iteration over Result
# HG changeset patch # User Georges Racinet # Date 1544544797 -3600 # Tue Dec 11 17:13:17 2018 +0100 # Node ID 7255a67d19ac54af80085ebfc9b62a684532aabe # Parent 76d8b20139a3b8b5835c7262216b97275845b582 # EXP-Topic rustancestors-tests rust: adapted hg-core tests for iteration over Result Now AncestorsIterator iters on Result diff -r 76d8b20139a3 -r 7255a67d19ac rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs Tue Dec 11 22:23:39 2018 +0900 +++ b/rust/hg-core/src/ancestors.rs Tue Dec 11 17:13:17 2018 +0100 @@ -173,6 +173,7 @@ ) -> Vec { AncestorsIterator::new(graph, initrevs, stoprev, inclusive) .unwrap() +.map(|res| res.unwrap()) .collect() } @@ -218,12 +219,12 @@ fn test_contains() { let mut lazy = AncestorsIterator::new(Stub, vec![10, 1], 0, true).unwrap(); -assert!(lazy.contains(1)); -assert!(!lazy.contains(3)); +assert!(lazy.contains(1).unwrap()); +assert!(!lazy.contains(3).unwrap()); let mut lazy = AncestorsIterator::new(Stub, vec![0], 0, false).unwrap(); -assert!(!lazy.contains(NULL_REVISION)); +assert!(!lazy.contains(NULL_REVISION).unwrap()); } /// A corrupted Graph, supporting error handling tests @@ -255,7 +256,6 @@ // inclusive=false looks up initrev's parents right away let mut iter = AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap(); -assert_eq!(iter.next(), Some(0)); -assert_eq!(iter.next(), None); +assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0; } } ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 4 of 6] revlog: add public CPython function to get parent revisions
On 12/11/18 2:10 PM, Yuya Nishihara wrote: > On Mon, 10 Dec 2018 18:54:56 +0100, Georges Racinet wrote: >> On 12/9/18 6:02 AM, Yuya Nishihara wrote: >>> On Wed, 05 Dec 2018 22:43:35 +0900, Yuya Nishihara wrote: >>>> # HG changeset patch >>>> # User Yuya Nishihara >>>> # Date 1543756237 -32400 >>>> # Sun Dec 02 22:10:37 2018 +0900 >>>> # Node ID 716a73bab79be30c20c75e13324c44205d5e2120 >>>> # Parent 3842abba948cd7f4bb3fad6805265a35fb94a083 >>>> revlog: add public CPython function to get parent revisions >>>> +/* >>>> + * Get parents of the given rev. >>>> + * >>>> + * If the specified rev is out of range, IndexError will be raised. If the >>>> + * revlog entry is corrupted, ValueError may be raised. >>>> + * >>>> + * Returns 0 on success or -1 on failure. >>>> + */ >>>> +int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps) >>> This is based on the idea that the Rust module will be statically linked >>> with >>> the cext objects. I thought that would be easier for our use case, >>> package-local >>> visibility, but I'm not certain. If that makes things complicated, maybe we >>> should choose dynamic linking and wrap the function with PyCapsule, as you >>> did >>> in the PoC code. >> Yes, it's true in the direct-ffi code, I passed the function pointer >> around because I was wary of a loop in dependencies (that's a bit silly >> since we can't avoid linking the Rust extension within the parsers >> module anyway), but also I didn't want to touch the existing C code too >> much for my first patch set. This version you're proposing feels simpler >> to me. >> >> Using a capsule in that context wouldn't be much complicated either, all >> we'd need in hg-direct-ffi would be to declare and call PyCapsule_Import >> (it's obviously designed to use very few CPython API concepts, only >> needs a char * for the name). The advantage it'd have then would be that >> the same capsule could be used for all types of FFI, and we could do the >> same for other functions we could need later on (maybe in other >> packages). Adding a new capsule seems less risky for people like me, who >> aren't as familiar with mercurial's C code as you are or, if you prefer >> to see it that way, will require less review. >> >> To be more explicit for other mailing-list subscribers, here's the >> change in revlog.c I'm doing in my PoC CPython code (this is inside the >> module init function) : >> >> @@ -2846,6 +2846,12 @@ >> if (nullentry) >> PyObject_GC_UnTrack(nullentry); >> >> + void *caps = PyCapsule_New( >> + index_get_parents, >> "mercurial.cext.parsers.index_get_parents_CAPI", >> + NULL); >> + if (caps != NULL) >> + PyModule_AddObject(mod, "index_get_parents_CAPI", caps); >> + >> #ifdef WITH_RUST >> rustlazyancestorsType.tp_new = PyType_GenericNew; >> if (PyType_Ready() < 0) >> >> So, to summarize, I think we should maybe promote capsules as the >> preferred way to interface Rust code with inner C code. It wouldn't be >> hard to document either. What do you think ? > My only concern about using PyCapsule is, we would have to get around some > "static mut" variables in Rust if the cost of the name resolution matters. > That's what I noticed while reading your rust/hg-cpython code. I understand that concern. Would a protection with std::sync::Once be enough to address it? The official Rust doc (https://doc.rust-lang.org/std/sync/struct.Once.html) claims that the static mut is then actually safe. > > To be clear, I'm not against using PyCapsule. It's documented as the "right" > way to interface C-level APIs across modules. I wrote this patch before > reading your upcoming series. That's the only reason I made the GetParents() > function public. About that upcoming series, I should be able to submit it real soon now: I don't consider https://github.com/dgrunwald/rust-cpython/issues/164 to be a blocker anymore. Regards, -- Georges Racinet Anybox SAS, http://anybox.fr Téléphone: +33 6 51 32 07 27 GPG: B59E 22AB B842 CAED 77F7 7A7F C34F A519 33AB 0A35, sur serveurs publics ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 6 of 6] rust: propagate error of index_get_parents() properly
Thanks for taking on this ! I have only one minor remark below On 12/5/18 2:43 PM, Yuya Nishihara wrote: > # HG changeset patch > # User Yuya Nishihara > # Date 1540817453 -32400 > # Mon Oct 29 21:50:53 2018 +0900 > # Node ID 3afbe6d40d6859d313f45b66ad5ca3c0a48f2e06 > # Parent f5cdfa49994e3943ba7c4ce2d66708142f0c7058 > rust: propagate error of index_get_parents() properly > > Before, rustla_contains() would return 0 on error, and the exception would > be cleared or noticed somewhere else. We need to propagate the error from > AncestorsIterator up to the FFI surface. > > diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs > --- a/rust/hg-core/src/ancestors.rs > +++ b/rust/hg-core/src/ancestors.rs > @@ -77,19 +77,20 @@ impl AncestorsIterator { > /// is in the ancestors it emits. > /// This is meant for iterators actually dedicated to that kind of > /// purpose > -pub fn contains( self, target: Revision) -> bool { > +pub fn contains( self, target: Revision) -> Result > { > if self.seen.contains() && target != NULL_REVISION { > -return true; > +return Ok(true); > } > -for rev in self { > +for item in self { > +let rev = item?; > if rev == target { > -return true; > +return Ok(true); > } > if rev < target { > -return false; > +return Ok(false); > } > } > -false > +Ok(false) > } > } > > @@ -117,19 +118,19 @@ impl AncestorsIterator { > /// concrete caller we target, so we shouldn't need a finer error treatment > /// for the time being. I think the comment right above about error treatment becomes obsolete with this patch. > impl Iterator for AncestorsIterator { > -type Item = Revision; > +type Item = Result; > > -fn next( self) -> Option { > +fn next( self) -> Option { > let current = match self.visit.peek() { > None => { > return None; > } > Some(c) => *c, > }; > -let (p1, p2) = self > -.graph > -.parents(current) > -.unwrap_or((NULL_REVISION, NULL_REVISION)); > +let (p1, p2) = match self.graph.parents(current) { > +Ok(ps) => ps, > +Err(e) => return Some(Err(e)), > +}; > if p1 < self.stoprev || self.seen.contains() { > self.visit.pop(); > } else { > @@ -138,7 +139,7 @@ impl Iterator for AncestorsIte > }; > > self.conditionally_push_rev(p2); > -Some(current) > +Some(Ok(current)) > } > } > > diff --git a/rust/hg-direct-ffi/src/ancestors.rs > b/rust/hg-direct-ffi/src/ancestors.rs > --- a/rust/hg-direct-ffi/src/ancestors.rs > +++ b/rust/hg-direct-ffi/src/ancestors.rs > @@ -139,7 +139,11 @@ pub extern "C" fn rustlazyancestors_next > #[inline] > fn raw_next(raw: *mut AncestorsIterator) -> c_long { > let as_ref = unsafe { *raw }; > -as_ref.next().unwrap_or(NULL_REVISION) as c_long > +let rev = match as_ref.next() { > +Some(Ok(rev)) => rev, > +Some(Err(_)) | None => NULL_REVISION, > +}; > +rev as c_long > } > > #[no_mangle] > @@ -157,10 +161,10 @@ fn raw_contains( > target: c_long, > ) -> c_int { > let as_ref = unsafe { *raw }; > -if as_ref.contains(target as Revision) { > - return 1; > +match as_ref.contains(target as Revision) { > +Ok(r) => r as c_int, > +Err(_) => -1, > } > -0 > } > > #[cfg(test)] > ___ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel -- Georges Racinet Anybox SAS, http://anybox.fr Téléphone: +33 6 51 32 07 27 GPG: B59E 22AB B842 CAED 77F7 7A7F C34F A519 33AB 0A35, sur serveurs publics ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 5 of 6] rust: look up HgRevlogIndex_GetParents() from symbol table
h code { > 0 => Ok((res[0], res[1])), > _ => Err(GraphError::ParentOutOfRange(rev)), > @@ -59,7 +62,6 @@ impl Graph for Index { > #[no_mangle] > pub extern "C" fn rustlazyancestors_init( > index: IndexPtr, > -parents: IndexParentsFn, > initrevslen: ssize_t, > initrevs: *mut c_long, > stoprev: c_long, > @@ -68,7 +70,7 @@ pub extern "C" fn rustlazyancestors_init > assert!(initrevslen >= 0); > unsafe { > raw_init( > -Index::new(index, parents), > +Index::new(index), > initrevslen as usize, > initrevs, > stoprev, > ___ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel -- Georges Racinet Anybox SAS, http://anybox.fr Téléphone: +33 6 51 32 07 27 GPG: B59E 22AB B842 CAED 77F7 7A7F C34F A519 33AB 0A35, sur serveurs publics ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 4 of 6] revlog: add public CPython function to get parent revisions
On 12/9/18 6:02 AM, Yuya Nishihara wrote: > On Wed, 05 Dec 2018 22:43:35 +0900, Yuya Nishihara wrote: >> # HG changeset patch >> # User Yuya Nishihara >> # Date 1543756237 -32400 >> # Sun Dec 02 22:10:37 2018 +0900 >> # Node ID 716a73bab79be30c20c75e13324c44205d5e2120 >> # Parent 3842abba948cd7f4bb3fad6805265a35fb94a083 >> revlog: add public CPython function to get parent revisions >> +/* >> + * Get parents of the given rev. >> + * >> + * If the specified rev is out of range, IndexError will be raised. If the >> + * revlog entry is corrupted, ValueError may be raised. >> + * >> + * Returns 0 on success or -1 on failure. >> + */ >> +int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps) > This is based on the idea that the Rust module will be statically linked with > the cext objects. I thought that would be easier for our use case, > package-local > visibility, but I'm not certain. If that makes things complicated, maybe we > should choose dynamic linking and wrap the function with PyCapsule, as you did > in the PoC code. Yes, it's true in the direct-ffi code, I passed the function pointer around because I was wary of a loop in dependencies (that's a bit silly since we can't avoid linking the Rust extension within the parsers module anyway), but also I didn't want to touch the existing C code too much for my first patch set. This version you're proposing feels simpler to me. Using a capsule in that context wouldn't be much complicated either, all we'd need in hg-direct-ffi would be to declare and call PyCapsule_Import (it's obviously designed to use very few CPython API concepts, only needs a char * for the name). The advantage it'd have then would be that the same capsule could be used for all types of FFI, and we could do the same for other functions we could need later on (maybe in other packages). Adding a new capsule seems less risky for people like me, who aren't as familiar with mercurial's C code as you are or, if you prefer to see it that way, will require less review. To be more explicit for other mailing-list subscribers, here's the change in revlog.c I'm doing in my PoC CPython code (this is inside the module init function) : @@ -2846,6 +2846,12 @@ if (nullentry) PyObject_GC_UnTrack(nullentry); + void *caps = PyCapsule_New( + index_get_parents, "mercurial.cext.parsers.index_get_parents_CAPI", + NULL); + if (caps != NULL) + PyModule_AddObject(mod, "index_get_parents_CAPI", caps); + #ifdef WITH_RUST rustlazyancestorsType.tp_new = PyType_GenericNew; if (PyType_Ready() < 0) So, to summarize, I think we should maybe promote capsules as the preferred way to interface Rust code with inner C code. It wouldn't be hard to document either. What do you think ? And yes, I should submit formally those rust-cpython bindings sooner than later. Regards, -- Georges Racinet Anybox SAS, http://anybox.fr Téléphone: +33 6 51 32 07 27 GPG: B59E 22AB B842 CAED 77F7 7A7F C34F A519 33AB 0A35, sur serveurs publics ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5370: rust: core implementation of missingancestors (no bindings)
gracinet added a comment. @yuja: do you mean one of those Differential Revisions of this system for each commit, sure I can do. With respect to rust-cpython bindings, I'm currently waiting for feedback on https://github.com/dgrunwald/rust-cpython/issues/164 Perhaps you'd have an idea ? Short summary: sporadic segfaults that I can for now reproduce only by running the whole test suite. That being said, I do have rust-cpython bindings for AncestorsIterator and MissingAncestors (plus the full lazyancestors class). Unless I'm mistaken, that means that together with the existing C implementations, there's a potential for a full native version of `mercurial/ancestor.py`. I can send them to the mailing-list or here. What would be the correct flag for that ? RFC ? I'm also working on a perfmissingancestors (started a few hours ago). For now it confirms measurements I'd observed earlier: about x5 performance boost. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5370 To: gracinet, #hg-reviewers Cc: yuja, durin42, kevincox, mjpieters, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5370: rust: core implementation of missingancestors (no bindings)
gracinet added a comment. I'll include your remark in the upcoming v2, thanks again ! INLINE COMMENTS > kevincox wrote in ancestors.rs:159 > Is always having an item in the set actually saving many corner cases? It > seems like you usually check for empty sets anyways. Not sure about this, so for now, I've been playing safe, ie, exactly as the Python version. > kevincox wrote in lib.rs:20 > I think the better solution here is to make `parents` return `[Revision; 2]` > (or `&[Revision]` and not return nulls. I agree, it was a pythonism to use a tuple, but it has impact on `AncestorsIterators`, as well as the implementation in `hg-direct-ffi`, and in more code I've not submitted yet, so I'd prefer to do that change independently in a follow-up. > kevincox wrote in lib.rs:26 > Would something like the following be simpler? > > fn parents_iter( > , > r: Revision, > ) -> Result, GraphError> { > let (p0, p1) = self.parents(r)?; > let iter = [p0, p1].into_iter() > .map(|r| if r == NULL_REVISION { None} else { Some(r) }); > Ok(iter) > } The reason I've preferred to implement it directly is that `into_iter()` iterates on references, which I found an unnecessary overhead,. But maybe that's a case of premature optimization ? In the same vein, I don't think `HashSet` is the best choice for a set of `i32`, unless it becomes really big, but I don't know where the threshold would be, compared to, say, a bit array. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D5370 To: gracinet, #hg-reviewers Cc: durin42, kevincox, mjpieters, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5365: rust: AncestorsIterator::next, renamed local variables
This revision was automatically updated to reflect the committed changes. Closed by commit rHG70976974c14a: rust: rename local variables in AncestorsIterator::next (authored by gracinet, committed by ). REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5365?vs=12695=12709 REVISION DETAIL https://phab.mercurial-scm.org/D5365 AFFECTED FILES rust/hg-core/src/ancestors.rs CHANGE DETAILS diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -126,19 +126,18 @@ } Some(c) => *c, }; -let parents = self +let (p1, p2) = self .graph .parents(current) .unwrap_or((NULL_REVISION, NULL_REVISION)); -let p1 = parents.0; if p1 < self.stoprev || self.seen.contains() { self.visit.pop(); } else { *(self.visit.peek_mut().unwrap()) = p1; self.seen.insert(p1); }; -self.conditionally_push_rev(parents.1); +self.conditionally_push_rev(p2); Some(current) } } To: gracinet, #hg-reviewers Cc: durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5369: rust: make clean takes care of rust/target
This revision was automatically updated to reflect the committed changes. Closed by commit rHG9072a890e523: rust: make clean takes care of rust/target (authored by gracinet, committed by ). CHANGED PRIOR TO COMMIT https://phab.mercurial-scm.org/D5369?vs=12700=12710#toc REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5369?vs=12700=12710 REVISION DETAIL https://phab.mercurial-scm.org/D5369 AFFECTED FILES Makefile CHANGE DETAILS diff --git a/Makefile b/Makefile --- a/Makefile +++ b/Makefile @@ -72,6 +72,7 @@ rm -rf build mercurial/locale $(MAKE) -C doc clean $(MAKE) -C contrib/chg distclean + rm -rf rust/target clean: cleanbutpackages rm -rf packages To: gracinet, #hg-reviewers Cc: yuja, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5370: rust: core implementation of missingancestors (no bindings)
gracinet created this revision. Herald added subscribers: mercurial-devel, mjpieters, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY rust: iterator version of Graph.parents This is handy for callers that want to simply do: for p in graph.parents_iter(rev) although one could argue that actually parents() should return an array instead of a tuple, giving us a similar iterator for free (but on references instead of values, unless we also use the arrayvec crate could help). Notably, the current C-backed parents() internally uses an array for communication with C code, so that currently, we'd get less memory copy and less code using an array. rust: translation of missingancestors This is as direct as possible a translation of the ancestor.missingancestors Python class in pure Rust. The goal for this changeset is to make it easy to compare with the Python version. We also add to Python tests the cases that helped us develop and debug this implementation. Some possible optimizations are marked along the way as TODO comments rust: translated random test of missingancestors An alternative would have been to expose to Python MissingAncestors but that would have meant - pollution of the release build used from Python, whereas we do it in this changeset within the tests submodule - waiting on rust-cpython bindings to be ready or doing the cumbersome direct-ffi (more pollution with unsafe C code) REPOSITORY rHG Mercurial BRANCH default REVISION DETAIL https://phab.mercurial-scm.org/D5370 AFFECTED FILES rust/Cargo.lock rust/hg-core/Cargo.toml rust/hg-core/src/ancestors.rs rust/hg-core/src/lib.rs tests/test-ancestor.py tests/test-ancestor.py.out CHANGE DETAILS diff --git a/tests/test-ancestor.py.out b/tests/test-ancestor.py.out --- a/tests/test-ancestor.py.out +++ b/tests/test-ancestor.py.out @@ -1,3 +1,17 @@ +% removeancestorsfrom(), example 1 +remaining (sorted): [5, 6, 8, 9] +% removeancestorsfrom(), example 2 +remaining (sorted): [11, 12, 13, 14] +% removeancestorsfrom(), example 3 +remaining (sorted): [3, 5] +% missingancestors(), example 1 +return [3, 7, 11] +% missingancestors(), example 2 +return [5, 10] +% missingancestors(), example 3 +return [3, 6, 9, 11] +% removeancestorsfrom(), bigger graph +Ok % lazy ancestor set for [], stoprev = 0, inclusive = False membership: [] iteration: [] diff --git a/tests/test-ancestor.py b/tests/test-ancestor.py --- a/tests/test-ancestor.py +++ b/tests/test-ancestor.py @@ -182,6 +182,64 @@ 5: [4, -1], 6: [4, -1], 7: [4, -1], 8: [-1, -1], 9: [6, 7], 10: [5, -1], 11: [3, 7], 12: [9, -1], 13: [8, -1]} +def test_missingancestors_explicit(): +"""A few explicit cases, easier to check for catching errors in refactors. + +The bigger graph at the end has been produced by the random generator +above, and we have some evidence that the other tests don't cover it. +""" +for i, (bases, revs) in enumerate((({1, 2, 3, 4, 7}, set(xrange(10))), + ({10}, set({11, 12, 13, 14})), + ({7}, set({1, 2, 3, 4, 5})), + )): +print("%% removeancestorsfrom(), example %d" % (i + 1)) +missanc = ancestor.incrementalmissingancestors(graph.get, bases) +missanc.removeancestorsfrom(revs) +print("remaining (sorted): %s" % sorted(list(revs))) + +for i, (bases, revs) in enumerate((({10}, {11}), + ({11}, {10}), + ({7}, {9, 11}), + )): +print("%% missingancestors(), example %d" % (i + 1)) +missanc = ancestor.incrementalmissingancestors(graph.get, bases) +print("return %s" % missanc.missingancestors(revs)) + +print("% removeancestorsfrom(), bigger graph") +vecgraph = [ +[-1, -1], [0, -1], [1, 0], [2, 1], [3, -1], [4, -1], [5, 1], +[2, -1], [7, -1], [8, -1], [9, -1], [10, 1], [3, -1], [12, -1], +[13, -1], [14, -1], [4, -1], [16, -1], [17, -1], [18, -1], +[19, 11], [20, -1], [21, -1], [22, -1], [23, -1], [2, -1], +[3, -1], [26, 24], [27, -1], [28, -1], [12, -1], [1, -1], [1, 9], +[32, -1], [33, -1], [34, 31], [35, -1], [36, 26], [37, -1], +[38, -1], [39, -1], [40, -1], [41, -1], [42, 26], [0, -1], +[44, -1], [45, 4], [40, -1], [47, -1], [36, 0], [49, -1], +[-1, -1], [51, -1], [52, -1], [53, -1], [14, -1], +[55, -1], [15, -1], [23, -1], [58, -1], [59, -1], [2, -1], +[61, 59], [62, -1], [63, -1], [-1, -1], [65, -1], +[66, -1], [67, -1], [68, -1], [37, 28], [69, 25], +[71, -1], [72, -1], [50, 2], [74, -1], [12, -1], +[18, -1], [77, -1], [78, -1], [79, -1], [43, 33], +[81, -1], [82, -1], [83, -1], [84, 45], [85, -1], +[86, -1], [-1, -1],
D5369: rust: make clean takes care of rust/target
gracinet created this revision. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers. REVISION SUMMARY This isn't the prettiest way of doing it, but it doesn't require looking up cargo, or wondering whether that should be part of setup.py clean. REPOSITORY rHG Mercurial BRANCH default REVISION DETAIL https://phab.mercurial-scm.org/D5369 AFFECTED FILES Makefile CHANGE DETAILS diff --git a/Makefile b/Makefile --- a/Makefile +++ b/Makefile @@ -72,6 +72,7 @@ rm -rf build mercurial/locale $(MAKE) -C doc clean $(MAKE) -C contrib/chg distclean + if test -d rust/target; then rm -rf rust/target; fi clean: cleanbutpackages rm -rf packages To: gracinet, #hg-reviewers Cc: mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5365: rust: AncestorsIterator::next, renamed local variables
gracinet created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY It was confusing to have p1 and parents.1 ; (p1, p2) is clearer. REPOSITORY rHG Mercurial BRANCH default REVISION DETAIL https://phab.mercurial-scm.org/D5365 AFFECTED FILES rust/hg-core/src/ancestors.rs CHANGE DETAILS diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -126,19 +126,18 @@ } Some(c) => *c, }; -let parents = self +let (p1, p2) = self .graph .parents(current) .unwrap_or((NULL_REVISION, NULL_REVISION)); -let p1 = parents.0; if p1 < self.stoprev || self.seen.contains() { self.visit.pop(); } else { *(self.visit.peek_mut().unwrap()) = p1; self.seen.insert(p1); }; -self.conditionally_push_rev(parents.1); +self.conditionally_push_rev(p2); Some(current) } } To: gracinet, #hg-reviewers Cc: durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5358: rust: peek_mut optim for lazy ancestors
This revision was automatically updated to reflect the committed changes. Closed by commit rHG04ebdb33e5d0: rust: peek_mut optim for lazy ancestors (authored by gracinet, committed by ). CHANGED PRIOR TO COMMIT https://phab.mercurial-scm.org/D5358?vs=12683=12694#toc REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D5358?vs=12683=12694 REVISION DETAIL https://phab.mercurial-scm.org/D5358 AFFECTED FILES rust/hg-core/src/ancestors.rs CHANGE DETAILS diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -57,7 +57,9 @@ }; this.seen.insert(NULL_REVISION); for rev in filtered_initrevs { -this.conditionally_push_parents(rev)?; +let parents = this.graph.parents(rev)?; +this.conditionally_push_rev(parents.0); +this.conditionally_push_rev(parents.1); } Ok(this) } @@ -70,17 +72,6 @@ } } -#[inline] -fn conditionally_push_parents( - self, -rev: Revision, -) -> Result<(), GraphError> { -let parents = self.graph.parents(rev)?; -self.conditionally_push_rev(parents.0); -self.conditionally_push_rev(parents.1); -Ok(()) -} - /// Consumes partially the iterator to tell if the given target /// revision /// is in the ancestors it emits. @@ -109,9 +100,9 @@ /// /// - there's no filtering of invalid parent revisions. Actually, it should be /// consistent and more efficient to filter them from the end caller. -/// - we don't use the equivalent of `heapq.heapreplace()`, but we should, for -/// the same reasons (using `peek_mut`) -/// - we don't have the optimization for adjacent revs (case where p1 == rev-1) +/// - we don't have the optimization for adjacent revs +/// (case where p1 == rev-1), because it amounts to update the first element +/// of the heap without sifting, which Rust's BinaryHeap doesn't let us do. /// - we save a few pushes by comparing with `stoprev` before pushing /// /// Error treatment: @@ -129,13 +120,25 @@ type Item = Revision; fn next( self) -> Option { -let current = match self.visit.pop() { +let current = match self.visit.peek() { None => { return None; } -Some(i) => i, +Some(c) => *c, }; -self.conditionally_push_parents(current).unwrap_or(()); +let parents = self +.graph +.parents(current) +.unwrap_or((NULL_REVISION, NULL_REVISION)); +let p1 = parents.0; +if p1 < self.stoprev || self.seen.contains() { +self.visit.pop(); +} else { +*(self.visit.peek_mut().unwrap()) = p1; +self.seen.insert(p1); +}; + +self.conditionally_push_rev(parents.1); Some(current) } } To: gracinet, #hg-reviewers Cc: yuja, durin42, kevincox, mercurial-devel ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
D5358: rust: peek_mut optim for lazy ancestors
gracinet created this revision. Herald added subscribers: mercurial-devel, kevincox, durin42. Herald added a reviewer: hg-reviewers. REVISION SUMMARY This is one of the two optimizations that are also present in the Python code: replacing pairs of pop/push on the BinaryHeap by single updates, hence having it under the hood maintain its consistency (sift) only once. On Mozilla central, the measured gain (see details below) is around 7%. Creating the PeekMut object by calling peek_mut() right away instead of peek() first is less efficient (gain is only 4%, stats not included). Our interpretation is that its creation has a cost which is vasted in the cases where it ends by droping the value (Peekmut::pop() just does self.heap.pop() anyway). On the other hand, the immutable peek() is very fast: it's just taking a reference in the underlying vector. The Python version still has another optimization: if parent(current) == current-1, then the heap doesn't need to maintain its consistency, since we already now that it's bigger than all the others in the heap. Rust's BinaryHeap doesn't allow us to mutate its biggest element with no housekeeping, but we tried it anyway, with a copy of the BinaryHeap implementation with a dedicaded added method: it's not worth the technical debt in our opinion (we measured only a further 1.6% improvement). One possible explanation would be that the sift is really fast anyway in that case, whereas it's not in the case of Python, because it's at least partly done in slow Python code. Still it's possible that replacing BinaryHeap by something more dedicated to discrete ordered types could be faster. Measurements on mozilla-central: Three runs of 'hg perfancestors' on the parent changeset: Moyenne des médianes: 0.100587 ! wall 0.100062 comb 0.10 user 0.10 sys 0.00 (best of 98) ! wall 0.135804 comb 0.13 user 0.13 sys 0.00 (max of 98) ! wall 0.102864 comb 0.102755 user 0.099286 sys 0.003469 (avg of 98) ! wall 0.101486 comb 0.11 user 0.11 sys 0.00 (median of 98) ! wall 0.096804 comb 0.09 user 0.09 sys 0.00 (best of 100) ! wall 0.132235 comb 0.13 user 0.12 sys 0.01 (max of 100) ! wall 0.100258 comb 0.100300 user 0.096000 sys 0.004300 (avg of 100) ! wall 0.098384 comb 0.10 user 0.10 sys 0.00 (median of 100) ! wall 0.099925 comb 0.10 user 0.10 sys 0.00 (best of 98) ! wall 0.133518 comb 0.14 user 0.13 sys 0.01 (max of 98) ! wall 0.102381 comb 0.102449 user 0.098265 sys 0.004184 (avg of 98) ! wall 0.101891 comb 0.09 user 0.09 sys 0.00 (median of 98) Mean of the medians: 0.100587 On the present changeset: ! wall 0.091344 comb 0.09 user 0.09 sys 0.00 (best of 100) ! wall 0.122728 comb 0.12 user 0.11 sys 0.01 (max of 100) ! wall 0.093268 comb 0.093300 user 0.089300 sys 0.004000 (avg of 100) ! wall 0.092567 comb 0.10 user 0.09 sys 0.01 (median of 100) ! wall 0.093294 comb 0.08 user 0.08 sys 0.00 (best of 100) ! wall 0.144887 comb 0.15 user 0.14 sys 0.01 (max of 100) ! wall 0.097708 comb 0.097700 user 0.093400 sys 0.004300 (avg of 100) ! wall 0.094980 comb 0.10 user 0.09 sys 0.01 (median of 100) ! wall 0.091262 comb 0.09 user 0.08 sys 0.01 (best of 100) ! wall 0.123772 comb 0.13 user 0.12 sys 0.01 (max of 100) ! wall 0.093188 comb 0.093200 user 0.089300 sys 0.003900 (avg of 100) ! wall 0.092364 comb 0.10 user 0.09 sys 0.01 (median of 100) Mean of the medians is 0.0933 REPOSITORY rHG Mercurial BRANCH default REVISION DETAIL https://phab.mercurial-scm.org/D5358 AFFECTED FILES rust/hg-core/src/ancestors.rs CHANGE DETAILS diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs +++ b/rust/hg-core/src/ancestors.rs @@ -57,7 +57,9 @@ }; this.seen.insert(NULL_REVISION); for rev in filtered_initrevs { -this.conditionally_push_parents(rev)?; +let parents = this.graph.parents(rev)?; +this.conditionally_push_rev(parents.0); +this.conditionally_push_rev(parents.1); } Ok(this) } @@ -70,17 +72,6 @@ } } -#[inline] -fn conditionally_push_parents( - self, -rev: Revision, -) -> Result<(), GraphError> { -let parents = self.graph.parents(rev)?; -self.conditionally_push_rev(parents.0); -self.conditionally_push_rev(parents.1); -Ok(()) -} - /// Consumes partially the iterator to tell if the given target /// revision /// is in the ancestors it emits. @@ -109,9 +100,9 @@ /// /// - there's no filtering of invalid parent revisions. Actually, it should be /// consistent and more efficient to filter
Re: [PATCH 3 of 3 STABLE] rust: propagate Python exception raised from index_get_parents_checked()
On 10/29/2018 02:03 PM, Yuya Nishihara wrote: > On Sun, 28 Oct 2018 22:11:03 +0900, Yuya Nishihara wrote: >> # HG changeset patch >> # User Yuya Nishihara >> # Date 1540730473 -32400 >> # Sun Oct 28 21:41:13 2018 +0900 >> # Branch stable >> # Node ID c40ceec4d993927b6788ad430dc6b4f3087d9aeb >> # Parent 28a5ec244ba88ce4a46a26a32c24fa36f7597245 >> rust: propagate Python exception raised from index_get_parents_checked() >> static int rustla_contains(rustlazyancestorsObject *self, PyObject *rev) { >> +int res; >> if (!(PyInt_Check(rev))) { >> return 0; as a reminder, this one, that you don't modify also takes care of the case of None, for which there is an explicit comment in the Python implementation. >> } >> -return rustlazyancestors_contains(self->iter, PyInt_AS_LONG(rev)); >> +res = rustlazyancestors_contains(self->iter, PyInt_AS_LONG(rev)); >> +if (PyErr_Occurred()) >> +return -1; >> +return res; > Not that a proper fix is to make AncestorsIterator functions return > Result<_, GraphError>. I can send the patch for stable if it's preferred. Returning Result<_, GraphError> as much as possible is my intent indeed, but it wouldn't apply to next(), because the Iterator trait does not allow for that, unless we change the signature to an Iterator with Item = Result. I didn't dare to do that earlier, as it seemed to be too heavy. That being said, I'm not sure how much of these errors (bad stoprev etc) we can get rid of from the start. The more serious case would be that of the corrupted index, though, in which a correct initialization can still give rise to errors later on. > ___ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel -- Georges Racinet Anybox SAS, http://anybox.fr Téléphone: +33 6 51 32 07 27 GPG: B59E 22AB B842 CAED 77F7 7A7F C34F A519 33AB 0A35, sur serveurs publics ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: Rust extensions: the next step
On 10/18/2018 12:22 PM, Gregory Szorc wrote: > One open item is full PyPy/cffi support. Ideally we’d only write the native > code interface once. But I think that means cffi everywhere and last I > looked, CPython into cffi was a bit slower compared to native extensions. I’m > willing to ignore cffi support for now (PyPy can use pure Python and rely on > JIT for faster execution). Maybe something like milksnake can help us here? > But I’m content with using the cpython crate to maintain a Rust-based > extension: that’s little different from what we do today and we shouldn’t let > perfect be the enemy of good. One nice thing with the cpython crate is that it's just using the CPython ABI. Therefore, there's nothing we can't do – only things that are less practical. It's not very intuitive, but it should be ok with a bit of practice. About cffi, if milksnake can automate it, that's an easy win to be added later (for now I still need to call in the C modules from the Rust code). In both cases, we need to tighten it with comprehensive integration tests. Cheers, -- Georges Racinet Anybox SAS, http://anybox.fr Téléphone: +33 6 51 32 07 27 GPG: B59E 22AB B842 CAED 77F7 7A7F C34F A519 33AB 0A35, sur serveurs publics ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: Rust extensions: the next step
On 10/18/2018 04:09 PM, Yuya Nishihara wrote: > On Thu, 18 Oct 2018 08:58:04 -0400, Josef 'Jeff' Sipek wrote: >> On Thu, Oct 18, 2018 at 12:22:16 +0200, Gregory Szorc wrote: >> ... >>> Something else we may want to consider is a single Python module exposing >>> the Rust code instead of N. Rust’s more aggressive cross function >>> compilation optimization could result in better performance if everything >>> is linked/exposed in a single shared library/module/extension. Maybe this >>> is what you are proposing? It is unclear if Rust code is linked into the >>> Python extension or loaded from a shared shared library. >> (Warning: I suck at python, aren't an expert on rust, but have more >> knowledge about ELF linking/loading/etc. than is healthy.) >> >> Isn't there also a distinction between code layout (separate crates) and the >> actual binary that cargo/rustc builds? IOW, the code could (and probably >> should) be nicely separated but rustc can combine all the crates' code into >> one big binary for loading into python. Since it would see all the code, it >> can do its fancy optimizations without impacting code readability. > IIUC, it is. Perhaps, the rustext is a single binary exporting multiple > submodules? Yes totally, it's exactly as Josef writes. To demonstrate, here's what I have : $ ls mercurial/*.so mercurial/rustext.so mercurial/zstd.so $ python Python 2.7.13 (default, Nov 24 2017, 17:33:09) [GCC 6.3.0 20170516] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> from mercurial import rustext >>> dir(rustext) ['GraphError', '__doc__', '__file__', '__name__', '__package__', 'ancestors'] >>> from mercurial.rustext import ancestors >>> ancestors is rustext.ancestors True >>> dir(ancestors) ['AncestorsIterator', '__doc__', '__name__', '__package__'] So, in short, it's a single shared library that can hold a bunch of modules. The submodules are themselves initialized from the Rust code. Here's the definition of 'rustext' itself. It follows the pattern expected by Josef. $ tail rust/hg-cpython/src/lib.rs mod ancestors; // corresponds to src/ancestors.rs mod exceptions; py_module_initializer!(rustext, initrustext, PyInit_rustext, |py, m| { m.add(py, "__doc__", "Mercurial core concepts - Rust implementation")?; m.add(py, "ancestors", ancestors::init_module(py)?)?; m.add(py, "GraphError", py.get_type::())?; Ok(()) }); (Mark confirmed to me during the sprint that adding submodules on the fly was doable). Indeed I hope the Rust compiler can do lots of optimizations in that single shared library object. > > I expect "rustext" (or its upper layer) to be a shim over Rust-based modules > and cexts. So if you do policy.importmod('parsers'), it will return > cext.parsers, whereas policy.importmod('ancestor') will return > rustext.ancestor, > though I have no idea if there will be cext/pure.ancestor. Yes, it's quite possible to add a new module policy this way. After all, from mercurial.policy, it behaves in the same way as the cext package does and the fact that we have a single shared library instead of several ones is an implementation detail, hidden by Python's import machinery. But this opens another, longer term, question: currently what I have in mercurial.rustext.ancestor has only a fragment of what mercurial.ancestor provides. Therefore to have mercurial.policy handle it, we'll need either to take such partial cases into account, or decide to translate the whole Python module in Rust. For the time being, I'm simply doing an import and catch the error to fallback to the Python version. Regards, -- Georges Racinet Anybox SAS, http://anybox.fr Téléphone: +33 6 51 32 07 27 GPG: B59E 22AB B842 CAED 77F7 7A7F C34F A519 33AB 0A35, sur serveurs publics ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Rust extensions: the next step
Hi all, first, many thanks for the Stockholm sprint, it was my first interaction with the Mercurial community, and it's been very welcoming to me. I've been pursuing some experiments I started then to convert the Rust bindings I've done in the patch series about ancestry iteration (now landed) to a proper Python extension, using the cpython crate and Python capsules. In short, it works. Early benchmarking shows that it's a few percent slower than the direct bindings through C code, which I think is acceptable compared to the other benefits (clearer integration, easier to generalise, no C code at all). The end result is a unique shared library importable as 'mercurial.rustext', which is itself made of several submodules, ie, one can do: from mercurial.rustext.ancestor import AncestorsIterator It will take me some more time, though, to get that experiment into a reviewable state (have to switch soon to other, unrelated, works) and we're too close to the freeze anyway, but if someone wants to see it, I can share it right away. Also, I could summarize some of these thoughts on the Oxidation wiki page. Greg, are you okay with that ? Regards, -- Georges Racinet Anybox SAS, http://anybox.fr Téléphone: +33 6 51 32 07 27 GPG: B59E 22AB B842 CAED 77F7 7A7F C34F A519 33AB 0A35, sur serveurs publics ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Re: [PATCH 2 of 2 V4] rust: rustlazyancestors.__contains__
On 10/14/2018 05:42 PM, Yuya Nishihara wrote: > On Sun, 14 Oct 2018 15:22:07 +0200, Georges Racinet wrote: >> # HG changeset patch >> # User Georges Racinet >> # Date 1539018701 -7200 >> # Mon Oct 08 19:11:41 2018 +0200 >> # Node ID 50d03c9079ffe3932955353be076ff24c4e87804 >> # Parent c04176c0f8b9aeaf196bd1eac00207d526f3d53b >> # EXP-Topic rustancestors-contains >> rust: rustlazyancestors.__contains__ >> +static int rustla_contains(rustlazyancestorsObject *self, PyObject *rev) { >> + if (!(PyInt_Check(rev))) { >> +return 0; >> + } >> + return rustlazyancestors_contains(self->iter, PyInt_AS_LONG(rev)); >> +} > Need tabs ;) yup, sorry, forgot to check that one. > >> --- /dev/nullThu Jan 01 00:00:00 1970 + >> +++ b/rust/hg-direct-ffi/rustfmt.tomlMon Oct 08 19:11:41 2018 +0200 >> @@ -0,0 +1,3 @@ >> +max_width = 79 >> +wrap_comments = true >> +error_on_line_overflow = true > Can you send a separate patch for this? Yes, it has nothing to do in this one > >> +int rustlazyancestors_contains(rustlazyancestorsObject *self, long rev); >> +#[no_mangle] >> +pub extern "C" fn rustlazyancestors_contains( >> +raw: *mut AncestorsIterator, >> +target: c_long, >> +) -> c_long { >> +raw_contains(raw, target) >> +} > s/-> c_long/-> c_int/ Should be done in V5. Good catch, again -- Georges Racinet Anybox SAS, http://anybox.fr Téléphone: +33 6 51 32 07 27 GPG: B59E 22AB B842 CAED 77F7 7A7F C34F A519 33AB 0A35, sur serveurs publics ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 1 of 2 V5] rust: rustlazyancestors.__contains__
# HG changeset patch # User Georges Racinet # Date 1539018701 -7200 # Mon Oct 08 19:11:41 2018 +0200 # Node ID a7cbd02e936e6724068219ee4c1c50dd4da2b22c # Parent 9cadb0f5f2279a60a79b1d0e50eccd60638495c0 # EXP-Topic rustancestors-contains rust: rustlazyancestors.__contains__ This changeset provides a Rust implementation of the iteration performed by lazyancestor.__contains__ It has the advantage over the Python iteration to use the 'seen' set encapsuled into the dedicated iterator (self._containsiter), rather than storing emitted items in another set (self._containsseen), and hence should reduce the memory footprint. Also, there's no need to convert intermediate emitted revisions back into Python integers. At this point, it would be tempting to implement the whole lazyancestor object in Rust, but that would lead to more C wrapping code (two objects) for little expected benefits. diff -r 9cadb0f5f227 -r a7cbd02e936e mercurial/ancestor.py --- a/mercurial/ancestor.py Thu Sep 27 16:55:44 2018 +0200 +++ b/mercurial/ancestor.py Mon Oct 08 19:11:41 2018 +0200 @@ -383,7 +383,7 @@ self._containsiter = None return False -class rustlazyancestors(lazyancestors): +class rustlazyancestors(object): def __init__(self, index, revs, stoprev=0, inclusive=False): self._index = index @@ -395,12 +395,26 @@ # constructor (from C code) doesn't understand anything else yet self._initrevs = initrevs = list(revs) -self._containsseen = set() self._containsiter = parsers.rustlazyancestors( index, initrevs, stoprev, inclusive) +def __nonzero__(self): +"""False if the set is empty, True otherwise. + +It's better to duplicate this essentially trivial method than +to subclass lazyancestors +""" +try: +next(iter(self)) +return True +except StopIteration: +return False + def __iter__(self): return parsers.rustlazyancestors(self._index, self._initrevs, self._stoprev, self._inclusive) + +def __contains__(self, target): +return target in self._containsiter diff -r 9cadb0f5f227 -r a7cbd02e936e mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c Thu Sep 27 16:55:44 2018 +0200 +++ b/mercurial/cext/revlog.c Mon Oct 08 19:11:41 2018 +0200 @@ -2316,6 +2316,7 @@ int inclusive); void rustlazyancestors_drop(rustlazyancestorsObject *self); int rustlazyancestors_next(rustlazyancestorsObject *self); +int rustlazyancestors_contains(rustlazyancestorsObject *self, long rev); /* CPython instance methods */ static int rustla_init(rustlazyancestorsObject *self, @@ -2395,6 +2396,24 @@ return PyInt_FromLong(res); } +static int rustla_contains(rustlazyancestorsObject *self, PyObject *rev) { + if (!(PyInt_Check(rev))) { + return 0; + } + return rustlazyancestors_contains(self->iter, PyInt_AS_LONG(rev)); +} + +static PySequenceMethods rustla_sequence_methods = { + 0, /* sq_length */ + 0, /* sq_concat */ + 0, /* sq_repeat */ + 0, /* sq_item */ + 0, /* sq_slice */ + 0, /* sq_ass_item */ + 0, /* sq_ass_slice */ + (objobjproc)rustla_contains, /* sq_contains */ +}; + static PyTypeObject rustlazyancestorsType = { PyVarObject_HEAD_INIT(NULL, 0) /* header */ "parsers.rustlazyancestors", /* tp_name */ @@ -2407,7 +2426,7 @@ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ - 0, /* tp_as_sequence */ + _sequence_methods, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ diff -r 9cadb0f5f227 -r a7cbd02e936e rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs Thu Sep 27 16:55:44 2018 +0200 +++ b/rust/hg-core/src/ancestors.rs Mon Oct 08 19:11:41 2018 +0200 @@ -80,6 +80,26 @@ self.conditionally_push_rev(parents.1); Ok(()) } + +/// Consumes partially the iterator to tell if the given target +/// revision +/// is in the ancestors it emits. +/// This is meant for iterators actually dedicated to that kind of +/// purpose +pub fn contains( self, target: Revision) -> bool { +if self.seen.contains() && target != NULL_REVISION { +return true; +} +for rev in self { +if rev == target { +re
[PATCH 2 of 2 V5] rust: rustfmt config for hg-direct-ffi
# HG changeset patch # User Georges Racinet on ishtar.racinet.fr # Date 1539594972 -7200 # Mon Oct 15 11:16:12 2018 +0200 # Node ID 58a939a95d72e90a78f795609fe59a1adef88ddf # Parent a7cbd02e936e6724068219ee4c1c50dd4da2b22c # EXP-Topic rustancestors-contains rust: rustfmt config for hg-direct-ffi For now, we're duplicating it, but it would be probably a good idea to use a single one for the whole workspace (would have implications on the other crates as well) diff -r a7cbd02e936e -r 58a939a95d72 rust/hg-direct-ffi/rustfmt.toml --- /dev/null Thu Jan 01 00:00:00 1970 + +++ b/rust/hg-direct-ffi/rustfmt.toml Mon Oct 15 11:16:12 2018 +0200 @@ -0,0 +1,3 @@ +max_width = 79 +wrap_comments = true +error_on_line_overflow = true ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 2 of 2 V4] rust: rustlazyancestors.__contains__
# HG changeset patch # User Georges Racinet # Date 1539018701 -7200 # Mon Oct 08 19:11:41 2018 +0200 # Node ID 50d03c9079ffe3932955353be076ff24c4e87804 # Parent c04176c0f8b9aeaf196bd1eac00207d526f3d53b # EXP-Topic rustancestors-contains rust: rustlazyancestors.__contains__ This changeset provides a Rust implementation of the iteration performed by lazyancestor.__contains__ It has the advantage over the Python iteration to use the 'seen' set encapsuled into the dedicated iterator (self._containsiter), rather than storing emitted items in another set (self._containsseen), and hence should reduce the memory footprint. Also, there's no need to convert intermediate emitted revisions back into Python integers. At this point, it would be tempting to implement the whole lazyancestor object in Rust, but that would lead to more C wrapping code (two objects) for little expected benefits. diff -r c04176c0f8b9 -r 50d03c9079ff mercurial/ancestor.py --- a/mercurial/ancestor.py Thu Sep 27 16:55:44 2018 +0200 +++ b/mercurial/ancestor.py Mon Oct 08 19:11:41 2018 +0200 @@ -383,7 +383,7 @@ self._containsiter = None return False -class rustlazyancestors(lazyancestors): +class rustlazyancestors(object): def __init__(self, index, revs, stoprev=0, inclusive=False): self._index = index @@ -395,12 +395,26 @@ # constructor (from C code) doesn't understand anything else yet self._initrevs = initrevs = list(revs) -self._containsseen = set() self._containsiter = parsers.rustlazyancestors( index, initrevs, stoprev, inclusive) +def __nonzero__(self): +"""False if the set is empty, True otherwise. + +It's better to duplicate this essentially trivial method than +to subclass lazyancestors +""" +try: +next(iter(self)) +return True +except StopIteration: +return False + def __iter__(self): return parsers.rustlazyancestors(self._index, self._initrevs, self._stoprev, self._inclusive) + +def __contains__(self, target): +return target in self._containsiter diff -r c04176c0f8b9 -r 50d03c9079ff mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c Thu Sep 27 16:55:44 2018 +0200 +++ b/mercurial/cext/revlog.c Mon Oct 08 19:11:41 2018 +0200 @@ -2316,6 +2316,7 @@ int inclusive); void rustlazyancestors_drop(rustlazyancestorsObject *self); int rustlazyancestors_next(rustlazyancestorsObject *self); +int rustlazyancestors_contains(rustlazyancestorsObject *self, long rev); /* CPython instance methods */ static int rustla_init(rustlazyancestorsObject *self, @@ -2395,6 +2396,24 @@ return PyInt_FromLong(res); } +static int rustla_contains(rustlazyancestorsObject *self, PyObject *rev) { + if (!(PyInt_Check(rev))) { +return 0; + } + return rustlazyancestors_contains(self->iter, PyInt_AS_LONG(rev)); +} + +static PySequenceMethods rustla_sequence_methods = { +0, /* sq_length */ + 0, /* sq_concat */ + 0, /* sq_repeat */ + 0, /* sq_item */ + 0, /* sq_slice */ + 0, /* sq_ass_item */ + 0, /* sq_ass_slice */ + (objobjproc)rustla_contains, /* sq_contains */ +}; + static PyTypeObject rustlazyancestorsType = { PyVarObject_HEAD_INIT(NULL, 0) /* header */ "parsers.rustlazyancestors", /* tp_name */ @@ -2407,7 +2426,7 @@ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ - 0, /* tp_as_sequence */ + _sequence_methods, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ diff -r c04176c0f8b9 -r 50d03c9079ff rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs Thu Sep 27 16:55:44 2018 +0200 +++ b/rust/hg-core/src/ancestors.rs Mon Oct 08 19:11:41 2018 +0200 @@ -80,6 +80,26 @@ self.conditionally_push_rev(parents.1); Ok(()) } + +/// Consumes partially the iterator to tell if the given target +/// revision +/// is in the ancestors it emits. +/// This is meant for iterators actually dedicated to that kind of +/// purpose +pub fn contains( self, target: Revision) -> bool { +if self.seen.contains() && target != NULL_REVISION { +return true; +} +for rev in self { +if rev == target { +return true; +} +
[PATCH 1 of 2 V4] rust: hooking into Python code
# HG changeset patch # User Georges Racinet # Date 1538060144 -7200 # Thu Sep 27 16:55:44 2018 +0200 # Node ID c04176c0f8b9aeaf196bd1eac00207d526f3d53b # Parent 3b275f5497771d8a71336273a77575dbf4882798 # EXP-Topic rustancestors-contains rust: hooking into Python code We introduce a new class called 'rustlazyancestors' in the ancestors module, which is used only if parsers.rustlazyancestors does exist. The implementation of __contains__ stays unchanged, but is now backed by the Rust iterator. It would probably be a good candidate for further development, though, as it is mostly looping, and duplicates the 'seen' set. The Rust code could be further optimized, however it already gives rise to performance improvements: median timing from hg perfancestors: - on pypy: before: 0.077566s after: 0.016676s -79% - on mozilla central: before: 0.190037s after: 0.082225s -58% - on a private repository (about one million revisions): before: 0.567085s after: 0.108816s -80% - on another private repository (about 400 000 revisions): before: 1.440918s after: 0.290116s -80% median timing for hg perfbranchmap base - on pypy: before: 1.383413s after: 0.507993s -63% - on mozilla central: before: 2.821940s after: 1.258902s -55% - on a private repository (about one million revisions): before: 77.065076s after: 16.158475s -80% - on another private repository (about 401 000 revisions): before: 7.835503s after: 3.545331s -54% diff -r 3b275f549777 -r c04176c0f8b9 mercurial/ancestor.py --- a/mercurial/ancestor.py Thu Sep 27 16:56:15 2018 +0200 +++ b/mercurial/ancestor.py Thu Sep 27 16:55:44 2018 +0200 @@ -11,9 +11,12 @@ from .node import nullrev from . import ( +policy, pycompat, ) +parsers = policy.importmod(r'parsers') + def commonancestorsheads(pfunc, *nodes): """Returns a set with the heads of all common ancestors of all nodes, heads(::nodes[0] and ::nodes[1] and ...) . @@ -379,3 +382,25 @@ # free up memory. self._containsiter = None return False + +class rustlazyancestors(lazyancestors): + +def __init__(self, index, revs, stoprev=0, inclusive=False): +self._index = index +self._stoprev = stoprev +self._inclusive = inclusive +# no need to prefilter out init revs that are smaller than stoprev, +# it's done by rustlazyancestors constructor. +# we need to convert to a list, because our ruslazyancestors +# constructor (from C code) doesn't understand anything else yet +self._initrevs = initrevs = list(revs) + +self._containsseen = set() +self._containsiter = parsers.rustlazyancestors( +index, initrevs, stoprev, inclusive) + +def __iter__(self): +return parsers.rustlazyancestors(self._index, + self._initrevs, + self._stoprev, + self._inclusive) diff -r 3b275f549777 -r c04176c0f8b9 mercurial/revlog.py --- a/mercurial/revlog.py Thu Sep 27 16:56:15 2018 +0200 +++ b/mercurial/revlog.py Thu Sep 27 16:55:44 2018 +0200 @@ -763,6 +763,10 @@ for r in revs: checkrev(r) # and we're sure ancestors aren't filtered as well +if util.safehasattr(parsers, 'rustlazyancestors'): +return ancestor.rustlazyancestors( +self.index, revs, +stoprev=stoprev, inclusive=inclusive) return ancestor.lazyancestors(self._uncheckedparentrevs, revs, stoprev=stoprev, inclusive=inclusive) ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
[PATCH 5 of 5 V3] rust: rustlazyancestors.__contains__
# HG changeset patch # User Georges Racinet # Date 1539018701 -7200 # Mon Oct 08 19:11:41 2018 +0200 # Node ID 5c3f0974b9afb074df9acd626813064eb6f2ffec # Parent 98f0f668f63b143eba344a74c8b22a0588f46935 # EXP-Topic rustancestors-contains rust: rustlazyancestors.__contains__ This changeset provides a Rust implementation of the iteration performed by lazyancestor.__contains__ It has the advantage over the Python iteration to use the 'seen' set encapsuled into the dedicated iterator (self._containsiter), rather than storing emitted items in another set (self._containsseen), and hence should reduce the memory footprint. Also, there's no need to convert intermediate emitted revisions back into Python integers. At this point, it would be tempting to implement the whole lazyancestor object in Rust, but that would lead to more C wrapping code (two objects) for little expected benefits. diff -r 98f0f668f63b -r 5c3f0974b9af mercurial/ancestor.py --- a/mercurial/ancestor.py Thu Sep 27 16:55:44 2018 +0200 +++ b/mercurial/ancestor.py Mon Oct 08 19:11:41 2018 +0200 @@ -383,7 +383,7 @@ self._containsiter = None return False -class rustlazyancestors(lazyancestors): +class rustlazyancestors(object): def __init__(self, index, revs, stoprev=0, inclusive=False): self._index = index @@ -395,12 +395,26 @@ # constructor (from C code) doesn't understand anything else yet self._initrevs = initrevs = list(revs) -self._containsseen = set() self._containsiter = parsers.rustlazyancestors( index, initrevs, stoprev, inclusive) +def __nonzero__(self): +"""False if the set is empty, True otherwise. + +It's better to duplicate this essentially trivial method than +to subclass lazyancestors +""" +try: +next(iter(self)) +return True +except StopIteration: +return False + def __iter__(self): return parsers.rustlazyancestors(self._index, self._initrevs, self._stoprev, self._inclusive) + +def __contains__(self, target): +return target in self._containsiter diff -r 98f0f668f63b -r 5c3f0974b9af mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c Thu Sep 27 16:55:44 2018 +0200 +++ b/mercurial/cext/revlog.c Mon Oct 08 19:11:41 2018 +0200 @@ -2316,6 +2316,7 @@ int inclusive); void rustlazyancestors_drop(rustlazyancestorsObject *self); int rustlazyancestors_next(rustlazyancestorsObject *self); +int rustlazyancestors_contains(rustlazyancestorsObject *self, long rev); /* CPython instance methods */ static int rustla_init(rustlazyancestorsObject *self, @@ -2394,6 +2395,24 @@ return PyInt_FromLong(res); } +static int rustla_contains(rustlazyancestorsObject *self, PyObject *rev) { + if (!(PyInt_Check(rev))) { +return 0; + } + return rustlazyancestors_contains(self->iter, PyInt_AS_LONG(rev)); +} + +static PySequenceMethods rustla_sequence_methods = { +0, /* sq_length */ + 0, /* sq_concat */ + 0, /* sq_repeat */ + 0, /* sq_item */ + 0, /* sq_slice */ + 0, /* sq_ass_item */ + 0, /* sq_ass_slice */ + (objobjproc)rustla_contains, /* sq_contains */ +}; + static PyTypeObject rustlazyancestorsType = { PyVarObject_HEAD_INIT(NULL, 0) /* header */ "parsers.rustlazyancestors", /* tp_name */ @@ -2406,7 +2425,7 @@ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ - 0, /* tp_as_sequence */ + _sequence_methods, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ diff -r 98f0f668f63b -r 5c3f0974b9af rust/hg-core/src/ancestors.rs --- a/rust/hg-core/src/ancestors.rs Thu Sep 27 16:55:44 2018 +0200 +++ b/rust/hg-core/src/ancestors.rs Mon Oct 08 19:11:41 2018 +0200 @@ -81,6 +81,26 @@ self.conditionally_push_rev(parents.1); Ok(()) } + +/// Consumes partially the iterator to tell if the given target +/// revision +/// is in the ancestors it emits. +/// This is meant for iterators actually dedicated to that kind of +/// purpose +pub fn contains( self, target: Revision) -> bool { +if self.seen.contains() && target != NULL_REVISION { +return true; +} +for rev in self { +if rev == target { +return true; +} +
[PATCH 3 of 5 V3] rust: exposing in parsers module
# HG changeset patch # User Georges Racinet # Date 1538060175 -7200 # Thu Sep 27 16:56:15 2018 +0200 # Node ID b1d2b4a4684a51ba9bfc3ea5bc6e177be65e4b69 # Parent 4b490e500a3551b03c41dc06f16aa506523719c6 # EXP-Topic rustancestors-contains rust: exposing in parsers module To build with the Rust code, set the HGWITHRUSTEXT environment variable. At this point, it's possible to instantiate and use a rustlazyancestors object from a Python interpreter. The changes in setup.py are obviously a quick hack, just good enough to test/bench without much refactoring. We'd be happy to improve on that with help from the community. Rust bindings crate gets compiled as a static library, which in turn gets linked within 'parsers.so' With respect to the plans at https://www.mercurial-scm.org/wiki/OxidationPlan this would probably qualify as "roll our own FFI". Also, it doesn't quite meet the target of getting rid of C code, since it brings actually more, yet: - the new C code does nothing else than parsing arguments and calling Rust functions. In particular, there's no complex allocation involved. - subsequent changes could rewrite more of revlog.c, this time resulting in an overall decrease of C code and unsafety. diff -r 4b490e500a35 -r b1d2b4a4684a mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c Thu Sep 27 16:51:36 2018 +0200 +++ b/mercurial/cext/revlog.c Thu Sep 27 16:56:15 2018 +0200 @@ -2290,6 +2290,151 @@ return NULL; } +#ifdef WITH_RUST + +/* rustlazyancestors: iteration over ancestors implemented in Rust + * + * This class holds a reference to an index and to the Rust iterator. + */ +typedef struct rustlazyancestorsObjectStruct rustlazyancestorsObject; + +struct rustlazyancestorsObjectStruct { + PyObject_HEAD + /* Type-specific fields go here. */ + indexObject *index;/* Ref kept to avoid GC'ing the index */ + void *iter;/* Rust iterator */ +}; + +/* FFI exposed from Rust code */ +rustlazyancestorsObject *rustlazyancestors_init( + indexObject *index, + /* to pass index_get_parents() */ + int (*)(indexObject *, Py_ssize_t, int*, int), + /* intrevs vector */ + int initrevslen, long *initrevs, + long stoprev, + int inclusive); +void rustlazyancestors_drop(rustlazyancestorsObject *self); +int rustlazyancestors_next(rustlazyancestorsObject *self); + +/* CPython instance methods */ +static int rustla_init(rustlazyancestorsObject *self, + PyObject *args) { + PyObject *initrevsarg = NULL; + PyObject *inclusivearg = NULL; + long stoprev = 0; + long *initrevs = NULL; + int inclusive = 0; + Py_ssize_t i; + + indexObject *index; + if (!PyArg_ParseTuple(args, "O!O!lO!", + , , + _Type, , + , + _Type, )) + return -1; + + Py_INCREF(index); + self->index = index; + + if (inclusivearg == Py_True) + inclusive = 1; + + Py_ssize_t linit = PyList_GET_SIZE(initrevsarg); + + initrevs = (long*)calloc(linit, sizeof(long)); + + if (initrevs == NULL) { + PyErr_NoMemory(); + goto bail; + } + + for (i=0; iiter = rustlazyancestors_init(index, + index_get_parents, + linit, initrevs, + stoprev, inclusive); + if (self->iter == NULL) + /* if this is because of GraphError::ParentOutOfRange +* index_get_parents() has already set the proper ValueError */ + goto bail; + + free(initrevs); + return 0; + + bail: + free(initrevs); + return -1; +}; + +static void rustla_dealloc(rustlazyancestorsObject *self) +{ + Py_XDECREF(self->index); + if (self->iter != NULL) { /* can happen if rustla_init failed */ + rustlazyancestors_drop(self->iter); + } + PyObject_Del(self); +} + +static PyObject *rustla_next(rustlazyancestorsObject *self) { + int res = rustlazyancestors_next(self->iter); + if (res == -1) { + /* Setting an explicit exception seems unnecessary +* as examples from Python source code (Objects/rangeobjets.c and +* Modules/_io/stringio.c) seem to demonstrate. + */ + return NULL; + } + return PyInt_FromLong(res); +} + +static PyTypeObject rustlazyancestorsType = { + PyVarObject_HEAD_INIT(NULL, 0) /* header */ + "parsers.rustlazyancestors", /* tp_name */ + sizeof(rustlazyancestorsObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)rustla_dealloc, /* tp_dealloc */ + 0,
[PATCH 4 of 5 V3] rust: hooking into Python code
# HG changeset patch # User Georges Racinet # Date 1538060144 -7200 # Thu Sep 27 16:55:44 2018 +0200 # Node ID 98f0f668f63b143eba344a74c8b22a0588f46935 # Parent b1d2b4a4684a51ba9bfc3ea5bc6e177be65e4b69 # EXP-Topic rustancestors-contains rust: hooking into Python code We introduce a new class called 'rustlazyancestors' in the ancestors module, which is used only if parsers.rustlazyancestors does exist. The implementation of __contains__ stays unchanged, but is now backed by the Rust iterator. It would probably be a good candidate for further development, though, as it is mostly looping, and duplicates the 'seen' set. The Rust code could be further optimized, however it already gives rise to performance improvements: median timing from hg perfancestors: - on pypy: before: 0.113749s after: 0.018628s -84% - on mozilla central: before: 0.329075s after: 0.083889s -75% - on a private repository (about one million revisions): before: 1.982365s after: 0.329907s -83% - on another private repository (about 400 000 revisions): before: 0.826686s after: 0.123760s -85% median timing for hg perfbranchmap base - on pypy: before: 1.808351s after: 0.480814s -73% - on mozilla central: before: 18.493269s after: 1.305514s -93% - on a private repository (about one million revisions): before: 9.153785s after: 3.662155s -60% - on another private repository (about 400 000 revisions): before: 98.034737s after: 18.109361s -81% diff -r b1d2b4a4684a -r 98f0f668f63b mercurial/ancestor.py --- a/mercurial/ancestor.py Thu Sep 27 16:56:15 2018 +0200 +++ b/mercurial/ancestor.py Thu Sep 27 16:55:44 2018 +0200 @@ -11,9 +11,12 @@ from .node import nullrev from . import ( +policy, pycompat, ) +parsers = policy.importmod(r'parsers') + def commonancestorsheads(pfunc, *nodes): """Returns a set with the heads of all common ancestors of all nodes, heads(::nodes[0] and ::nodes[1] and ...) . @@ -379,3 +382,25 @@ # free up memory. self._containsiter = None return False + +class rustlazyancestors(lazyancestors): + +def __init__(self, index, revs, stoprev=0, inclusive=False): +self._index = index +self._stoprev = stoprev +self._inclusive = inclusive +# no need to prefilter out init revs that are smaller than stoprev, +# it's done by rustlazyancestors constructor. +# we need to convert to a list, because our ruslazyancestors +# constructor (from C code) doesn't understand anything else yet +self._initrevs = initrevs = list(revs) + +self._containsseen = set() +self._containsiter = parsers.rustlazyancestors( +index, initrevs, stoprev, inclusive) + +def __iter__(self): +return parsers.rustlazyancestors(self._index, + self._initrevs, + self._stoprev, + self._inclusive) diff -r b1d2b4a4684a -r 98f0f668f63b mercurial/changelog.py --- a/mercurial/changelog.pyThu Sep 27 16:56:15 2018 +0200 +++ b/mercurial/changelog.pyThu Sep 27 16:55:44 2018 +0200 @@ -20,14 +20,18 @@ from . import ( encoding, error, +policy, pycompat, revlog, +util, ) from .utils import ( dateutil, stringutil, ) +parsers = policy.importmod(r'parsers') + _defaultextra = {'branch': 'default'} def _string_escape(text): @@ -343,6 +347,14 @@ if i not in self.filteredrevs: yield i +def ancestors(self, revs, *args, **kwargs): +if util.safehasattr(parsers, 'rustlazyancestors') and self.filteredrevs: +missing = self.filteredrevs.difference(revs) +if missing: +# raise the lookup error +self.rev(min(missing)) +return super(changelog, self).ancestors(revs, *args, **kwargs) + def reachableroots(self, minroot, heads, roots, includepath=False): return self.index.reachableroots2(minroot, heads, roots, includepath) diff -r b1d2b4a4684a -r 98f0f668f63b mercurial/revlog.py --- a/mercurial/revlog.py Thu Sep 27 16:56:15 2018 +0200 +++ b/mercurial/revlog.py Thu Sep 27 16:55:44 2018 +0200 @@ -747,6 +747,10 @@ See the documentation for ancestor.lazyancestors for more details.""" +if util.safehasattr(parsers, 'rustlazyancestors'): +return ancestor.rustlazyancestors( +self.index, revs, +stoprev=stoprev, inclusive=inclusive) return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev, inclusive=inclusive) ___ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel