D7116: rust-performance: introduce FastHashMap type alias for HashMap

2019-12-10 Thread Raphaël Gomès
Closed by commit rHG5ac243a92e37: rust-performance: introduce FastHashMap type 
alias for HashMap (authored by Alphare).
This revision was automatically updated to reflect the committed changes.
This revision was not accepted when it landed; it landed in state "Needs 
Review".

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7116?vs=18563=18577

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7116/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7116

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/dirstate.rs
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/dirstate/dirstate_map.rs
  rust/hg-core/src/dirstate/parsers.rs
  rust/hg-core/src/discovery.rs
  rust/hg-core/src/filepatterns.rs
  rust/hg-core/src/lib.rs
  rust/hg-cpython/src/parsers.rs

CHANGE DETAILS

diff --git a/rust/hg-cpython/src/parsers.rs b/rust/hg-cpython/src/parsers.rs
--- a/rust/hg-cpython/src/parsers.rs
+++ b/rust/hg-cpython/src/parsers.rs
@@ -15,9 +15,9 @@
 };
 use hg::{
 pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf,
-DirstatePackError, DirstateParents, DirstateParseError, PARENT_SIZE,
+DirstatePackError, DirstateParents, DirstateParseError, FastHashMap,
+PARENT_SIZE,
 };
-use std::collections::HashMap;
 use std::convert::TryInto;
 
 use crate::dirstate::{extract_dirstate, make_dirstate_tuple};
@@ -29,8 +29,8 @@
 copymap: PyDict,
 st: PyBytes,
 ) -> PyResult {
-let mut dirstate_map = HashMap::new();
-let mut copies = HashMap::new();
+let mut dirstate_map = FastHashMap::default();
+let mut copies = FastHashMap::default();
 
 match parse_dirstate( dirstate_map,  copies, st.data(py)) {
 Ok(parents) => {
@@ -85,7 +85,7 @@
 
 let mut dirstate_map = extract_dirstate(py, )?;
 
-let copies: Result, PyErr> = copymap
+let copies: Result, PyErr> = copymap
 .items(py)
 .iter()
 .map(|(key, value)| {
diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -24,6 +24,8 @@
 pub use filepatterns::{
 build_single_regex, read_pattern_file, PatternSyntax, PatternTuple,
 };
+use std::collections::HashMap;
+use twox_hash::RandomXxHashBuilder64;
 
 /// Mercurial revision numbers
 ///
@@ -53,6 +55,11 @@
 
 pub type LineNumber = usize;
 
+/// Rust's default hasher is too slow because it tries to prevent collision
+/// attacks. We are not concerned about those: if an ill-minded person has
+/// write access to your repository, you have other issues.
+pub type FastHashMap = HashMap;
+
 #[derive(Clone, Debug, PartialEq)]
 pub enum GraphError {
 ParentOutOfRange(Revision),
diff --git a/rust/hg-core/src/filepatterns.rs b/rust/hg-core/src/filepatterns.rs
--- a/rust/hg-core/src/filepatterns.rs
+++ b/rust/hg-core/src/filepatterns.rs
@@ -7,10 +7,11 @@
 
 //! Handling of Mercurial-specific patterns.
 
-use crate::{utils::SliceExt, LineNumber, PatternError, PatternFileError};
+use crate::{
+utils::SliceExt, FastHashMap, LineNumber, PatternError, PatternFileError,
+};
 use lazy_static::lazy_static;
 use regex::bytes::{NoExpand, Regex};
-use std::collections::HashMap;
 use std::fs::File;
 use std::io::Read;
 use std::path::{Path, PathBuf};
@@ -214,8 +215,8 @@
 }
 
 lazy_static! {
-static ref SYNTAXES: HashMap<&'static [u8], &'static [u8]> = {
-let mut m = HashMap::new();
+static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
+let mut m = FastHashMap::default();
 
 m.insert(b"re".as_ref(), b"relre:".as_ref());
 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs
--- a/rust/hg-core/src/discovery.rs
+++ b/rust/hg-core/src/discovery.rs
@@ -11,12 +11,11 @@
 //! `mercurial.setdiscovery`
 
 use super::{Graph, GraphError, Revision, NULL_REVISION};
-use crate::ancestors::MissingAncestors;
-use crate::dagops;
+use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
 use rand::seq::SliceRandom;
 use rand::{thread_rng, RngCore, SeedableRng};
 use std::cmp::{max, min};
-use std::collections::{HashMap, HashSet, VecDeque};
+use std::collections::{HashSet, VecDeque};
 
 type Rng = rand_pcg::Pcg32;
 
@@ -25,7 +24,7 @@
 graph: G, // plays the role of self._repo
 common: MissingAncestors,
 undecided: Option>,
-children_cache: Option>>,
+children_cache: Option>>,
 missing: HashSet,
 rng: Rng,
 respect_size: bool,
@@ -61,7 +60,7 @@
 where
 I: Iterator,
 {
-let mut distances: HashMap = HashMap::new();
+let mut distances: FastHashMap = FastHashMap::default();
 let mut visit: VecDeque = heads.into_iter().collect();
 let mut factor: u32 = 1;
 let mut seen: HashSet = HashSet::new();
@@ -328,7 +327,8 @@
 }
 self.ensure_undecided()?;
 
-let mut children: HashMap> = HashMap::new();
+let mut children: 

D7116: rust-performance: introduce FastHashMap type alias for HashMap

2019-12-10 Thread Raphaël Gomès
Alphare updated this revision to Diff 18563.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7116?vs=18039=18563

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7116/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7116

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/dirstate.rs
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/dirstate/dirstate_map.rs
  rust/hg-core/src/dirstate/parsers.rs
  rust/hg-core/src/discovery.rs
  rust/hg-core/src/filepatterns.rs
  rust/hg-core/src/lib.rs
  rust/hg-cpython/src/parsers.rs

CHANGE DETAILS

diff --git a/rust/hg-cpython/src/parsers.rs b/rust/hg-cpython/src/parsers.rs
--- a/rust/hg-cpython/src/parsers.rs
+++ b/rust/hg-cpython/src/parsers.rs
@@ -16,9 +16,9 @@
 };
 use hg::{
 pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf,
-DirstatePackError, DirstateParents, DirstateParseError, PARENT_SIZE,
+DirstatePackError, DirstateParents, DirstateParseError, FastHashMap,
+PARENT_SIZE,
 };
-use std::collections::HashMap;
 use std::convert::TryInto;
 
 use crate::dirstate::{extract_dirstate, make_dirstate_tuple};
@@ -30,8 +30,8 @@
 copymap: PyDict,
 st: PyBytes,
 ) -> PyResult {
-let mut dirstate_map = HashMap::new();
-let mut copies = HashMap::new();
+let mut dirstate_map = FastHashMap::default();
+let mut copies = FastHashMap::default();
 
 match parse_dirstate( dirstate_map,  copies, st.data(py)) {
 Ok(parents) => {
@@ -86,7 +86,7 @@
 
 let mut dirstate_map = extract_dirstate(py, )?;
 
-let copies: Result, PyErr> = copymap
+let copies: Result, PyErr> = copymap
 .items(py)
 .iter()
 .map(|(key, value)| {
diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -24,6 +24,8 @@
 pub use filepatterns::{
 build_single_regex, read_pattern_file, PatternSyntax, PatternTuple,
 };
+use std::collections::HashMap;
+use twox_hash::RandomXxHashBuilder64;
 
 /// Mercurial revision numbers
 ///
@@ -53,6 +55,11 @@
 
 pub type LineNumber = usize;
 
+/// Rust's default hasher is too slow because it tries to prevent collision
+/// attacks. We are not concerned about those: if an ill-minded person has
+/// write access to your repository, you have other issues.
+pub type FastHashMap = HashMap;
+
 #[derive(Clone, Debug, PartialEq)]
 pub enum GraphError {
 ParentOutOfRange(Revision),
diff --git a/rust/hg-core/src/filepatterns.rs b/rust/hg-core/src/filepatterns.rs
--- a/rust/hg-core/src/filepatterns.rs
+++ b/rust/hg-core/src/filepatterns.rs
@@ -7,10 +7,11 @@
 
 //! Handling of Mercurial-specific patterns.
 
-use crate::{utils::SliceExt, LineNumber, PatternError, PatternFileError};
+use crate::{
+utils::SliceExt, FastHashMap, LineNumber, PatternError, PatternFileError,
+};
 use lazy_static::lazy_static;
 use regex::bytes::{NoExpand, Regex};
-use std::collections::HashMap;
 use std::fs::File;
 use std::io::Read;
 use std::path::{Path, PathBuf};
@@ -214,8 +215,8 @@
 }
 
 lazy_static! {
-static ref SYNTAXES: HashMap<&'static [u8], &'static [u8]> = {
-let mut m = HashMap::new();
+static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
+let mut m = FastHashMap::default();
 
 m.insert(b"re".as_ref(), b"relre:".as_ref());
 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs
--- a/rust/hg-core/src/discovery.rs
+++ b/rust/hg-core/src/discovery.rs
@@ -11,12 +11,11 @@
 //! `mercurial.setdiscovery`
 
 use super::{Graph, GraphError, Revision, NULL_REVISION};
-use crate::ancestors::MissingAncestors;
-use crate::dagops;
+use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
 use rand::seq::SliceRandom;
 use rand::{thread_rng, RngCore, SeedableRng};
 use std::cmp::{max, min};
-use std::collections::{HashMap, HashSet, VecDeque};
+use std::collections::{HashSet, VecDeque};
 
 type Rng = rand_pcg::Pcg32;
 
@@ -25,7 +24,7 @@
 graph: G, // plays the role of self._repo
 common: MissingAncestors,
 undecided: Option>,
-children_cache: Option>>,
+children_cache: Option>>,
 missing: HashSet,
 rng: Rng,
 respect_size: bool,
@@ -61,7 +60,7 @@
 where
 I: Iterator,
 {
-let mut distances: HashMap = HashMap::new();
+let mut distances: FastHashMap = FastHashMap::default();
 let mut visit: VecDeque = heads.into_iter().collect();
 let mut factor: u32 = 1;
 let mut seen: HashSet = HashSet::new();
@@ -328,7 +327,8 @@
 }
 self.ensure_undecided()?;
 
-let mut children: HashMap> = HashMap::new();
+let mut children: FastHashMap> =
+FastHashMap::default();
 for  in self.undecided.as_ref().unwrap() {
 for p in ParentsIterator::graph_parents(, rev)? {
 

D7116: rust-performance: introduce FastHashMap type alias for HashMap

2019-12-04 Thread marmoute (Pierre-Yves David)
marmoute added a comment.


  Any update on this ? I'll need it for my work on copy tracing. Reading the 
review thread, there is no remaining blocker to land this.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7116/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7116

To: Alphare, #hg-reviewers, kevincox, durin42
Cc: marmoute, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7116: rust-performance: introduce FastHashMap type alias for HashMap

2019-11-12 Thread Raphaël Gomès
Alphare updated this revision to Diff 18039.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7116?vs=17198=18039

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7116/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7116

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/dirstate.rs
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/dirstate/dirstate_map.rs
  rust/hg-core/src/dirstate/parsers.rs
  rust/hg-core/src/discovery.rs
  rust/hg-core/src/filepatterns.rs
  rust/hg-core/src/lib.rs
  rust/hg-cpython/src/parsers.rs

CHANGE DETAILS

diff --git a/rust/hg-cpython/src/parsers.rs b/rust/hg-cpython/src/parsers.rs
--- a/rust/hg-cpython/src/parsers.rs
+++ b/rust/hg-cpython/src/parsers.rs
@@ -16,9 +16,9 @@
 };
 use hg::{
 pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf,
-DirstatePackError, DirstateParents, DirstateParseError, PARENT_SIZE,
+DirstatePackError, DirstateParents, DirstateParseError, FastHashMap,
+PARENT_SIZE,
 };
-use std::collections::HashMap;
 use std::convert::TryInto;
 
 use crate::dirstate::{extract_dirstate, make_dirstate_tuple};
@@ -30,8 +30,8 @@
 copymap: PyDict,
 st: PyBytes,
 ) -> PyResult {
-let mut dirstate_map = HashMap::new();
-let mut copies = HashMap::new();
+let mut dirstate_map = FastHashMap::default();
+let mut copies = FastHashMap::default();
 
 match parse_dirstate( dirstate_map,  copies, st.data(py)) {
 Ok(parents) => {
@@ -86,7 +86,7 @@
 
 let mut dirstate_map = extract_dirstate(py, )?;
 
-let copies: Result, PyErr> = copymap
+let copies: Result, PyErr> = copymap
 .items(py)
 .iter()
 .map(|(key, value)| {
diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -24,6 +24,8 @@
 pub use filepatterns::{
 build_single_regex, read_pattern_file, PatternSyntax, PatternTuple,
 };
+use std::collections::HashMap;
+use twox_hash::RandomXxHashBuilder64;
 
 /// Mercurial revision numbers
 ///
@@ -53,6 +55,11 @@
 
 pub type LineNumber = usize;
 
+/// Rust's default hasher is too slow because it tries to prevent collision
+/// attacks. We are not concerned about those: if an ill-minded person has
+/// write access to your repository, you have other issues.
+pub type FastHashMap = HashMap;
+
 #[derive(Clone, Debug, PartialEq)]
 pub enum GraphError {
 ParentOutOfRange(Revision),
diff --git a/rust/hg-core/src/filepatterns.rs b/rust/hg-core/src/filepatterns.rs
--- a/rust/hg-core/src/filepatterns.rs
+++ b/rust/hg-core/src/filepatterns.rs
@@ -7,10 +7,11 @@
 
 //! Handling of Mercurial-specific patterns.
 
-use crate::{utils::SliceExt, LineNumber, PatternError, PatternFileError};
+use crate::{
+utils::SliceExt, FastHashMap, LineNumber, PatternError, PatternFileError,
+};
 use lazy_static::lazy_static;
 use regex::bytes::{NoExpand, Regex};
-use std::collections::HashMap;
 use std::fs::File;
 use std::io::Read;
 use std::path::{Path, PathBuf};
@@ -214,8 +215,8 @@
 }
 
 lazy_static! {
-static ref SYNTAXES: HashMap<&'static [u8], &'static [u8]> = {
-let mut m = HashMap::new();
+static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
+let mut m = FastHashMap::default();
 
 m.insert(b"re".as_ref(), b"relre:".as_ref());
 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs
--- a/rust/hg-core/src/discovery.rs
+++ b/rust/hg-core/src/discovery.rs
@@ -11,12 +11,11 @@
 //! `mercurial.setdiscovery`
 
 use super::{Graph, GraphError, Revision, NULL_REVISION};
-use crate::ancestors::MissingAncestors;
-use crate::dagops;
+use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
 use rand::seq::SliceRandom;
 use rand::{thread_rng, RngCore, SeedableRng};
 use std::cmp::{max, min};
-use std::collections::{HashMap, HashSet, VecDeque};
+use std::collections::{HashSet, VecDeque};
 
 type Rng = rand_pcg::Pcg32;
 
@@ -25,7 +24,7 @@
 graph: G, // plays the role of self._repo
 common: MissingAncestors,
 undecided: Option>,
-children_cache: Option>>,
+children_cache: Option>>,
 missing: HashSet,
 rng: Rng,
 respect_size: bool,
@@ -61,7 +60,7 @@
 where
 I: Iterator,
 {
-let mut distances: HashMap = HashMap::new();
+let mut distances: FastHashMap = FastHashMap::default();
 let mut visit: VecDeque = heads.into_iter().collect();
 let mut factor: u32 = 1;
 let mut seen: HashSet = HashSet::new();
@@ -328,7 +327,8 @@
 }
 self.ensure_undecided()?;
 
-let mut children: HashMap> = HashMap::new();
+let mut children: FastHashMap> =
+FastHashMap::default();
 for  in self.undecided.as_ref().unwrap() {
 for p in ParentsIterator::graph_parents(, rev)? {
 

D7116: rust-performance: introduce FastHashMap type alias for HashMap

2019-11-11 Thread durin42 (Augie Fackler)
This revision now requires changes to proceed.
durin42 added a comment.
durin42 requested changes to this revision.


  It sounds like I expect this to be rebased now that other patches landed. Let 
me know if that's wrong?

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7116/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7116

To: Alphare, #hg-reviewers, kevincox, durin42
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7116: rust-performance: introduce FastHashMap type alias for HashMap

2019-11-08 Thread Raphaël Gomès
Alphare added a comment.


  I'll rebase this series once my other patches land, to make it easier to get 
all instances of `HashMap` in the codebase.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7116/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7116

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7116: rust-performance: introduce FastHashMap type alias for HashMap

2019-10-19 Thread kevincox (Kevin Cox)
kevincox added a comment.


  In D7116#104769 , @Alphare wrote:
  
  > The following comparison shows that for input > 20 bytes, fnv has worse 
overall performance than xx
  
  Sounds good. We can always do benchmarks in the future and swap it.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7116/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7116

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7116: rust-performance: introduce FastHashMap type alias for HashMap

2019-10-17 Thread Raphaël Gomès
Alphare added a comment.


  In D7116#104685 , @kevincox 
wrote:
  
  > I've seen very good results with https://github.com/servo/rust-fnv in the 
past so it is probably worth including that in the comparison and possibly 
using it. It is especially good for small keys which seems like a common case 
in hg.
  
  The following comparison shows that for input > 20 bytes, fnv has worse 
overall performance than xx: https://cglab.ca/~abeinges/blah/hash-rs/.
  I think that keys bigger than 20 bytes are pretty common: 
`mercurial/dirstate.py` is nested once and is already over 20 bytes. Maybe we 
could use fnv for cases other than the dirstate where we have mostly small 
keys, but that seems pretty cumbersome to me.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7116/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7116

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7116: rust-performance: introduce FastHashMap type alias for HashMap

2019-10-16 Thread kevincox (Kevin Cox)
kevincox added a comment.
kevincox accepted this revision.


  I've seen very good results with https://github.com/servo/rust-fnv in the 
past so it is probably worth including that in the comparison and possibly 
using it. It is especially good for small keys which seems like a common case 
in hg.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7116/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7116

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7116: rust-performance: introduce FastHashMap type alias for HashMap

2019-10-16 Thread kevincox (Kevin Cox)
kevincox added a comment.


  In D7116#104617 , @durin42 wrote:
  
  > OOC, have you compared this with the hashbrown crate for perf?
  
  std::collections::HashMap now uses hashbrown 
https://blog.rust-lang.org/2019/07/04/Rust-1.36.0.html

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7116/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7116

To: Alphare, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7116: rust-performance: introduce FastHashMap type alias for HashMap

2019-10-16 Thread durin42 (Augie Fackler)
durin42 added a comment.


  OOC, have you compared this with the hashbrown crate for perf?

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7116/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7116

To: Alphare, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7116: rust-performance: introduce FastHashMap type alias for HashMap

2019-10-16 Thread Raphaël Gomès
Alphare created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Rust's default hashing is slow, because it is meant for preventing collision
  attacks.
  For all of the current Rust code, we don't care about those attacks, because
  if an person with bad intentions has write access to your repo, you have other
  issues.
  
  I've chosen to use the TwoXHash crate because it was made by a reputable 
member
  of the Rust community and has very good benchmarks.
  
  For now it does not seem to improve performance by much for the current code,
  but it's something else to not worry about when benchmarking code: in a
  previous experiment with copytracing in Rust, it accounted for more than 10%
  of the time of the entire script.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7116

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/dirstate.rs
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/dirstate/dirstate_map.rs
  rust/hg-core/src/dirstate/parsers.rs
  rust/hg-core/src/filepatterns.rs
  rust/hg-core/src/lib.rs
  rust/hg-cpython/src/parsers.rs

CHANGE DETAILS

diff --git a/rust/hg-cpython/src/parsers.rs b/rust/hg-cpython/src/parsers.rs
--- a/rust/hg-cpython/src/parsers.rs
+++ b/rust/hg-cpython/src/parsers.rs
@@ -16,9 +16,9 @@
 };
 use hg::{
 pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf,
-DirstatePackError, DirstateParents, DirstateParseError, PARENT_SIZE,
+DirstatePackError, DirstateParents, DirstateParseError, FastHashMap,
+PARENT_SIZE,
 };
-use std::collections::HashMap;
 use std::convert::TryInto;
 
 use crate::dirstate::{extract_dirstate, make_dirstate_tuple};
@@ -30,8 +30,8 @@
 copymap: PyDict,
 st: PyBytes,
 ) -> PyResult {
-let mut dirstate_map = HashMap::new();
-let mut copies = HashMap::new();
+let mut dirstate_map = FastHashMap::default();
+let mut copies = FastHashMap::default();
 
 match parse_dirstate( dirstate_map,  copies, st.data(py)) {
 Ok(parents) => {
@@ -86,7 +86,7 @@
 
 let mut dirstate_map = extract_dirstate(py, )?;
 
-let copies: Result, PyErr> = copymap
+let copies: Result, PyErr> = copymap
 .items(py)
 .iter()
 .map(|(key, value)| {
diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -22,6 +22,8 @@
 pub use filepatterns::{
 build_single_regex, read_pattern_file, PatternSyntax, PatternTuple,
 };
+use std::collections::HashMap;
+use twox_hash::RandomXxHashBuilder64;
 
 /// Mercurial revision numbers
 ///
@@ -51,6 +53,11 @@
 
 pub type LineNumber = usize;
 
+/// Rust's default hasher is too slow because it tries to prevent collision
+/// attacks. We are not concerned about those: if an ill-minded person has
+/// write access to your repository, you have other issues.
+pub type FastHashMap = HashMap;
+
 #[derive(Clone, Debug, PartialEq)]
 pub enum GraphError {
 ParentOutOfRange(Revision),
diff --git a/rust/hg-core/src/filepatterns.rs b/rust/hg-core/src/filepatterns.rs
--- a/rust/hg-core/src/filepatterns.rs
+++ b/rust/hg-core/src/filepatterns.rs
@@ -7,10 +7,11 @@
 
 //! Handling of Mercurial-specific patterns.
 
-use crate::{utils::SliceExt, LineNumber, PatternError, PatternFileError};
+use crate::{
+utils::SliceExt, FastHashMap, LineNumber, PatternError, PatternFileError,
+};
 use lazy_static::lazy_static;
 use regex::bytes::{NoExpand, Regex};
-use std::collections::HashMap;
 use std::fs::File;
 use std::io::Read;
 use std::path::{Path, PathBuf};
@@ -214,8 +215,8 @@
 }
 
 lazy_static! {
-static ref SYNTAXES: HashMap<&'static [u8], &'static [u8]> = {
-let mut m = HashMap::new();
+static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
+let mut m = FastHashMap::default();
 
 m.insert(b"re".as_ref(), b"relre:".as_ref());
 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
diff --git a/rust/hg-core/src/dirstate/parsers.rs 
b/rust/hg-core/src/dirstate/parsers.rs
--- a/rust/hg-core/src/dirstate/parsers.rs
+++ b/rust/hg-core/src/dirstate/parsers.rs
@@ -157,13 +157,12 @@
 #[cfg(test)]
 mod tests {
 use super::*;
-use crate::utils::hg_path::HgPathBuf;
-use std::collections::HashMap;
+use crate::{utils::hg_path::HgPathBuf, FastHashMap};
 
 #[test]
 fn test_pack_dirstate_empty() {
-let mut state_map: StateMap = HashMap::new();
-let copymap = HashMap::new();
+let mut state_map: StateMap = FastHashMap::default();
+let copymap = FastHashMap::default();
 let parents = DirstateParents {
 p1: *b"12345678910111213141",
 p2: *b"",
@@ -194,7 +193,7 @@
 .collect();
 let mut state_map = expected_state_map.clone();
 
-let copymap = HashMap::new();
+let copymap =