D7871: rust-utils: add util for canonical path

2020-02-10 Thread Raphaël Gomès
Alphare added inline comments.

INLINE COMMENTS

> martinvonz wrote in files.rs:366
> Can the test method be annotated so it's only run on unix?

Yes, but I think we need to have a more general chat about this subject. A 
non-negligible number of `hg-core`'s features do not work under non-UNIX 
systems. Our module policy system does not have the fine-grained control needed 
to map for a more complexe set of `features` (in Cargo terms). I think the more 
reasonable option right now is to have a compilation failure. Maybe we should 
have a big gated `compile_error!` at the top of `lib.rs` to inform users that 
there is work to be done to be cross-platform?

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7871/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

To: Alphare, #hg-reviewers, kevincox, martinvonz
Cc: martinvonz, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7871: rust-utils: add util for canonical path

2020-02-10 Thread Raphaël Gomès
Closed by commit rHGecf816b17825: rust-utils: add util for canonical path 
(authored by Alphare).
This revision was automatically updated to reflect the committed changes.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7871?vs=20057=20077

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7871/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/utils/files.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -9,13 +9,18 @@
 
 //! Functions for fiddling with files.
 
-use crate::utils::hg_path::{HgPath, HgPathBuf};
-
-use crate::utils::replace_slice;
+use crate::utils::{
+hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
+path_auditor::PathAuditor,
+replace_slice,
+};
 use lazy_static::lazy_static;
+use same_file::is_same_file;
+use std::borrow::ToOwned;
 use std::fs::Metadata;
 use std::iter::FusedIterator;
-use std::path::Path;
+use std::ops::Deref;
+use std::path::{Path, PathBuf};
 
 pub fn get_path_from_bytes(bytes: &[u8]) ->  {
 let os_str;
@@ -189,9 +194,66 @@
 }
 }
 
+/// Returns the canonical path of `name`, given `cwd` and `root`
+pub fn canonical_path(
+root: impl AsRef,
+cwd: impl AsRef,
+name: impl AsRef,
+) -> Result {
+// TODO add missing normalization for other platforms
+let root = root.as_ref();
+let cwd = cwd.as_ref();
+let name = name.as_ref();
+
+let name = if !name.is_absolute() {
+root.join().join()
+} else {
+name.to_owned()
+};
+let mut auditor = PathAuditor::new();
+if name != root && name.starts_with() {
+let name = name.strip_prefix().unwrap();
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+} else if name == root {
+return Ok("".into());
+} else {
+// Determine whether `name' is in the hierarchy at or beneath `root',
+// by iterating name=name.parent() until it returns `None` (can't
+// check name == '/', because that doesn't work on windows).
+let mut name = name.deref();
+let original_name = name.to_owned();
+loop {
+let same = is_same_file(, ).unwrap_or(false);
+if same {
+if name == original_name {
+// `name` was actually the same as root (maybe a symlink)
+return Ok("".into());
+}
+// `name` is a symlink to root, so `original_name` is under 
+// root
+let rel_path = original_name.strip_prefix().unwrap();
+auditor.audit_path(path_to_hg_path_buf(_path)?)?;
+return Ok(rel_path.to_owned());
+}
+name = match name.parent() {
+None => break,
+Some(p) => p,
+};
+}
+// TODO hint to the user about using --cwd
+// Bubble up the responsibility to Python for now
+Err(HgPathError::NotUnderRoot {
+path: original_name.to_owned(),
+root: root.to_owned(),
+})
+}
+}
+
 #[cfg(test)]
 mod tests {
 use super::*;
+use pretty_assertions::assert_eq;
 
 #[test]
 fn find_dirs_some() {
@@ -235,4 +297,88 @@
 assert_eq!(dirs.next(), None);
 assert_eq!(dirs.next(), None);
 }
+
+#[test]
+fn test_canonical_path() {
+let root = Path::new("/repo");
+let cwd = Path::new("/dir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/dir/filename"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/filename"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("repo/filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo/subdir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, 

D7871: rust-utils: add util for canonical path

2020-02-10 Thread martinvonz (Martin von Zweigbergk)
This revision is now accepted and ready to land.
martinvonz added inline comments.
martinvonz accepted this revision.

INLINE COMMENTS

> files.rs:366
> +// TODO make portable
> +std::os::unix::fs::symlink(, _of_repo).unwrap();
> +

Can the test method be annotated so it's only run on unix?

REPOSITORY
  rHG Mercurial

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7871/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

To: Alphare, #hg-reviewers, kevincox, martinvonz
Cc: martinvonz, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7871: rust-utils: add util for canonical path

2020-02-10 Thread Raphaël Gomès
Alphare updated this revision to Diff 20057.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7871?vs=20051=20057

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7871/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/utils/files.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -9,13 +9,18 @@
 
 //! Functions for fiddling with files.
 
-use crate::utils::hg_path::{HgPath, HgPathBuf};
-
-use crate::utils::replace_slice;
+use crate::utils::{
+hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
+path_auditor::PathAuditor,
+replace_slice,
+};
 use lazy_static::lazy_static;
+use same_file::is_same_file;
+use std::borrow::ToOwned;
 use std::fs::Metadata;
 use std::iter::FusedIterator;
-use std::path::Path;
+use std::ops::Deref;
+use std::path::{Path, PathBuf};
 
 pub fn get_path_from_bytes(bytes: &[u8]) ->  {
 let os_str;
@@ -189,9 +194,66 @@
 }
 }
 
+/// Returns the canonical path of `name`, given `cwd` and `root`
+pub fn canonical_path(
+root: impl AsRef,
+cwd: impl AsRef,
+name: impl AsRef,
+) -> Result {
+// TODO add missing normalization for other platforms
+let root = root.as_ref();
+let cwd = cwd.as_ref();
+let name = name.as_ref();
+
+let name = if !name.is_absolute() {
+root.join().join()
+} else {
+name.to_owned()
+};
+let mut auditor = PathAuditor::new();
+if name != root && name.starts_with() {
+let name = name.strip_prefix().unwrap();
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+} else if name == root {
+return Ok("".into());
+} else {
+// Determine whether `name' is in the hierarchy at or beneath `root',
+// by iterating name=name.parent() until it returns `None` (can't
+// check name == '/', because that doesn't work on windows).
+let mut name = name.deref();
+let original_name = name.to_owned();
+loop {
+let same = is_same_file(, ).unwrap_or(false);
+if same {
+if name == original_name {
+// `name` was actually the same as root (maybe a symlink)
+return Ok("".into());
+}
+// `name` is a symlink to root, so `original_name` is under 
+// root
+let rel_path = original_name.strip_prefix().unwrap();
+auditor.audit_path(path_to_hg_path_buf(_path)?)?;
+return Ok(rel_path.to_owned());
+}
+name = match name.parent() {
+None => break,
+Some(p) => p,
+};
+}
+// TODO hint to the user about using --cwd
+// Bubble up the responsibility to Python for now
+Err(HgPathError::NotUnderRoot {
+path: original_name.to_owned(),
+root: root.to_owned(),
+})
+}
+}
+
 #[cfg(test)]
 mod tests {
 use super::*;
+use pretty_assertions::assert_eq;
 
 #[test]
 fn find_dirs_some() {
@@ -235,4 +297,88 @@
 assert_eq!(dirs.next(), None);
 assert_eq!(dirs.next(), None);
 }
+
+#[test]
+fn test_canonical_path() {
+let root = Path::new("/repo");
+let cwd = Path::new("/dir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/dir/filename"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/filename"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("repo/filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo/subdir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("subdir/filename"))
+);
+}
+
+#[test]
+fn 

D7871: rust-utils: add util for canonical path

2020-02-10 Thread Raphaël Gomès
Alphare updated this revision to Diff 20051.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7871?vs=20037=20051

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7871/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/utils/files.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -9,13 +9,17 @@
 
 //! Functions for fiddling with files.
 
-use crate::utils::hg_path::{HgPath, HgPathBuf};
-
-use crate::utils::replace_slice;
+use crate::utils::{
+hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
+path_auditor::PathAuditor,
+replace_slice,
+};
 use lazy_static::lazy_static;
+use same_file::is_same_file;
 use std::fs::Metadata;
 use std::iter::FusedIterator;
-use std::path::Path;
+use std::ops::Deref;
+use std::path::{Path, PathBuf};
 
 pub fn get_path_from_bytes(bytes: &[u8]) ->  {
 let os_str;
@@ -189,9 +193,63 @@
 }
 }
 
+/// Returns the canonical path of `name`, given `cwd` and `root`
+pub fn canonical_path(
+root: impl AsRef,
+cwd: impl AsRef,
+name: impl AsRef,
+) -> Result {
+// TODO add missing normalization for other platforms
+let root = root.as_ref();
+let cwd = cwd.as_ref();
+let name = name.as_ref();
+
+let name = if !name.is_absolute() {
+root.join().join()
+} else {
+name.to_owned()
+};
+let mut auditor = PathAuditor::new();
+if name != root && name.starts_with() {
+let name = name.strip_prefix().unwrap();
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+} else if name == root {
+return Ok("".into());
+} else {
+// Determine whether `name' is in the hierarchy at or beneath `root',
+// by iterating name=name.parent() until it returns `None` (can't
+// check name == '/', because that doesn't work on windows).
+let mut name = name.deref();
+let mut iterations = 0;
+loop {
+let same = is_same_file(, ).unwrap_or(false);
+if same {
+if iterations == 0 {
+// `name` was actually the same as root (maybe a symlink)
+return Ok("".into());
+}
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+}
+name = match name.parent() {
+None => break,
+Some(p) => {iterations += 1; p},
+};
+}
+// TODO hint to the user about using --cwd
+// Bubble up the responsibility to Python for now
+Err(HgPathError::NotUnderRoot {
+path: name.to_owned(),
+root: root.to_owned(),
+})
+}
+}
+
 #[cfg(test)]
 mod tests {
 use super::*;
+use pretty_assertions::assert_eq;
 
 #[test]
 fn find_dirs_some() {
@@ -235,4 +293,53 @@
 assert_eq!(dirs.next(), None);
 assert_eq!(dirs.next(), None);
 }
+
+#[test]
+fn test_canonical_path() {
+let root = Path::new("/repo");
+let cwd = Path::new("/dir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("repo/filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo/subdir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("subdir/filename"))
+);
+}
 }
diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml
--- a/rust/hg-core/Cargo.toml
+++ b/rust/hg-core/Cargo.toml
@@ -18,7 +18,8 @@
 rayon = "1.3.0"
 regex = "1.1.0"
 twox-hash = "1.5.0"
+same-file = "1.0.6"
 
 [dev-dependencies]
 tempfile = "3.1.0"
 

D7871: rust-utils: add util for canonical path

2020-02-10 Thread Raphaël Gomès
Alphare updated this revision to Diff 20037.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7871?vs=19937=20037

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7871/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/utils/files.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -9,13 +9,17 @@
 
 //! Functions for fiddling with files.
 
-use crate::utils::hg_path::{HgPath, HgPathBuf};
-
-use crate::utils::replace_slice;
+use crate::utils::{
+hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
+path_auditor::PathAuditor,
+replace_slice,
+};
 use lazy_static::lazy_static;
+use same_file::is_same_file;
 use std::fs::Metadata;
 use std::iter::FusedIterator;
-use std::path::Path;
+use std::ops::Deref;
+use std::path::{Path, PathBuf};
 
 pub fn get_path_from_bytes(bytes: &[u8]) ->  {
 let os_str;
@@ -189,9 +193,62 @@
 }
 }
 
+/// Returns the canonical path of `name`, given `cwd` and `root`
+pub fn canonical_path(
+root: impl AsRef,
+cwd: impl AsRef,
+name: impl AsRef,
+) -> Result {
+// TODO add missing normalization for other platforms
+let root = root.as_ref();
+let cwd = cwd.as_ref();
+let name = name.as_ref();
+
+let name = if !name.is_absolute() {
+root.join().join()
+} else {
+name.to_owned()
+};
+let mut auditor = PathAuditor::new();
+if name != root && name.starts_with() {
+let name = name.strip_prefix().unwrap();
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+} else if name == root {
+return Ok("".into());
+} else {
+// Determine whether `name' is in the hierarchy at or beneath `root',
+// by iterating name=name.parent() until it returns `None` (can't
+// check name == '/', because that doesn't work on windows).
+let mut name = name.deref();
+loop {
+let same = is_same_file(, ).unwrap_or(false);
+if same {
+if name.components().next().is_none() {
+// `name` was actually the same as root (maybe a symlink)
+return Ok("".into());
+}
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+}
+name = match name.parent() {
+None => break,
+Some(p) => p,
+};
+}
+// TODO hint to the user about using --cwd
+// Bubble up the responsibility to Python for now
+Err(HgPathError::NotUnderRoot {
+path: name.to_owned(),
+root: root.to_owned(),
+})
+}
+}
+
 #[cfg(test)]
 mod tests {
 use super::*;
+use pretty_assertions::assert_eq;
 
 #[test]
 fn find_dirs_some() {
@@ -235,4 +292,53 @@
 assert_eq!(dirs.next(), None);
 assert_eq!(dirs.next(), None);
 }
+
+#[test]
+fn test_canonical_path() {
+let root = Path::new("/repo");
+let cwd = Path::new("/dir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("repo/filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo/subdir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("subdir/filename"))
+);
+}
 }
diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml
--- a/rust/hg-core/Cargo.toml
+++ b/rust/hg-core/Cargo.toml
@@ -18,7 +18,8 @@
 rayon = "1.3.0"
 regex = "1.1.0"
 twox-hash = "1.5.0"
+same-file = "1.0.6"
 
 [dev-dependencies]
 tempfile = "3.1.0"
 pretty_assertions = "0.6.1"
\ No 

D7871: rust-utils: add util for canonical path

2020-02-06 Thread martinvonz (Martin von Zweigbergk)
martinvonz added inline comments.

INLINE COMMENTS

> files.rs:231
> +}
> +auditor.audit_path(path_to_hg_path_buf(name)?)?;
> +return Ok(name.to_owned());

Related to the comment above, this seems to (generally) return an absolute 
parent directory instead of a repo-relative path. Try adding a `println!()` 
just inside the `if same` to make sure you have test coverage.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7871/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

To: Alphare, #hg-reviewers, kevincox, martinvonz
Cc: martinvonz, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7871: rust-utils: add util for canonical path

2020-02-06 Thread martinvonz (Martin von Zweigbergk)
martinvonz added inline comments.

INLINE COMMENTS

> Alphare wrote in files.rs:220-222
> I could reword this sentence if you feel that it does not convey the same 
> meaning. It fits the Python implementation better, that's for sure.

To me, the current sentence very much makes it sound like it iterates until 
`name != name.parent()`, which doesn't seem correct. So, yes, please reword it. 
(Unless I'm just misunderstanding something.)

> Alphare wrote in files.rs:227-230
> For `b'/'` it would return `Some(std::Path::Components::RootDir)`. It only 
> returns `None` if there are no components.

Okay, but it seems that that doesn't address my concern. If I understand 
correctly (now that you've corrected me), we get here if `name == ""`. However, 
name is an absolute path (right?), so with `root = "/some/path/"` and `name = 
""`, why should we return `Ok()`? If `name` is indeed an absolute path, I would 
interpret "" as the root (file system root, not repo root), which is not inside 
"/some/path". What am I missing?

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7871/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

To: Alphare, #hg-reviewers, kevincox, martinvonz
Cc: martinvonz, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7871: rust-utils: add util for canonical path

2020-02-06 Thread Raphaël Gomès
Alphare added inline comments.

INLINE COMMENTS

> martinvonz wrote in files.rs:207-216
> Might be worth having a fast path for when `name` is not absolute (including 
> "") and neither `name` nor `cwd` contains "../"so we don't do `let name = 
> root.join().join()` immediately followed by `let name = 
> name.strip_prefix().unwrap()`. Especially since that seems like the 
> common case. Or can that ever produce different results? Anyway, that can be 
> done later.

That might be worth investigating indeed. I will defer to a later time, though. 
:)

> martinvonz wrote in files.rs:220-222
> Is this accurate? It looks like we break when `name.parent() == None`.

I could reword this sentence if you feel that it does not convey the same 
meaning. It fits the Python implementation better, that's for sure.

> martinvonz wrote in files.rs:227-230
> `name.components().next()` will be `None` only if `name` is something like 
> "/", right? But then we shouldn't return "" (a repo-relative path), we should 
> error out.

For `b'/'` it would return `Some(std::Path::Components::RootDir)`. It only 
returns `None` if there are no components.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7871/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

To: Alphare, #hg-reviewers, kevincox, martinvonz
Cc: martinvonz, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7871: rust-utils: add util for canonical path

2020-02-06 Thread martinvonz (Martin von Zweigbergk)
This revision now requires changes to proceed.
martinvonz added inline comments.
martinvonz requested changes to this revision.

INLINE COMMENTS

> files.rs:207-216
> +let name = if !name.is_absolute() {
> +root.join().join()
> +} else {
> +name.to_owned()
> +};
> +let mut auditor = PathAuditor::new();
> +if name != root && name.starts_with() {

Might be worth having a fast path for when `name` is not absolute (including 
"") and neither `name` nor `cwd` contains "../"so we don't do `let name = 
root.join().join()` immediately followed by `let name = 
name.strip_prefix().unwrap()`. Especially since that seems like the common 
case. Or can that ever produce different results? Anyway, that can be done 
later.

> files.rs:220-222
> +// Determine whether `name' is in the hierarchy at or beneath `root',
> +// by iterating name=name.parent() until that causes no change (can't
> +// check name == '/', because that doesn't work on windows).

Is this accurate? It looks like we break when `name.parent() == None`.

> files.rs:227-230
> +if name.components().next().is_none() {
> +// `name` was actually the same as root (maybe a symlink)
> +return Ok("".into());
> +}

`name.components().next()` will be `None` only if `name` is something like "/", 
right? But then we shouldn't return "" (a repo-relative path), we should error 
out.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7871/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

To: Alphare, #hg-reviewers, kevincox, martinvonz
Cc: martinvonz, durin42, kevincox, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7871: rust-utils: add util for canonical path

2020-02-06 Thread Raphaël Gomès
Alphare updated this revision to Diff 19937.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7871?vs=19359=19937

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7871/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/utils/files.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -9,13 +9,17 @@
 
 //! Functions for fiddling with files.
 
-use crate::utils::hg_path::{HgPath, HgPathBuf};
-
-use crate::utils::replace_slice;
+use crate::utils::{
+hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
+path_auditor::PathAuditor,
+replace_slice,
+};
 use lazy_static::lazy_static;
+use same_file::is_same_file;
 use std::fs::Metadata;
 use std::iter::FusedIterator;
-use std::path::Path;
+use std::ops::Deref;
+use std::path::{Path, PathBuf};
 
 pub fn get_path_from_bytes(bytes: &[u8]) ->  {
 let os_str;
@@ -189,9 +193,62 @@
 }
 }
 
+/// Returns the canonical path of `name`, given `cwd` and `root`
+pub fn canonical_path(
+root: impl AsRef,
+cwd: impl AsRef,
+name: impl AsRef,
+) -> Result {
+// TODO add missing normalization for other platforms
+let root = root.as_ref();
+let cwd = cwd.as_ref();
+let name = name.as_ref();
+
+let name = if !name.is_absolute() {
+root.join().join()
+} else {
+name.to_owned()
+};
+let mut auditor = PathAuditor::new();
+if name != root && name.starts_with() {
+let name = name.strip_prefix().unwrap();
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+} else if name == root {
+return Ok("".into());
+} else {
+// Determine whether `name' is in the hierarchy at or beneath `root',
+// by iterating name=name.parent() until that causes no change (can't
+// check name == '/', because that doesn't work on windows).
+let mut name = name.deref();
+loop {
+let same = is_same_file(, ).unwrap_or(false);
+if same {
+if name.components().next().is_none() {
+// `name` was actually the same as root (maybe a symlink)
+return Ok("".into());
+}
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+}
+name = match name.parent() {
+None => break,
+Some(p) => p,
+};
+}
+// TODO hint to the user about using --cwd
+// Bubble up the responsibility to Python for now
+Err(HgPathError::NotUnderRoot {
+path: name.to_owned(),
+root: root.to_owned(),
+})
+}
+}
+
 #[cfg(test)]
 mod tests {
 use super::*;
+use pretty_assertions::assert_eq;
 
 #[test]
 fn find_dirs_some() {
@@ -235,4 +292,53 @@
 assert_eq!(dirs.next(), None);
 assert_eq!(dirs.next(), None);
 }
+
+#[test]
+fn test_canonical_path() {
+let root = Path::new("/repo");
+let cwd = Path::new("/dir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("repo/filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo/subdir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("subdir/filename"))
+);
+}
 }
diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml
--- a/rust/hg-core/Cargo.toml
+++ b/rust/hg-core/Cargo.toml
@@ -18,7 +18,8 @@
 rayon = "1.3.0"
 regex = "1.1.0"
 twox-hash = "1.5.0"
+same-file = "1.0.6"
 
 [dev-dependencies]
 tempfile = "3.1.0"
 pretty_assertions = "0.6.1"
\ No 

D7871: rust-utils: add util for canonical path

2020-01-16 Thread Raphaël Gomès
Alphare updated this revision to Diff 19359.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7871?vs=19354=19359

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7871/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/utils/files.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -9,13 +9,17 @@
 
 //! Functions for fiddling with files.
 
-use crate::utils::hg_path::{HgPath, HgPathBuf};
-
-use crate::utils::replace_slice;
+use crate::utils::{
+hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
+path_auditor::PathAuditor,
+replace_slice,
+};
 use lazy_static::lazy_static;
+use same_file::is_same_file;
 use std::fs::Metadata;
 use std::iter::FusedIterator;
-use std::path::Path;
+use std::ops::Deref;
+use std::path::{Path, PathBuf};
 
 pub fn get_path_from_bytes(bytes: &[u8]) ->  {
 let os_str;
@@ -189,9 +193,62 @@
 }
 }
 
+/// Returns the canonical path of `name`, given `cwd` and `root`
+pub fn canonical_path(
+root: impl AsRef,
+cwd: impl AsRef,
+name: impl AsRef,
+) -> Result {
+// TODO add missing normalization for other platforms
+let root = root.as_ref();
+let cwd = cwd.as_ref();
+let name = name.as_ref();
+
+let name = if !name.is_absolute() {
+root.join().join()
+} else {
+name.to_owned()
+};
+let mut auditor = PathAuditor::new();
+if name != root && name.starts_with() {
+let name = name.strip_prefix().unwrap();
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+} else if name == root {
+return Ok("".into());
+} else {
+// Determine whether `name' is in the hierarchy at or beneath `root',
+// by iterating name=name.parent() until that causes no change (can't
+// check name == '/', because that doesn't work on windows).
+let mut name = name.deref();
+loop {
+let same = is_same_file(, ).unwrap_or(false);
+if same {
+if name.components().next().is_none() {
+// `name` was actually the same as root (maybe a symlink)
+return Ok("".into());
+}
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+}
+name = match name.parent() {
+None => break,
+Some(p) => p,
+};
+}
+// TODO hint to the user about using --cwd
+// Bubble up the responsibility to Python for now
+Err(HgPathError::NotUnderRoot {
+path: name.to_owned(),
+root: root.to_owned(),
+})
+}
+}
+
 #[cfg(test)]
 mod tests {
 use super::*;
+use pretty_assertions::assert_eq;
 
 #[test]
 fn find_dirs_some() {
@@ -235,4 +292,53 @@
 assert_eq!(dirs.next(), None);
 assert_eq!(dirs.next(), None);
 }
+
+#[test]
+fn test_canonical_path() {
+let root = Path::new("/repo");
+let cwd = Path::new("/dir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("repo/filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo/subdir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("subdir/filename"))
+);
+}
 }
diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml
--- a/rust/hg-core/Cargo.toml
+++ b/rust/hg-core/Cargo.toml
@@ -17,7 +17,8 @@
 rayon = "1.2.0"
 regex = "1.1.0"
 twox-hash = "1.5.0"
+same-file = "1.0.6"
 
 [dev-dependencies]
 tempfile = "3.1.0"
 pretty_assertions = "0.6.1"
\ No 

D7871: rust-utils: add util for canonical path

2020-01-16 Thread Raphaël Gomès
Alphare updated this revision to Diff 19354.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7871?vs=19349=19354

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7871/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/utils/files.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -9,13 +9,17 @@
 
 //! Functions for fiddling with files.
 
-use crate::utils::hg_path::{HgPath, HgPathBuf};
-
-use crate::utils::replace_slice;
+use crate::utils::{
+hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
+path_auditor::PathAuditor,
+replace_slice,
+};
 use lazy_static::lazy_static;
+use same_file::is_same_file;
 use std::fs::Metadata;
 use std::iter::FusedIterator;
-use std::path::Path;
+use std::ops::Deref;
+use std::path::{Path, PathBuf};
 
 pub fn get_path_from_bytes(bytes: &[u8]) ->  {
 let os_str;
@@ -189,9 +193,62 @@
 }
 }
 
+/// Returns the canonical path of `name`, given `cwd` and `root`
+pub fn canonical_path(
+root: impl AsRef,
+cwd: impl AsRef,
+name: impl AsRef,
+) -> Result {
+// TODO add missing normalization for other platforms
+let root = root.as_ref();
+let cwd = cwd.as_ref();
+let name = name.as_ref();
+
+let name = if !name.is_absolute() {
+root.join().join()
+} else {
+name.to_owned()
+};
+let mut auditor = PathAuditor::new();
+if name != root && name.starts_with() {
+let name = name.strip_prefix().unwrap();
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+} else if name == root {
+return Ok("".into());
+} else {
+// Determine whether `name' is in the hierarchy at or beneath `root',
+// by iterating name=name.parent() until that causes no change (can't
+// check name == '/', because that doesn't work on windows).
+let mut name = name.deref();
+loop {
+let same = is_same_file(, ).unwrap_or(false);
+if same {
+if name.components().next().is_none() {
+// `name` was actually the same as root (maybe a symlink)
+return Ok("".into());
+}
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+}
+name = match name.parent() {
+None => break,
+Some(p) => p,
+};
+}
+// TODO hint to the user about using --cwd
+// Bubble up the responsibility to Python for now
+Err(HgPathError::NotUnderRoot {
+path: name.to_owned(),
+root: root.to_owned(),
+})
+}
+}
+
 #[cfg(test)]
 mod tests {
 use super::*;
+use pretty_assertions::assert_eq;
 
 #[test]
 fn find_dirs_some() {
@@ -235,4 +292,53 @@
 assert_eq!(dirs.next(), None);
 assert_eq!(dirs.next(), None);
 }
+
+#[test]
+fn test_canonical_path() {
+let root = Path::new("/repo");
+let cwd = Path::new("/dir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("repo/filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo/subdir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("subdir/filename"))
+);
+}
 }
diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml
--- a/rust/hg-core/Cargo.toml
+++ b/rust/hg-core/Cargo.toml
@@ -17,7 +17,8 @@
 rayon = "1.2.0"
 regex = "1.1.0"
 twox-hash = "1.5.0"
+same-file = "1.0.6"
 
 [dev-dependencies]
 tempfile = "3.1.0"
 pretty_assertions = "0.6.1"
\ No 

D7871: rust-utils: add util for canonical path

2020-01-16 Thread Raphaël Gomès
Alphare updated this revision to Diff 19349.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7871?vs=19321=19349

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7871/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/utils/files.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -9,13 +9,17 @@
 
 //! Functions for fiddling with files.
 
-use crate::utils::hg_path::{HgPath, HgPathBuf};
-
-use crate::utils::replace_slice;
+use crate::utils::{
+hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
+path_auditor::PathAuditor,
+replace_slice,
+};
 use lazy_static::lazy_static;
+use same_file::is_same_file;
 use std::fs::Metadata;
 use std::iter::FusedIterator;
-use std::path::Path;
+use std::ops::Deref;
+use std::path::{Path, PathBuf};
 
 pub fn get_path_from_bytes(bytes: &[u8]) ->  {
 let os_str;
@@ -260,9 +264,62 @@
 }
 }
 
+/// Returns the canonical path of `name`, given `cwd` and `root`
+pub fn canonical_path(
+root: impl AsRef,
+cwd: impl AsRef,
+name: impl AsRef,
+) -> Result {
+// TODO add missing normalization for other platforms
+let root = root.as_ref();
+let cwd = cwd.as_ref();
+let name = name.as_ref();
+
+let name = if !name.is_absolute() {
+root.join().join()
+} else {
+name.to_owned()
+};
+let mut auditor = PathAuditor::new();
+if name != root && name.starts_with() {
+let name = name.strip_prefix().unwrap();
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+} else if name == root {
+return Ok("".into());
+} else {
+// Determine whether `name' is in the hierarchy at or beneath `root',
+// by iterating name=name.parent() until that causes no change (can't
+// check name == '/', because that doesn't work on windows).
+let mut name = name.deref();
+loop {
+let same = is_same_file(, ).unwrap_or(false);
+if same {
+if name.components().next().is_none() {
+// `name` was actually the same as root (maybe a symlink)
+return Ok("".into());
+}
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+}
+name = match name.parent() {
+None => break,
+Some(p) => p,
+};
+}
+// TODO hint to the user about using --cwd
+// Bubble up the responsibility to Python for now
+Err(HgPathError::NotUnderRoot {
+path: name.to_owned(),
+root: root.to_owned(),
+})
+}
+}
+
 #[cfg(test)]
 mod tests {
 use super::*;
+use pretty_assertions::assert_eq;
 
 #[test]
 fn find_dirs_some() {
@@ -403,4 +460,53 @@
 assert_eq!(dirs.next(), None);
 assert_eq!(dirs.next(), None);
 }
+
+#[test]
+fn test_canonical_path() {
+let root = Path::new("/repo");
+let cwd = Path::new("/dir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("repo/filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo/subdir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("subdir/filename"))
+);
+}
 }
diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml
--- a/rust/hg-core/Cargo.toml
+++ b/rust/hg-core/Cargo.toml
@@ -17,7 +17,8 @@
 rayon = "1.2.0"
 regex = "1.1.0"
 twox-hash = "1.5.0"
+same-file = "1.0.6"
 
 [dev-dependencies]
 tempfile = "3.1.0"
 pretty_assertions = "0.6.1"
\ No 

D7871: rust-utils: add util for canonical path

2020-01-15 Thread Raphaël Gomès
Alphare updated this revision to Diff 19321.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7871?vs=19263=19321

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7871/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/utils/files.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -9,13 +9,17 @@
 
 //! Functions for fiddling with files.
 
-use crate::utils::hg_path::{HgPath, HgPathBuf};
-
+use crate::utils::hg_path::{
+path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError,
+};
+use crate::utils::path_auditor::PathAuditor;
 use crate::utils::replace_slice;
 use lazy_static::lazy_static;
 use std::fs::Metadata;
 use std::iter::FusedIterator;
-use std::path::Path;
+use std::ops::Deref;
+use same_file::is_same_file;
+use std::path::{Path, PathBuf};
 
 pub fn get_path_from_bytes(bytes: &[u8]) ->  {
 let os_str;
@@ -260,9 +264,62 @@
 }
 }
 
+/// Returns the canonical path of `name`, given `cwd` and `root`
+pub fn canonical_path(
+root: impl AsRef,
+cwd: impl AsRef,
+name: impl AsRef,
+) -> Result {
+// TODO add missing normalization for other platforms
+let root = root.as_ref();
+let cwd = cwd.as_ref();
+let name = name.as_ref();
+
+let name = if !name.is_absolute() {
+root.join().join()
+} else {
+name.to_owned()
+};
+let mut auditor = PathAuditor::new();
+if name != root && name.starts_with() {
+let name = name.strip_prefix().unwrap();
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+} else if name == root {
+return Ok("".into());
+} else {
+// Determine whether `name' is in the hierarchy at or beneath `root',
+// by iterating name=name.parent() until that causes no change (can't
+// check name == '/', because that doesn't work on windows).
+let mut name = name.deref();
+loop {
+let same = is_same_file(, ).unwrap_or(false);
+if same {
+if name.components().next().is_none() {
+// `name` was actually the same as root (maybe a symlink)
+return Ok("".into());
+}
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+}
+name = match name.parent() {
+None => break,
+Some(p) => p,
+};
+}
+// TODO hint to the user about using --cwd
+// Bubble up the responsibility to Python for now
+Err(HgPathError::NotUnderRoot {
+path: name.to_owned(),
+root: root.to_owned(),
+})
+}
+}
+
 #[cfg(test)]
 mod tests {
 use super::*;
+use pretty_assertions::assert_eq;
 
 #[test]
 fn find_dirs_some() {
@@ -403,4 +460,53 @@
 assert_eq!(dirs.next(), None);
 assert_eq!(dirs.next(), None);
 }
+
+#[test]
+fn test_canonical_path() {
+let root = Path::new("/repo");
+let cwd = Path::new("/dir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("repo/filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo/subdir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("subdir/filename"))
+);
+}
 }
diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml
--- a/rust/hg-core/Cargo.toml
+++ b/rust/hg-core/Cargo.toml
@@ -17,7 +17,8 @@
 rayon = "1.2.0"
 regex = "1.1.0"
 twox-hash = "1.5.0"
+same-file = "1.0.6"
 
 [dev-dependencies]
 tempfile = "3.1.0"
 pretty_assertions = "0.6.1"
\ No newline 

D7871: rust-utils: add util for canonical path

2020-01-14 Thread Raphaël Gomès
Alphare created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7871

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/utils/files.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -9,13 +9,17 @@
 
 //! Functions for fiddling with files.
 
-use crate::utils::hg_path::{HgPath, HgPathBuf};
-
+use crate::utils::hg_path::{
+path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError,
+};
+use crate::utils::path_auditor::PathAuditor;
 use crate::utils::replace_slice;
 use lazy_static::lazy_static;
 use std::fs::Metadata;
 use std::iter::FusedIterator;
-use std::path::Path;
+use std::ops::Deref;
+use same_file::is_same_file;
+use std::path::{Path, PathBuf};
 
 pub fn get_path_from_bytes(bytes: &[u8]) ->  {
 let os_str;
@@ -272,9 +276,62 @@
 }
 }
 
+/// Returns the canonical path of `name`, given `cwd` and `root`
+pub fn canonical_path(
+root: impl AsRef,
+cwd: impl AsRef,
+name: impl AsRef,
+) -> Result {
+// TODO add missing normalization for other platforms
+let root = root.as_ref();
+let cwd = cwd.as_ref();
+let name = name.as_ref();
+
+let name = if !name.is_absolute() {
+root.join().join()
+} else {
+name.to_owned()
+};
+let mut auditor = PathAuditor::new();
+if name != root && name.starts_with() {
+let name = name.strip_prefix().unwrap();
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+} else if name == root {
+return Ok("".into());
+} else {
+// Determine whether `name' is in the hierarchy at or beneath `root',
+// by iterating name=name.parent() until that causes no change (can't
+// check name == '/', because that doesn't work on windows).
+let mut name = name.deref();
+loop {
+let same = is_same_file(, ).unwrap_or(false);
+if same {
+if name.components().next().is_none() {
+// `name` was actually the same as root (maybe a symlink)
+return Ok("".into());
+}
+auditor.audit_path(path_to_hg_path_buf(name)?)?;
+return Ok(name.to_owned());
+}
+name = match name.parent() {
+None => break,
+Some(p) => p,
+};
+}
+// TODO hint to the user about using --cwd
+// Bubble up the responsibility to Python for now
+Err(HgPathError::NotUnderRoot {
+path: name.to_owned(),
+root: root.to_owned(),
+})
+}
+}
+
 #[cfg(test)]
 mod tests {
 use super::*;
+use pretty_assertions::assert_eq;
 
 #[test]
 fn find_dirs_some() {
@@ -415,4 +472,53 @@
 assert_eq!(dirs.next(), None);
 assert_eq!(dirs.next(), None);
 }
+
+#[test]
+fn test_canonical_path() {
+let root = Path::new("/repo");
+let cwd = Path::new("/dir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Err(HgPathError::NotUnderRoot {
+path: PathBuf::from("/"),
+root: root.to_path_buf()
+})
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/");
+let name = Path::new("repo/filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("filename"))
+);
+
+let root = Path::new("/repo");
+let cwd = Path::new("/repo/subdir");
+let name = Path::new("filename");
+assert_eq!(
+canonical_path(root, cwd, name),
+Ok(PathBuf::from("subdir/filename"))
+);
+}
 }
diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml
--- a/rust/hg-core/Cargo.toml
+++ b/rust/hg-core/Cargo.toml
@@ -17,7 +17,8 @@
 rayon = "1.2.0"
 regex = "1.1.0"
 twox-hash = "1.5.0"
+same-file = "1.0.6"
 
 [dev-dependencies]
 tempfile = "3.1.0"
 pretty_assertions = "0.6.1"
\ No newline at end of file
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
---