Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package xh for openSUSE:Factory checked in at 2025-05-14 17:01:45 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/xh (Old) and /work/SRC/openSUSE:Factory/.xh.new.30101 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "xh" Wed May 14 17:01:45 2025 rev:9 rq:1277327 version:0.24.1 Changes: -------- --- /work/SRC/openSUSE:Factory/xh/xh.changes 2025-02-25 16:52:03.866141301 +0100 +++ /work/SRC/openSUSE:Factory/.xh.new.30101/xh.changes 2025-05-14 17:02:11.649716377 +0200 @@ -1,0 +2,8 @@ +Wed May 14 07:15:40 UTC 2025 - Michael Vetter <mvet...@suse.com> + +- Update to 0.24.1: + * Support RFC 5987 encoding for Content-Disposition filenames, see #416 + * Fix crash on empty zstd response body, see #411 + * Improve rustls errors for invalid certificates, see #413 + +------------------------------------------------------------------- Old: ---- xh-0.24.0.tar.xz New: ---- xh-0.24.1.tar.xz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ xh.spec ++++++ --- /var/tmp/diff_new_pack.nZT7Pc/_old 2025-05-14 17:02:12.565754736 +0200 +++ /var/tmp/diff_new_pack.nZT7Pc/_new 2025-05-14 17:02:12.569754903 +0200 @@ -17,7 +17,7 @@ Name: xh -Version: 0.24.0 +Version: 0.24.1 Release: 0 Summary: Tool for sending HTTP requests License: MIT ++++++ _service ++++++ --- /var/tmp/diff_new_pack.nZT7Pc/_old 2025-05-14 17:02:12.605756410 +0200 +++ /var/tmp/diff_new_pack.nZT7Pc/_new 2025-05-14 17:02:12.605756410 +0200 @@ -3,7 +3,7 @@ <param name="url">https://github.com/ducaale/xh</param> <param name="versionformat">@PARENT_TAG@</param> <param name="scm">git</param> - <param name="revision">v0.24.0</param> + <param name="revision">v0.24.1</param> <param name="match-tag">*</param> <param name="versionrewrite-pattern">v(\d+\.\d+\.\d+)</param> <param name="versionrewrite-replacement">\1</param> ++++++ vendor.tar.xz ++++++ /work/SRC/openSUSE:Factory/xh/vendor.tar.xz /work/SRC/openSUSE:Factory/.xh.new.30101/vendor.tar.xz differ: char 15, line 1 ++++++ xh-0.24.0.tar.xz -> xh-0.24.1.tar.xz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xh-0.24.0/CHANGELOG.md new/xh-0.24.1/CHANGELOG.md --- old/xh-0.24.0/CHANGELOG.md 2025-02-18 09:15:49.000000000 +0100 +++ new/xh-0.24.1/CHANGELOG.md 2025-05-02 16:20:05.000000000 +0200 @@ -1,3 +1,13 @@ +## [0.24.1] - 2025-05-02 +### Features +- Support RFC 5987 encoding for Content-Disposition filenames, see #416 (@zuisong) + +### Bug fixes +- Fix crash on empty zstd response body, see #411 (@blyxxyz) + +### Other +- Improve rustls errors for invalid certificates, see #413 (@blyxxyz) + ## [0.24.0] - 2025-02-18 ### Features - Add `--generate` option to generate the man page and shell completions at runtime, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xh-0.24.0/Cargo.lock new/xh-0.24.1/Cargo.lock --- old/xh-0.24.0/Cargo.lock 2025-02-18 09:15:49.000000000 +0100 +++ new/xh-0.24.1/Cargo.lock 2025-05-02 16:20:05.000000000 +0200 @@ -814,9 +814,9 @@ [[package]] name = "humantime" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" [[package]] name = "hyper" @@ -1760,9 +1760,9 @@ [[package]] name = "rustls" -version = "0.23.23" +version = "0.23.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47796c98c480fce5406ef69d1c76378375492c3b0a0de587be0c1d9feb12f395" +checksum = "822ee9188ac4ec04a2f0531e55d035fb2de73f18b41a63c70c2712503b6fb13c" dependencies = [ "log", "once_cell", @@ -1805,9 +1805,9 @@ [[package]] name = "rustls-webpki" -version = "0.102.8" +version = "0.103.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +checksum = "0aa4eeac2588ffff23e9d7a7e9b3f971c5fb5b7ebc9452745e0c232c64f83b2f" dependencies = [ "ring", "rustls-pki-types", @@ -1842,6 +1842,15 @@ ] [[package]] +name = "sanitize-filename" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc984f4f9ceb736a7bb755c3e3bd17dc56370af2600c9780dcc48c66453da34d" +dependencies = [ + "regex", +] + +[[package]] name = "schannel" version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -2780,7 +2789,7 @@ [[package]] name = "xh" -version = "0.24.0" +version = "0.24.1" dependencies = [ "anyhow", "assert_cmd", @@ -2798,6 +2807,7 @@ "flate2", "form_urlencoded", "http-body-util", + "humantime", "hyper", "hyper-util", "indicatif", @@ -2812,6 +2822,7 @@ "once_cell", "os_display", "pem", + "percent-encoding", "predicates", "rand", "regex-lite", @@ -2821,6 +2832,7 @@ "rpassword", "rustls", "ruzstd", + "sanitize-filename", "serde", "serde-transcode", "serde_json", diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xh-0.24.0/Cargo.toml new/xh-0.24.1/Cargo.toml --- old/xh-0.24.0/Cargo.toml 2025-02-18 09:15:49.000000000 +0100 +++ new/xh-0.24.1/Cargo.toml 2025-05-02 16:20:05.000000000 +0200 @@ -1,6 +1,6 @@ [package] name = "xh" -version = "0.24.0" +version = "0.24.1" authors = ["ducaale <sharaf...@hotmail.com>"] edition = "2021" rust-version = "1.74.0" @@ -48,6 +48,7 @@ supports-hyperlinks = "3.0.0" termcolor = "1.1.2" time = "0.3.16" +humantime = "2.2.0" unicode-width = "0.1.9" url = "2.2.2" ruzstd = { version = "0.7", default-features = false, features = ["std"]} @@ -56,9 +57,11 @@ # Enable logging in transitive dependencies. # The rustls version number should be kept in sync with hyper/reqwest. -rustls = { version = "0.23.14", optional = true, default-features = false, features = ["logging"] } +rustls = { version = "0.23.25", optional = true, default-features = false, features = ["logging"] } tracing = { version = "0.1.41", default-features = false, features = ["log"] } reqwest_cookie_store = { version = "0.8.0", features = ["serde"] } +percent-encoding = "2.3.1" +sanitize-filename = "0.6.0" [dependencies.reqwest] version = "0.12.3" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xh-0.24.0/doc/xh.1 new/xh-0.24.1/doc/xh.1 --- old/xh-0.24.0/doc/xh.1 2025-02-18 09:15:49.000000000 +0100 +++ new/xh-0.24.1/doc/xh.1 2025-05-02 16:20:05.000000000 +0200 @@ -1,4 +1,4 @@ -.TH XH 1 2025-02-18 0.24.0 "User Commands" +.TH XH 1 2025-05-02 0.24.1 "User Commands" .SH NAME xh \- Friendly and fast tool for sending HTTP requests diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xh-0.24.0/src/content_disposition.rs new/xh-0.24.1/src/content_disposition.rs --- old/xh-0.24.0/src/content_disposition.rs 1970-01-01 01:00:00.000000000 +0100 +++ new/xh-0.24.1/src/content_disposition.rs 2025-05-02 16:20:05.000000000 +0200 @@ -0,0 +1,186 @@ +use percent_encoding::percent_decode_str; + +/// Parse filename from Content-Disposition header +/// Prioritizes filename* parameter if present, otherwise uses filename parameter +pub fn parse_filename_from_content_disposition(content_disposition: &str) -> Option<String> { + let parts: Vec<&str> = content_disposition + .split(';') + .map(|part| part.trim()) + .collect(); + + // First try to find filename* parameter + for part in parts.iter() { + if let Some(value) = part.strip_prefix("filename*=") { + if let Some(filename) = parse_encoded_filename(value) { + return Some(filename); + } + } + } + + // If filename* is not found or parsing failed, try regular filename parameter + for part in parts { + if let Some(value) = part.strip_prefix("filename=") { + return parse_regular_filename(value); + } + } + + None +} + +/// Parse regular filename parameter +/// Handles both quoted and unquoted filenames +fn parse_regular_filename(filename: &str) -> Option<String> { + // Content-Disposition: attachment; filename="file with \"quotes\".txt" // This won't occur + // Content-Disposition: attachment; filename*=UTF-8''file%20with%20quotes.txt // This is the actual practice + // + // We don't need to handle escaped characters in Content-Disposition header parsing because: + // + // It's not a standard practice + // It rarely occurs in real-world scenarios + // When filenames contain special characters, they should use the filename* parameter + + // Remove quotes if present + let filename = if filename.starts_with('"') && filename.ends_with('"') && filename.len() >= 2 { + &filename[1..(filename.len() - 1)] + } else { + filename + }; + + if filename.is_empty() { + return None; + } + + Some(filename.to_string()) +} + +/// Parse RFC 5987 encoded filename (filename*) +/// Format: charset'language'encoded-value +fn parse_encoded_filename(content: &str) -> Option<String> { + // Remove "filename*=" prefix + + // According to RFC 5987, format should be: charset'language'encoded-value + let parts: Vec<&str> = content.splitn(3, '\'').collect(); + if parts.len() != 3 { + return None; + } + let charset = parts[0]; + let encoded_filename = parts[2]; + + // Percent-decode the encoded filename into bytes. + let decoded_bytes = percent_decode_str(encoded_filename).collect::<Vec<u8>>(); + + if charset.eq_ignore_ascii_case("UTF-8") { + if let Ok(decoded_str) = String::from_utf8(decoded_bytes) { + return Some(decoded_str); + } + } else if charset.eq_ignore_ascii_case("ISO-8859-1") { + // RFC 5987 says to use ISO/IEC 8859-1:1998. + // But Firefox and Chromium decode %99 as ™ so they're actually using + // Windows-1252. This mixup is common on the web. + // This affects the 0x80-0x9F range. According to ISO 8859-1 those are + // control characters. According to Windows-1252 most of them are + // printable characters. + // They agree on all the other characters, and filenames shouldn't have + // control characters, so Windows-1252 makes sense. + if let Some(decoded_str) = encoding_rs::WINDOWS_1252 + .decode_without_bom_handling_and_without_replacement(&decoded_bytes) + { + return Some(decoded_str.into_owned()); + } + } else { + // Unknown charset. As a fallback, try interpreting as UTF-8. + // Firefox also does this. + // Chromium makes up its own filename. (Even if `filename=` is present.) + if let Ok(decoded_str) = String::from_utf8(decoded_bytes) { + return Some(decoded_str); + } + } + + None +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_simple_filename() { + let header = r#"attachment; filename="example.pdf""#; + assert_eq!( + parse_filename_from_content_disposition(header), + Some("example.pdf".to_string()) + ); + } + + #[test] + fn test_filename_without_quotes() { + let header = "attachment; filename=example.pdf"; + assert_eq!( + parse_filename_from_content_disposition(header), + Some("example.pdf".to_string()) + ); + } + + #[test] + fn test_encoded_filename() { + // UTF-8 encoded Chinese filename "测试.pdf" + let header = "attachment; filename*=UTF-8''%E6%B5%8B%E8%AF%95.pdf"; + assert_eq!( + parse_filename_from_content_disposition(header), + Some("测试.pdf".to_string()) + ); + } + + #[test] + fn test_both_filenames() { + // When both filename and filename* are present, filename* should be preferred + let header = + r#"attachment; filename="fallback.pdf"; filename*=UTF-8''%E6%B5%8B%E8%AF%95.pdf"#; + assert_eq!( + parse_filename_from_content_disposition(header), + Some("测试.pdf".to_string()) + ); + } + #[test] + fn test_decode_with_windows_1252() { + let header = "content-disposition: attachment; filename*=iso-8859-1'en'a%99b"; + assert_eq!( + parse_filename_from_content_disposition(header), + Some("a™b".to_string()) + ); + } + + #[test] + fn test_both_filenames_with_bad_format() { + // When both filename and filename* are present, filename* with bad format, filename should be used + let header = r#"attachment; filename="fallback.pdf"; filename*=UTF-8'bad_format.pdf"#; + assert_eq!( + parse_filename_from_content_disposition(header), + Some("fallback.pdf".to_string()) + ); + } + + #[test] + fn test_no_filename() { + let header = "attachment"; + assert_eq!(parse_filename_from_content_disposition(header), None); + } + + #[test] + fn test_iso_8859_1() { + let header = "attachment;filename*=iso-8859-1'en'%A3%20rates"; + assert_eq!( + parse_filename_from_content_disposition(header), + Some("£ rates".to_string()) + ); + } + + #[test] + fn test_bad_encoding_fallback_to_utf8() { + let header = "attachment;filename*=UTF-16''%E6%B5%8B%E8%AF%95.pdf"; + assert_eq!( + parse_filename_from_content_disposition(header), + Some("测试.pdf".to_string()) + ); + } +} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xh-0.24.0/src/decoder.rs new/xh-0.24.1/src/decoder.rs --- old/xh-0.24.0/src/decoder.rs 2025-02-18 09:15:49.000000000 +0100 +++ new/xh-0.24.1/src/decoder.rs 2025-05-02 16:20:05.000000000 +0200 @@ -1,4 +1,6 @@ +use std::cell::Cell; use std::io::{self, Read}; +use std::rc::Rc; use std::str::FromStr; use brotli::Decompressor as BrotliDecoder; @@ -6,7 +8,7 @@ use reqwest::header::{HeaderMap, CONTENT_ENCODING, CONTENT_LENGTH, TRANSFER_ENCODING}; use ruzstd::{FrameDecoder, StreamingDecoder as ZstdDecoder}; -#[derive(Debug)] +#[derive(Debug, Clone, Copy)] pub enum CompressionType { Gzip, Deflate, @@ -18,7 +20,11 @@ type Err = anyhow::Error; fn from_str(value: &str) -> anyhow::Result<CompressionType> { match value { - "gzip" => Ok(CompressionType::Gzip), + // RFC 2616 section 3.5: + // For compatibility with previous implementations of HTTP, + // applications SHOULD consider "x-gzip" and "x-compress" to be + // equivalent to "gzip" and "compress" respectively. + "gzip" | "x-gzip" => Ok(CompressionType::Gzip), "deflate" => Ok(CompressionType::Deflate), "br" => Ok(CompressionType::Brotli), "zstd" => Ok(CompressionType::Zstd), @@ -52,89 +58,114 @@ compression_type } -struct InnerReader<R: Read> { - reader: R, - has_read_data: bool, - has_errored: bool, +/// A wrapper that checks whether an error is an I/O error or a decoding error. +/// +/// The main purpose of this is to suppress decoding errors that happen because +/// of an empty input. This is behavior we inherited from HTTPie. +/// +/// It's load-bearing in the case of HEAD requests, where responses don't have a +/// body but may declare a Content-Encoding. +/// +/// We also treat other empty response bodies like this, regardless of the request +/// method. This matches all the user agents I tried (reqwest, requests/HTTPie, curl, +/// wget, Firefox, Chromium) but I don't know if it's prescribed by any RFC. +/// +/// As a side benefit we make I/O errors more focused by stripping decoding errors. +/// +/// The reader is structured like this: +/// +/// OuterReader ───────┐ +/// compression codec ├── [Status] +/// [InnerReader] ──────┘ +/// underlying I/O +/// +/// The shared Status object is used to communicate. +struct OuterReader<'a> { + decoder: Box<dyn Read + 'a>, + status: Option<Rc<Status>>, +} + +struct Status { + has_read_data: Cell<bool>, + read_error: Cell<Option<io::Error>>, + error_msg: &'static str, } -impl<R: Read> InnerReader<R> { - fn new(reader: R) -> Self { - InnerReader { - reader, - has_read_data: false, - has_errored: false, +impl Read for OuterReader<'_> { + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + match self.decoder.read(buf) { + Ok(n) => Ok(n), + Err(err) => { + let Some(ref status) = self.status else { + // No decoder, pass on as is + return Err(err); + }; + match status.read_error.take() { + // If an I/O error happened, return that. + Some(read_error) => Err(read_error), + // If the input was empty, ignore the decoder error. + None if !status.has_read_data.get() => Ok(0), + // Otherwise, decorate the decoder error with a message. + None => Err(io::Error::new( + io::ErrorKind::InvalidData, + DecodeError { + msg: status.error_msg, + err, + }, + )), + } + } } } } +struct InnerReader<R: Read> { + reader: R, + status: Rc<Status>, +} + impl<R: Read> Read for InnerReader<R> { fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { - self.has_errored = false; + self.status.read_error.set(None); match self.reader.read(buf) { Ok(0) => Ok(0), Ok(len) => { - self.has_read_data = true; + self.status.has_read_data.set(true); Ok(len) } - Err(e) => { - self.has_errored = true; - Err(e) + Err(err) => { + // Store the real error and return a placeholder. + // The placeholder is intercepted and replaced by the real error + // before leaving this module. + // We store the whole error instead of setting a flag because of zstd: + // - ZstdDecoder::new() fails with a custom error type and it's hard + // to extract the underlying io::Error + // - ZstdDecoder::read() (unlike the other decoders) wraps custom errors + // around the underlying io::Error + let msg = err.to_string(); + let kind = err.kind(); + self.status.read_error.set(Some(err)); + Err(io::Error::new(kind, msg)) } } } } -#[allow(clippy::large_enum_variant)] -enum Decoder<R: Read> { - PlainText(InnerReader<R>), - Gzip(GzDecoder<InnerReader<R>>), - Deflate(ZlibDecoder<InnerReader<R>>), - Brotli(BrotliDecoder<InnerReader<R>>), - Zstd(ZstdDecoder<InnerReader<R>, FrameDecoder>), +#[derive(Debug)] +struct DecodeError { + msg: &'static str, + err: io::Error, } -impl<R: Read> Read for Decoder<R> { - fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { - match self { - Decoder::PlainText(decoder) => decoder.read(buf), - Decoder::Gzip(decoder) => match decoder.read(buf) { - Ok(n) => Ok(n), - Err(e) if decoder.get_ref().has_errored => Err(e), - Err(_) if !decoder.get_ref().has_read_data => Ok(0), - Err(e) => Err(io::Error::new( - e.kind(), - format!("error decoding gzip response body: {}", e), - )), - }, - Decoder::Deflate(decoder) => match decoder.read(buf) { - Ok(n) => Ok(n), - Err(e) if decoder.get_ref().has_errored => Err(e), - Err(_) if !decoder.get_ref().has_read_data => Ok(0), - Err(e) => Err(io::Error::new( - e.kind(), - format!("error decoding deflate response body: {}", e), - )), - }, - Decoder::Brotli(decoder) => match decoder.read(buf) { - Ok(n) => Ok(n), - Err(e) if decoder.get_ref().has_errored => Err(e), - Err(_) if !decoder.get_ref().has_read_data => Ok(0), - Err(e) => Err(io::Error::new( - e.kind(), - format!("error decoding brotli response body: {}", e), - )), - }, - Decoder::Zstd(decoder) => match decoder.read(buf) { - Ok(n) => Ok(n), - Err(e) if decoder.get_ref().has_errored => Err(e), - Err(_) if !decoder.get_ref().has_read_data => Ok(0), - Err(e) => Err(io::Error::new( - e.kind(), - format!("error decoding zstd response body: {}", e), - )), - }, - } +impl std::fmt::Display for DecodeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.msg) + } +} + +impl std::error::Error for DecodeError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + Some(&self.err) } } @@ -142,18 +173,71 @@ reader: &mut impl Read, compression_type: Option<CompressionType>, ) -> impl Read + '_ { - let reader = InnerReader::new(reader); - match compression_type { - Some(CompressionType::Gzip) => Decoder::Gzip(GzDecoder::new(reader)), - Some(CompressionType::Deflate) => Decoder::Deflate(ZlibDecoder::new(reader)), - Some(CompressionType::Brotli) => Decoder::Brotli(BrotliDecoder::new(reader, 4096)), - Some(CompressionType::Zstd) => Decoder::Zstd(ZstdDecoder::new(reader).unwrap()), - None => Decoder::PlainText(reader), + let Some(compression_type) = compression_type else { + return OuterReader { + decoder: Box::new(reader), + status: None, + }; + }; + + let status = Rc::new(Status { + has_read_data: Cell::new(false), + read_error: Cell::new(None), + error_msg: match compression_type { + CompressionType::Gzip => "error decoding gzip response body", + CompressionType::Deflate => "error decoding deflate response body", + CompressionType::Brotli => "error decoding brotli response body", + CompressionType::Zstd => "error decoding zstd response body", + }, + }); + let reader = InnerReader { + reader, + status: Rc::clone(&status), + }; + OuterReader { + decoder: match compression_type { + CompressionType::Gzip => Box::new(GzDecoder::new(reader)), + CompressionType::Deflate => Box::new(ZlibDecoder::new(reader)), + // 32K is the default buffer size for gzip and deflate + CompressionType::Brotli => Box::new(BrotliDecoder::new(reader, 32 * 1024)), + CompressionType::Zstd => Box::new(LazyZstdDecoder::Uninit(Some(reader))), + }, + status: Some(status), + } +} + +/// [ZstdDecoder] reads from its input during construction. +/// +/// We need to delay construction until [Read] so read errors stay read errors. +enum LazyZstdDecoder<R: Read> { + Uninit(Option<R>), + Init(ZstdDecoder<R, FrameDecoder>), +} + +impl<R: Read> Read for LazyZstdDecoder<R> { + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + match self { + LazyZstdDecoder::Uninit(reader) => match reader.take() { + Some(reader) => match ZstdDecoder::new(reader) { + Ok(decoder) => { + *self = LazyZstdDecoder::Init(decoder); + self.read(buf) + } + Err(err) => Err(io::Error::other(err)), + }, + // We seem to get here in --stream mode because another layer tries + // to read again after Ok(0). + None => Err(io::Error::other("failed to construct ZstdDecoder")), + }, + LazyZstdDecoder::Init(streaming_decoder) => streaming_decoder.read(buf), + } } } #[cfg(test)] mod tests { + use std::error::Error; + use super::*; #[test] @@ -167,7 +251,7 @@ Err(e) => { assert!(e .to_string() - .starts_with("error decoding gzip response body:")) + .starts_with("error decoding gzip response body")) } } } @@ -195,23 +279,128 @@ #[test] fn interrupts_are_handled_gracefully() { struct InterruptedReader { - init: bool, + step: u8, } impl Read for InterruptedReader { - fn read(&mut self, _buf: &mut [u8]) -> io::Result<usize> { - if !self.init { - self.init = true; - Err(io::Error::new(io::ErrorKind::Interrupted, "interrupted")) - } else { - Ok(0) + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + self.step += 1; + match self.step { + 1 => Read::read(&mut b"abc".as_slice(), buf), + 2 => Err(io::Error::new(io::ErrorKind::Interrupted, "interrupted")), + 3 => Read::read(&mut b"def".as_slice(), buf), + _ => Ok(0), } } } - let mut base_reader = InterruptedReader { init: false }; - let mut reader = decompress(&mut base_reader, Some(CompressionType::Gzip)); - let mut buffer = Vec::new(); - reader.read_to_end(&mut buffer).unwrap(); - assert_eq!(buffer, b""); + for compression_type in [ + None, + Some(CompressionType::Brotli), + Some(CompressionType::Deflate), + Some(CompressionType::Gzip), + Some(CompressionType::Zstd), + ] { + let mut base_reader = InterruptedReader { step: 0 }; + let mut reader = decompress(&mut base_reader, compression_type); + let mut buffer = Vec::with_capacity(16); + let res = reader.read_to_end(&mut buffer); + if compression_type.is_none() { + res.unwrap(); + assert_eq!(buffer, b"abcdef"); + } else { + res.unwrap_err(); + } + } + } + + #[test] + fn empty_inputs_do_not_cause_errors() { + for compression_type in [ + None, + Some(CompressionType::Brotli), + Some(CompressionType::Deflate), + Some(CompressionType::Gzip), + Some(CompressionType::Zstd), + ] { + let mut input: &[u8] = b""; + let mut reader = decompress(&mut input, compression_type); + let mut buf = Vec::new(); + reader.read_to_end(&mut buf).unwrap(); + assert_eq!(buf, b""); + + // Must accept repeated read attempts after EOF (this happens with --stream) + for _ in 0..10 { + reader.read_to_end(&mut buf).unwrap(); + assert_eq!(buf, b""); + } + } + } + + #[test] + fn read_errors_keep_their_context() { + #[derive(Debug)] + struct SpecialErr; + impl std::fmt::Display for SpecialErr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } + } + impl std::error::Error for SpecialErr {} + + struct SadReader; + impl Read for SadReader { + fn read(&mut self, _buf: &mut [u8]) -> io::Result<usize> { + Err(io::Error::new(io::ErrorKind::WouldBlock, SpecialErr)) + } + } + + for compression_type in [ + None, + Some(CompressionType::Brotli), + Some(CompressionType::Deflate), + Some(CompressionType::Gzip), + Some(CompressionType::Zstd), + ] { + let mut input = SadReader; + let mut reader = decompress(&mut input, compression_type); + let mut buf = Vec::new(); + let err = reader.read_to_end(&mut buf).unwrap_err(); + assert_eq!(err.kind(), io::ErrorKind::WouldBlock); + err.get_ref().unwrap().downcast_ref::<SpecialErr>().unwrap(); + } + } + + #[test] + fn true_decode_errors_are_preserved() { + for compression_type in [ + CompressionType::Brotli, + CompressionType::Deflate, + CompressionType::Gzip, + CompressionType::Zstd, + ] { + let mut input: &[u8] = b"bad"; + let mut reader = decompress(&mut input, Some(compression_type)); + let mut buf = Vec::new(); + let err = reader.read_to_end(&mut buf).unwrap_err(); + + assert_eq!(err.kind(), io::ErrorKind::InvalidData); + let decode_err = err + .get_ref() + .unwrap() + .downcast_ref::<DecodeError>() + .unwrap(); + let real_err = decode_err.source().unwrap(); + let real_err = real_err.downcast_ref::<io::Error>().unwrap(); + + // All four decoders make a different choice here... + // Still the easiest way to check that we're preserving the error + let expected_kind = match compression_type { + CompressionType::Gzip => io::ErrorKind::UnexpectedEof, + CompressionType::Deflate => io::ErrorKind::InvalidInput, + CompressionType::Brotli => io::ErrorKind::InvalidData, + CompressionType::Zstd => io::ErrorKind::Other, + }; + assert_eq!(real_err.kind(), expected_kind); + } } } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xh-0.24.0/src/download.rs new/xh-0.24.1/src/download.rs --- old/xh-0.24.0/src/download.rs 2025-02-18 09:15:49.000000000 +0100 +++ new/xh-0.24.1/src/download.rs 2025-05-02 16:20:05.000000000 +0200 @@ -3,6 +3,9 @@ use std::path::{Path, PathBuf}; use std::time::Instant; +use crate::content_disposition; +use crate::decoder::{decompress, get_compression_type}; +use crate::utils::{copy_largebuf, test_pretend_term, HeaderValueExt}; use anyhow::{anyhow, Context, Result}; use indicatif::{HumanBytes, ProgressBar, ProgressStyle}; use mime2ext::mime2ext; @@ -13,9 +16,6 @@ StatusCode, }; -use crate::decoder::{decompress, get_compression_type}; -use crate::utils::{copy_largebuf, test_pretend_term, HeaderValueExt}; - fn get_content_length(headers: &HeaderMap) -> Option<u64> { headers .get(CONTENT_LENGTH) @@ -27,20 +27,12 @@ // of PathBufs fn get_file_name(response: &Response, orig_url: &reqwest::Url) -> String { fn from_header(response: &Response) -> Option<String> { - let quoted = Regex::new("filename=\"([^\"]*)\"").unwrap(); - // Alternative form: - let unquoted = Regex::new("filename=([^;=\"]*)").unwrap(); - // TODO: support "filename*" version - let header = response .headers() .get(CONTENT_DISPOSITION)? .to_utf8_str() .ok()?; - let caps = quoted - .captures(header) - .or_else(|| unquoted.captures(header))?; - Some(caps[1].to_string()) + content_disposition::parse_filename_from_content_disposition(header) } fn from_url(url: &reqwest::Url) -> Option<String> { @@ -60,7 +52,13 @@ .or_else(|| from_url(orig_url)) .unwrap_or_else(|| "index".to_string()); - let filename = filename.split(std::path::is_separator).next_back().unwrap(); + let filename = sanitize_filename::sanitize_with_options( + &filename, + sanitize_filename::Options { + replacement: "_", + ..Default::default() + }, + ); let mut filename = filename.trim().trim_start_matches('.').to_string(); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xh-0.24.0/src/error_reporting.rs new/xh-0.24.1/src/error_reporting.rs --- old/xh-0.24.0/src/error_reporting.rs 1970-01-01 01:00:00.000000000 +0100 +++ new/xh-0.24.1/src/error_reporting.rs 2025-05-02 16:20:05.000000000 +0200 @@ -0,0 +1,76 @@ +use std::process::ExitCode; + +pub(crate) fn additional_messages(err: &anyhow::Error, native_tls: bool) -> Vec<String> { + let mut msgs = Vec::new(); + + #[cfg(feature = "rustls")] + msgs.extend(format_rustls_error(err)); + + if native_tls && err.root_cause().to_string() == "invalid minimum TLS version for backend" { + msgs.push("Try running without the --native-tls flag.".into()); + } + + msgs +} + +/// Format certificate expired/not valid yet messages. By default these print +/// human-unfriendly Unix timestamps. +/// +/// Other rustls error messages (e.g. wrong host) are readable enough. +#[cfg(feature = "rustls")] +fn format_rustls_error(err: &anyhow::Error) -> Option<String> { + use humantime::format_duration; + use rustls::pki_types::UnixTime; + use rustls::CertificateError; + use time::OffsetDateTime; + + // Multiple layers of io::Error for some reason? + // This may be fragile + let err = err.root_cause().downcast_ref::<std::io::Error>()?; + let err = err.get_ref()?.downcast_ref::<std::io::Error>()?; + let err = err.get_ref()?.downcast_ref::<rustls::Error>()?; + let rustls::Error::InvalidCertificate(err) = err else { + return None; + }; + + fn conv_time(unix_time: &UnixTime) -> Option<OffsetDateTime> { + OffsetDateTime::from_unix_timestamp(unix_time.as_secs() as i64).ok() + } + + match err { + CertificateError::ExpiredContext { time, not_after } => { + let time = conv_time(time)?; + let not_after = conv_time(not_after)?; + let diff = format_duration((time - not_after).try_into().ok()?); + Some(format!( + "Certificate not valid after {not_after} ({diff} ago).", + )) + } + CertificateError::NotValidYetContext { time, not_before } => { + let time = conv_time(time)?; + let not_before = conv_time(not_before)?; + let diff = format_duration((not_before - time).try_into().ok()?); + Some(format!( + "Certificate not valid before {not_before} ({diff} from now).", + )) + } + _ => None, + } +} + +pub(crate) fn exit_code(err: &anyhow::Error) -> ExitCode { + if let Some(err) = err.downcast_ref::<reqwest::Error>() { + if err.is_timeout() { + return ExitCode::from(2); + } + } + + if err + .downcast_ref::<crate::redirect::TooManyRedirects>() + .is_some() + { + return ExitCode::from(6); + } + + ExitCode::FAILURE +} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xh-0.24.0/src/main.rs new/xh-0.24.1/src/main.rs --- old/xh-0.24.0/src/main.rs 2025-02-18 09:15:49.000000000 +0100 +++ new/xh-0.24.1/src/main.rs 2025-05-02 16:20:05.000000000 +0200 @@ -2,8 +2,10 @@ mod auth; mod buffer; mod cli; +mod content_disposition; mod decoder; mod download; +mod error_reporting; mod formatting; mod generation; mod middleware; @@ -22,7 +24,7 @@ use std::io::{self, IsTerminal, Read, Write as _}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; use std::path::PathBuf; -use std::process; +use std::process::ExitCode; use std::str::FromStr; use std::sync::Arc; @@ -61,7 +63,7 @@ } } -fn main() { +fn main() -> ExitCode { let args = Cli::parse(); if args.debug { @@ -77,39 +79,30 @@ let bin_name = args.bin_name.clone(); match run(args) { - Ok(exit_code) => { - process::exit(exit_code); - } + Ok(exit_code) => exit_code, Err(err) => { log::debug!("{err:#?}"); eprintln!("{bin_name}: error: {err:?}"); - let msg = err.root_cause().to_string(); - if native_tls && msg == "invalid minimum TLS version for backend" { + + for message in error_reporting::additional_messages(&err, native_tls) { eprintln!(); - eprintln!("Try running without the --native-tls flag."); - } - if let Some(err) = err.downcast_ref::<reqwest::Error>() { - if err.is_timeout() { - process::exit(2); - } + eprintln!("{message}"); } - if msg.starts_with("Too many redirects") { - process::exit(6); - } - process::exit(1); + + error_reporting::exit_code(&err) } } } -fn run(args: Cli) -> Result<i32> { +fn run(args: Cli) -> Result<ExitCode> { if let Some(generate) = args.generate { generation::generate(&args.bin_name, generate); - return Ok(0); + return Ok(ExitCode::SUCCESS); } if args.curl { to_curl::print_curl_translation(args)?; - return Ok(0); + return Ok(ExitCode::SUCCESS); } let (mut headers, headers_to_unset) = args.request_items.headers()?; @@ -189,7 +182,7 @@ return Err(anyhow!("This binary was built without native-tls support")); } - let mut exit_code: i32 = 0; + let mut failure_code = None; let mut resume: Option<u64> = None; let mut auth = None; let mut save_auth_in_session = true; @@ -622,16 +615,17 @@ let status = response.status(); if args.check_status.unwrap_or(!args.httpie_compat_mode) { - exit_code = match status.as_u16() { - 300..=399 if !args.follow => 3, - 400..=499 => 4, - 500..=599 => 5, - _ => 0, - }; + match status.as_u16() { + 300..=399 if !args.follow => failure_code = Some(ExitCode::from(3)), + 400..=499 => failure_code = Some(ExitCode::from(4)), + 500..=599 => failure_code = Some(ExitCode::from(5)), + _ => (), + } + // Print this if the status code isn't otherwise ending up in the terminal. // HTTPie looks at --quiet, since --quiet always suppresses the response // headers even if you pass --print=h. But --print takes precedence for us. - if exit_code != 0 && (is_output_redirected || !print.response_headers) { + if failure_code.is_some() && (is_output_redirected || !print.response_headers) { log::warn!("HTTP {} {}", status.as_u16(), reason_phrase(&response)); } } @@ -640,7 +634,7 @@ printer.print_response_headers(&response)?; } if args.download { - if exit_code == 0 { + if failure_code.is_none() { download_file( response, args.output, @@ -670,7 +664,7 @@ .with_context(|| format!("couldn't persist session {}", s.path.display()))?; } - Ok(exit_code) + Ok(failure_code.unwrap_or(ExitCode::SUCCESS)) } /// Configure backtraces for standard panics and anyhow using `$RUST_BACKTRACE`. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xh-0.24.0/src/printer.rs new/xh-0.24.1/src/printer.rs --- old/xh-0.24.0/src/printer.rs 2025-02-18 09:15:49.000000000 +0100 +++ new/xh-0.24.1/src/printer.rs 2025-05-02 16:20:05.000000000 +0200 @@ -44,6 +44,17 @@ checked: bool, } +#[derive(Debug)] +struct FoundBinaryData; + +impl std::fmt::Display for FoundBinaryData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("binary data not shown in terminal") + } +} + +impl std::error::Error for FoundBinaryData {} + impl<'a, T: Read> BinaryGuard<'a, T> { fn new(reader: &'a mut T, checked: bool) -> Self { Self { @@ -78,10 +89,7 @@ Err(e) => return Err(e), }; if self.checked && buf.contains(&b'\0') { - return Err(io::Error::new( - io::ErrorKind::InvalidData, - "Found binary data", - )); + return Err(io::Error::new(io::ErrorKind::InvalidData, FoundBinaryData)); } else if buf.is_empty() { if self.buffer.is_empty() { return Ok(None); @@ -454,7 +462,7 @@ Ok(_) => { self.buffer.print("\n")?; } - Err(err) if err.kind() == io::ErrorKind::InvalidData => { + Err(err) if err.get_ref().is_some_and(|err| err.is::<FoundBinaryData>()) => { self.buffer.print(BINARY_SUPPRESSOR)?; } Err(err) => return Err(err.into()), diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xh-0.24.0/src/redirect.rs new/xh-0.24.1/src/redirect.rs --- old/xh-0.24.0/src/redirect.rs 2025-02-18 09:15:49.000000000 +0100 +++ new/xh-0.24.1/src/redirect.rs 2025-05-02 16:20:05.000000000 +0200 @@ -1,4 +1,4 @@ -use anyhow::{anyhow, Result}; +use anyhow::Result; use reqwest::blocking::{Request, Response}; use reqwest::header::{ HeaderMap, AUTHORIZATION, CONTENT_ENCODING, CONTENT_LENGTH, CONTENT_TYPE, COOKIE, LOCATION, @@ -31,10 +31,10 @@ if remaining_redirects > 0 { remaining_redirects -= 1; } else { - return Err(anyhow!( - "Too many redirects (--max-redirects={})", - self.max_redirects - )); + return Err(TooManyRedirects { + max_redirects: self.max_redirects, + } + .into()); } log::info!("Following redirect to {}", next_request.url()); log::trace!("Remaining redirects: {}", remaining_redirects); @@ -48,6 +48,23 @@ } } +#[derive(Debug)] +pub(crate) struct TooManyRedirects { + max_redirects: usize, +} + +impl std::fmt::Display for TooManyRedirects { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Too many redirects (--max-redirects={})", + self.max_redirects, + ) + } +} + +impl std::error::Error for TooManyRedirects {} + // See https://github.com/seanmonstar/reqwest/blob/bbeb1ede4e8098481c3de6f2cafb8ecca1db4ede/src/async_impl/client.rs#L1500-L1607 fn get_next_request(mut request: Request, response: &Response) -> Option<Request> { let get_next_url = |request: &Request| { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xh-0.24.0/tests/cases/download.rs new/xh-0.24.1/tests/cases/download.rs --- old/xh-0.24.0/tests/cases/download.rs 2025-02-18 09:15:49.000000000 +0100 +++ new/xh-0.24.1/tests/cases/download.rs 2025-05-02 16:20:05.000000000 +0200 @@ -120,6 +120,101 @@ } #[test] +fn download_support_filename_rfc_5987() { + let dir = tempdir().unwrap(); + let server = server::http(|_req| async move { + hyper::Response::builder() + .header( + "Content-Disposition", + r#"attachment; filename*=UTF-8''abcd1234.txt"#, + ) + .body("file".into()) + .unwrap() + }); + + get_command() + .args(["--download", &server.base_url()]) + .current_dir(&dir) + .assert() + .success(); + assert_eq!( + fs::read_to_string(dir.path().join("abcd1234.txt")).unwrap(), + "file" + ); +} +#[test] +fn download_support_filename_rfc_5987_percent_encoded() { + let dir = tempdir().unwrap(); + let server = server::http(|_req| async move { + hyper::Response::builder() + .header( + "Content-Disposition", + r#"attachment; filename*=UTF-8''%E6%B5%8B%E8%AF%95.txt"#, + ) + .body("file".into()) + .unwrap() + }); + + get_command() + .args(["--download", &server.base_url()]) + .current_dir(&dir) + .assert() + .success(); + assert_eq!( + fs::read_to_string(dir.path().join("测试.txt")).unwrap(), + "file" + ); +} + +#[test] +fn download_support_filename_rfc_5987_percent_encoded_with_iso_8859_1() { + let dir = tempdir().unwrap(); + let server = server::http(|_req| async move { + hyper::Response::builder() + .header( + "Content-Disposition", + r#"attachment; filename*=iso-8859-1'en'%A3%20rates.txt"#, + ) + .body("file".into()) + .unwrap() + }); + + get_command() + .args(["--download", &server.base_url()]) + .current_dir(&dir) + .assert() + .success(); + assert_eq!( + fs::read_to_string(dir.path().join("£ rates.txt")).unwrap(), + "file" + ); +} + +#[test] +fn download_filename_star_with_high_priority() { + let dir = tempdir().unwrap(); + let server = server::http(|_req| async move { + hyper::Response::builder() + .header( + "Content-Disposition", + r#"attachment; filename="fallback.txt"; filename*=UTF-8''%E6%B5%8B%E8%AF%95.txt"#, + ) + .body("file".into()) + .unwrap() + }); + + get_command() + .args(["--download", &server.base_url()]) + .current_dir(&dir) + .assert() + .success(); + assert_eq!( + fs::read_to_string(dir.path().join("测试.txt")).unwrap(), + "file" + ); +} + +#[test] fn download_supplied_unquoted_filename() { let dir = tempdir().unwrap(); let server = server::http(|_req| async move { @@ -158,7 +253,10 @@ .current_dir(&dir) .assert() .success(); - assert_eq!(fs::read_to_string(dir.path().join("bar")).unwrap(), "file"); + assert_eq!( + fs::read_to_string(dir.path().join("foo_baz_bar")).unwrap(), + "file" + ); } #[cfg(windows)] @@ -180,7 +278,10 @@ .current_dir(&dir) .assert() .success(); - assert_eq!(fs::read_to_string(dir.path().join("bar")).unwrap(), "file"); + assert_eq!( + fs::read_to_string(dir.path().join("foo_baz_bar")).unwrap(), + "file" + ); } // TODO: test implicit download filenames diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xh-0.24.0/tests/cli.rs new/xh-0.24.1/tests/cli.rs --- old/xh-0.24.0/tests/cli.rs 2025-02-18 09:15:49.000000000 +0100 +++ new/xh-0.24.1/tests/cli.rs 2025-05-02 16:20:05.000000000 +0200 @@ -92,6 +92,7 @@ cmd.env("XH_CONFIG_DIR", ""); #[cfg(target_os = "windows")] cmd.env("XH_TEST_MODE_WIN_HOME_DIR", ""); + cmd.env("RUST_BACKTRACE", "0"); cmd } @@ -1137,7 +1138,6 @@ // temporarily disabled for builds not using rustls #[cfg(all(feature = "online-tests", feature = "rustls"))] -#[ignore = "endpoint is randomly timing out"] #[test] fn verify_default_yes() { use predicates::boolean::PredicateBooleanExt; @@ -1153,7 +1153,6 @@ // temporarily disabled for builds not using rustls #[cfg(all(feature = "online-tests", feature = "rustls"))] -#[ignore = "endpoint is randomly timing out"] #[test] fn verify_explicit_yes() { use predicates::boolean::PredicateBooleanExt; @@ -1168,7 +1167,6 @@ } #[cfg(feature = "online-tests")] -#[ignore = "endpoint is randomly timing out"] #[test] fn verify_no() { get_command() @@ -1180,7 +1178,6 @@ } #[cfg(all(feature = "rustls", feature = "online-tests"))] -#[ignore = "endpoint is randomly timing out"] #[test] fn verify_valid_file() { get_command() @@ -1196,7 +1193,6 @@ // This test may fail if https://github.com/seanmonstar/reqwest/issues/1260 is fixed // If that happens make sure to remove the warning, not just this test #[cfg(all(feature = "native-tls", feature = "online-tests"))] -#[ignore = "endpoint is randomly timing out"] #[test] fn verify_valid_file_native_tls() { get_command() @@ -1217,6 +1213,16 @@ .stderr(predicates::str::is_empty()); } +#[cfg(all(feature = "rustls", feature = "online-tests"))] +#[test] +fn formatted_certificate_expired_message() { + get_command() + .arg("https://expired.badssl.com") + .assert() + .failure() + .stderr(contains("Certificate not valid after 2015-04-12")); +} + #[test] fn override_dns_resolution() { let server = server::http(|req| async move { @@ -3565,6 +3571,59 @@ Content-Length: 100 Date: N/A + + "#}); +} + +/// Regression test: this used to crash because ZstdDecoder::new() is fallible +#[test] +fn empty_zstd_response_with_content_encoding_and_content_length() { + let server = server::http(|_req| async move { + hyper::Response::builder() + .header("date", "N/A") + .header("content-encoding", "zstd") + .header("content-length", "100") + .body("".into()) + .unwrap() + }); + + get_command() + .arg("head") + .arg(server.base_url()) + .assert() + .stdout(indoc! {r#" + HTTP/1.1 200 OK + Content-Encoding: zstd + Content-Length: 100 + Date: N/A + + + "#}); +} + +/// After an initial fix this scenario still crashed +#[test] +fn streaming_empty_zstd_response_with_content_encoding_and_content_length() { + let server = server::http(|_req| async move { + hyper::Response::builder() + .header("date", "N/A") + .header("content-encoding", "zstd") + .header("content-length", "100") + .body("".into()) + .unwrap() + }); + + get_command() + .arg("--stream") + .arg("head") + .arg(server.base_url()) + .assert() + .stdout(indoc! {r#" + HTTP/1.1 200 OK + Content-Encoding: zstd + Content-Length: 100 + Date: N/A + "#}); }