Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package mandown for openSUSE:Factory checked in at 2026-04-21 12:44:09 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/mandown (Old) and /work/SRC/openSUSE:Factory/.mandown.new.11940 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "mandown" Tue Apr 21 12:44:09 2026 rev:4 rq:1348330 version:1.1.0 Changes: -------- --- /work/SRC/openSUSE:Factory/mandown/mandown.changes 2024-10-21 16:25:33.383335113 +0200 +++ /work/SRC/openSUSE:Factory/.mandown.new.11940/mandown.changes 2026-04-21 12:46:37.506915175 +0200 @@ -1,0 +2,9 @@ +Mon Apr 20 21:31:04 UTC 2026 - Dirk Müller <[email protected]> + +- udpate to 1.1.0: + * Strip <style> + * Improve ASCII fallback + * Improved link handling + * Fudge multi-line HTML + +------------------------------------------------------------------- Old: ---- mandown-0.1.3.tar.zst New: ---- mandown-1.1.0.tar.zst ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ mandown.spec ++++++ --- /var/tmp/diff_new_pack.1zLg96/_old 2026-04-21 12:46:38.122940728 +0200 +++ /var/tmp/diff_new_pack.1zLg96/_new 2026-04-21 12:46:38.122940728 +0200 @@ -1,7 +1,7 @@ # # spec file for package mandown # -# Copyright (c) 2024 SUSE LLC +# Copyright (c) 2026 SUSE LLC and contributors # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,7 +17,7 @@ Name: mandown -Version: 0.1.3 +Version: 1.1.0 Release: 0 Summary: A man page generator for markdown markup files License: Apache-2.0 ++++++ _service ++++++ --- /var/tmp/diff_new_pack.1zLg96/_old 2026-04-21 12:46:38.158942221 +0200 +++ /var/tmp/diff_new_pack.1zLg96/_new 2026-04-21 12:46:38.162942387 +0200 @@ -1,17 +1,17 @@ <services> <service name="tar_scm" mode="manual"> - <param name="url">https://gitlab.com/kornelski/mandown</param> + <param name="url">https://gitlab.com/kornelski/mandown.git</param> <param name="scm">git</param> <param name="exclude">.git</param> - <param name="revision">0.1.3</param> + <param name="revision">v1.1.0</param> <param name="versionformat">@PARENT_TAG@</param> - <param name="versionrewrite-pattern">(.*)</param> + <param name="versionrewrite-pattern">v(.*)</param> </service> <service name="set_version" mode="manual"> <param name="basename">mandown</param> </service> <service name="recomprizz" mode="manual"> - <param name="target">mandown-0.1.3.tar</param> + <param name="target">mandown-1.1.0.tar</param> </service> <service name="cargo_vendor" mode="manual"> <param name="srcdir">mandown</param> ++++++ mandown-0.1.3.tar.zst -> mandown-1.1.0.tar.zst ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mandown-0.1.3/.gitignore new/mandown-1.1.0/.gitignore --- old/mandown-0.1.3/.gitignore 2024-10-20 08:07:48.000000000 +0200 +++ new/mandown-1.1.0/.gitignore 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ -/target diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mandown-0.1.3/Cargo.lock new/mandown-1.1.0/Cargo.lock --- old/mandown-0.1.3/Cargo.lock 1970-01-01 01:00:00.000000000 +0100 +++ new/mandown-1.1.0/Cargo.lock 2026-04-20 23:29:30.000000000 +0200 @@ -0,0 +1,69 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "bitflags" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" + +[[package]] +name = "deunicode" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc55fe0d1f6c107595572ec8b107c0999bb1a2e0b75e37429a4fb0d6474a0e7d" + +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "mandown" +version = "1.1.0" +dependencies = [ + "deunicode", + "pulldown-cmark", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "pulldown-cmark" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0" +dependencies = [ + "bitflags", + "getopts", + "memchr", + "pulldown-cmark-escape", + "unicase", +] + +[[package]] +name = "pulldown-cmark-escape" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae" + +[[package]] +name = "unicase" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mandown-0.1.3/Cargo.toml new/mandown-1.1.0/Cargo.toml --- old/mandown-0.1.3/Cargo.toml 2024-10-20 08:07:48.000000000 +0200 +++ new/mandown-1.1.0/Cargo.toml 2026-04-20 23:29:30.000000000 +0200 @@ -1,20 +1,21 @@ [package] name = "mandown" -version = "0.1.3" +version = "1.1.0" authors = ["Kornel <[email protected]>"] -edition = "2018" +edition = "2021" description = "Markdown to groff (man page) converter" license = "Apache-2.0 OR MIT" include = ["/Cargo.toml", "README.md", "/src/*.rs"] readme = "README.md" homepage = "https://lib.rs/mandown" -keywords = ["markdown", "manpages", "roff", "troff", "groff"] +keywords = ["markdown", "manpage", "roff", "troff", "groff"] categories = ["text-processing", "command-line-utilities"] repository = "https://gitlab.com/kornelski/mandown.git" +rust-version = "1.74" [dependencies] -pulldown-cmark = "0.9.0" -deunicode = "1.3.1" +pulldown-cmark = "0.13" +deunicode = "1.6.1" [profile.release] lto = true @@ -24,3 +25,4 @@ [package.metadata.docs.rs] targets = ["x86_64-unknown-linux-gnu"] +rustdoc-args = ["--generate-link-to-definition"] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mandown-0.1.3/README.md new/mandown-1.1.0/README.md --- old/mandown-0.1.3/README.md 2024-10-20 08:07:48.000000000 +0200 +++ new/mandown-1.1.0/README.md 2026-04-20 23:29:30.000000000 +0200 @@ -1,16 +1,16 @@ # Convert Markdown to man pages -This can be used [as a Rust library](https://docs.rs/mandown) or as a command-line executable. +This can be used [as a Rust library](https://docs.rs/mandown), or as a command-line executable. -## Installation +## CLI installation -* Install [Rust 1.42 or later](https://rustup.rs/) +* Install [Rust 1.74 or later](https://rustup.rs/) * Run `cargo install mandown` -## Usage +## CLI usage -The `mandown` command takes a path as an argument, and prints manpage to stdout. +The `mandown` command takes a path as an argument, and prints the manpage to stdout. ```sh mandown README.md > converted.1 @@ -23,4 +23,3 @@ cat README.md | mandown - MYPROGRAM 1 > converted.1 man ./converted.1 ``` - diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mandown-0.1.3/src/lib.rs new/mandown-1.1.0/src/lib.rs --- old/mandown-0.1.3/src/lib.rs 2024-10-20 08:07:48.000000000 +0200 +++ new/mandown-1.1.0/src/lib.rs 2026-04-20 23:29:30.000000000 +0200 @@ -1,3 +1,5 @@ +use pulldown_cmark::CowStr; + /// Converts a markdown string to a groff/troff string. /// /// * `title` is the name of the program. It's typically all-uppercase. @@ -6,148 +8,304 @@ /// /// The conversion is very rough. HTML fragments are merely stripped from tags. /// GitHub tables extension is not supported. +#[must_use] pub fn convert(markdown_markup: &str, title: &str, section: u8) -> String { - use pulldown_cmark::{Parser, Options}; use pulldown_cmark::Event::*; use pulldown_cmark::Tag::*; + use pulldown_cmark::{Options, Parser, TagEnd, LinkType, BlockQuoteKind}; let mut options = Options::empty(); options.insert(Options::ENABLE_STRIKETHROUGH); + options.insert(Options::ENABLE_TABLES); + options.insert(Options::ENABLE_GFM); + options.insert(Options::ENABLE_DEFINITION_LIST); + options.insert(Options::ENABLE_FOOTNOTES); let parser = Parser::new_ext(markdown_markup, options); - let mut out = Rough { out: String::new(), in_quotes: false }; + let mut out = Rough { + out: String::new(), + in_quotes: false, + unclosed_table_cell: false, + bold_level: 0, + italic_level: 0, + }; out.title(title, section); - let mut links = Vec::new(); - let mut images = Vec::new(); + let mut links = Links { + regular: Vec::new(), + deferred: Vec::new(), + }; let mut min_header_level = 999; - let mut link_ref_num = 1; + let mut last_header_level = 999; + let mut link_ref_num = 0; let mut list_item_num = None; let mut in_list = false; + let mut in_footnote = false; let mut first_para_in_list = false; - fn flush_links(out: &mut Rough, links: &mut Vec<(String, String, String)>) { - if links.is_empty() { - return; - } - - out.empty_line(); - for (n, url, title) in links.drain(..) { - out.text(&format!("{} {} {}", n, url, title)); - out.line_break(); - } - } + let mut html_state = TagStrip { + state: HtmlState::Text, + skip_content: false, + }; + let mut url_stack = Vec::new(); for event in parser { match event { - Rule => out.centered("----"), - Html(markup) => { - out.text(&strip_tags(&markup)); + Rule => { + html_state.reset(); + links.flush(&mut out, false); + out.centered("----") + }, + Html(markup) | DisplayMath(markup) => { + out.ensure_line_start(); + out.text(&html_state.strip_tags(&markup)); + }, + InlineHtml(markup) | InlineMath(markup) => { + html_state.reset(); + out.text(&html_state.strip_tags(&markup)); }, TaskListMarker(checked) => out.text(if checked {"[x]"} else {"[ ]"}), - Start(Heading(n, _, _)) => { - let n = n as u32; - flush_links(&mut out, &mut links); + Start(Heading { level, .. }) => { + let n = level as u32; + links.flush(&mut out, n < last_header_level); + last_header_level = n; if n < min_header_level { min_header_level = n; } - out.section_start(n + 1 - min_header_level); + out.section_title_start(n + 1 - min_header_level); + }, + End(TagEnd::Heading(n)) => { + out.section_title_end((n as u32) + 1 - min_header_level); + + links.flush(&mut out, false); }, - End(Heading(n, _, _)) => out.section_end((n as u32) + 1 - min_header_level), - Start(Link(..)) | Start(Image(..)) => {}, - End(Link(_, url, title)) => { - let marker = format!("[{}]", link_ref_num); - out.text(&marker); - links.push((marker, url.to_string(), title.to_string())); - link_ref_num += 1; - }, - End(Image(_, url, title)) => { - let marker = format!("[img{}]", link_ref_num); - out.text(&marker); - images.push((marker, url.to_string(), title.to_string())); - link_ref_num += 1; + Start(tag @ (Link { .. } | Image { .. })) => { + let is_image = matches!(tag, Image { .. }); + let (Link { dest_url, link_type, mut title, mut id } | Image { dest_url, link_type, mut title, mut id }) = tag else { + break; + }; + let to_stack = match link_type { + LinkType::Autolink | + LinkType::Email => None, + _ => { + let shortcut_reference = matches!(link_type, LinkType::Shortcut | LinkType::ShortcutUnknown); + if shortcut_reference { + out.text("["); + } + let defer = is_image || shortcut_reference || matches!(link_type, LinkType::Reference | LinkType::ReferenceUnknown | LinkType::Collapsed | LinkType::CollapsedUnknown); + if title == dest_url { + title = "".into(); + } + if !shortcut_reference && id.len() > 5 { + id = "".into(); + } + Some((dest_url, title, id, is_image, defer, shortcut_reference)) + } + }; + url_stack.push(to_stack); }, + End(TagEnd::Link | TagEnd::Image) => { + if let Some((url, title, mut id, is_image, defer, shortcut_reference)) = url_stack.pop().flatten() { + let mut matches_existing = false; + for (old_url, old_title, old_id) in links.regular.iter().chain(&links.deferred) { + if url == *old_url && (title.is_empty() || title == *old_title) && (id.is_empty() || id == *old_id) { + matches_existing = true; + if id.is_empty() { + id = old_id.clone(); + } + } + } + let id = if !id.is_empty() { id } else { + link_ref_num += 1; + format!("{}{link_ref_num}", if is_image { "img" } else { "" }).into() + }; + if !shortcut_reference { + out.text(&format!("[{id}]")); + } else { + out.text("]"); + } + if !matches_existing { + if defer { + &mut links.deferred + } else { + &mut links.regular + }.push((url, title, id)); + } + } + }, Start(CodeBlock(_)) => out.pre_start(), - End(CodeBlock(_)) => out.pre_end(), + End(TagEnd::CodeBlock) => out.pre_end(), Start(List(num)) => { list_item_num = num; - out.indent() + out.indent(); + }, + End(TagEnd::List(_)) => { + out.outdent(); + links.flush(&mut out, false); + list_item_num = None; }, - End(List(_)) => out.outdent(), Start(Item) => { in_list = true; first_para_in_list = true; - out.list_start(list_item_num); + out.list_item_start(list_item_num); if let Some(n) = &mut list_item_num { *n += 1; } }, - End(Item) => { + End(TagEnd::Item) => { in_list = false; - out.list_end(); flush_links(&mut out, &mut links); + first_para_in_list = false; + out.list_item_end(); }, - Start(BlockQuote) => out.blockquote_start(), - End(BlockQuote) => { - flush_links(&mut out, &mut links); - out.blockquote_end() + Start(BlockQuote(kind)) => { + out.blockquote_start(); + if let Some(kind) = kind { + out.italic_start(); + out.text(match kind { + BlockQuoteKind::Note => "Note", + BlockQuoteKind::Tip => "Tip", + BlockQuoteKind::Important => "Important", + BlockQuoteKind::Warning => "Warning", + BlockQuoteKind::Caution => "Caution", + }); + out.italic_end(); + out.line_break(); + } + }, + End(TagEnd::BlockQuote(_)) => { + links.flush(&mut out, false); + out.blockquote_end(); }, Start(Paragraph) => { + html_state.reset(); if in_list { if first_para_in_list { first_para_in_list = false; } else { out.empty_line(); } - } else { - out.paragraph_start() + } else if !in_footnote { + out.paragraph_start(); } }, - End(Paragraph) => {flush_links(&mut out, &mut links); out.paragraph_end();}, + End(TagEnd::Paragraph) => { + out.paragraph_end(); + }, Start(Emphasis) => out.italic_start(), - End(Emphasis) => out.italic_end(), + End(TagEnd::Emphasis) => out.italic_end(), - Start(Strikethrough) => {out.text("~"); out.italic_start();}, - End(Strikethrough) => {out.italic_end(); out.text("~");}, + Start(Strikethrough) => { + out.text("~"); out.italic_start(); + }, + End(TagEnd::Strikethrough) => { + out.italic_end(); out.text("~"); + }, Start(Strong) => out.bold_start(), - End(Strong) => out.bold_end(), + End(TagEnd::Strong) => out.bold_end(), + + Start(Superscript) => { out.text("^"); }, + End(TagEnd::Superscript) => {}, + Start(Subscript) => { out.text("_"); }, + End(TagEnd::Subscript) => {}, HardBreak => out.line_break(), SoftBreak => out.ensure_line_start(), Code(text) => out.code(&text), Text(text) => out.text(&text), - FootnoteReference(s) | Start(FootnoteDefinition(s)) => { - out.text(&format!("[*{}]", s)); + FootnoteReference(s) => { + out.text(&format!("[^{s}]")); + } + Start(FootnoteDefinition(s)) => { + in_footnote = true; + out.empty_line(); + out.text(&format!("[^{s}]: ")); + }, + End(TagEnd::FootnoteDefinition) => { + in_footnote = false; + links.flush(&mut out, true); + out.ensure_line_start(); + }, + + Start(HtmlBlock) => out.paragraph_start(), + End(TagEnd::HtmlBlock) => out.paragraph_end(), + + Start(MetadataBlock(_)) | + End(TagEnd::MetadataBlock(_)) => out.ensure_line_start(), + + Start(Table(_)) => { + html_state.reset(); + out.table_start(); + }, + End(TagEnd::Table) => out.table_end(), + Start(TableRow) | Start(TableHead) => out.table_row_start(), + End(TagEnd::TableRow) | End(TagEnd::TableHead) => out.table_row_end(), + Start(TableCell) => out.table_cell_start(), + End(TagEnd::TableCell) => out.table_cell_end(), + + Start(DefinitionList) => { + html_state.reset(); + out.ensure_line_start(); + }, + Start(DefinitionListDefinition) => { + out.indent(); + } + End(TagEnd::DefinitionListDefinition) => { + out.outdent(); + } + End(TagEnd::DefinitionList) => { + links.flush(&mut out, false); + }, + Start(DefinitionListTitle) => out.bold_start(), + End(TagEnd::DefinitionListTitle) => { + out.bold_end(); + out.ensure_line_start(); }, - End(FootnoteDefinition(_)) => {}, - - // FIXME: total fudge - Start(Table(_)) => out.paragraph_start(), - End(Table(_)) => out.paragraph_end(), - Start(TableHead) => out.paragraph_start(), - End(TableHead) => out.paragraph_end(), - Start(TableRow) | End(TableRow) => out.line_break(), - Start(TableCell) | End(TableCell) => out.text(" | "), } } - flush_links(&mut out, &mut links); - flush_links(&mut out, &mut images); + links.flush(&mut out, true); out.out } +struct Links<'a> { + regular: Vec<(CowStr<'a>, CowStr<'a>, CowStr<'a>)>, + deferred: Vec<(CowStr<'a>, CowStr<'a>, CowStr<'a>)>, +} + +impl Links<'_> { + pub fn flush(&mut self, out: &mut Rough, flush_all: bool) { + let num_links = self.regular.len() + self.deferred.len(); + if num_links < 10 && self.regular.is_empty() && (!flush_all || self.deferred.is_empty()) { + return; + } + + out.empty_line(); + for (url, title, id) in self.deferred.drain(..).chain(self.regular.drain(..)) { + out.text(&format!("[{id}]: {url}")); + if !title.is_empty() { + out.text(&format!(" {title}")) + } + out.line_break(); + } + } +} + struct Rough { out: String, in_quotes: bool, + unclosed_table_cell: bool, + bold_level: u8, + italic_level: u8, } impl Rough { @@ -157,11 +315,11 @@ self.in_quotes = true; self.text(title); self.in_quotes = false; - self.text(&format!("\" {}", man_section)); - self.out.push_str("\n"); + self.text(&format!("\" {man_section}")); + self.out.push('\n'); } - pub fn section_start(&mut self, level: u32) { + pub fn section_title_start(&mut self, level: u32) { self.ensure_line_start(); self.in_quotes = true; // extra line needed too, otherwise headers get wrapped into prev paragraph? @@ -173,19 +331,40 @@ } } - pub fn section_end(&mut self, _level: u32) { + pub fn section_title_end(&mut self, _level: u32) { self.in_quotes = false; self.out.push_str("\"\n"); } - // pub fn table_start(&mut self) { - // self.ensure_line_start(); - // self.out.push_str(".TS\n"); - // } - // pub fn table_end(&mut self) { - // self.ensure_line_start(); - // self.out.push_str(".TE\n"); - // } + pub fn table_start(&mut self) { + self.paragraph_start(); + // self.out.push_str(".TS\n"); + } + + pub fn table_end(&mut self) { + self.paragraph_end(); + // self.out.push_str(".TE\n"); + } + + pub fn table_row_start(&mut self) { + self.ensure_line_start(); + } + + pub fn table_row_end(&mut self) { + if self.unclosed_table_cell { + self.unclosed_table_cell = false; + self.text("\t|"); + } + self.line_break() + } + + pub fn table_cell_start(&mut self) { + self.text(if self.unclosed_table_cell { "\t| " } else { "| " }); + } + + pub fn table_cell_end(&mut self) { + self.unclosed_table_cell = true; + } pub fn paragraph_start(&mut self) { self.ensure_line_start(); @@ -193,7 +372,9 @@ } pub fn paragraph_end(&mut self) { - self.out.push_str("\n"); + debug_assert_eq!(0, self.bold_level); + debug_assert_eq!(0, self.italic_level); + self.out.push('\n'); } pub fn blockquote_start(&mut self) { @@ -208,30 +389,30 @@ // self.out.push_str(".QE\n"); } - pub fn list_start(&mut self, n: Option<u64>) { + pub fn list_item_start(&mut self, n: Option<u64>) { self.ensure_line_start(); self.out.push_str(".Bl\n"); if let Some(n) = n { - self.out.push_str(&format!(".IP {}. 4\n", n)); + self.out.push_str(&format!(".IP {n}. 4\n")); } else { self.out.push_str(".IP \\(bu 4\n"); } } - pub fn list_end(&mut self) { + pub fn list_item_end(&mut self) { self.ensure_line_start(); self.out.push_str(".El\n"); } pub fn ensure_line_start(&mut self) { - if self.out.is_empty() || self.out.chars().rev().next() == Some('\n') { + if self.out.is_empty() || self.out.ends_with('\n') { return; } self.out.push('\n'); } pub fn text(&mut self, text: &str) { - let text = deunicode::deunicode(&text); + let text = deunicode::deunicode_with_tofu_cow(text, "[?]"); let text = if self.in_quotes { text.replace('"', "\"\"") } else { @@ -269,16 +450,20 @@ } pub fn italic_start(&mut self) { + self.italic_level += 1; self.out.push_str("\\fI"); } pub fn italic_end(&mut self) { + self.italic_level -= 1; self.out.push_str("\\fP"); } pub fn bold_start(&mut self) { + self.bold_level += 1; self.out.push_str("\\fB"); } pub fn bold_end(&mut self) { + self.bold_level -= 1; self.out.push_str("\\fP"); } @@ -299,34 +484,108 @@ } } -fn strip_tags(txt: &str) -> String { - let mut out = String::with_capacity(txt.len()/2); - let mut in_tag = false; - let mut maybe_in_tag = false; - let mut in_arg = None; - for ch in txt.chars() { - match ch { - '>' if in_tag && in_arg.is_none() => { - in_tag = false; - }, - '<' if !in_tag && !maybe_in_tag => { - maybe_in_tag = true; - }, - 'a'..='z' | 'A'..='Z' | '/' | '!' if maybe_in_tag => { - maybe_in_tag = false; - in_tag = true; - }, - '"' | '\'' if in_tag && in_arg.is_none() => { - in_arg = Some(ch); - }, - '"' | '\'' if in_arg == Some(ch) => { - in_arg = None; - }, - _ if in_tag => {} - ch => { - out.push(ch); +struct TagStrip { + state: HtmlState, + skip_content: bool, +} + +enum HtmlState { + Text, + Lt, + Name, + InTag, + Comment(u8), + Arg(u8), +} + +impl TagStrip { + fn reset(&mut self) { + self.state = HtmlState::Text; + self.skip_content = false; + } + + fn strip_tags(&mut self, txt: &str) -> String { + let mut out = String::with_capacity(txt.len() / 2); + let mut tag_name = String::new(); + for ch in txt.chars() { + match self.state { + HtmlState::Text => match ch { + '<' => { + self.state = HtmlState::Lt; + } + _ => if !self.skip_content { + out.push(ch); + } + }, + HtmlState::Lt => match ch { + 'a'..='z' | 'A'..='Z' => { + tag_name.clear(); + tag_name.push(ch.to_ascii_lowercase()); + self.state = HtmlState::Name; + }, + '/' => { + self.skip_content = false; + self.state = HtmlState::InTag; + }, + '!' => { + self.state = HtmlState::Comment(0); + }, + _ => { + self.state = HtmlState::Text; + if !self.skip_content { + out.push('<'); + out.push(ch); + } + }, + }, + HtmlState::Name => match ch { + 'a'..='z' | 'A'..='Z' => { + tag_name.push(ch.to_ascii_lowercase()); + }, + ch => { + if tag_name == "style" || tag_name == "script" { + self.skip_content = true; + } + tag_name.clear(); + self.state = if ch == '>' { HtmlState::Text } else { HtmlState::InTag }; + } + } + HtmlState::InTag => match ch { + '"' | '\'' => { + self.state = HtmlState::Arg(ch as u8); + }, + '>' => { + self.state = HtmlState::Text; + }, + _ => {}, + } + + HtmlState::Comment(n) => match ch { + '-' => { + self.state = HtmlState::Comment((n + 1).min(2)); + }, + '>' if n >= 2 => { + self.state = HtmlState::Text; + }, + _ => {}, + }, + HtmlState::Arg(q) => match ch { + '"' | '\'' if (ch as u8) == q => { + self.state = HtmlState::InTag; + }, + _ => {}, + } } } + out } - out +} + +#[test] +fn tags() { + let mut t = TagStrip { + skip_content: false, + state: HtmlState::Text, + }; + assert_eq!("hi c X aaa 1 <> 2 end", t.strip_tags("<x>hi</x> <!-- com -->c<!--> X<x/> <a href=''>aaa</a> 1 <> 2 <script> garbage <!--> </script>end")); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mandown-0.1.3/src/main.rs new/mandown-1.1.0/src/main.rs --- old/mandown-0.1.3/src/main.rs 2024-10-20 08:07:48.000000000 +0200 +++ new/mandown-1.1.0/src/main.rs 2026-04-20 23:29:30.000000000 +0200 @@ -11,18 +11,16 @@ let section = args.next(); let path_tmp; - let (markdown, title) = match source { - Some(ref path) if !path.starts_with('-') => { - let path: &Path = path.as_ref(); + let (markdown, title) = match &source { + Some(path) if !path.starts_with('-') => { + let path = Path::new(path); let title = match title.as_deref() { - None => { - match path.file_stem().and_then(|f| f.to_str()) { - Some(n) if n == "README" => { - path_tmp = path.canonicalize()?; - path_tmp.parent().and_then(|p| p.file_name()).and_then(|f| f.to_str()) - }, - x => x, - } + None => match path.file_stem().and_then(|f| f.to_str()) { + Some("README") => { + path_tmp = path.canonicalize()?; + path_tmp.parent().and_then(|p| p.file_name()?.to_str()) + }, + x => x, }, x => x, }; @@ -35,25 +33,27 @@ (s, None) }, _ => { - println!("Usage: {} path-to-markdown.md [title] [manpage section]\n", prog); - println!("e.g. {} README.md MYCOOLPROGRAM 1 > out.1 && man ./out.1", prog); + println!("Usage: {prog} path-to-markdown.md [title] [manpage section]\n"); + println!("e.g. {prog} README.md MYCOOLPROGRAM 1 > out.1 && man ./out.1"); println!("The path can be \"-\" to read from stdin."); return Ok(()); }, }; let section = match section { - Some(num) => num.parse().map_err(|e| format!("The section argument must be a number: {}", e))?, + Some(num) => num.parse().map_err(|e| format!("The section argument must be a number: {e}"))?, None => 1, }; - std::io::stdout().write_all(mandown::convert(&markdown, title.unwrap_or(""), section).as_bytes())?; + std::io::stdout().write_all( + mandown::convert(&markdown, title.unwrap_or(""), section).as_bytes(), + )?; Ok(()) } fn main() { if let Err(e) = run() { - eprintln!("{}", e); + eprintln!("{e}"); std::process::exit(1); } } ++++++ vendor.tar.zst ++++++ ++++ 182173 lines of diff (skipped)
