Module Name: othersrc Committed By: joerg Date: Thu Apr 30 00:28:59 UTC 2009
Added Files: othersrc/usr.bin/pod2mdoc: pod2mdoc.py Log Message: Add a Python script to convert POD markup to mdoc markup. It tries to do something sane e.g. by detecting arguments etc, but post-processing is still required. To generate a diff of this commit: cvs rdiff -u -r0 -r1.1 othersrc/usr.bin/pod2mdoc/pod2mdoc.py Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Added files: Index: othersrc/usr.bin/pod2mdoc/pod2mdoc.py diff -u /dev/null othersrc/usr.bin/pod2mdoc/pod2mdoc.py:1.1 --- /dev/null Thu Apr 30 00:28:59 2009 +++ othersrc/usr.bin/pod2mdoc/pod2mdoc.py Thu Apr 30 00:28:58 2009 @@ -0,0 +1,606 @@ +#!/usr/pkg/bin/python2.5 +import datetime +import re + +nroff_post_punctuation = (".", ",", ":", ";", ")", "]", "?", "!") +nroff_pre_punctuation = ("(", "[") + +class Node(object): + def __str__(self): + return "\n".join(self.output()) + +def convert_bracket(line): + if line[0] == ".": + args = line.split() + output = [] + if len(args) > 1: + if args[1] == "[": + output.append(".Oo") + output.append(args[0][1:]) + else: + output.append(args[0]) + if args[1] == "]": + output.append("Oc") + else: + output.append(args[1]) + for a in args[2:]: + if a == "[": + output.append("Oo") + elif a == "]": + output.append("Oc") + else: + output.append(a) + else: + output.append(args[0]) + if output[0] == ".Oo" and output[-1] == "Oc" and "Oo" not in output[1:-1] and "Oc" not in output[1:-1]: + output[0] = ".Op" + output.pop() + return " ".join(output) + else: + return line + +def apply_expand(lines, f): + output = [] + for l in lines: + output += f(l) + return output + +def expand_generic(line, regex, macro, sub): + output = [] + while True: + m = re.search(regex, line) + if m is None: + break + pre = line[:m.start()].rstrip() + matched = line[m.start():m.end()].strip() + post = line[m.end():].lstrip() + args = [ macro ] + while pre and pre[-1] in nroff_pre_punctuation: + args.append(pre[-1]) + pre = pre[:-1].rstrip() + if pre: + output.append(pre) + args += sub(m) + while post and post[0] in nroff_post_punctuation: + args.append(post[0]) + post = post[1:].lstrip() + output.append(" ".join(args)) + line = post + if line: + output.append(line) + return output + +def expand_static_tag(line, marker, macro, replacement): + return expand_generic(line, re.escape(marker), macro, lambda m: [replacement]) + +def expand_url(line): + return expand_generic(line, r"\\\*\[Lt\]B<(http|ftp)://.*?>\\\*\[Gt\]", + ".Lk", lambda m: [ m.group(0)[8:-7].strip() ]) + +def expand_crossref(line): + return expand_generic(line, r"L<(([-a-zA-Z0-9_\.]+)\(([0-9]+)\))\|\1>", + ".Xr", lambda m: [ m.group(2).strip(), m.group(3)]) + +def expand_code(line): + return expand_generic(line, "C<([^<]*)>", + ".Va", lambda m: [ m.group(1).strip() ]) + +def expand_code2(line): + return expand_generic(line, "C<([^<]*)I<file>([^<]*)>", + ".Va", lambda m: [ ".Sy %sfile%s" % (m.group(1).strip(), m.group(2).strip()) ]) + +def expand_options_fixed(line): + def split_options(m): + output = [] + if m.group(1) == "Os": + output.append("\&Os") + else: + output.append(m.group(1)) + output.append("Ns") + output.append("Oo") + output.append("Ns") + output.append("Ar") + output.append(m.group(2)) + if m.group(3): + args = m.group(3).split("|")[1:] + for a in args: + output.append("Ns") + output.append("|") + output.append("Ns") + output.append("Ar") + output.append(a[2:-1]) + output.append("Ns") + output.append("Oc") + return output + + return expand_generic(line, r"B<-([-a-zA-Z0-9=#\+,]+)>\[B<([^<>]*)>(\|B<[^<>]*>)*\]", ".Fl", split_options) + +def expand_options_optional(line): + def split_options(m): + args = m.group(1)[2:-1].split() + output = [] + first = True + for a in args: + if not first: + output.append("Fl") + else: + first = False + if a[1:] == "Os": + output.append("\&Os") + else: + output.append(a[1:]) + output[-1] = output[-1] + "-" + output.append("Ns") + output.append("Ar") + output.append(m.group(3)) + + return output + + return expand_generic(line, r"(B<-[-a-zA-Z0-9=#\+,]+( -[-a-zA-Z0-9=#\+,]+)*>)\[B<->I<([^<>]*)>\]", ".Fl", split_options) + +def expand_options_optional2(line): + def split_options(m): + args = m.group(1)[2:-3].split() + output = [] + first = True + for a in args: + if not first: + output.append("Fl") + else: + first = False + if a[1:] == "Os": + output.append("\&Os") + else: + output.append(a[1:]) + output.append("Ns") + output.append("Oo") + output.append("=") + output.append("Ns") + output.append("Ar") + output.append(m.group(3)) + output.append("Oc") + + return output + + regex = r"(B<-[-a-zA-Z0-9=#\+,]+( -[-a-zA-Z0-9=#\+,]+)*\[=>)I<([^<>]*)>B<\]" + macro = ".Fl" + + output = [] + while True: + m = re.search(regex, line) + if m is None: + break + pre = line[:m.start()].rstrip() + matched = line[m.start():m.end()].strip() + post = line[m.end():].lstrip() + args = [ macro ] + while pre and pre[-1] in nroff_pre_punctuation: + args.append(pre[-1]) + pre = pre[:-1].rstrip() + if pre: + output.append(pre) + args += split_options(m) + post = "B<" + post + while post and post[0] in nroff_post_punctuation: + args.append(post[0]) + post = post[1:].lstrip() + output.append(" ".join(args)) + line = post + if line: + output.append(line) + return output + +def expand_options(line): + def split_options(m): + args = m.group(1)[2:-1].split() + output = [] + first = True + for a in args: + if a[0] != "-": + output.append("Ar") + output.append(a) + continue + if not first: + output.append("Fl") + else: + first = False + if a[1:] == "Os": + output.append("\&Os") + else: + output.append(a[1:]) + if m.group(4): + if not m.group(5): + output.append("Ns") + args2 = m.group(7).split() + for a in args2: + output.append("Ar") + output.append(a) + if m.group(8): + output[-1] = output[-1] + "..." + if m.group(9): + output += [ "Ns", "=", "Ns" ] + args3 = m.group(11).split() + for a in args3: + output.append("Ar") + output.append(a) + if m.group(12): + output[-1] = output[-1] + "..." + if m.group(13): + output += [ "Ns", "=", "Ns", "Ar", m.group(14) ] + return output + return expand_generic(line, r"(B<-[-a-zA-Z0-9=#\+,_]+?(\.\.\.)?( [-a-zA-Z0-9=#\+,_]+)*>)(( ?)(I<([^<>]*)>(\.\.\.)?))?(\[(B<=>|=)I<([^<>]*)>\](\.\.\.)?)?(B<=>I<([^<>]*)>)?", ".Fl", split_options) + +def expand_filename(line): + def expand_file(m): + line = m.group(1) + args = [] + while True: + m = re.search(r"[BI]<([^<>]*)>", line) + if m is None: + break + args.append(line[:m.start()]) + args.append(m.group(1)) + line = line[m.end():] + args.append(line) + return [ "".join(args)] + return expand_generic(line, r"F<((I<[^<>]*>|B<[^<>]*>|[^<>]*)*)>", ".Pa", expand_file) + +def expand_remaining_italic(line): + return expand_generic(line, r"I<([^<>]*)>", ".Em", lambda m: [ m.group(1)] ) + +def expand_remaining_bold(line): + return expand_generic(line, r"B<([^<>]*)>", ".Sy", lambda m: [ m.group(1)] ) + +def macro_options(line): + output = [] + while True: + m = re.match(r"B<-([-a-zA-Z0-9=#\+,_]+)( ([- a-zA-Z0-9+]*))?>( +)?(I<([^<>]*)>(B<(=|-)>I<([^<>]*)>)?)?(B< +>)?", line) + if m is None: + break + + output.append("Fl") + if m.group(1) == "Os": + output.append("\&Os") + else: + output.append(m.group(1)) + if m.group(2): + for a in m.group(3).split(): + if a[0] == "-": + output.append("Fl") + if a[1:] == "Os": + output.append("\&Os") + else: + output.append(a[1:]) + else: + output.append("Ar") + output.append(a) + if m.group(5): + if not m.group(4): + output.append("Ns") + output.append("Ar") + output.append(m.group(6).strip()) + if m.group(7): + output.append("Ns") + output.append(m.group(8)) + output.append("Ns") + output.append("Ar") + output.append(m.group(9).strip()) + line = line[m.end():].lstrip() + if line: + output.append(line) + return " ".join(output) + +def macro_bold_italic(line): + output = [] + while True: + m = re.search(r"[IBC]<([^<>]*)>", line) + if m is None: + break + + if line[:m.start()]: + output.append(line[:m.start()]) + if m.group(0)[0] == "I": + output.append("Em") + else: + output.append("Sy") + output.append(m.group(1)) + output.append("Ns") + line = line[m.end():].lstrip() + if line: + output.append(line) + + return " ".join(output) + +def expand_S(string): + string = string.replace("S< >", "\~") + string = string.replace("B< >", "\~") + while True: + if not "S<" in string: + return string + pre, tag, post = string.partition("S<") + open_tags = 1 + i = 0 + while i < len(post): + if post[i] == '<': + open_tags += 1 + elif post[i] == '>': + open_tags -= 1 + if not open_tags: + break + i += 1 + if open_tags: + raise SyntaxError, "Unbalanced <>" + string = pre + post[:i] + post[i+1:] + +class Document(Node): + def __init__(self): + self._children = [] + def append(self, c): + self._children.append(c) + def output(self): + output = [] + output.append(".\\\" $NetBSD: pod2mdoc.py,v 1.1 2009/04/30 00:28:58 joerg Exp $") + output.append(".Dd %s" % mdoc_date) + output.append(".Dt %s %s" % (mdoc_command.upper(), mdoc_section)) + output.append(".Os") + # Assumes that Section level=3 doesn't happen + # without Section level < 3 before it. + for c in self._children: + output += c.output() + if output[-1] == ".Pp": + output.pop() + output.append("") + output2 = [] + for l in output: + if l.startswith("."): + output2.append(l.replace(' "', ' \&"')) + else: + output2.append(l) + return output2 + +class Section(Node): + def __init__(self, level, title): + self.level = level + self.title = expand_S(title) + self._children = [] + def append(self, c): + self._children.append(c) + def output(self): + output = [] + if self.level == 1: + output.append(".Sh %s" % self.title) + if self.level == 2: + output.append(".Ss %s" % self.title) + if self.level == 3: + output.append(".It Sy %s" % self.title) + in_sect3 = False + for c in self._children: + if in_sect3: + if not isinstance(c, Section) or c.level != 3: + output.append(".El") + in_sect3 = False + else: + if isinstance(c, Section) and c.level == 3: + output.append(".Bl -tag -width xx") + in_sect3 = True + output += c.output() + if output[-1] == ".Pp": + output.pop() + if in_sect3: + output.append(".El") + return output + +class Display(Node): + def __init__(self, lines): + lines = [ expand_S(l).replace("<", "\*[Lt]").replace(">", "\*[Gt]") for l in lines ] + self.lines = [] + for l in lines: + if l.startswith("."): + self.lines.append("\&" + l) + else: + self.lines.append(l) + while self.lines and not self.lines[-1]: + self.lines.pop() + def output(self): + return [".Bd -literal -offset indent"] + self.lines + [ ".Ed" ] + +class Paragraph(Node): + def __init__(self, lines): + line = " ".join(lines) + if ". " in line: + lines = [ l.strip() + "." for l in line.split(". ") ] + lines[-1] = lines[-1][:-1] + else: + lines = [ line ] + def kill_space(x): + while " " in x: + x = x.replace(" ", " ") + return x + lines = [ expand_S(x) for x in lines ] + lines = [ kill_space(x) for x in lines ] + for name in command_names: + lines = apply_expand(lines, lambda l: expand_static_tag(l, "B<%s>" % name, ".Nm", name)) + lines = apply_expand(lines, expand_url) + lines = apply_expand(lines, expand_crossref) + lines = apply_expand(lines, expand_code) + lines = apply_expand(lines, expand_code2) + lines = apply_expand(lines, expand_options_fixed) + lines = apply_expand(lines, expand_options_optional) + lines = apply_expand(lines, expand_options_optional2) + lines = apply_expand(lines, expand_options) + lines = apply_expand(lines, expand_filename) + lines = apply_expand(lines, expand_remaining_italic) + lines = apply_expand(lines, expand_remaining_bold) + lines = [ convert_bracket(x) for x in lines ] + + self.lines = lines + def output(self): + return self.lines + [ ".Pp" ] + +class List(Node): + def __init__(self): + self._children = [] + self._type = None + def appendItem(self, c): + self._children.append((expand_S(c), [])) + def append(self, c): + self._children[-1][1].append(c) + def finalize(self): + for i,l in self._children: + if not i: + break + if self._type and i[:1] != self._type: + self._type = None + break + if i[:1] not in ("*", "-"): + break + self._type = i[0] + else: + return + for i,l in self._children: + if i[:2] != "I<" or i[-1] != ">": + break + if "<" in i[2:-1] or ">" in i[2:-1]: + break + else: + self._children = [ ("Sy %s" % i[2:-1], l) for (i,l) in self._children] + return + + self._children = [ (convert_bracket(macro_bold_italic(macro_options(i))), l) for (i,l) in self._children] + + def output(self): + output = [] + if self._type == "*": + output.append(".Bl -bullet") + elif self._type == "-": + output.append(".Bl -dash") + else: + output.append(".Bl -tag -width xx") + for i, l in self._children: + if self._type: + output.append(".It") + else: + output.append(".It %s" % i) + for c in l: + output += c.output() + output.append(".El") + return output + +def convert_to_tree(data): + in_display = False + in_paragraph = False + buffer = [] + + document = [ Document() ] + def append(c): + document[-1].append(c) + + for line in data: + line = line.rstrip() + if not line or line.isspace(): + if in_paragraph: + append(Paragraph(buffer)) + in_paragraph = False + buffer = [] + if in_display: + buffer.append(line) + elif line[0] == "\t" or (mdoc_display_space and line[0] == " "): + if in_paragraph: + append(Paragraph(buffer)) + in_paragraph = False + buffer = [] + if not in_display: + in_display = True + buffer.append(line[1:]) + elif line[0] == '=': + if in_display: + append(Display(buffer)) + buffer = [] + in_display = False + if in_paragraph: + append(Paragraph(buffer)) + buffer = [] + in_paragraph = False + if line.startswith("=pod"): + continue + if line.startswith("=cut"): + break + if line.startswith("=head1") or line.startswith("=head2") or line.startswith("=head3"): + level = int(line[5]) + while not isinstance(document[-1], Document): + if not isinstance(document[-1], Section): + raise SyntaxError, "Bad nesting" + if document[-1].level >= level: + document.pop() + else: + break + c = Section(level, line[6:].strip()) + append(c) + document.append(c) + elif line.startswith("=over"): + c = List() + append(c) + document.append(c) + elif line.startswith("=back"): + if not isinstance(document[-1], List): + raise SyntaxError, "Bad nesting" + document[-1].finalize() + document.pop() + elif line.startswith("=item"): + if not isinstance(document[-1], List): + raise SyntaxError, "Bad nesting" + document[-1].appendItem(line[6:].strip()) + else: + raise SyntaxError, "Bad meta command: %s" % line + else: + if in_display: + append(Display(buffer)) + buffer = [] + in_display = False + if not in_paragraph: + in_paragraph = True + buffer.append(line) + if in_display: + append(Display(buffer)) + if in_paragraph: + append(Paragraph(buffer)) + + return document[0] + +command_names = [] +input_name = None +mdoc_date = datetime.date.today().strftime("%B %e, %Y") +mdoc_command = None +mdoc_section = 1 +mdoc_display_space = False +output_name = None + +import getopt +import sys + +opts, args = getopt.getopt(sys.argv[1:], "C:c:d:i:o:Ss:") +for o,a in opts: + if o == "-c": + mdoc_command = a + command_names.append(a) + elif o == "-i": + input_name = a + elif o == "-d": + mdoc_date = a + elif o == "-C": + command_names.append(a) + elif o == "-s": + mdoc_section = a + elif o == "-S": + mdoc_display_space = True + elif o == "-o": + output_name = a + +data = [ x.replace("\\", "\\e").replace("E<gt>", "\\*[Gt]").replace("E<lt>", "\\*[Lt]") for x in open(input_name).readlines() ] + +if output_name: + output = open(output_name, "w") +else: + output = sys.stdout + +output.write(str(convert_to_tree(data)))