Package: linuxdoc-tools
Version: 0.9.21-0.8
Severity: wishlist
Hi,
I have noticed that TOC creation in fmt_txt.pl has some
minor annoyances,
* Does not work well with chapters (creates numbering after sections)
* Long lines are not wrapped
* Tabs and extra whitespace is not stripped, making the result visually
strange.
I have been playing with the txt postASP stuff related to TOC creation and
finally mostly rewrite it to my taste after similar ideas with a number of
improvements. In my preliminary tests, things are working with chapters,
long lines are wrapped and a lot of noise (pending tabs,..) is removed
giving a much better look.
This is still very experimental, but I am attaching a patch showing the
current changes. I will test them extensively in the meantime.
Cheers,
--
Agustin
Index: lib/dist/fmt_txt.pl
===================================================================
RCS file: /home/agmartin/CVSROOT/debian/linuxdoc-tools/lib/dist/fmt_txt.pl,v
retrieving revision 1.3
retrieving revision 1.1.1.1.2.18
diff -u -r1.3 -r1.1.1.1.2.18
--- lib/dist/fmt_txt.pl 7 May 2007 10:17:00 -0000 1.3
+++ lib/dist/fmt_txt.pl 14 May 2007 11:55:38 -0000 1.1.1.1.2.18
@@ -14,6 +14,7 @@
use File::Copy;
use Text::EntityMap;
+use Text::Wrap;
use LinuxDocTools::CharEnts;
use LinuxDocTools::Lang;
use LinuxDocTools::Vars;
@@ -88,12 +89,11 @@
# ---------------------------------------------------------------
$txt->{preASP} = sub
# ---------------------------------------------------------------
-# Run the file through the genertoc utility before sgmlsasp. Not needed
-# when producing a manpage. A lot of code from FJM, untested by me.
+# Pre-process file before sgmlsasp and create a TOC unless producing
+# a manpage. Code based in the genertoc utility and in code from FJM.
# ---------------------------------------------------------------
{
- my ($infile, $outfile) = @_;
- my (@toc, @lines);
+ my ($INFILE, $OUTFILE) = @_;
my $char_maps = load_char_maps ('.2tr', [ Text::EntityMap::sdata_dirs() ]);
if ( $global->{charset} eq "latin1" ){
@@ -101,10 +101,10 @@
}
if ($txt->{manpage}){
- while (<$infile>){
+ while (<$INFILE>){
if ( s/^-// ){
chomp;
- print $outfile "-" . &parse_data ($_, $char_maps, $txt_escape) . "\n";
+ print $OUTFILE "-" . &parse_data ($_, $char_maps, $txt_escape) . "\n";
} elsif (/^A/) {
/^A(\S+) (IMPLIED|CDATA|NOTATION|ENTITY|TOKEN)( (.*))?$/
|| die "bad attribute data: $_\n";
@@ -113,9 +113,9 @@
# CDATA attributes get translated also
$value = &parse_data ($value, $char_maps, $txt_escape);
}
- print $outfile "A$name $type $value\n";
+ print $OUTFILE "A$name $type $value\n";
} else {
- print $outfile $_;
+ print $OUTFILE $_;
}
}
return;
@@ -124,190 +124,162 @@
# note the conversion of `sdata_dirs' list to an anonymous array to
# make a single argument
- #
- # Build TOC. The file is read into @lines in the meantime, we need to
- # traverse it twice.
- #
- push (@toc, "(HLINE\n");
- push (@toc, ")HLINE\n");
- push (@toc, "(P\n");
- push (@toc, "-" . Xlat ("Table of Contents") . "\n");
- push (@toc, ")P\n");
- push (@toc, "(VERB\n");
- my (@prevheader, @header);
- my $appendix = 0;
- my $nonprint = 0;
- while (<$infile>)
- {
- push (@lines, $_);
-
- if (/^\(SECT(.*)/)
- {
- @prevheader = @header;
- @header = @header[0..$1];
- if ($appendix == 1)
- {
- $header[$1] = "A";
- $appendix = 0;
- } else
- {
- $header[$1]++;
- }
- }
- if (/^\(APPEND(.*)/)
- {
- $appendix = 1;
- }
- if (/^\(HEADING/)
- {
- $_ = <$infile>;
- s/\\n/ /g;
- push (@lines, $_);
- chop;
- s/^-//;
- $_ = join(".",@header) . " " . $_;
- s/\(\\[0-9][0-9][0-9]\)/\\\1/g;
-
- if (!$#header)
- {
- # put a newline before top-level sections unless previous was also
- # a top level section
- $_ = "\\n" . $_ unless (!$#prevheader);
- # put a . and a space after top level sections
- s/ /. /;
-##### $_ = "-" . $_ . "\\n";
- $_ = "-" . $_;
- }
- else
- {
- # subsections get indentation matching hierarchy
- $_ = "-" . " " x $#header . $_;
- }
-
-# remove tags from a toc
- s/\)TT//g;
- s/\(TT//g;
- s/\)IT//g;
- s/\(IT//g;
- s/\)EM//g;
- s/\(EM//g;
- s/\)BF//g;
- s/\(BF//g;
- s/AID * CDATA.*$//g;
- s/\)LABEL//g;
- s/\(LABEL//g;
-
- push(@toc, parse_data ($_, $char_maps, $txt_escape));
-
- $_ = <$infile>;
- while (!/^\)HEADING/) {
- s/\\n/ /g; ####
- push(@lines, $_);
- chop;
- s/^-//;
-
-# remove tags from a toc
- s/\)TT//g;
- s/\(TT//g;
- s/\)IT//g;
- s/\(IT//g;
- s/\)EM//g;
- s/\(EM//g;
- s/\)BF//g;
- s/\(BF//g;
- s/AID * CDATA.*$//g;
- s/\)LABEL//g;
- s/\(LABEL//g;
-
-# remove NIDX, NCDX from a toc entry
- if (/^\(NIDX$/ || /^\(NCDX$/) { $nonprint = 1; }
- if (/^\)NIDX$/ || /^\)NCDX$/) { $nonprint = 1; }
-
-# $_ = "-" . $_ . "\\n";
- push(@toc, parse_data ($_, $char_maps, $txt_escape))
- if (! $nonprint);
- $_ = <$infile>;
- }
- s/\\n/ /g; ###
- push(@lines, $_);
- push(@toc, "\\n\n");
+ # ---------------------------------
+ # Pre-process file and extract TOC info
+ # ---------------------------------
+
+ my $inheading = 0;
+ my $headertext = '';
+ my $sectionlevel = '';
+ my $appendix = 0;
+ my $txtout = "";
+ my @tocarray = ();
+ my $thetoc = '';
+ my @header = ();
+ my @prevheader = ();
+ my $chapterskip = 0;
+
+ while (<$INFILE>) {
+ if ($inheading){
+ next if ( /^(\(|\))(BF|EM|IT|LABEL|TT)/ );
+ next if ( /^\)TOC/ );
+
+ if ( s/^-// ) { # Header text
+ chomp;
+ s/([^\\])\\n/$1 /g; # No unescaped \n in text
+ s/^\\n/ /g; # No newlines in text BOL
+ s/([^\\])\\011/$1 /g; # No tabulars in text
+ s/^[\s\n\t]*//; #
+ s/[\s\n\t]*$//; #
+ $headertext .= $_;
+ $headertext .= " ";
+ } elsif (/^\)HEADING/){ # End of header: Write full header text
+ $headertext =~ s/[ \n]*$//;
+ if ( $headertext ) {
+ $headertext = &parse_data ($headertext, $char_maps, $txt_escape);
+ $txtout .= "-" . $headertext . "\n";
+ push @tocarray, [$sectionlevel, $headertext];
+ }
+ $inheading = 0;
+ $sectionlevel = '';
+ $txtout .= $_;
+ } else { # labels and friends: copy to output
+ $txtout .= $_;
}
- }
- push (@toc, ")VERB\n");
- push (@toc, "(HLINE\n");
- push (@toc, ")HLINE\n");
-
- my $inheading = 0;
- my $tipo = '';
- for (@lines)
- {
- if ($inheading)
- {
- next if (/^\)TT/ || /^\(TT/ || /^\)IT/ || /^\(IT/ ||
- /^\)EM/ || /^\(EM/ || /^\)BF/ || /^\(BF/);
- if (/^-/)
- {
- $tipo .= $' ;
- chop ($tipo);
- $tipo .= " " unless $tipo =~ / $/;
- }
- else
- {
- $tipo =~ s/ $//;
- if ($tipo)
- {
- print $outfile "-"
- . parse_data ($tipo, $char_maps, $txt_escape)
- . "\n";
- }
- print $outfile $_;
- $tipo = '';
- }
- if (/^\)HEADING/)
- {
- $inheading = 0;
- }
- next;
+
+ } else { # --- Not in heading
+
+ if (/^\(APPEND(.*)/) { # appendix mode
+ $appendix = 1;
+ $txtout .= $_;
+ } elsif (/^\(HEADING/) { # Go into heading processing mode.
+ $headertext = '';
+ $inheading = 1;
+ $txtout .= $_;
+ } elsif (/^\(CHAPT/) {
+ $sectionlevel = 0;
+ $chapterskip = 1; # Start sectioning with chapter
+ if ( $appendix ) {
+ $sectionlevel = "A$sectionlevel";
+ $appendix = 0;
}
- if (/^\(HEADING/)
- {
- #
- # Go into heading processing mode.
- #
- $tipo = '';
- $inheading = 1;
+ $txtout .= $_;
+ } elsif (/^\(SECT(.*)/) {
+ $sectionlevel = $1 ? $1 : 0;
+ $sectionlevel += $chapterskip;
+ if ( $appendix ) {
+ $sectionlevel = "A$sectionlevel";
+ $appendix = 0;
}
- if (/^\(TOC/)
- {
- print $outfile @toc;
- next;
+ $txtout .= $_;
+ } elsif (/^\(TOC/) { # Placeholder for TOC
+ $txtout .= "##TOC##";
+ } elsif ( s/^-// ) {
+ chomp;
+ $txtout .= "-" . &parse_data ($_, $char_maps, $txt_escape) . "\n";
+ } elsif (/^A/) {
+ /^A(\S+) (IMPLIED|CDATA|NOTATION|ENTITY|TOKEN)( (.*))?$/
+ || die "bad attribute data: $_\n";
+ my ($name,$type,$value) = ($1,$2,$4);
+ if ($type eq "CDATA") {
+ # CDATA attributes get translated also
+ $value = &parse_data ($value, $char_maps, $txt_escape);
}
- if (/^-/)
- {
- my ($str) = $';
- chop ($str);
- print $outfile "-" . parse_data ($str, $char_maps, $txt_escape) . "\n";
- next;
- }
- elsif (/^A/)
- {
- /^A(\S+) (IMPLIED|CDATA|NOTATION|ENTITY|TOKEN)( (.*))?$/
- || die "bad attribute data: $_\n";
- my ($name,$type,$value) = ($1,$2,$4);
- if ($type eq "CDATA")
- {
- # CDATA attributes get translated also
- $value = parse_data ($value, $char_maps, $txt_escape);
- }
- print $outfile "A$name $type $value\n";
- next;
- }
-
- #
- # Default action if not skipped over with next: copy in to out.
- #
- print $outfile $_;
+ $txtout .= "A$name $type $value\n";
+ } else {
+ $txtout .= $_;
+ }
+ }
+ } # end of while (<$INFILE>) loop
+
+ # ----------------------------
+ # Post-process the TOC, if any
+ # ----------------------------
+
+ if ( @tocarray ) {
+ my $toclinelength = 72; # Length of a normal line
+ @header = @prevheader = ();
+ $thetoc = join ("\n",("(HLINE",
+ ")HLINE",
+ "(P",
+ "-" . Xlat ("Table of Contents"),
+ ")P",
+ "(VERB\n"));
+
+ foreach my $entry ( @tocarray ) {
+ my $level = $$entry[0]; # Section level
+ my $text = $$entry[1]; # section entry
+ my $number = ''; # Numbering of the item
+ my $nwhite = ''; # Will be length($number) times " "
+
+ $text =~ s/(\(|\))(BF|EM|IT|LABEL|TT)//g;
+ $text =~ s/AID * CDATA.*$//g;
+ $text =~ s/\s+/ /g;
+
+ @prevheader = @header;
+ @header = @header[0..$level];
+
+ if ( $level =~ s/^A// ){
+ $header[$level] = "A";
+ } else {
+ $header[$level]++;
+ }
+
+ my $number = join ('.',@header);
+
+ if ( ! $#header ) {
+ # put a . after top level sections
+ $number .= '.';
+ # put a newline before top-level sections unless previous is one
+ $number = "\\n" . $number unless (!$#prevheader);
+ $number = "-" . $number;
+ } else {
+ # subsections get indentation matching hierarchy
+ $number = "-" . " " x $#header . $number;
+ }
+ unless ( $text =~ /^(\(|\))(NCDX|NIDX)$/ ){
+ $nwhite = $number;
+ $nwhite =~ s/^[-\\n]*//;
+ $nwhite = "-" . " " x length($nwhite);
+ $Text::Wrap::columns = $toclinelength - length($nwhite);
+ foreach ( split("\n",wrap('','',$text)) ){
+ $thetoc .= "$number $_\\n\n";
+ $number = $nwhite; # Whitespaces if number is already printed
+ }
+ }
}
+ $thetoc .= join ("\n",(")VERB",
+ "(HLINE",
+ ")HLINE\n"));
+ } # Parsed @tocarray
+
+ if ( $thetoc ){
+ $txtout =~ s/^\#\#TOC\#\#/$thetoc/m;
+ } else {
+ $txtout =~ s/^\#\#TOC\#\#//m;
+ }
+ print $OUTFILE $txtout;
+ return 0;
};
# ---------------------------------------------------------------
@@ -400,5 +372,6 @@
__END__
#Local Variables:
-#perl-indent-level: 2
+# mode: perl
+# perl-indent-level: 2
#End: