tag 440946 + patch
reassign 423168 perl-modules
forcemerge 440946 423168
thanks

Attached a patch for lib/Pod/Html.pm that fixes all issues mentioned in the 
original report and does some minor related code cleanup. Test case is now
xmllint clean and generated html renders correctly.

NOTE: this patch includes the fix for #423168, therefore merging the BRs.

I have tested the patch fairly extensively, including:
- consecutive =item paras for the same definition
- unclosed lists at the end of sections and the document

I've also run it on the pod files in perl-5.8.8/pod and compared old and new 
output. I have seen no unexpected changes.

Although the patch may look fairly big, the changes are IMO not very 
invasive and the new algorithm seems a lot cleaner and maintainable.

Details of the patch
- <dd> tags are now generated in the same way as <li> tags; the <dt>
  term is now processed as a kind of introduction to a <dd> definition
- we no longer generate redundant </dd><dd> pairs within a definition
  if the definition contains more Lpars; again similar to <li>
- printing <dd> tags is delayed in case there are multiple consecutive
  terms (=item); this results in same behavior as in old code

Related cleanup
- handle "open lists" at the end of a section (process_head) and end of
  a document (pod2html) the same way and the same for all list types
  using finish_list function; in the old code this was inconsistent

Coding details
- $need_dd and $After_Lpar flags are no longer needed and thus removed
- @Listend generalized to @Listtype consistent with new usage
- function emit_li renamed to new_listitem to better reflect extended
  functionality
- emit_li returned $emitted but that was not actually used, so dropped

Cheers,
FJP

--- perl-5.8.8/lib/Pod/Html.pm.orig	2006-12-05 12:52:37.000000000 +0100
+++ perl-5.8.8/lib/Pod/Html.pm	2007-09-05 23:20:30.000000000 +0200
@@ -232,8 +232,8 @@
 my $Doindex;
 
 my $Backlink;
-my($Listlevel, @Listend);
-my $After_Lpar;
+my($Listlevel, @Listtype);
+my $ListNewTerm;
 use vars qw($Ignore);  # need to localize it later.
 
 my(%Items_Named, @Items_Seen);
@@ -273,7 +273,7 @@
     $Htmldir = "";	    	# The directory to which the html pages
 				# will (eventually) be written.
     $Htmlfile = "";		# write to stdout by default
-    $Htmlfileurl = "" ;		# The url that other files would use to
+    $Htmlfileurl = "";		# The url that other files would use to
 				# refer to this file.  This is only used
 				# to make relative urls that point to
 				# other files.
@@ -289,8 +289,9 @@
     $Doindex = 1;   	    	# non-zero if we should generate an index
     $Backlink = '';		# text for "back to top" links
     $Listlevel = 0;		# current list depth
-    @Listend = ();		# the text to use to end the list.
-    $After_Lpar = 0;            # set to true after a par in an =item
+    @Listtype = ();		# list types for open lists
+    $ListNewTerm = 0;		# indicates new term in definition list; used
+    				# to correctly open/close <dd> tags
     $Ignore = 1;		# whether or not to format text.  we don't
 				#   format text until we hit our first pod
 				#   directive.
@@ -495,7 +496,6 @@
 
     # now convert this file
     my $after_item;             # set to true after an =item
-    my $need_dd = 0;
     warn "Converting input file $Podfile\n" if $Verbose;
     foreach my $i (0..$#poddata){
         $PTQuote = 0; # status of quote conversion
@@ -505,7 +505,6 @@
 	if (/^(=.*)/s) {	# is it a pod directive?
 	    $Ignore = 0;
 	    $after_item = 0;
-	    $need_dd = 0;
 	    $_ = $1;
 	    if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
 		process_begin($1, $2);
@@ -521,12 +520,12 @@
 		if (/^=(head[1-6])\s+(.*\S)/s) {	# =head[1-6] heading
 		    process_head( $1, $2, $Doindex && $index );
 		} elsif (/^=item\s*(.*\S)?/sm) {	# =item text
-		    $need_dd = process_item( $1 );
+		    process_item( $1 );
 		    $after_item = 1;
 		} elsif (/^=over\s*(.*)/) {		# =over N
 		    process_over();
 		} elsif (/^=back/) {		# =back
-		    process_back($need_dd);
+		    process_back();
 		} elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
 		    process_for($1,$2);
 		} else {
@@ -541,8 +540,14 @@
 	    next if $Ignore;
 	    next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
 	    print HTML and next if @Begin_Stack && $Begin_Stack[-1] eq 'html';
-	    print HTML "<dd>\n" if $need_dd;
 	    my $text = $_;
+
+	    # Open tag for definition list as we have something to put in it
+	    if( $ListNewTerm ){
+		print HTML "<dd>\n";
+		$ListNewTerm = 0;
+	    }
+
 	    if( $text =~ /\A\s+/ ){
 		process_pre( \$text );
 	        print HTML "<pre>\n$text</pre>\n";
@@ -572,12 +577,8 @@
 		}
 		## end of experimental
 
-		if( $after_item ){
-		    $After_Lpar = 1;
-		}
 		print HTML "<p>$text</p>\n";
 	    }
-	    print HTML "</dd>\n" if $need_dd;
 	    $after_item = 0;
 	}
     }
@@ -1052,12 +1053,12 @@
 
 	# figure out what kind of item it is.
 	# Build string for referencing this item.
-	if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
+	if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bulleted list
 	    next unless $1;
 	    $item = $1;
         } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
 	    $item = $1;
-	} elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
+	} elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # definition list
 	    $item = $1;
 	} else {
 	    next;
@@ -1077,12 +1078,7 @@
     $tag =~ /head([1-6])/;
     my $level = $1;
 
-    if( $Listlevel ){
-	warn "$0: $Podfile: unterminated list at =head in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
-        while( $Listlevel ){
-            process_back();
-        }
-    }
+    finish_list();
 
     print HTML "<p>\n";
     if( $level == 1 && ! $Top ){
@@ -1120,19 +1116,32 @@
         $name = anchorify($name);
 	print HTML qq{<a name="$name">}, process_text( \$otext ), '</a>';
     }
-    print HTML "</strong>\n";
+    print HTML "</strong>";
     undef( $EmittedItem );
 }
 
-sub emit_li {
+sub new_listitem {
     my( $tag ) = @_;
+    # Open tag for definition list as we have something to put in it
+    if( ($tag ne 'dl') && ($ListNewTerm) ){
+	print HTML "<dd>\n";
+	$ListNewTerm = 0;
+    }
+
     if( $Items_Seen[$Listlevel]++ == 0 ){
-	push( @Listend, "</$tag>" );
+	# start of new list
+	push( @Listtype, "$tag" );
 	print HTML "<$tag>\n";
+    } else {
+	# if this is not the first item, close the previous one
+	if ( $tag eq 'dl' ){
+	    print HTML "</dd>\n" unless $ListNewTerm;
+	} else {
+	    print HTML "</li>\n";
+	}
     }
-    my $emitted = $tag eq 'dl' ? 'dt' : 'li';
-    print HTML "<$emitted>";
-    return $emitted;
+    my $opentag = $tag eq 'dl' ? 'dt' : 'li';
+    print HTML "<$opentag>";
 }
 
 #
@@ -1140,7 +1149,6 @@
 #
 sub process_item {
     my( $otext ) = @_;
-    my $need_dd = 0; # set to 1 if we need a <dd></dd> after an item
 
     # lots of documents start a list without doing an =over.  this is
     # bad!  but, the proper thing to do seems to be to just assume
@@ -1150,43 +1158,43 @@
 	process_over();
     }
 
-    # formatting: insert a paragraph if preceding item has >1 paragraph
-    if( $After_Lpar ){
-	print HTML $need_dd ? "</dd>\n" : "</li>\n" if $After_Lpar;
-	$After_Lpar = 0;
-    }
-
     # remove formatting instructions from the text
     my $text = depod( $otext );
 
-    my $emitted; # the tag actually emitted, used for closing
-
     # all the list variants:
     if( $text =~ /\A\*/ ){ # bullet
-        $emitted = emit_li( 'ul' );
+        new_listitem( 'ul' );
         if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
             my $tag = $1;
             $otext =~ s/\A\*\s+//;
             emit_item_tag( $otext, $tag, 1 );
+            print HTML "\n";
         }
 
     } elsif( $text =~ /\A\d+/ ){ # numbered list
-        $emitted = emit_li( 'ol' );
+        new_listitem( 'ol' );
         if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
             my $tag = $1;
             $otext =~ s/\A\d+\.?\s*//;
             emit_item_tag( $otext, $tag, 1 );
+            print HTML "\n";
         }
 
     } else {			# definition list
-        $emitted = emit_li( 'dl' );
-        if ($text =~ /\A(.+)\Z/s ){ # should have text
+        # new_listitem takes care of opening the <dt> tag
+        new_listitem( 'dl' );
+        if( $text =~ /\A(.+)\Z/s ){ # should have text
             emit_item_tag( $otext, $text, 1 );
+        } else {
+            warn "$0: $Podfile: no term text provided for definition list in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
         }
-        $need_dd = 1;
+        # write the definition term and close <dt> tag
+        print HTML "</dt>\n";
+        # trigger opening a <dd> tag for the actual definition; will not
+        # happen if next paragraph is also a definition term (=item)
+        $ListNewTerm = 1;
     }
     print HTML "\n";
-    return $need_dd;
 }
 
 #
@@ -1196,30 +1204,31 @@
     # start a new list
     $Listlevel++;
     push( @Items_Seen, 0 );
-    $After_Lpar = 0;
 }
 
 #
 # process_back - process a pod back tag and convert it to HTML format.
 #
 sub process_back {
-    my $need_dd = shift;
     if( $Listlevel == 0 ){
 	warn "$0: $Podfile: unexpected =back directive in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
 	return;
     }
 
-    # close off the list.  note, I check to see if $Listend[$Listlevel] is
+    # close off the list.  note, I check to see if $Listtype[$Listlevel] is
     # defined because an =item directive may have never appeared and thus
-    # $Listend[$Listlevel] may have never been initialized.
+    # $Listtype[$Listlevel] may have never been initialized.
     $Listlevel--;
-    if( defined $Listend[$Listlevel] ){
-	print HTML $need_dd ? "</dd>\n" : "</li>\n" if $After_Lpar;
-	print HTML $Listend[$Listlevel];
-        print HTML "\n";
-        pop( @Listend );
+    if( defined $Listtype[$Listlevel] ){
+        if ( $Listtype[$Listlevel] eq 'dl' ){
+            print HTML "</dd>\n" unless $ListNewTerm;
+        } else {
+            print HTML "</li>\n";
+        }
+        print HTML "</$Listtype[$Listlevel]>\n";
+        pop( @Listtype );
+        $ListNewTerm = 0;
     }
-    $After_Lpar = 0;
 
     # clean up item count
     pop( @Items_Seen );
@@ -1964,9 +1973,11 @@
 # after the entire pod file has been read and converted.
 #
 sub finish_list {
-    while ($Listlevel > 0) {
-	print HTML "</dl>\n";
-	$Listlevel--;
+    if( $Listlevel ){
+	warn "$0: $Podfile: unterminated list(s) at =head in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
+	while( $Listlevel ){
+            process_back();
+        }
     }
 }
 

Attachment: signature.asc
Description: This is a digitally signed message part.

Reply via email to