bertysentry commented on a change in pull request #49: URL: https://github.com/apache/maven-doxia/pull/49#discussion_r550357588
########## File path: doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java ########## @@ -130,133 +184,98 @@ public void parse( Reader source, Sink sink ) * @return HTML content generated by flexmark-java * @throws IOException passed through */ - String toHtml( Reader source ) + CharSequence toHtml( Reader source ) throws IOException { + // Read the source String text = IOUtil.toString( source ); - MutableDataHolder flexmarkOptions = PegdownOptionsAdapter.flexmarkOptions( - Extensions.ALL & ~( Extensions.HARDWRAPS | Extensions.ANCHORLINKS ) ).toMutable(); - ArrayList<Extension> extensions = new ArrayList<>(); - for ( Extension extension : flexmarkOptions.get( com.vladsch.flexmark.parser.Parser.EXTENSIONS ) ) - { - extensions.add( extension ); - } - - extensions.add( FlexmarkDoxiaExtension.create() ); - flexmarkOptions.set( com.vladsch.flexmark.parser.Parser.EXTENSIONS, extensions ); - flexmarkOptions.set( HtmlRenderer.HTML_BLOCK_OPEN_TAG_EOL, false ); - flexmarkOptions.set( HtmlRenderer.HTML_BLOCK_CLOSE_TAG_EOL, false ); - flexmarkOptions.set( HtmlRenderer.MAX_TRAILING_BLANK_LINES, -1 ); - - com.vladsch.flexmark.parser.Parser parser = com.vladsch.flexmark.parser.Parser.builder( flexmarkOptions ) - .build(); - HtmlRenderer renderer = HtmlRenderer.builder( flexmarkOptions ) - .linkResolverFactory( new FlexmarkDoxiaLinkResolver.Factory() ) - .build(); - + // Now, build the HTML document StringBuilder html = new StringBuilder( 1000 ); html.append( "<html>" ); html.append( "<head>" ); - Pattern metadataPattern = Pattern.compile( MULTI_MARKDOWN_METADATA_SECTION, Pattern.MULTILINE ); - Matcher metadataMatcher = metadataPattern.matcher( text ); + + // First, we interpret the "metadata" section of the document and add the corresponding HTML headers + Matcher metadataMatcher = METADATA_SECTION_PATTERN.matcher( text ); boolean haveTitle = false; if ( metadataMatcher.find() ) { - metadataPattern = Pattern.compile( MULTI_MARKDOWN_METADATA_ENTRY, Pattern.MULTILINE ); - Matcher lineMatcher = metadataPattern.matcher( metadataMatcher.group( 1 ) ); - boolean first = true; - while ( lineMatcher.find() ) + Matcher entryMatcher = METADATA_ENTRY_PATTERN.matcher( metadataMatcher.group( 0 ) ); + while ( entryMatcher.find() ) { - String key = StringUtils.trimToEmpty( lineMatcher.group( 1 ) ); - if ( first ) - { - boolean found = false; - for ( String k : STANDARD_METADATA_KEYS ) - { - if ( k.equalsIgnoreCase( key ) ) - { - found = true; - break; - } - } - if ( !found ) - { - break; - } - first = false; - } - String value = StringUtils.trimToEmpty( lineMatcher.group( 2 ) ); + String key = entryMatcher.group( 1 ); + String value = entryMatcher.group( 2 ); if ( "title".equalsIgnoreCase( key ) ) { haveTitle = true; html.append( "<title>" ); - html.append( StringEscapeUtils.escapeXml( value ) ); + html.append( HtmlTools.escapeHTML( value, false ) ); Review comment: The only difference between *false* and *true* in *HtmlTools.escapeHTML()* is the handling of the apostrophe (should it be replaced with `'` or not). As we're inside a `<title>` element, there is no need to replace the apostrophe. However, on line 218 and 221, we're inside the attribute of an element, i.e. inside quotes, and that's why we need to escape the apostrophe here. Note that the presence (and absence) of apostrophe in the title and metadatas is covered with unit tests (see *testMetadataSinkEvent()* in **MarkdownParserTest.java**) ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org