Add script to convert C API docs to mdtext

Project: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/commit/4c7a29ab
Tree: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/tree/4c7a29ab
Diff: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/diff/4c7a29ab

Branch: refs/heads/master
Commit: 4c7a29ab7c0c8eb2dab4efce4edd4f3d2ee1b58f
Parents: 992bdf8
Author: Nick Wellnhofer <wellnho...@aevum.de>
Authored: Mon Apr 4 15:15:25 2016 +0200
Committer: Nick Wellnhofer <wellnho...@aevum.de>
Committed: Mon Apr 4 15:15:25 2016 +0200

----------------------------------------------------------------------
 devel/bin/html2mdtext.pl | 94 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/4c7a29ab/devel/bin/html2mdtext.pl
----------------------------------------------------------------------
diff --git a/devel/bin/html2mdtext.pl b/devel/bin/html2mdtext.pl
new file mode 100755
index 0000000..0b53569
--- /dev/null
+++ b/devel/bin/html2mdtext.pl
@@ -0,0 +1,94 @@
+#!/usr/bin/perl
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+=head1 NAME
+
+html2mdtext.pl - Convert HTML to mdtext for the Apache CMS
+
+=head1 SYNOPSIS
+
+    html2mdtext.pl
+
+=head1 DESCRIPTION
+
+This script creates mdtext files from HTML. It must be run in the C<c>
+directory and scans all .html files found in C<autogen/share/doc/clownfish>.
+The resulting mdtext files are stored in a directory named C<mdtext>.
+
+=cut
+
+use strict;
+use warnings;
+use utf8;
+
+use File::Find;
+use File::Path qw( make_path );
+use File::Slurp;
+
+my $src_dir  = 'autogen/share/doc/clownfish';
+my $out_root = 'mdtext';
+
+find( { wanted => \&process_file, no_chdir => 1 }, $src_dir );
+
+sub process_file {
+    my $filename = $_;
+    my $dir      = $File::Find::topdir;
+
+    return if -d $filename || $filename !~ /\.html\z/;
+    $filename =~ s|^$dir/||;
+
+    html2mdtext( $dir, $filename );
+};
+
+sub html2mdtext {
+    my ( $base_dir, $filename ) = @_;
+
+    my $content = read_file( "$base_dir/$filename", binmode => ':utf8' );
+
+    if ($content !~ m|<title>([^<]+)</title>|) {
+        warn("No title found in $filename");
+        return;
+    }
+    my $title = $1;
+
+    if ($content !~ m|<body>\s*(.+?)\s*</body>|s) {
+        warn("No body found in $filename");
+        return;
+    }
+    $content = $1;
+
+    # Increase header level.
+    $content =~ s{(</?h)(\d)>}{ $1 . ($2 + 1) . '>' }ge;
+
+    $content = <<"EOF";
+Title: $title
+
+<div class="c-api">
+$content
+</div>
+EOF
+
+    my @path_comps = split('/', $filename);
+    pop(@path_comps);
+    my $out_dir = join('/', $out_root, @path_comps);
+    make_path($out_dir);
+
+    my $out_filename = "$out_root/$filename";
+    $out_filename =~ s|(\.[^/.]*)?\z|.mdtext|;
+    write_file( $out_filename, { binmode => ':utf8' }, \$content );
+}
+

Reply via email to