Traditionnally, pages named Foo:Bar are page 'Bar' in namespace 'Foo'.
However, it is also possible to call a page Foo:Bar if 'Foo' is not a
namespace. In this case, the actual name of the page is 'Foo:Bar', in the
main namespace. Since we can't tell with only the filename, query the
wiki for a namespace 'Foo' in these cases, but deal with the case where
no such namespace is found.

Signed-off-by: Matthieu Moy <matthieu....@imag.fr>
---
 contrib/mw-to-git/git-remote-mediawiki      | 49 +++++++++++++++++++----------
 contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh | 20 ++++++++++++
 2 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/contrib/mw-to-git/git-remote-mediawiki 
b/contrib/mw-to-git/git-remote-mediawiki
index d6be188..47d4878 100755
--- a/contrib/mw-to-git/git-remote-mediawiki
+++ b/contrib/mw-to-git/git-remote-mediawiki
@@ -955,8 +955,11 @@ sub mw_import_revids {
                # Differentiates classic pages and media files.
                my ($namespace, $filename) = $page_title =~ /^([^:]*):(.*)$/;
                my %mediafile;
-               if ($namespace && get_mw_namespace_id($namespace) == 
get_mw_namespace_id("File")) {
-                       %mediafile = 
get_mw_mediafile_for_page_revision($filename, $rev->{timestamp});
+               if ($namespace) {
+                       my $id = get_mw_namespace_id($namespace);
+                       if ($id && $id == get_mw_namespace_id("File")) {
+                               %mediafile = 
get_mw_mediafile_for_page_revision($filename, $rev->{timestamp});
+                       }
                }
                # If this is a revision of the media page for new version
                # of a file do one common commit for both file and media page.
@@ -1306,7 +1309,11 @@ sub get_mw_namespace_id {
                chomp(@temp);
                foreach my $ns (@temp) {
                        my ($n, $id) = split(/:/, $ns);
-                       $namespace_id{$n} = $id;
+                       if ($id eq 'notANameSpace') {
+                               $namespace_id{$n} = {is_namespace => 0};
+                       } else {
+                               $namespace_id{$n} = {is_namespace => 1, id => 
$id};
+                       }
                        $cached_mw_namespace_id{$n} = 1;
                }
        }
@@ -1324,28 +1331,38 @@ sub get_mw_namespace_id {
 
                while (my ($id, $ns) = each(%{$result->{query}->{namespaces}})) 
{
                        if (defined($ns->{id}) && defined($ns->{canonical})) {
-                               $namespace_id{$ns->{canonical}} = $ns->{id};
+                               $namespace_id{$ns->{canonical}} = {is_namespace 
=> 1, id => $ns->{id}};
                                if ($ns->{'*'}) {
                                        # alias (e.g. french Fichier: as alias 
for canonical File:)
-                                       $namespace_id{$ns->{'*'}} = $ns->{id};
+                                       $namespace_id{$ns->{'*'}} = 
{is_namespace => 1, id => $ns->{id}};
                                }
                        }
                }
        }
 
-       my $id = $namespace_id{$name};
+       my $ns = $namespace_id{$name};
+       my $id;
 
-       if (defined $id) {
-               # Store explicitely requested namespaces on disk
-               if (!exists $cached_mw_namespace_id{$name}) {
-                       run_git("config --add remote.". $remotename
-                               .".namespaceCache \"". $name .":". $id ."\"");
-                       $cached_mw_namespace_id{$name} = 1;
-               }
-               return $id;
-       } else {
-               die "No such namespace $name on MediaWiki.";
+       unless (defined $ns) {
+               print STDERR "No such namespace $name on MediaWiki.\n";
+               $ns = {is_namespace => 0};
+               $namespace_id{$name} = $ns;
+       }
+
+       if ($ns->{is_namespace}) {
+               $id = $ns->{id};
+       }
+
+       # Store "notANameSpace" as special value for inexisting namespaces
+       my $store_id = ($id || 'notANameSpace');
+
+       # Store explicitely requested namespaces on disk
+       if (!exists $cached_mw_namespace_id{$name}) {
+               run_git("config --add remote.". $remotename
+                       .".namespaceCache \"". $name .":". $store_id ."\"");
+               $cached_mw_namespace_id{$name} = 1;
        }
+       return $id;
 }
 
 sub get_mw_namespace_id_for_page {
diff --git a/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh 
b/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh
index 8635878..246d47d 100755
--- a/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh
+++ b/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh
@@ -169,6 +169,26 @@ test_expect_failure 'special character at the begining of 
file name from mw to g
        test_path_is_file mw_dir_11/[char_2
 '
 
+test_expect_success 'Pull page with title containing ":" other than namespace 
separator' '
+       wiki_editpage Foo:Bar content false &&
+       (
+               cd mw_dir_11 &&
+               git pull
+       ) &&
+       test_path_is_file mw_dir_11/Foo:Bar.mw
+'
+
+test_expect_success 'Push page with title containing ":" other than namespace 
separator' '
+       (
+               cd mw_dir_11 &&
+               echo content >NotANameSpace:Page.mw &&
+               git add NotANameSpace:Page.mw &&
+               git commit -m "add page with colon" &&
+               git push
+       ) &&
+       wiki_page_exist NotANameSpace:Page
+'
+
 test_expect_success 'test of correct formating for file name from mw to git' '
        wiki_reset &&
        git clone mediawiki::'"$WIKI_URL"' mw_dir_12 &&
-- 
1.7.11.2.258.g5ff3cdf.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to