Hi, Here's a patch contributed trough bug #29259 (https://savannah.nongnu.org/bugs/index.php?29259) about the German prefix rules. If the author of this patch is on this list, could he manifest himself so we can attribute the work correctly to him?
I'd like some additional review on it though. Thanks! - Maxime -- Maxime Petazzoni <http://www.bulix.org> ``One by one, the penguins took away my sanity.'' Linux kernel and software developer at MontaVista Software
From b9dc98d21881ea1b2bf292962233d4177bbf2018 Mon Sep 17 00:00:00 2001 From: Maxime Petazzoni <[email protected]> Date: Tue, 11 May 2010 17:10:51 +0200 Subject: [PATCH ocitysmap] German prefix rules Signed-off-by: Maxime Petazzoni <[email protected]> --- ocitysmap/i18n.py | 76 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 files changed, 71 insertions(+), 5 deletions(-) diff --git a/ocitysmap/i18n.py b/ocitysmap/i18n.py index 37563cf..a9c1344 100644 --- a/ocitysmap/i18n.py +++ b/ocitysmap/i18n.py @@ -640,6 +640,72 @@ class i18n_pl_generic(i18n): return a == b +class i18n_de_generic(i18n): + # + # German streets are often named after people and include a title. + # The title will be captured as part of the <prefix> + # Covering airport names and "New"/"Old" as prefixes as well + # + APPELLATIONS = [ u"Alte", u"Alter", u"Doktor", u"Dr.", + u"Flughafen", u"Flugplatz", u"Gen.,", u"General", + u"Neue", u"Neuer", u"Platz", + u"Prinz", u"Prinzessin", u"Prof.", + u"Professor" ] + # + # Surnames in german streets named after people tend to have the middle name + # listed after the rest of the surname, + # e.g. "Platz der deutschen Einheit" => "deutschen Einheit (Platz der)" + # Likewise, articles are captured as part of the prefix, + # e.g. "An der Märchenwiese" => "Märchenwiese (An der)" + # + DETERMINANTS = [ u"\s?An den", u"\s?An der", u"\s?Am", + u"\s?Auf den" , u"\s?Auf der" + u" an", u" des", u" der", u" von", u" vor"] + + SPACE_REDUCE = re.compile(r"\s+") + PREFIX_REGEXP = re.compile(r"^(?P<prefix>(%s)(%s)?)\s?\b(?P<name>.+)" % + ("|".join(APPELLATIONS), + "|".join(DETERMINANTS)), re.IGNORECASE + | re.UNICODE) + + # for IndexPageGenerator._upper_unaccent_string + E_ACCENT = re.compile(ur"[éèêëẽ]", re.IGNORECASE | re.UNICODE) + I_ACCENT = re.compile(ur"[íìîïĩ]", re.IGNORECASE | re.UNICODE) + A_ACCENT = re.compile(ur"[áàâäã]", re.IGNORECASE | re.UNICODE) + O_ACCENT = re.compile(ur"[óòôöõ]", re.IGNORECASE | re.UNICODE) + U_ACCENT = re.compile(ur"[úùûüũ]", re.IGNORECASE | re.UNICODE) + + def __init__(self, language, locale_path): + self.language = str(language) + _install_language(language, locale_path) + + def _upper_unaccent_string(self, s): + s = self.E_ACCENT.sub("e", s) + s = self.I_ACCENT.sub("i", s) + s = self.A_ACCENT.sub("a", s) + s = self.O_ACCENT.sub("o", s) + s = self.U_ACCENT.sub("u", s) + return s.upper() + + def language_code(self): + return self.language + + def user_readable_street(self, name): + # + # Make sure name actually contains something, + # the PREFIX_REGEXP.match fails on zero-length strings + # + if len(name) == 0: + return name + + name = name.strip() + name = self.SPACE_REDUCE.sub(" ", name) + name = self.PREFIX_REGEXP.sub(r"\g<name> (\g<prefix>)", name) + return name + + def first_letter_equal(self, a, b): + return self._upper_unaccent_string(a) == self._upper_unaccent_string(b) + class i18n_generic(i18n): def __init__(self, language, locale_path): self.language = str(language) @@ -678,15 +744,15 @@ language_class_map = { 'en_US.UTF-8': i18n_generic, 'en_ZA.UTF-8': i18n_generic, 'en_ZW.UTF-8': i18n_generic, - 'de_BE.UTF-8': i18n_generic, 'nl_BE.UTF-8': i18n_nl_generic, 'nl_NL.UTF-8': i18n_nl_generic, 'it_IT.UTF-8': i18n_it_generic, 'it_CH.UTF-8': i18n_it_generic, - 'de_AT.UTF-8': i18n_generic, - 'de_DE.UTF-8': i18n_generic, - 'de_LU.UTF-8': i18n_generic, - 'de_CH.UTF-8': i18n_generic, + 'de_AT.UTF-8': i18n_de_generic, + 'de_BE.UTF-8': i18n_de_generic, + 'de_DE.UTF-8': i18n_de_generic, + 'de_LU.UTF-8': i18n_de_generic, + 'de_CH.UTF-8': i18n_de_generic, 'es_ES.UTF-8': i18n_es_generic, 'es_AR.UTF-8': i18n_es_generic, 'es_BO.UTF-8': i18n_es_generic, -- 1.6.3.3.341.g9b22d
signature.asc
Description: Digital signature
