Hi, It is known that po/Rules-quot does not work properly with BSD Sed:
[bug-gettext] msgfilter: Rules-quot implicity depends on GNU Sed. https://lists.gnu.org/archive/html/bug-gettext/2013-04/msg00028.html The file basically does conversion from ASCII quotations ("...", `...', '...') to Unicode quotations (“...”, ‘...’), using msgfilter sed. So, I wonder if this conversion might be worth an addition to the built-in filters. What do people think? I'm attaching a initial patch for this. If it makes sense, I'll prepare a filter for boldquot as well (and docs and tests). Regards, -- Daiki Ueno
>From 6daf24b4c3c56915057796c7de2e518bc7d58dfb Mon Sep 17 00:00:00 2001 From: Daiki Ueno <[email protected]> Date: Wed, 9 Apr 2014 19:25:58 +0900 Subject: [PATCH] msgfilter: Add 'quot' filter --- gettext-tools/src/Makefile.am | 1 + gettext-tools/src/filter-quote.c | 153 +++++++++++++++++++++++++++++++++++++++ gettext-tools/src/filters.h | 8 ++ gettext-tools/src/msgfilter.c | 7 ++ 4 files changed, 169 insertions(+) create mode 100755 gettext-tools/src/filter-quote.c diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index fe44293..3d50c71 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -214,6 +214,7 @@ else msgfilter_SOURCES = ../woe32dll/c++msgfilter.cc endif msgfilter_SOURCES += filter-sr-latin.c +msgfilter_SOURCES += filter-quote.c if !WOE32DLL msggrep_SOURCES = msggrep.c else diff --git a/gettext-tools/src/filter-quote.c b/gettext-tools/src/filter-quote.c new file mode 100755 index 0000000..bdfb3c3 --- /dev/null +++ b/gettext-tools/src/filter-quote.c @@ -0,0 +1,153 @@ +/* Convert ASCII quotation marks to Unicode quotation marks. + Copyright (C) 2014 Free Software Foundation, Inc. + Written by Daiki Ueno <[email protected]>, 2014. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +/* Specification. */ +#include "filters.h" + +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include "xalloc.h" + +void +ascii_quote_to_unicode (const char *input, size_t input_len, + char **output_p, size_t *output_len_p) +{ + size_t i; + const char *start, *p; + char *output, *r; + bool state = false; + + start = input; + + /* Large enough. */ + r = output = XNMALLOC (3 * input_len + 1, char); + + for (i = 0; i < input_len; i++) + { + int j; + + p = &input[i]; + switch (*p) + { + case '"': + if (state) + { + if (*start == '"') + { + if (p > start + 1) + { + /* U+201C: LEFT DOUBLE QUOTATION MARK */ + memcpy (r, "\xe2\x80\x9c", 3); + r += 3; + memcpy (r, start + 1, p - start - 1); + r += p - start - 1; + /* U+201D: RIGHT DOUBLE QUOTATION MARK */ + memcpy (r, "\xe2\x80\x9d", 3); + r += 3; + } + else + { + /* Consider "" as "". */ + memcpy (r, "\"\"", 2); + r += 2; + } + start = p + 1; + state = false; + } + } + else + { + memcpy (r, start, p - start); + r += p - start; + start = p; + state = true; + } + break; + + case '`': + if (state) + { + if (*start == '`') + { + memcpy (r, start, p - start); + start = p; + } + } + else + { + memcpy (r, start, p - start); + r += p - start; + start = p; + state = true; + } + break; + + case '\'': + if (state) + { + if (*start == '`' + || (*start == '\'' + && (((start > input && *(start - 1) == ' ') + && (i == input_len - 1 || *(p + 1) == ' ')) + || (start == input && i < input_len - 1 + && *(p + 1) == ' ')))) + { + /* U+2018: LEFT SINGLE QUOTATION MARK */ + memcpy (r, "\xe2\x80\x98", 3); + r += 3; + memcpy (r, start + 1, p - start - 1); + r += p - start - 1; + /* U+2019: RIGHT SINGLE QUOTATION MARK */ + memcpy (r, "\xe2\x80\x99", 3); + r += 3; + start = p + 1; + } + else + { + memcpy (r, start, p - start); + r += p - start; + start = p; + } + state = false; + } + else if (start == input || *(start - 1) == ' ') + { + memcpy (r, start, p - start); + r += p - start; + start = p; + state = true; + } + break; + } + } + + p = &input[i]; + if (p > start) + { + memcpy (r, start, p - start); + r += p - start; + } + *r = '\0'; + + *output_p = output; + *output_len_p = r - output; +} diff --git a/gettext-tools/src/filters.h b/gettext-tools/src/filters.h index 93128b0..1d47fbe 100644 --- a/gettext-tools/src/filters.h +++ b/gettext-tools/src/filters.h @@ -29,6 +29,14 @@ extern "C" { extern void serbian_to_latin (const char *input, size_t input_len, char **output_p, size_t *output_len_p); +/* Convert a string INPUT of INPUT_LEN bytes, converting ASCII quotation + marks to Unicode quotation marks. + Store the freshly allocated result in *OUTPUT_P and its length (in bytes) + in *OUTPUT_LEN_P. + Input and output are in UTF-8 encoding. */ +extern void ascii_quote_to_unicode (const char *input, size_t input_len, + char **output_p, size_t *output_len_p); + #ifdef __cplusplus } #endif diff --git a/gettext-tools/src/msgfilter.c b/gettext-tools/src/msgfilter.c index b92eef0..0cf76b8 100644 --- a/gettext-tools/src/msgfilter.c +++ b/gettext-tools/src/msgfilter.c @@ -349,6 +349,13 @@ There is NO WARRANTY, to the extent permitted by law.\n\ /* Convert the input to UTF-8 first. */ result = iconv_msgdomain_list (result, po_charset_utf8, true, input_file); } + else if (strcmp (sub_name, "quot") == 0 && sub_argc == 1) + { + filter = ascii_quote_to_unicode; + + /* Convert the input to UTF-8 first. */ + result = iconv_msgdomain_list (result, po_charset_utf8, true, input_file); + } else { filter = generic_filter; -- 1.9.0
