Updated patch attached. Note caching the abbreviated months like this speeds `ls -l` up by around 10% which is nice, though not that important as ls output is mainly for processing by humans.
cheers, Pádraig.
>From dec4db2e8e07f90845f66858de21b0446c591746 Mon Sep 17 00:00:00 2001 From: =?utf-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com> Date: Tue, 24 Mar 2009 14:29:21 +0000 Subject: [PATCH] ls: Fix alignment when month names have varying widths * NEWS: Mention the fix * gl/lib/mbsalign.c: A new module to align and or truncate a string in a specified number of screen cells. * gl/lib/mbsalign.h: Ditto * gl/modules/mbsalign: Ditto * bootstrap.conf: Reference the new module * src/ls.c (abmon_init): New function, precompute the abbreviated months aligned left in a minimum width column <= 5 screen cells. Note this caching of the month strings speeds up `ls -l` by around 10% on glibc-2.7-2 on linux at least. (align_nstrftime): A new function to replace use of %b in the format specification to strftime with the precomputed month strings. --- NEWS | 3 + bootstrap.conf | 1 + gl/lib/mbsalign.c | 169 +++++++++++++++++++++++++++++++++++++++++++++++++++ gl/lib/mbsalign.h | 23 +++++++ gl/modules/mbsalign | 26 ++++++++ src/ls.c | 83 ++++++++++++++++++++++++- 6 files changed, 303 insertions(+), 2 deletions(-) create mode 100644 gl/lib/mbsalign.c create mode 100644 gl/lib/mbsalign.h create mode 100644 gl/modules/mbsalign diff --git a/NEWS b/NEWS index 766f271..b71452c 100644 --- a/NEWS +++ b/NEWS @@ -22,6 +22,9 @@ GNU coreutils NEWS -*- outline -*- ls --sort=version (-v) sorted names beginning with "." inconsistently. Now, names that start with "." are always listed before those that don't. + ls now aligns output correctly in the presence of abbreviated month names + from the locale database that have differing widths. + pr: fix the bug whereby --indent=N (-o) did not indent header lines [bug introduced in coreutils-6.9.90] diff --git a/bootstrap.conf b/bootstrap.conf index 0747bb8..ae033d9 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -70,6 +70,7 @@ gnulib_modules=" long-options lstat malloc manywarnings mbrtowc + mbsalign mbswidth memcasecmp memcmp2 mempcpy memrchr mgetgroups diff --git a/gl/lib/mbsalign.c b/gl/lib/mbsalign.c new file mode 100644 index 0000000..cc1339d --- /dev/null +++ b/gl/lib/mbsalign.c @@ -0,0 +1,169 @@ +/* Align/Truncate a string in a given screen width + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Pádraig Brady. */ + +#include <config.h> +#include "mbsalign.h" + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <wchar.h> +#include <wctype.h> +#include "xalloc.h" + +static int wc_ensure_printable (wchar_t * wchars); +static size_t wc_truncate (wchar_t * wchars, size_t width); +static int rpl_wcswidth (const wchar_t *s, size_t n); + +/* Align a string in a given screen width, handling multibyte characters. + In addition if the string is too large for the width it's truncated. + When centering the number of trailing spaces may be 1 less than the + number of leading spaces. + Returns number of bytes written to dest (not including the trailing NUL). + Returns number of display cells used in the width parameter. */ + +int +mbsalign (const char *src, char *dest, size_t dest_size, + int *width, mbs_align_t align) +{ + int src_len = strlen (src); + int ret = 0; + char *newstr = NULL; + wchar_t *str_wc = NULL; + const char *str_to_print = src; + int used = src_len, spaces, wc_conversion = 0, wc_enabled = 0; + + if (MB_CUR_MAX > 1) + { + int src_chars = mbstowcs (NULL, src, 0) + 1; + str_wc = xmalloc (src_chars * sizeof (wchar_t)); + if (mbstowcs (str_wc, src, src_chars) > 0) + { + str_wc[src_chars - 1] = L'\0'; + wc_enabled = 1; + wc_conversion = wc_ensure_printable (str_wc); + used = rpl_wcswidth (str_wc, src_chars); + } + } + + if (wc_conversion || used > *width) + { + newstr = xmalloc (src_len); + str_to_print = newstr; + if (wc_enabled) + { + used = wc_truncate (str_wc, *width); + wcstombs (newstr, str_wc, src_len); + } + else + { + memcpy (newstr, src, *width); + newstr[*width] = '\0'; + } + } + + spaces = *width - used; + spaces = (spaces < 0 ? 0 : spaces); + *width = used; /* indicate to called how many cells used. */ + + /* FIXME: Should I be padding with "figure space" (\u2007) + rather than spaces below? (only if non ascii data present) */ + switch (align) + { + case MBS_ALIGN_CENTER: + ret = snprintf (dest, dest_size, "%*s%s%*s", + spaces / 2 + spaces % 2, "", + str_to_print, spaces / 2, ""); + break; + case MBS_ALIGN_LEFT: + ret = snprintf (dest, dest_size, "%s%*s", str_to_print, spaces, ""); + break; + case MBS_ALIGN_RIGHT: + ret = snprintf (dest, dest_size, "%*s%s", spaces, "", str_to_print); + break; + } + + free (str_wc); + free (newstr); + + return ret; +} + +/* Replace non printable chars. + return 1 if replacement made, 0 otherwise. */ + +static int +wc_ensure_printable (wchar_t * wchars) +{ + int replaced = 0; + wchar_t *wc = wchars; + while (*wc) + { + if (!iswprint ((wint_t) * wc)) + { + *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */ + replaced = 1; + } + wc++; + } + return replaced; +} + +/* truncate wchar string to width cells. + * returns number of cells used. */ + +static size_t +wc_truncate (wchar_t * wc, size_t width) +{ + int cells = 0, next_cells = 0; + while (*wc) + { + next_cells = wcwidth (*wc); + if (next_cells == -1) /* non printable */ + { + *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */ + next_cells = 1; + } + if (cells + next_cells > width) + break; + cells += next_cells; + wc++; + } + *wc = L'\0'; + return cells; +} + +/* FIXME: move this function to gnulib as it's missing on: + OpenBSD 3.8, IRIX 5.3, Solaris 2.5.1, mingw, BeOS */ + +static int +rpl_wcswidth (const wchar_t *s, size_t n) +{ + int ret = 0; + + while (n-- > 0 && *s != L'\0') + { + int nwidth = wcwidth (*s++); + if (nwidth == -1) /* non printable */ + return -1; + ret += nwidth; + } + + return ret; +} + diff --git a/gl/lib/mbsalign.h b/gl/lib/mbsalign.h new file mode 100644 index 0000000..d9c0c0a --- /dev/null +++ b/gl/lib/mbsalign.h @@ -0,0 +1,23 @@ +/* Align/Truncate a string in a given screen width + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <stddef.h> + +typedef enum { MBS_ALIGN_LEFT, MBS_ALIGN_RIGHT, MBS_ALIGN_CENTER } mbs_align_t; + +int +mbsalign (const char *src, char *dest, size_t dest_size, + int *width, mbs_align_t align); diff --git a/gl/modules/mbsalign b/gl/modules/mbsalign new file mode 100644 index 0000000..895945c --- /dev/null +++ b/gl/modules/mbsalign @@ -0,0 +1,26 @@ +Description: +Align/Truncate a string in a given screen width. + +Files: +lib/mbsalign.c +lib/mbsalign.h + +Depends-on: +xalloc +wchar +wctype +wcwidth + +configure.ac: + +Makefile.am: +lib_SOURCES += mbsalign.c mbsalign.h + +Include: +"mbsalign.h" + +License: +LGPL + +Maintainer: +Pádraig Brady diff --git a/src/ls.c b/src/ls.c index fa6a59a..74ee2ca 100644 --- a/src/ls.c +++ b/src/ls.c @@ -67,6 +67,10 @@ #include <selinux/selinux.h> #include <wchar.h> +#if HAVE_LANGINFO_CODESET +# include <langinfo.h> +#endif + /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is present. */ #ifndef SA_NOCLDSTOP @@ -105,6 +109,7 @@ #include "strftime.h" #include "xstrtol.h" #include "areadlink.h" +#include "mbsalign.h" #define PROGRAM_NAME (ls_mode == LS_LS ? "ls" \ : (ls_mode == LS_MULTI_COL \ @@ -695,6 +700,11 @@ static char const *long_time_format[2] = screen columns small, because many people work in windows with only 80 columns. But make this as wide as the other string below, for recent files. */ + /* TRANSLATORS: ls output needs to be aligned for ease of reading, + so be wary of using variable width fields from the locale. + Note %b is handled specially by ls and aligned correctly. + Note also that specifying a width as in %5b is erroneous as strftime + will count bytes rather than characters in multibyte locales. */ N_("%b %e %Y"), /* strftime format for recent files (younger than 6 months), in -l output. This should contain the month, day and time (at @@ -703,6 +713,11 @@ static char const *long_time_format[2] = screen columns small, because many people work in windows with only 80 columns. But make this as wide as the other string above, for non-recent files. */ + /* TRANSLATORS: ls output needs to be aligned for ease of reading, + so be wary of using variable width fields from the locale. + Note %b is handled specially by ls and aligned correctly. + Note also that specifying a width as in %5b is erroneous as strftime + will count bytes rather than characters in multibyte locales. */ N_("%b %e %H:%M") }; @@ -933,6 +948,41 @@ static struct obstack dev_ino_obstack; } \ while (0) +/* Read the abbreviated month names from the locale, to determine the + max width of the field and truncate names greater than our max allowed. + Note even though this handles multibyte locales correctly + it's not restricted to them as single byte locales can have + variable width abbreviated months and also precomputing/caching + the names was seen to increase the performance of ls by around 10% + on glibc-2.7-2 on linux at least. */ +enum { MAX_MON_WIDTH = 5 }; /* max number of display cells to use */ +static char abmon[12][MAX_MON_WIDTH * MB_LEN_MAX + 1]; +static int required_mon_width; +void +abmon_init (void) +{ +#ifdef HAVE_NL_LANGINFO + required_mon_width = MAX_MON_WIDTH; + int curr_max_width; + do + { + curr_max_width = required_mon_width; + required_mon_width = 0; + for (int i = 0; i < 12; i++) + { + int width = curr_max_width; + + mbsalign (nl_langinfo (ABMON_1 + i), + abmon[i], sizeof (abmon[i]), + &width, MBS_ALIGN_LEFT); + + required_mon_width = MAX (required_mon_width, width); + } + } + while (curr_max_width > required_mon_width); +#endif +} + /* Pop a dev/ino struct off the global dev_ino_obstack and return that struct. */ static struct dev_ino @@ -1953,6 +2003,9 @@ decode_switches (int argc, char **argv) } } } + /* Note we leave %5b etc. alone so user widths/flags are honoured. */ + if (strstr(long_time_format[0],"%b") || strstr(long_time_format[1],"%b")) + abmon_init(); } return optind; @@ -3317,6 +3370,32 @@ print_current_files (void) } } +/* Replace %b with precomputed aligned data. */ + +static size_t +align_nstrftime (char *src, size_t size, char const *fmt, struct tm const *tm, + int __utc, int __ns) +{ + const char *nfmt = fmt; + char *rpl_fmt = NULL; + if (required_mon_width) + { + rpl_fmt = xmalloc (strlen (fmt) + MAX_MON_WIDTH - 2 + 1); + char *pb = strstr (fmt, "%b"); + char *pfmt = rpl_fmt; + nfmt = rpl_fmt; + + memcpy (pfmt, fmt, pb - fmt); + pfmt += pb - fmt; + strcpy (pfmt, abmon[tm->tm_mon]); + pfmt = strchr (pfmt, '\0'); + memcpy (pfmt, pb + 2, strlen (pb + 2) + 1); + } + size_t ret = nstrftime (src, size, nfmt, tm, __utc, __ns); + free (rpl_fmt); + return ret; +} + /* Return the expected number of columns in a long-format time stamp, or zero if it cannot be calculated. */ @@ -3341,7 +3420,7 @@ long_time_expected_width (void) if (tm) { size_t len = - nstrftime (buf, sizeof buf, long_time_format[0], tm, 0, 0); + align_nstrftime (buf, sizeof buf, long_time_format[0], tm, 0, 0); if (len != 0) width = mbsnwidth (buf, len, 0); } @@ -3616,7 +3695,7 @@ print_long_format (const struct fileinfo *f) /* We assume here that all time zones are offset from UTC by a whole number of seconds. */ - s = nstrftime (p, TIME_STAMP_LEN_MAXIMUM + 1, fmt, + s = align_nstrftime (p, TIME_STAMP_LEN_MAXIMUM + 1, fmt, when_local, 0, when_timespec.tv_nsec); } -- 1.5.3.6
_______________________________________________ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils