Updated patch attached.

Note caching the abbreviated months like this speeds `ls -l`
up by around 10% which is nice, though not that important
as ls output is mainly for processing by humans.

cheers,
Pádraig.

>From dec4db2e8e07f90845f66858de21b0446c591746 Mon Sep 17 00:00:00 2001
From: =?utf-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Tue, 24 Mar 2009 14:29:21 +0000
Subject: [PATCH] ls: Fix alignment when month names have varying widths

* NEWS: Mention the fix
* gl/lib/mbsalign.c: A new module to align and or truncate a
string in a specified number of screen cells.
* gl/lib/mbsalign.h: Ditto
* gl/modules/mbsalign: Ditto
* bootstrap.conf: Reference the new module
* src/ls.c (abmon_init): New function, precompute the abbreviated
months aligned left in a minimum width column <= 5 screen cells.
Note this caching of the month strings speeds up `ls -l` by around
10% on glibc-2.7-2 on linux at least.
(align_nstrftime): A new function to replace use of %b in the
format specification to strftime with the precomputed month strings.
---
 NEWS                |    3 +
 bootstrap.conf      |    1 +
 gl/lib/mbsalign.c   |  169 +++++++++++++++++++++++++++++++++++++++++++++++++++
 gl/lib/mbsalign.h   |   23 +++++++
 gl/modules/mbsalign |   26 ++++++++
 src/ls.c            |   83 ++++++++++++++++++++++++-
 6 files changed, 303 insertions(+), 2 deletions(-)
 create mode 100644 gl/lib/mbsalign.c
 create mode 100644 gl/lib/mbsalign.h
 create mode 100644 gl/modules/mbsalign

diff --git a/NEWS b/NEWS
index 766f271..b71452c 100644
--- a/NEWS
+++ b/NEWS
@@ -22,6 +22,9 @@ GNU coreutils NEWS                                    -*- outline -*-
   ls --sort=version (-v) sorted names beginning with "." inconsistently.
   Now, names that start with "." are always listed before those that don't.
 
+  ls now aligns output correctly in the presence of abbreviated month names
+  from the locale database that have differing widths.
+
   pr: fix the bug whereby --indent=N (-o) did not indent header lines
   [bug introduced in coreutils-6.9.90]
 
diff --git a/bootstrap.conf b/bootstrap.conf
index 0747bb8..ae033d9 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -70,6 +70,7 @@ gnulib_modules="
 	long-options lstat malloc
 	manywarnings
 	mbrtowc
+	mbsalign
 	mbswidth
 	memcasecmp memcmp2 mempcpy
 	memrchr mgetgroups
diff --git a/gl/lib/mbsalign.c b/gl/lib/mbsalign.c
new file mode 100644
index 0000000..cc1339d
--- /dev/null
+++ b/gl/lib/mbsalign.c
@@ -0,0 +1,169 @@
+/* Align/Truncate a string in a given screen width
+   Copyright (C) 2009 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* Written by Pádraig Brady.  */
+
+#include <config.h>
+#include "mbsalign.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <wchar.h>
+#include <wctype.h>
+#include "xalloc.h"
+
+static int wc_ensure_printable (wchar_t * wchars);
+static size_t wc_truncate (wchar_t * wchars, size_t width);
+static int rpl_wcswidth (const wchar_t *s, size_t n);
+
+/* Align a string in a given screen width, handling multibyte characters.
+   In addition if the string is too large for the width it's truncated.
+   When centering the number of trailing spaces may be 1 less than the
+   number of leading spaces.
+   Returns number of bytes written to dest (not including the trailing NUL).
+   Returns number of display cells used in the width parameter.  */
+
+int
+mbsalign (const char *src, char *dest, size_t dest_size,
+          int *width, mbs_align_t align)
+{
+  int src_len = strlen (src);
+  int ret = 0;
+  char *newstr = NULL;
+  wchar_t *str_wc = NULL;
+  const char *str_to_print = src;
+  int used = src_len, spaces, wc_conversion = 0, wc_enabled = 0;
+
+  if (MB_CUR_MAX > 1)
+    {
+      int src_chars = mbstowcs (NULL, src, 0) + 1;
+      str_wc = xmalloc (src_chars * sizeof (wchar_t));
+      if (mbstowcs (str_wc, src, src_chars) > 0)
+        {
+          str_wc[src_chars - 1] = L'\0';
+          wc_enabled = 1;
+          wc_conversion = wc_ensure_printable (str_wc);
+          used = rpl_wcswidth (str_wc, src_chars);
+        }
+    }
+
+  if (wc_conversion || used > *width)
+    {
+      newstr = xmalloc (src_len);
+      str_to_print = newstr;
+      if (wc_enabled)
+        {
+          used = wc_truncate (str_wc, *width);
+          wcstombs (newstr, str_wc, src_len);
+        }
+      else
+        {
+          memcpy (newstr, src, *width);
+          newstr[*width] = '\0';
+        }
+    }
+
+  spaces = *width - used;
+  spaces = (spaces < 0 ? 0 : spaces);
+  *width = used;  /* indicate to called how many cells used.  */
+
+  /* FIXME: Should I be padding with "figure space" (\u2007)
+     rather than spaces below? (only if non ascii data present)  */
+  switch (align)
+    {
+    case MBS_ALIGN_CENTER:
+      ret = snprintf (dest, dest_size, "%*s%s%*s",
+                      spaces / 2 + spaces % 2, "",
+                      str_to_print, spaces / 2, "");
+      break;
+    case MBS_ALIGN_LEFT:
+      ret = snprintf (dest, dest_size, "%s%*s", str_to_print, spaces, "");
+      break;
+    case MBS_ALIGN_RIGHT:
+      ret = snprintf (dest, dest_size, "%*s%s", spaces, "", str_to_print);
+      break;
+    }
+
+  free (str_wc);
+  free (newstr);
+
+  return ret;
+}
+
+/* Replace non printable chars.
+   return 1 if replacement made, 0 otherwise.  */
+
+static int
+wc_ensure_printable (wchar_t * wchars)
+{
+  int replaced = 0;
+  wchar_t *wc = wchars;
+  while (*wc)
+    {
+      if (!iswprint ((wint_t) * wc))
+        {
+          *wc = 0xFFFD;         /* L'\uFFFD' (replacement char) */
+          replaced = 1;
+        }
+      wc++;
+    }
+  return replaced;
+}
+
+/* truncate wchar string to width cells.
+ * returns number of cells used.  */
+
+static size_t
+wc_truncate (wchar_t * wc, size_t width)
+{
+  int cells = 0, next_cells = 0;
+  while (*wc)
+    {
+      next_cells = wcwidth (*wc);
+      if (next_cells == -1) /* non printable */
+        {
+          *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
+          next_cells = 1;
+        }
+      if (cells + next_cells > width)
+        break;
+      cells += next_cells;
+      wc++;
+    }
+  *wc = L'\0';
+  return cells;
+}
+
+/* FIXME: move this function to gnulib as it's missing on:
+   OpenBSD 3.8, IRIX 5.3, Solaris 2.5.1, mingw, BeOS  */
+
+static int
+rpl_wcswidth (const wchar_t *s, size_t n)
+{
+  int ret = 0;
+
+  while (n-- > 0 && *s != L'\0')
+    {
+      int nwidth = wcwidth (*s++);
+      if (nwidth == -1) /* non printable  */
+        return -1;
+      ret += nwidth;
+    }
+
+  return ret;
+}
+
diff --git a/gl/lib/mbsalign.h b/gl/lib/mbsalign.h
new file mode 100644
index 0000000..d9c0c0a
--- /dev/null
+++ b/gl/lib/mbsalign.h
@@ -0,0 +1,23 @@
+/* Align/Truncate a string in a given screen width
+   Copyright (C) 2009 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <stddef.h>
+
+typedef enum { MBS_ALIGN_LEFT, MBS_ALIGN_RIGHT, MBS_ALIGN_CENTER } mbs_align_t;
+
+int
+mbsalign (const char *src, char *dest, size_t dest_size,
+          int *width, mbs_align_t align);
diff --git a/gl/modules/mbsalign b/gl/modules/mbsalign
new file mode 100644
index 0000000..895945c
--- /dev/null
+++ b/gl/modules/mbsalign
@@ -0,0 +1,26 @@
+Description:
+Align/Truncate a string in a given screen width.
+
+Files:
+lib/mbsalign.c
+lib/mbsalign.h
+
+Depends-on:
+xalloc
+wchar
+wctype
+wcwidth
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += mbsalign.c mbsalign.h
+
+Include:
+"mbsalign.h"
+
+License:
+LGPL
+
+Maintainer:
+Pádraig Brady
diff --git a/src/ls.c b/src/ls.c
index fa6a59a..74ee2ca 100644
--- a/src/ls.c
+++ b/src/ls.c
@@ -67,6 +67,10 @@
 #include <selinux/selinux.h>
 #include <wchar.h>
 
+#if HAVE_LANGINFO_CODESET
+# include <langinfo.h>
+#endif
+
 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
    present.  */
 #ifndef SA_NOCLDSTOP
@@ -105,6 +109,7 @@
 #include "strftime.h"
 #include "xstrtol.h"
 #include "areadlink.h"
+#include "mbsalign.h"
 
 #define PROGRAM_NAME (ls_mode == LS_LS ? "ls" \
 		      : (ls_mode == LS_MULTI_COL \
@@ -695,6 +700,11 @@ static char const *long_time_format[2] =
        screen columns small, because many people work in windows with
        only 80 columns.  But make this as wide as the other string
        below, for recent files.  */
+    /* TRANSLATORS: ls output needs to be aligned for ease of reading,
+       so be wary of using variable width fields from the locale.
+       Note %b is handled specially by ls and aligned correctly.
+       Note also that specifying a width as in %5b is erroneous as strftime
+       will count bytes rather than characters in multibyte locales.  */
     N_("%b %e  %Y"),
     /* strftime format for recent files (younger than 6 months), in -l
        output.  This should contain the month, day and time (at
@@ -703,6 +713,11 @@ static char const *long_time_format[2] =
        screen columns small, because many people work in windows with
        only 80 columns.  But make this as wide as the other string
        above, for non-recent files.  */
+    /* TRANSLATORS: ls output needs to be aligned for ease of reading,
+       so be wary of using variable width fields from the locale.
+       Note %b is handled specially by ls and aligned correctly.
+       Note also that specifying a width as in %5b is erroneous as strftime
+       will count bytes rather than characters in multibyte locales.  */
     N_("%b %e %H:%M")
   };
 
@@ -933,6 +948,41 @@ static struct obstack dev_ino_obstack;
     }									\
   while (0)
 
+/* Read the abbreviated month names from the locale, to determine the
+   max width of the field and truncate names greater than our max allowed.
+   Note even though this handles multibyte locales correctly
+   it's not restricted to them as single byte locales can have
+   variable width abbreviated months and also precomputing/caching
+   the names was seen to increase the performance of ls by around 10%
+   on glibc-2.7-2 on linux at least.  */
+enum { MAX_MON_WIDTH = 5 }; /* max number of display cells to use */
+static char abmon[12][MAX_MON_WIDTH * MB_LEN_MAX + 1];
+static int required_mon_width;
+void
+abmon_init (void)
+{
+#ifdef HAVE_NL_LANGINFO
+  required_mon_width = MAX_MON_WIDTH;
+  int curr_max_width;
+  do
+    {
+      curr_max_width = required_mon_width;
+      required_mon_width = 0;
+      for (int i = 0; i < 12; i++)
+	{
+	  int width = curr_max_width;
+
+	  mbsalign (nl_langinfo (ABMON_1 + i),
+		    abmon[i], sizeof (abmon[i]),
+		    &width, MBS_ALIGN_LEFT);
+
+	  required_mon_width = MAX (required_mon_width, width);
+	}
+    }
+  while (curr_max_width > required_mon_width);
+#endif
+}
+
 /* Pop a dev/ino struct off the global dev_ino_obstack
    and return that struct.  */
 static struct dev_ino
@@ -1953,6 +2003,9 @@ decode_switches (int argc, char **argv)
 		  }
 	      }
 	  }
+      /* Note we leave %5b etc. alone so user widths/flags are honoured.  */
+      if (strstr(long_time_format[0],"%b") || strstr(long_time_format[1],"%b"))
+	abmon_init();
     }
 
   return optind;
@@ -3317,6 +3370,32 @@ print_current_files (void)
     }
 }
 
+/* Replace %b with precomputed aligned data.  */
+
+static size_t
+align_nstrftime (char *src, size_t size, char const *fmt, struct tm const *tm,
+		 int __utc, int __ns)
+{
+  const char *nfmt = fmt;
+  char *rpl_fmt = NULL;
+  if (required_mon_width)
+    {
+      rpl_fmt = xmalloc (strlen (fmt) + MAX_MON_WIDTH - 2 + 1);
+      char *pb = strstr (fmt, "%b");
+      char *pfmt = rpl_fmt;
+      nfmt = rpl_fmt;
+
+      memcpy (pfmt, fmt, pb - fmt);
+      pfmt += pb - fmt;
+      strcpy (pfmt, abmon[tm->tm_mon]);
+      pfmt = strchr (pfmt, '\0');
+      memcpy (pfmt, pb + 2, strlen (pb + 2) + 1);
+    }
+  size_t ret = nstrftime (src, size, nfmt, tm, __utc, __ns);
+  free (rpl_fmt);
+  return ret;
+}
+
 /* Return the expected number of columns in a long-format time stamp,
    or zero if it cannot be calculated.  */
 
@@ -3341,7 +3420,7 @@ long_time_expected_width (void)
       if (tm)
 	{
 	  size_t len =
-	    nstrftime (buf, sizeof buf, long_time_format[0], tm, 0, 0);
+	    align_nstrftime (buf, sizeof buf, long_time_format[0], tm, 0, 0);
 	  if (len != 0)
 	    width = mbsnwidth (buf, len, 0);
 	}
@@ -3616,7 +3695,7 @@ print_long_format (const struct fileinfo *f)
 
       /* We assume here that all time zones are offset from UTC by a
 	 whole number of seconds.  */
-      s = nstrftime (p, TIME_STAMP_LEN_MAXIMUM + 1, fmt,
+      s = align_nstrftime (p, TIME_STAMP_LEN_MAXIMUM + 1, fmt,
 		     when_local, 0, when_timespec.tv_nsec);
     }
 
-- 
1.5.3.6

_______________________________________________
Bug-coreutils mailing list
Bug-coreutils@gnu.org
http://lists.gnu.org/mailman/listinfo/bug-coreutils

Reply via email to