[PATCH] contrib/git-jump: allow to configure the grep command

2017-11-09 Thread Beat Bolli
Add the configuration option "jump.grepCmd" that allows to configure the
command that is used to search in grep mode. This allows the users of
git-jump to use ag(1) or ack(1) as search engines.

Signed-off-by: Beat Bolli 
---
 contrib/git-jump/README   | 3 +++
 contrib/git-jump/git-jump | 7 +--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/contrib/git-jump/README b/contrib/git-jump/README
index 225e3f095..9f58d5db8 100644
--- a/contrib/git-jump/README
+++ b/contrib/git-jump/README
@@ -63,6 +63,9 @@ git jump grep foo_bar
 # same as above, but case-insensitive; you can give
 # arbitrary grep options
 git jump grep -i foo_bar
+
+# use the silver searcher for git jump grep
+git config jump.grepCmd "ag --column"
 --
 
 
diff --git a/contrib/git-jump/git-jump b/contrib/git-jump/git-jump
index 427f206a4..80ab0590b 100755
--- a/contrib/git-jump/git-jump
+++ b/contrib/git-jump/git-jump
@@ -11,7 +11,8 @@ diff: elements are diff hunks. Arguments are given to diff.
 
 merge: elements are merge conflicts. Arguments are ignored.
 
-grep: elements are grep hits. Arguments are given to grep.
+grep: elements are grep hits. Arguments are given to git grep or, if
+  configured, to the command in `jump.grepCmd`.
 
 ws: elements are whitespace errors. Arguments are given to diff --check.
 EOF
@@ -50,7 +51,9 @@ mode_merge() {
 # but let's clean up extra whitespace, so they look better if the
 # editor shows them to us in the status bar.
 mode_grep() {
-   git grep -n "$@" |
+   cmd=$(git config jump.grepCmd)
+   test -n "$cmd" || cmd="git grep -n"
+   $cmd "$@" |
perl -pe '
s/[ \t]+/ /g;
s/^ *//;
-- 
2.15.0.rc1.299.gda03b47c3



[PATCH] contrib/git-jump: allow to configure the grep command

2017-11-19 Thread Beat Bolli
Add the configuration option "jump.grepCmd" that allows to configure the
command that is used to search in grep mode. This allows the users of
git-jump to use ag(1) or ack(1) as search engines.

Signed-off-by: Beat Bolli 
---
 contrib/git-jump/README   | 3 +++
 contrib/git-jump/git-jump | 7 +--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/contrib/git-jump/README b/contrib/git-jump/README
index 225e3f095..9f58d5db8 100644
--- a/contrib/git-jump/README
+++ b/contrib/git-jump/README
@@ -63,6 +63,9 @@ git jump grep foo_bar
 # same as above, but case-insensitive; you can give
 # arbitrary grep options
 git jump grep -i foo_bar
+
+# use the silver searcher for git jump grep
+git config jump.grepCmd "ag --column"
 --
 
 
diff --git a/contrib/git-jump/git-jump b/contrib/git-jump/git-jump
index 427f206a4..80ab0590b 100755
--- a/contrib/git-jump/git-jump
+++ b/contrib/git-jump/git-jump
@@ -11,7 +11,8 @@ diff: elements are diff hunks. Arguments are given to diff.
 
 merge: elements are merge conflicts. Arguments are ignored.
 
-grep: elements are grep hits. Arguments are given to grep.
+grep: elements are grep hits. Arguments are given to git grep or, if
+  configured, to the command in `jump.grepCmd`.
 
 ws: elements are whitespace errors. Arguments are given to diff --check.
 EOF
@@ -50,7 +51,9 @@ mode_merge() {
 # but let's clean up extra whitespace, so they look better if the
 # editor shows them to us in the status bar.
 mode_grep() {
-   git grep -n "$@" |
+   cmd=$(git config jump.grepCmd)
+   test -n "$cmd" || cmd="git grep -n"
+   $cmd "$@" |
perl -pe '
s/[ \t]+/ /g;
s/^ *//;
-- 
2.15.0.rc1.299.gda03b47c3



[PATCH] unicode: update the width tables to Unicode 10

2017-07-07 Thread Beat Bolli
Now that the Unicode 10 has been announced[0], update the character
width tables to the new version.

[0] http://blog.unicode.org/2017/06/announcing-unicode-standard-version-100.html

Signed-off-by: Beat Bolli 
---
 unicode_width.h | 42 +-
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/unicode_width.h b/unicode_width.h
index 02207be4f..6dee2c77c 100644
--- a/unicode_width.h
+++ b/unicode_width.h
@@ -51,6 +51,7 @@ static const struct interval zero_width[] = {
 { 0x0AC7, 0x0AC8 },
 { 0x0ACD, 0x0ACD },
 { 0x0AE2, 0x0AE3 },
+{ 0x0AFA, 0x0AFF },
 { 0x0B01, 0x0B01 },
 { 0x0B3C, 0x0B3C },
 { 0x0B3F, 0x0B3F },
@@ -73,7 +74,8 @@ static const struct interval zero_width[] = {
 { 0x0CC6, 0x0CC6 },
 { 0x0CCC, 0x0CCD },
 { 0x0CE2, 0x0CE3 },
-{ 0x0D01, 0x0D01 },
+{ 0x0D00, 0x0D01 },
+{ 0x0D3B, 0x0D3C },
 { 0x0D41, 0x0D44 },
 { 0x0D4D, 0x0D4D },
 { 0x0D62, 0x0D63 },
@@ -158,7 +160,7 @@ static const struct interval zero_width[] = {
 { 0x1CED, 0x1CED },
 { 0x1CF4, 0x1CF4 },
 { 0x1CF8, 0x1CF9 },
-{ 0x1DC0, 0x1DF5 },
+{ 0x1DC0, 0x1DF9 },
 { 0x1DFB, 0x1DFF },
 { 0x200B, 0x200F },
 { 0x202A, 0x202E },
@@ -262,6 +264,15 @@ static const struct interval zero_width[] = {
 { 0x1171D, 0x1171F },
 { 0x11722, 0x11725 },
 { 0x11727, 0x1172B },
+{ 0x11A01, 0x11A06 },
+{ 0x11A09, 0x11A0A },
+{ 0x11A33, 0x11A38 },
+{ 0x11A3B, 0x11A3E },
+{ 0x11A47, 0x11A47 },
+{ 0x11A51, 0x11A56 },
+{ 0x11A59, 0x11A5B },
+{ 0x11A8A, 0x11A96 },
+{ 0x11A98, 0x11A99 },
 { 0x11C30, 0x11C36 },
 { 0x11C38, 0x11C3D },
 { 0x11C3F, 0x11C3F },
@@ -269,6 +280,11 @@ static const struct interval zero_width[] = {
 { 0x11CAA, 0x11CB0 },
 { 0x11CB2, 0x11CB3 },
 { 0x11CB5, 0x11CB6 },
+{ 0x11D31, 0x11D36 },
+{ 0x11D3A, 0x11D3A },
+{ 0x11D3C, 0x11D3D },
+{ 0x11D3F, 0x11D45 },
+{ 0x11D47, 0x11D47 },
 { 0x16AF0, 0x16AF4 },
 { 0x16B30, 0x16B36 },
 { 0x16F8F, 0x16F92 },
@@ -339,7 +355,7 @@ static const struct interval double_width[] = {
 { 0x3000, 0x303E },
 { 0x3041, 0x3096 },
 { 0x3099, 0x30FF },
-{ 0x3105, 0x312D },
+{ 0x3105, 0x312E },
 { 0x3131, 0x318E },
 { 0x3190, 0x31BA },
 { 0x31C0, 0x31E3 },
@@ -358,10 +374,11 @@ static const struct interval double_width[] = {
 { 0xFE68, 0xFE6B },
 { 0xFF01, 0xFF60 },
 { 0xFFE0, 0xFFE6 },
-{ 0x16FE0, 0x16FE0 },
+{ 0x16FE0, 0x16FE1 },
 { 0x17000, 0x187EC },
 { 0x18800, 0x18AF2 },
-{ 0x1B000, 0x1B001 },
+{ 0x1B000, 0x1B11E },
+{ 0x1B170, 0x1B2FB },
 { 0x1F004, 0x1F004 },
 { 0x1F0CF, 0x1F0CF },
 { 0x1F18E, 0x1F18E },
@@ -370,6 +387,7 @@ static const struct interval double_width[] = {
 { 0x1F210, 0x1F23B },
 { 0x1F240, 0x1F248 },
 { 0x1F250, 0x1F251 },
+{ 0x1F260, 0x1F265 },
 { 0x1F300, 0x1F320 },
 { 0x1F32D, 0x1F335 },
 { 0x1F337, 0x1F37C },
@@ -392,15 +410,13 @@ static const struct interval double_width[] = {
 { 0x1F6CC, 0x1F6CC },
 { 0x1F6D0, 0x1F6D2 },
 { 0x1F6EB, 0x1F6EC },
-{ 0x1F6F4, 0x1F6F6 },
-{ 0x1F910, 0x1F91E },
-{ 0x1F920, 0x1F927 },
-{ 0x1F930, 0x1F930 },
-{ 0x1F933, 0x1F93E },
-{ 0x1F940, 0x1F94B },
-{ 0x1F950, 0x1F95E },
-{ 0x1F980, 0x1F991 },
+{ 0x1F6F4, 0x1F6F8 },
+{ 0x1F910, 0x1F93E },
+{ 0x1F940, 0x1F94C },
+{ 0x1F950, 0x1F96B },
+{ 0x1F980, 0x1F997 },
 { 0x1F9C0, 0x1F9C0 },
+{ 0x1F9D0, 0x1F9E6 },
 { 0x2, 0x2FFFD },
 { 0x3, 0x3FFFD }
 };
-- 
2.13.2.753.g7f5404b



Re: [PATCH] unicode: update the width tables to Unicode 10

2017-07-07 Thread Beat Bolli


-- 
„It takes love over gold” — Dire Straits

> On 7 Jul 2017, at 17:43, Junio C Hamano  wrote:
> 
> Beat Bolli  writes:
> 
>> Now that the Unicode 10 has been announced[0], update the character
>> width tables to the new version.

Typo! Could you drop the first "the" from the message?

Thanks,
Beat

>> 
>> [0] 
>> http://blog.unicode.org/2017/06/announcing-unicode-standard-version-100.html
>> 
>> Signed-off-by: Beat Bolli 
>> ---
> 
> Thanks, again, for keeping an eye on the progress in the external
> world ;-)  Will apply.



Re: SEC_E_BUFFER_TOO_SMALL on Windows

2018-04-23 Thread Beat Bolli
On Mon, Apr 23, 2018 at 11:13:41AM -0500, Jason B. Nance wrote:
> Hello all,
> 
> We are seeing intermittent errors with Git 2.16.2.windows.1 on Windows
> 7 connecting to TFS 2017 (running in a Jenkins slave process):
> 
> ERROR: Error cloning remote repo 'origin'
> hudson.plugins.git.GitException: Command "C:\tools\Git\bin\git.exe
> fetch --tags --progress
> https://internal-tfs-server/tfs/project/_git/repo
> +refs/heads/*:refs/remotes/origin/*" returned status code 128:
> stdout: stderr: fatal: unable to access
> 'https://internal-tfs-server/tfs/project/_git/repo/': schannel:
> next InitializeSecurityContext failed: SEC_E_BUFFER_TOO_SMALL
> (0x80090321) - The buffers supplied to a function was too small.
> 
> I found the following thread from 2015 on a cURL list that seems to be
> similar:
> 
> https://curl.haxx.se/mail/lib-2015-04/0136.html
> 
> However, it looks like a patch was released for that issue:
> 
> https://curl.haxx.se/mail/lib-2015-04/0152.html
> 
> Rebooting the Windows client appears to resolve the issue for a time.
> 
> Has anyone else experienced this and found a resolution or workaround?

This answer seems relevant: https://stackoverflow.com/a/39217099/232775 .
The link in the answer is no longer available; the current link is
https://developer.microsoft.com/en-us/microsoft-edge/platform/issues/4906705/ .

The obvious workaround would be to retry the request, because the error
happens randomly depending on the value of a Diffie-Hellman ephemeral
key parameter.

Cheers,
Beat


signature.asc
Description: PGP signature


Re: [PATCH] git: add -N as a short option for --no-pager

2018-04-24 Thread Beat Bolli
On Wed, Apr 25, 2018 at 09:05:56AM +0900, Junio C Hamano wrote:
> Johannes Sixt  writes:
> 
> > In modern setups, less, the pager, uses alternate screen to show
> > the content. When it is closed, it switches back to the original
> > screen, and all content is gone.
> >
> > It is not uncommon to request that the output remains visible in
> > the terminal. For this, the option --no-pager can be used. But
> > it is a bit cumbersome to type, even when command completion is
> > available. Provide a short option, -N, to make the option easier
> > accessible.
> >
> > Signed-off-by: Johannes Sixt 
> > ---
> 
> Heh, I used to append "|cat", which is four keystrokes that is a bit
> shorter than " --no-pager", but that is only acceptable when you do
> not care about colored output ;-)
> 
> I am not absolutely certain about the choice of a single letter. I
> already checked we do not use "git -N cmd" for anything else right
> now, so I am certain about the availability, but I am not sure if
> capital 'N' is the best choice, when the other side is lowercase 'p'
> (and more importantly, the other side 'p' has mneomonic value for
> 'pagination', but 'N' merely stands for 'no' and could be negating
> anything, not related to pagination). But I agree that a short-hand
> would be welcome.
> 

I'm quite fond of the notation "-p-", but that would set a precedent for
all other "--no-" options.

Maybe the option parser could be enhanced to allow for both?

Thanks,
Beat

> > diff --git a/Documentation/git.txt b/Documentation/git.txt
> > index 4767860e72..17b50b0dc6 100644
> > --- a/Documentation/git.txt
> > +++ b/Documentation/git.txt
> > @@ -11,7 +11,7 @@ SYNOPSIS
> >  [verse]
> >  'git' [--version] [--help] [-C ] [-c =]
> >  [--exec-path[=]] [--html-path] [--man-path] [--info-path]
> > -[-p|--paginate|--no-pager] [--no-replace-objects] [--bare]
> > +[-p|--paginate|-N|--no-pager] [--no-replace-objects] [--bare]
> >  [--git-dir=] [--work-tree=] [--namespace=]
> >  [--super-prefix=]
> >   []
> > @@ -103,6 +103,7 @@ foo.bar= ...`) sets `foo.bar` to the empty string which 
> > `git config
> > configuration options (see the "Configuration Mechanism" section
> > below).
> >  
> > +-N::
> >  --no-pager::
> > Do not pipe Git output into a pager.


signature.asc
Description: PGP signature


Re: git https and github

2018-04-25 Thread Beat Bolli
On 25.04.18 02:32, Lev wrote:
> Hi list,
> 
> 
> I'm struggling with git connecting to Github.
> 
> The problem might be SSL/TLS related.
> 
> https://githubengineering.com/crypto-removal-notice/
> 
> I suspect that my setup still uses tlsv1 or tlsv1.1.
> 
> I've tried to explicitly set git to use tlsv1.2 in my .gitconfig file
> like this:
> 
> [http]
>   sslVersion = tlsv1.2

This is the default, so this setting should not be needed, unless it's
overridden in some higher prioritized git config file. Have you tried

git -c http.sslVersion=tlsv1.2 clone 

? This should override any settings files.

> I've tried to re-compile git with OpenSSL and GnuTLS. All give the
> same error.
> 
> git clone https://github.com/OnionIoT/source.git
> Cloning into 'source'...
> * Couldn't find host github.com in the .netrc file; using defaults
> *   Trying 192.30.253.112...
> * TCP_NODELAY set
> * Connected to github.com (192.30.253.112) port 443 (#0)
> * ALPN, offering http/1.1
> * Cipher selection:
> ALL:!EXPORT:!EXPORT40:!EXPORT56:!aNULL:!LOW:!RC4:@STRENGTH
> * successfully set certificate verify locations:
> *   CAfile: /etc/ssl/certs/ca-certificates.crt
>   CApath: /etc/ssl/certs
> * error:1409442E:SSL routines:ssl3_read_bytes:tlsv1 alert protocol
> version
> * Curl_http_done: called premature == 1
> * stopped the pause stream!
> * Closing connection 0
> fatal: unable to access 'https://github.com/OnionIoT/source.git/':
> error:1409442E:SSL routines:ssl3_read_bytes:tlsv1 alert protocol
> version lev@jive:~/git$ unset GIT_SSL_VERSION lev@jive:~/git$ git clone
> https://github.com/OnionIoT/source.git Cloning into 'source'...
> * Couldn't find host github.com in the .netrc file; using defaults
> *   Trying 192.30.253.112...
> * TCP_NODELAY set
> * Connected to github.com (192.30.253.112) port 443 (#0)
> * ALPN, offering http/1.1
> * Cipher selection:
> ALL:!EXPORT:!EXPORT40:!EXPORT56:!aNULL:!LOW:!RC4:@STRENGTH
> * successfully set certificate verify locations:
> *   CAfile: /etc/ssl/certs/ca-certificates.crt
>   CApath: /etc/ssl/certs
> * error:1409442E:SSL routines:ssl3_read_bytes:tlsv1 alert protocol
> version
> * Curl_http_done: called premature == 1
> * stopped the pause stream!
> * Closing connection 0
> fatal: unable to access 'https://github.com/OnionIoT/source.git/':
> error:1409442E:SSL routines:ssl3_read_bytes:tlsv1 alert protocol version
> 
> 
> I can connect to other git servers without any error. This is a debian
> stable system with the following components:
> 
>   git version 2.11.0
>   libcurl 7.52.1
>   OpenSSL 1.0.2l

This OpenSSL version is certainly recent enough to support TLSv1.2. Are
you sure you ran the newly compiled git binary?

(Sorry for asking stupid questions; it's sometimes difficult to get to
the root of a problem)

> 
> 
> Is there any way to know what is the exact protocol used? Are there any
> workaround, fix for this issue?
> 
> Any help welcome. Thank you,
> Levente
> 


Cheers,
Beat



[PATCH] builtin/config: work around an unsized array forward declaration

2018-07-05 Thread Beat Bolli
As reported here[0], Microsoft Visual Studio 2017.2 and "gcc -pedantic"
don't understand the forward declaration of an unsized static array.
They insist on an array size:

d:\git\src\builtin\config.c(70,46): error C2133: 'builtin_config_options': 
unknown size

The thread [1] explains that this is due to the single-pass nature of
old compilers.

To work around this error, introduce the forward-declared function
usage_builtin_config() instead that uses the array
builtin_config_options only after it has been defined.

Also use this function in all other places where usage_with_options() is
called with the same arguments.

[0]: https://github.com/git-for-windows/git/issues/1735
[1]: https://groups.google.com/forum/#!topic/comp.lang.c.moderated/bmiF2xMz51U

Fixes https://github.com/git-for-windows/git/issues/1735

Reported-By: Karen Huang (via GitHub)
Signed-off-by: Beat Bolli 
---
 builtin/config.c | 27 +++
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/builtin/config.c b/builtin/config.c
index b29d26dede..2c93a289a7 100644
--- a/builtin/config.c
+++ b/builtin/config.c
@@ -67,7 +67,7 @@ static int show_origin;
{ OPTION_CALLBACK, (s), (l), (v), NULL, (h), PARSE_OPT_NOARG | \
PARSE_OPT_NONEG, option_parse_type, (i) }
 
-static struct option builtin_config_options[];
+static NORETURN void usage_builtin_config(void);
 
 static int option_parse_type(const struct option *opt, const char *arg,
 int unset)
@@ -111,8 +111,7 @@ static int option_parse_type(const struct option *opt, 
const char *arg,
 * --type=int'.
 */
error("only one type at a time.");
-   usage_with_options(builtin_config_usage,
-   builtin_config_options);
+   usage_builtin_config();
}
*to_type = new_type;
 
@@ -157,11 +156,16 @@ static struct option builtin_config_options[] = {
OPT_END(),
 };
 
+static NORETURN void usage_builtin_config(void)
+{
+   usage_with_options(builtin_config_usage, builtin_config_options);
+}
+
 static void check_argc(int argc, int min, int max) {
if (argc >= min && argc <= max)
return;
error("wrong number of arguments");
-   usage_with_options(builtin_config_usage, builtin_config_options);
+   usage_builtin_config();
 }
 
 static void show_config_origin(struct strbuf *buf)
@@ -596,7 +600,7 @@ int cmd_config(int argc, const char **argv, const char 
*prefix)
if (use_global_config + use_system_config + use_local_config +
!!given_config_source.file + !!given_config_source.blob > 1) {
error("only one config file at a time.");
-   usage_with_options(builtin_config_usage, 
builtin_config_options);
+   usage_builtin_config();
}
 
if (use_local_config && nongit)
@@ -660,12 +664,12 @@ int cmd_config(int argc, const char **argv, const char 
*prefix)
 
if ((actions & (ACTION_GET_COLOR|ACTION_GET_COLORBOOL)) && type) {
error("--get-color and variable type are incoherent");
-   usage_with_options(builtin_config_usage, 
builtin_config_options);
+   usage_builtin_config();
}
 
if (HAS_MULTI_BITS(actions)) {
error("only one action at a time.");
-   usage_with_options(builtin_config_usage, 
builtin_config_options);
+   usage_builtin_config();
}
if (actions == 0)
switch (argc) {
@@ -673,25 +677,24 @@ int cmd_config(int argc, const char **argv, const char 
*prefix)
case 2: actions = ACTION_SET; break;
case 3: actions = ACTION_SET_ALL; break;
default:
-   usage_with_options(builtin_config_usage, 
builtin_config_options);
+   usage_builtin_config();
}
if (omit_values &&
!(actions == ACTION_LIST || actions == ACTION_GET_REGEXP)) {
error("--name-only is only applicable to --list or 
--get-regexp");
-   usage_with_options(builtin_config_usage, 
builtin_config_options);
+   usage_builtin_config();
}
 
if (show_origin && !(actions &
(ACTION_GET|ACTION_GET_ALL|ACTION_GET_REGEXP|ACTION_LIST))) {
error("--show-origin is only applicable to --get, --get-all, "
  "--get-regexp, and --list.");
-   usage_with_options(builtin_config_usage, 
builtin_config_options);
+   usage_builtin_config();
}
 
if (default_value && !(actions & ACTION_GET)) {
error("--default is only applicable to --get");
-   usage_with_options(builtin_config_usage,
-   builtin_config_options);
+   usage_builtin_config();
}
 
if (actions & PAGING_ACTIONS)
-- 
2.15.0.rc1.299.gda03b47c3



Re: [PATCH] builtin/config: work around an unsized array forward declaration

2018-07-05 Thread Beat Bolli
Hi Peff

On 05.07.18 21:38, Jeff King wrote:
> On Thu, Jul 05, 2018 at 08:34:45PM +0200, Beat Bolli wrote:
> 
>> As reported here[0], Microsoft Visual Studio 2017.2 and "gcc -pedantic"
>> don't understand the forward declaration of an unsized static array.
>> They insist on an array size:
>>
>> d:\git\src\builtin\config.c(70,46): error C2133: 
>> 'builtin_config_options': unknown size
>>
>> The thread [1] explains that this is due to the single-pass nature of
>> old compilers.
> 
> Right, that makes sense.
> 
>> To work around this error, introduce the forward-declared function
>> usage_builtin_config() instead that uses the array
>> builtin_config_options only after it has been defined.
>>
>> Also use this function in all other places where usage_with_options() is
>> called with the same arguments.
> 
> Your patch is obviously correct, but I think here there might be an even
> simpler solution: just bump option_parse_type() below the declaration,
> since it's the only one that needs it. That hunk is bigger, but the
> overall diff is simpler, and we don't need to carry that extra wrapper
> function.

That was dscho's first try in the GitHub issue. It doesn't compile
because the OPT_CALLBACK* macros in the builtin_config_options
declaration inserts a pointer to option_parse_type into the array items.
We need at least one forward declaration, and my patch seemed the least
intrusive.

> As a general rule for this case (because reordering isn't always an
> option), I also wonder if we should prefer just introducing a pointer
> alias:
> 
>   /* forward declaration is a pointer */
>   static struct option *builtin_config_options;
> 
>   /* later, declare the actual storage and its alias */
>   static struct option builtin_config_options_storage[] = {
>   ...
>   };
>   static struct option *builtin_config_options = 
> builtin_config_options_storage;
> 
> There are occasionally cases where the caller really wants an array and
> not a pointer, but in practice those are pretty rare.
> 
> I have a slight preference for the reordering solution in this case, but
> any of them would be OK with me.
> 
> -Peff 

Regards, Beat



[RFC PATCH 0/6] Compile cleanly in pedantic mode

2018-07-08 Thread Beat Bolli
While developing 6aaded550 ("builtin/config: work around an unsized
array forward declaration", 2018-07-05), I have compiled Git with
CFLAGS="-std=c99 -pedantic".

This is an RFC patch series that fixes a few compiler warnings when
compiling with these options, always assuming that this is a worthwile
goal.

Note that all warnings were produced by -pedantic; the C99 standard
option by itself didn't cause any of them.

The warnings were:

1) Char arrays initialized from a parenthesized string.

Suppressed by defining USE_PARENS_AROUND_GETTEXT_N to 0
globally. This was done just to keep the amount of warnings
manageable; this series leaves that knob alone. The advantage of
not mistakenly concatenating two translated strings is greater.

2) connect.h, refs/refs-internal.h: Forward reference to an enum.

Added two #includes that define the enums. This was already
(inconclusively) talked about in [0].

3) convert.c: Invalid escape sequence "\e".

Replaced with "\033".

4) seqencer.c: Empty statements at top level.

Removed the extra semicolons.

5) string-list.c: Forbidden to cast from void * to a function pointer and
   vice versa.

Encapsulated the function pointer in a context struct. This is
controversial because it has a performance impact, namely one
additional pointer dereference per string comparison. An
alternative might be to use multiple casts via intptr_t. But
I'm not sure if this is worth the trouble.

6) utf8.c: overflow of char values.

Use unsigned char for the BOM constants.

This series has patches for 2) to 6).

Regards,
Beat

[0] 
https://public-inbox.org/git/53ab8626-f862-a732-b369-abeab69a4...@ramsayjones.plus.com/T/


[RFC PATCH 2/6] refs/refs-internal.h: avoid forward declaration of an enum

2018-07-08 Thread Beat Bolli
Include iterator.h to define enum iterator_selection.

Signed-off-by: Beat Bolli 
---
 refs/refs-internal.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index dd834314bd..a78b5cb803 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -1,6 +1,8 @@
 #ifndef REFS_REFS_INTERNAL_H
 #define REFS_REFS_INTERNAL_H
 
+#include "iterator.h"   /* for enum iterator_selection */
+
 /*
  * Data structures and functions for the internal use of the refs
  * module. Code outside of the refs module should use only the public
-- 
2.15.0.rc1.299.gda03b47c3



[RFC PATCH 6/6] utf8.c: avoid char overflow

2018-07-08 Thread Beat Bolli
In ISO C, char constants must be in the range -128..127. Change the BOM
constants to unsigned char to avoid overflow.

Signed-off-by: Beat Bolli 
---
 utf8.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/utf8.c b/utf8.c
index d55e20c641..833ce00617 100644
--- a/utf8.c
+++ b/utf8.c
@@ -561,15 +561,15 @@ char *reencode_string_len(const char *in, int insz,
 #endif
 
 static int has_bom_prefix(const char *data, size_t len,
- const char *bom, size_t bom_len)
+ const unsigned char *bom, size_t bom_len)
 {
return data && bom && (len >= bom_len) && !memcmp(data, bom, bom_len);
 }
 
-static const char utf16_be_bom[] = {0xFE, 0xFF};
-static const char utf16_le_bom[] = {0xFF, 0xFE};
-static const char utf32_be_bom[] = {0x00, 0x00, 0xFE, 0xFF};
-static const char utf32_le_bom[] = {0xFF, 0xFE, 0x00, 0x00};
+static const unsigned char utf16_be_bom[] = {0xFE, 0xFF};
+static const unsigned char utf16_le_bom[] = {0xFF, 0xFE};
+static const unsigned char utf32_be_bom[] = {0x00, 0x00, 0xFE, 0xFF};
+static const unsigned char utf32_le_bom[] = {0xFF, 0xFE, 0x00, 0x00};
 
 int has_prohibited_utf_bom(const char *enc, const char *data, size_t len)
 {
-- 
2.15.0.rc1.299.gda03b47c3



[RFC PATCH 4/6] sequencer.c: avoid empty statements at top level

2018-07-08 Thread Beat Bolli
The marco GIT_PATH_FUNC expands to a complete statement including the
semicolon. Remove two extra trailing semicolons.

Signed-off-by: Beat Bolli 
---
 sequencer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sequencer.c b/sequencer.c
index 5354d4d51e..66e7073995 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -62,12 +62,12 @@ static GIT_PATH_FUNC(rebase_path_done, "rebase-merge/done")
  * The file to keep track of how many commands were already processed (e.g.
  * for the prompt).
  */
-static GIT_PATH_FUNC(rebase_path_msgnum, "rebase-merge/msgnum");
+static GIT_PATH_FUNC(rebase_path_msgnum, "rebase-merge/msgnum")
 /*
  * The file to keep track of how many commands are to be processed in total
  * (e.g. for the prompt).
  */
-static GIT_PATH_FUNC(rebase_path_msgtotal, "rebase-merge/end");
+static GIT_PATH_FUNC(rebase_path_msgtotal, "rebase-merge/end")
 /*
  * The commit message that is planned to be used for any changes that
  * need to be committed following a user interaction.
-- 
2.15.0.rc1.299.gda03b47c3



[RFC PATCH 1/6] connect.h: avoid forward declaration of an enum

2018-07-08 Thread Beat Bolli
Include protocol.h to define enum protocol_version.

Signed-off-by: Beat Bolli 
---
 connect.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/connect.h b/connect.h
index 0e69c6709c..c86f862f2f 100644
--- a/connect.h
+++ b/connect.h
@@ -1,6 +1,8 @@
 #ifndef CONNECT_H
 #define CONNECT_H
 
+#include "protocol.h"   /* for enum protocol_version */
+
 #define CONNECT_VERBOSE   (1u << 0)
 #define CONNECT_DIAG_URL  (1u << 1)
 #define CONNECT_IPV4  (1u << 2)
-- 
2.15.0.rc1.299.gda03b47c3



[RFC PATCH 5/6] string-list.c: avoid conversion from void * to function pointer

2018-07-08 Thread Beat Bolli
ISO C forbids the conversion of void pointers to function pointers.
Introduce a context struct that encapsulates the function pointer.

Signed-off-by: Beat Bolli 
---
 string-list.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/string-list.c b/string-list.c
index a0cf0cfe88..771c455098 100644
--- a/string-list.c
+++ b/string-list.c
@@ -224,18 +224,28 @@ struct string_list_item *string_list_append(struct 
string_list *list,
list->strdup_strings ? xstrdup(string) : (char 
*)string);
 }
 
+/*
+ * Encapsulate the compare function pointer because ISO C99 forbids
+ * casting from void * to a function pointer and vice versa.
+ */
+struct string_list_sort_ctx
+{
+   compare_strings_fn cmp;
+};
+
 static int cmp_items(const void *a, const void *b, void *ctx)
 {
-   compare_strings_fn cmp = ctx;
+   struct string_list_sort_ctx *sort_ctx = ctx;
const struct string_list_item *one = a;
const struct string_list_item *two = b;
-   return cmp(one->string, two->string);
+   return sort_ctx->cmp(one->string, two->string);
 }
 
 void string_list_sort(struct string_list *list)
 {
-   QSORT_S(list->items, list->nr, cmp_items,
-   list->cmp ? list->cmp : strcmp);
+   struct string_list_sort_ctx sort_ctx = {list->cmp ? list->cmp : strcmp};
+
+   QSORT_S(list->items, list->nr, cmp_items, &sort_ctx);
 }
 
 struct string_list_item *unsorted_string_list_lookup(struct string_list *list,
-- 
2.15.0.rc1.299.gda03b47c3



[RFC PATCH 3/6] convert.c: replace "\e" escapes with "\033".

2018-07-08 Thread Beat Bolli
The "\e" escape is not defined in ISO C.

While on this line, add a missing space after the comma.

Signed-off-by: Beat Bolli 
---
 convert.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/convert.c b/convert.c
index 64d0d30e08..edebb946f5 100644
--- a/convert.c
+++ b/convert.c
@@ -334,7 +334,7 @@ static void trace_encoding(const char *context, const char 
*path,
strbuf_addf(&trace, "%s (%s, considered %s):\n", context, path, 
encoding);
for (i = 0; i < len && buf; ++i) {
strbuf_addf(
-   &trace,"| \e[2m%2i:\e[0m %2x \e[2m%c\e[0m%c",
+   &trace, "| \033[2m%2i:\033[0m %2x \033[2m%c\033[0m%c",
i,
(unsigned char) buf[i],
(buf[i] > 32 && buf[i] < 127 ? buf[i] : ' '),
-- 
2.15.0.rc1.299.gda03b47c3



Re: [RFC PATCH 6/6] utf8.c: avoid char overflow

2018-07-09 Thread Beat Bolli

Hi Dscho

Am 09.07.2018 15:14, schrieb Johannes Schindelin:

Hi Beat,

On Sun, 8 Jul 2018, Beat Bolli wrote:

In ISO C, char constants must be in the range -128..127. Change the 
BOM

constants to unsigned char to avoid overflow.

Signed-off-by: Beat Bolli 
---
 utf8.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/utf8.c b/utf8.c
index d55e20c641..833ce00617 100644
--- a/utf8.c
+++ b/utf8.c
@@ -561,15 +561,15 @@ char *reencode_string_len(const char *in, int 
insz,

 #endif

 static int has_bom_prefix(const char *data, size_t len,
- const char *bom, size_t bom_len)
+ const unsigned char *bom, size_t bom_len)
 {
 	return data && bom && (len >= bom_len) && !memcmp(data, bom, 
bom_len);

 }

-static const char utf16_be_bom[] = {0xFE, 0xFF};
-static const char utf16_le_bom[] = {0xFF, 0xFE};
-static const char utf32_be_bom[] = {0x00, 0x00, 0xFE, 0xFF};
-static const char utf32_le_bom[] = {0xFF, 0xFE, 0x00, 0x00};
+static const unsigned char utf16_be_bom[] = {0xFE, 0xFF};
+static const unsigned char utf16_le_bom[] = {0xFF, 0xFE};
+static const unsigned char utf32_be_bom[] = {0x00, 0x00, 0xFE, 0xFF};
+static const unsigned char utf32_le_bom[] = {0xFF, 0xFE, 0x00, 0x00};


An alternative approach that might be easier to read (and avoids the
confusion arising from our use of (signed) chars for strings pretty 
much

everywhere):

#define FE ((char)0xfe)
#define FF ((char)0xff)

...


I have tried this first (without the macros, though), and thought it 
looked
really ugly. That's why I chose this solution. The usage is pretty local 
and

close to function has_bom_prefix().

Would an explaining comment help?

Beat


Re: [RFC PATCH 6/6] utf8.c: avoid char overflow

2018-07-09 Thread Beat Bolli

Am 09.07.2018 16:48, schrieb Beat Bolli:

Hi Dscho

Am 09.07.2018 15:14, schrieb Johannes Schindelin:

Hi Beat,

On Sun, 8 Jul 2018, Beat Bolli wrote:

In ISO C, char constants must be in the range -128..127. Change the 
BOM

constants to unsigned char to avoid overflow.

Signed-off-by: Beat Bolli 
---
 utf8.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/utf8.c b/utf8.c
index d55e20c641..833ce00617 100644
--- a/utf8.c
+++ b/utf8.c
@@ -561,15 +561,15 @@ char *reencode_string_len(const char *in, int 
insz,

 #endif

 static int has_bom_prefix(const char *data, size_t len,
- const char *bom, size_t bom_len)
+ const unsigned char *bom, size_t bom_len)
 {
 	return data && bom && (len >= bom_len) && !memcmp(data, bom, 
bom_len);

 }

-static const char utf16_be_bom[] = {0xFE, 0xFF};
-static const char utf16_le_bom[] = {0xFF, 0xFE};
-static const char utf32_be_bom[] = {0x00, 0x00, 0xFE, 0xFF};
-static const char utf32_le_bom[] = {0xFF, 0xFE, 0x00, 0x00};
+static const unsigned char utf16_be_bom[] = {0xFE, 0xFF};
+static const unsigned char utf16_le_bom[] = {0xFF, 0xFE};
+static const unsigned char utf32_be_bom[] = {0x00, 0x00, 0xFE, 
0xFF};
+static const unsigned char utf32_le_bom[] = {0xFF, 0xFE, 0x00, 
0x00};


An alternative approach that might be easier to read (and avoids the
confusion arising from our use of (signed) chars for strings pretty 
much

everywhere):

#define FE ((char)0xfe)
#define FF ((char)0xff)

...


I have tried this first (without the macros, though), and thought it 
looked
really ugly. That's why I chose this solution. The usage is pretty 
local and

close to function has_bom_prefix().

Would an explaining comment help?


I have found an even simpler solution. Use proper char literals.

I will put this into v2.

Regards,
Beat


diff --git a/utf8.c b/utf8.c
index d55e20c641..982217eec9 100644
--- a/utf8.c
+++ b/utf8.c
@@ -566,10 +566,10 @@ static int has_bom_prefix(const char *data, size_t 
len,
return data && bom && (len >= bom_len) && !memcmp(data, bom, 
bom_len);

 }

-static const char utf16_be_bom[] = {0xFE, 0xFF};
-static const char utf16_le_bom[] = {0xFF, 0xFE};
-static const char utf32_be_bom[] = {0x00, 0x00, 0xFE, 0xFF};
-static const char utf32_le_bom[] = {0xFF, 0xFE, 0x00, 0x00};
+static const char utf16_be_bom[] = {'\xFE', '\xFF'};
+static const char utf16_le_bom[] = {'\xFF', '\xFE'};
+static const char utf32_be_bom[] = {'\0', '\0', '\xFE', '\xFF'};
+static const char utf32_le_bom[] = {'\xFF', '\xFE', '\0', '\0'};

 int has_prohibited_utf_bom(const char *enc, const char *data, size_t 
len)

 {


Re: [RFC PATCH 6/6] utf8.c: avoid char overflow

2018-07-09 Thread Beat Bolli
On 09.07.18 18:33, Junio C Hamano wrote:
> Beat Bolli  writes:
> 
>>>> -static const char utf16_be_bom[] = {0xFE, 0xFF};
>>>> -static const char utf16_le_bom[] = {0xFF, 0xFE};
>>>> -static const char utf32_be_bom[] = {0x00, 0x00, 0xFE, 0xFF};
>>>> -static const char utf32_le_bom[] = {0xFF, 0xFE, 0x00, 0x00};
>>>> +static const unsigned char utf16_be_bom[] = {0xFE, 0xFF};
>>>> +static const unsigned char utf16_le_bom[] = {0xFF, 0xFE};
>>>> +static const unsigned char utf32_be_bom[] = {0x00, 0x00, 0xFE, 0xFF};
>>>> +static const unsigned char utf32_le_bom[] = {0xFF, 0xFE, 0x00, 0x00};
>>>
>>> An alternative approach that might be easier to read (and avoids the
>>> confusion arising from our use of (signed) chars for strings pretty
>>> much
>>> everywhere):
>>>
>>> #define FE ((char)0xfe)
>>> #define FF ((char)0xff)
>>>
>>> ...
>>
>> I have tried this first (without the macros, though), and thought
>> it looked really ugly. That's why I chose this solution. The usage
>> is pretty local and close to function has_bom_prefix().
> 
> I found that what you posted was already OK, as has_bom_prefix()
> appears only locally in this file and that is the only thing that
> cares about these foo_bom[] constants.  Casting the elements in
> these arrays to (char) type is also fine and not all that ugly,
> I think, and between the two (but without the macro) I have no
> strong preference.  I wonder if writing them as '\376' and '\377'
> as old timers would helps the compiler, though.
> 

Yes, it does, as I found out in
https://public-inbox.org/git/e3df2644b59b170e26b2a7c0d3978...@drbeat.li/

But I prefer hex; it's closer to the usual definition of the BOM bytes.

Beat



[PATCH 0/6] Compile cleanly in pedantic mode

2018-07-09 Thread Beat Bolli
While developing 6aaded550 ("builtin/config: work around an unsized
array forward declaration", 2018-07-05), I have compiled Git with
CFLAGS="-std=c99 -pedantic".

This series fixes a few compiler warnings when compiling with these
options.

Note that all warnings were produced by -pedantic; the C99 standard
option by itself didn't cause any of them.

The warnings were:

1) Char arrays initialized from a parenthesized string.

Suppressed by defining USE_PARENS_AROUND_GETTEXT_N to 0
globally. This was done just to keep the amount of warnings
manageable; this series leaves that knob alone. The advantage of
not mistakenly concatenating two translated strings is greater.

2) connect.h, refs/refs-internal.h: Forward reference to an enum.

Added two #includes that define the enums. This was already
(inconclusively) talked about in [0].

3) convert.c: Invalid escape sequence "\e".

Replaced with "\033".

4) sequencer.c: Empty statements at top level.

Removed the extra semicolons.

5) string-list.c: Forbidden to cast from void * to a function pointer and
   vice versa.

Encapsulated the function pointer in a context struct. This is
controversial because it has a performance impact, namely one
additional pointer dereference per string comparison. An
alternative might be to use multiple casts via intptr_t. But
I'm not sure if this is worth the trouble.

6) utf8.c: overflow of char values.

Used proper char literals for the BOM constants.

This series has patches for 2) to 6).

Regards,
Beat

[0] 
https://public-inbox.org/git/53ab8626-f862-a732-b369-abeab69a4...@ramsayjones.plus.com/T/


Beat Bolli (6):
  connect.h: avoid forward declaration of an enum
  refs/refs-internal.h: avoid forward declaration of an enum
  convert.c: replace "\e" escapes with "\033".
  sequencer.c: avoid empty statements at top level
  string-list.c: avoid conversion from void * to function pointer
  utf8.c: avoid char overflow

 connect.h|  2 ++
 convert.c|  2 +-
 path.h   |  2 +-
 refs/refs-internal.h |  2 ++
 sequencer.c  |  4 ++--
 string-list.c| 18 ++
 utf8.c   |  8 
 7 files changed, 26 insertions(+), 12 deletions(-)


Interdiff from the RFC series:

diff --git a/path.h b/path.h
index 1ccd0373c9..fc9d3487a0 100644
--- a/path.h
+++ b/path.h
@@ -147,7 +147,7 @@ extern void report_linked_checkout_garbage(void);
 /*
  * You can define a static memoized git path like:
  *
- *static GIT_PATH_FUNC(git_path_foo, "FOO");
+ *static GIT_PATH_FUNC(git_path_foo, "FOO")
  *
  * or use one of the global ones below.
  */
diff --git a/utf8.c b/utf8.c
index 833ce00617..982217eec9 100644
--- a/utf8.c
+++ b/utf8.c
@@ -561,15 +561,15 @@ char *reencode_string_len(const char *in, int insz,
 #endif

 static int has_bom_prefix(const char *data, size_t len,
- const unsigned char *bom, size_t bom_len)
+ const char *bom, size_t bom_len)
 {
return data && bom && (len >= bom_len) && !memcmp(data, bom, bom_len);
 }

-static const unsigned char utf16_be_bom[] = {0xFE, 0xFF};
-static const unsigned char utf16_le_bom[] = {0xFF, 0xFE};
-static const unsigned char utf32_be_bom[] = {0x00, 0x00, 0xFE, 0xFF};
-static const unsigned char utf32_le_bom[] = {0xFF, 0xFE, 0x00, 0x00};
+static const char utf16_be_bom[] = {'\xFE', '\xFF'};
+static const char utf16_le_bom[] = {'\xFF', '\xFE'};
+static const char utf32_be_bom[] = {'\0', '\0', '\xFE', '\xFF'};
+static const char utf32_le_bom[] = {'\xFF', '\xFE', '\0', '\0'};

 int has_prohibited_utf_bom(const char *enc, const char *data, size_t len)
 {

-- 
2.18.0.203.gfac676dfb9


[PATCH 5/6] string-list.c: avoid conversion from void * to function pointer

2018-07-09 Thread Beat Bolli
ISO C forbids the conversion of void pointers to function pointers.
Introduce a context struct that encapsulates the function pointer.

Signed-off-by: Beat Bolli 
---
 string-list.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/string-list.c b/string-list.c
index a0cf0cfe88..771c455098 100644
--- a/string-list.c
+++ b/string-list.c
@@ -224,18 +224,28 @@ struct string_list_item *string_list_append(struct 
string_list *list,
list->strdup_strings ? xstrdup(string) : (char 
*)string);
 }
 
+/*
+ * Encapsulate the compare function pointer because ISO C99 forbids
+ * casting from void * to a function pointer and vice versa.
+ */
+struct string_list_sort_ctx
+{
+   compare_strings_fn cmp;
+};
+
 static int cmp_items(const void *a, const void *b, void *ctx)
 {
-   compare_strings_fn cmp = ctx;
+   struct string_list_sort_ctx *sort_ctx = ctx;
const struct string_list_item *one = a;
const struct string_list_item *two = b;
-   return cmp(one->string, two->string);
+   return sort_ctx->cmp(one->string, two->string);
 }
 
 void string_list_sort(struct string_list *list)
 {
-   QSORT_S(list->items, list->nr, cmp_items,
-   list->cmp ? list->cmp : strcmp);
+   struct string_list_sort_ctx sort_ctx = {list->cmp ? list->cmp : strcmp};
+
+   QSORT_S(list->items, list->nr, cmp_items, &sort_ctx);
 }
 
 struct string_list_item *unsorted_string_list_lookup(struct string_list *list,
-- 
2.18.0.203.gfac676dfb9



[PATCH 4/6] sequencer.c: avoid empty statements at top level

2018-07-09 Thread Beat Bolli
The macro GIT_PATH_FUNC expands to a function definition that ends with
a closing brace. Remove two extra semicolons.

While at it, fix the example in path.h.

Signed-off-by: Beat Bolli 
---
 path.h  | 2 +-
 sequencer.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/path.h b/path.h
index 1ccd0373c9..fc9d3487a0 100644
--- a/path.h
+++ b/path.h
@@ -147,7 +147,7 @@ extern void report_linked_checkout_garbage(void);
 /*
  * You can define a static memoized git path like:
  *
- *static GIT_PATH_FUNC(git_path_foo, "FOO");
+ *static GIT_PATH_FUNC(git_path_foo, "FOO")
  *
  * or use one of the global ones below.
  */
diff --git a/sequencer.c b/sequencer.c
index 5354d4d51e..66e7073995 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -62,12 +62,12 @@ static GIT_PATH_FUNC(rebase_path_done, "rebase-merge/done")
  * The file to keep track of how many commands were already processed (e.g.
  * for the prompt).
  */
-static GIT_PATH_FUNC(rebase_path_msgnum, "rebase-merge/msgnum");
+static GIT_PATH_FUNC(rebase_path_msgnum, "rebase-merge/msgnum")
 /*
  * The file to keep track of how many commands are to be processed in total
  * (e.g. for the prompt).
  */
-static GIT_PATH_FUNC(rebase_path_msgtotal, "rebase-merge/end");
+static GIT_PATH_FUNC(rebase_path_msgtotal, "rebase-merge/end")
 /*
  * The commit message that is planned to be used for any changes that
  * need to be committed following a user interaction.
-- 
2.18.0.203.gfac676dfb9



[PATCH 6/6] utf8.c: avoid char overflow

2018-07-09 Thread Beat Bolli
In ISO C, char constants must be in the range -128..127. Change the BOM
constants to char literals to avoid overflow.

Signed-off-by: Beat Bolli 
---
 utf8.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/utf8.c b/utf8.c
index d55e20c641..982217eec9 100644
--- a/utf8.c
+++ b/utf8.c
@@ -566,10 +566,10 @@ static int has_bom_prefix(const char *data, size_t len,
return data && bom && (len >= bom_len) && !memcmp(data, bom, bom_len);
 }
 
-static const char utf16_be_bom[] = {0xFE, 0xFF};
-static const char utf16_le_bom[] = {0xFF, 0xFE};
-static const char utf32_be_bom[] = {0x00, 0x00, 0xFE, 0xFF};
-static const char utf32_le_bom[] = {0xFF, 0xFE, 0x00, 0x00};
+static const char utf16_be_bom[] = {'\xFE', '\xFF'};
+static const char utf16_le_bom[] = {'\xFF', '\xFE'};
+static const char utf32_be_bom[] = {'\0', '\0', '\xFE', '\xFF'};
+static const char utf32_le_bom[] = {'\xFF', '\xFE', '\0', '\0'};
 
 int has_prohibited_utf_bom(const char *enc, const char *data, size_t len)
 {
-- 
2.18.0.203.gfac676dfb9



[PATCH 2/6] refs/refs-internal.h: avoid forward declaration of an enum

2018-07-09 Thread Beat Bolli
Include iterator.h to define enum iterator_selection.

Signed-off-by: Beat Bolli 
---
 refs/refs-internal.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index dd834314bd..a78b5cb803 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -1,6 +1,8 @@
 #ifndef REFS_REFS_INTERNAL_H
 #define REFS_REFS_INTERNAL_H
 
+#include "iterator.h"   /* for enum iterator_selection */
+
 /*
  * Data structures and functions for the internal use of the refs
  * module. Code outside of the refs module should use only the public
-- 
2.18.0.203.gfac676dfb9



[PATCH 3/6] convert.c: replace "\e" escapes with "\033".

2018-07-09 Thread Beat Bolli
The "\e" escape is not defined in ISO C.

While on this line, add a missing space after the comma.

Signed-off-by: Beat Bolli 
---
 convert.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/convert.c b/convert.c
index 64d0d30e08..edebb946f5 100644
--- a/convert.c
+++ b/convert.c
@@ -334,7 +334,7 @@ static void trace_encoding(const char *context, const char 
*path,
strbuf_addf(&trace, "%s (%s, considered %s):\n", context, path, 
encoding);
for (i = 0; i < len && buf; ++i) {
strbuf_addf(
-   &trace,"| \e[2m%2i:\e[0m %2x \e[2m%c\e[0m%c",
+   &trace, "| \033[2m%2i:\033[0m %2x \033[2m%c\033[0m%c",
i,
(unsigned char) buf[i],
(buf[i] > 32 && buf[i] < 127 ? buf[i] : ' '),
-- 
2.18.0.203.gfac676dfb9



[PATCH 1/6] connect.h: avoid forward declaration of an enum

2018-07-09 Thread Beat Bolli
Include protocol.h to define enum protocol_version.

Signed-off-by: Beat Bolli 
---
 connect.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/connect.h b/connect.h
index 0e69c6709c..c86f862f2f 100644
--- a/connect.h
+++ b/connect.h
@@ -1,6 +1,8 @@
 #ifndef CONNECT_H
 #define CONNECT_H
 
+#include "protocol.h"   /* for enum protocol_version */
+
 #define CONNECT_VERBOSE   (1u << 0)
 #define CONNECT_DIAG_URL  (1u << 1)
 #define CONNECT_IPV4  (1u << 2)
-- 
2.18.0.203.gfac676dfb9



Re: [RFC PATCH 2/6] refs/refs-internal.h: avoid forward declaration of an enum

2018-07-09 Thread Beat Bolli
On 09.07.18 20:46, Jeff King wrote:
> On Sun, Jul 08, 2018 at 04:43:38PM +0200, Beat Bolli wrote:
> 
>> diff --git a/refs/refs-internal.h b/refs/refs-internal.h
>> index dd834314bd..a78b5cb803 100644
>> --- a/refs/refs-internal.h
>> +++ b/refs/refs-internal.h
>> @@ -1,6 +1,8 @@
>>  #ifndef REFS_REFS_INTERNAL_H
>>  #define REFS_REFS_INTERNAL_H
>>  
>> +#include "iterator.h"   /* for enum iterator_selection */
> 
> IMHO this kind of comment does more harm than good, because it is so
> prone to going stale (nobody is going to bother updating it when they
> add new dependencies on iterator.h). Anybody who is interested in the
> original reason can use "git blame" to dig up your commit message. And
> anybody who is thinking about deleting that line would need to dig into
> whether anything had been added in the meantime that also requires the
> include.
> 
> So at best it's redundant, and at worst it's slightly misleading. :)
> 
> Not worth a re-roll by itself, but it looked like you had a few other
> bits in the other patches to address.
> 
> Other than this minor quibble, the whole series looks good to me, modulo
> the existing review.
> 
> -Peff
> 

Ooosp, I've just sent the non-RFC reroll without this change.

Junio, would you squash this into [1/6] and [2/6], please (if you agree,
of course :-)

Beat


[PATCH] unicode: update the width tables to Unicode 11

2018-07-09 Thread Beat Bolli
Now that Unicode 11 has been announced[0], update the character
width tables to the new version.

[0] http://blog.unicode.org/2018/06/announcing-unicode-standard-version-110.html

Signed-off-by: Beat Bolli 
---
 unicode-width.h | 41 -
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/unicode-width.h b/unicode-width.h
index 6dee2c77ce..7c643760f8 100644
--- a/unicode-width.h
+++ b/unicode-width.h
@@ -20,12 +20,13 @@ static const struct interval zero_width[] = {
 { 0x0730, 0x074A },
 { 0x07A6, 0x07B0 },
 { 0x07EB, 0x07F3 },
+{ 0x07FD, 0x07FD },
 { 0x0816, 0x0819 },
 { 0x081B, 0x0823 },
 { 0x0825, 0x0827 },
 { 0x0829, 0x082D },
 { 0x0859, 0x085B },
-{ 0x08D4, 0x0902 },
+{ 0x08D3, 0x0902 },
 { 0x093A, 0x093A },
 { 0x093C, 0x093C },
 { 0x0941, 0x0948 },
@@ -37,6 +38,7 @@ static const struct interval zero_width[] = {
 { 0x09C1, 0x09C4 },
 { 0x09CD, 0x09CD },
 { 0x09E2, 0x09E3 },
+{ 0x09FE, 0x09FE },
 { 0x0A01, 0x0A02 },
 { 0x0A3C, 0x0A3C },
 { 0x0A41, 0x0A42 },
@@ -63,6 +65,7 @@ static const struct interval zero_width[] = {
 { 0x0BC0, 0x0BC0 },
 { 0x0BCD, 0x0BCD },
 { 0x0C00, 0x0C00 },
+{ 0x0C04, 0x0C04 },
 { 0x0C3E, 0x0C40 },
 { 0x0C46, 0x0C48 },
 { 0x0C4A, 0x0C4D },
@@ -182,6 +185,7 @@ static const struct interval zero_width[] = {
 { 0xA825, 0xA826 },
 { 0xA8C4, 0xA8C5 },
 { 0xA8E0, 0xA8F1 },
+{ 0xA8FF, 0xA8FF },
 { 0xA926, 0xA92D },
 { 0xA947, 0xA951 },
 { 0xA980, 0xA982 },
@@ -219,19 +223,22 @@ static const struct interval zero_width[] = {
 { 0x10A38, 0x10A3A },
 { 0x10A3F, 0x10A3F },
 { 0x10AE5, 0x10AE6 },
+{ 0x10D24, 0x10D27 },
+{ 0x10F46, 0x10F50 },
 { 0x11001, 0x11001 },
 { 0x11038, 0x11046 },
 { 0x1107F, 0x11081 },
 { 0x110B3, 0x110B6 },
 { 0x110B9, 0x110BA },
 { 0x110BD, 0x110BD },
+{ 0x110CD, 0x110CD },
 { 0x11100, 0x11102 },
 { 0x11127, 0x1112B },
 { 0x1112D, 0x11134 },
 { 0x11173, 0x11173 },
 { 0x11180, 0x11181 },
 { 0x111B6, 0x111BE },
-{ 0x111CA, 0x111CC },
+{ 0x111C9, 0x111CC },
 { 0x1122F, 0x11231 },
 { 0x11234, 0x11234 },
 { 0x11236, 0x11237 },
@@ -239,13 +246,14 @@ static const struct interval zero_width[] = {
 { 0x112DF, 0x112DF },
 { 0x112E3, 0x112EA },
 { 0x11300, 0x11301 },
-{ 0x1133C, 0x1133C },
+{ 0x1133B, 0x1133C },
 { 0x11340, 0x11340 },
 { 0x11366, 0x1136C },
 { 0x11370, 0x11374 },
 { 0x11438, 0x1143F },
 { 0x11442, 0x11444 },
 { 0x11446, 0x11446 },
+{ 0x1145E, 0x1145E },
 { 0x114B3, 0x114B8 },
 { 0x114BA, 0x114BA },
 { 0x114BF, 0x114C0 },
@@ -264,8 +272,9 @@ static const struct interval zero_width[] = {
 { 0x1171D, 0x1171F },
 { 0x11722, 0x11725 },
 { 0x11727, 0x1172B },
-{ 0x11A01, 0x11A06 },
-{ 0x11A09, 0x11A0A },
+{ 0x1182F, 0x11837 },
+{ 0x11839, 0x1183A },
+{ 0x11A01, 0x11A0A },
 { 0x11A33, 0x11A38 },
 { 0x11A3B, 0x11A3E },
 { 0x11A47, 0x11A47 },
@@ -285,6 +294,10 @@ static const struct interval zero_width[] = {
 { 0x11D3C, 0x11D3D },
 { 0x11D3F, 0x11D45 },
 { 0x11D47, 0x11D47 },
+{ 0x11D90, 0x11D91 },
+{ 0x11D95, 0x11D95 },
+{ 0x11D97, 0x11D97 },
+{ 0x11EF3, 0x11EF4 },
 { 0x16AF0, 0x16AF4 },
 { 0x16B30, 0x16B36 },
 { 0x16F8F, 0x16F92 },
@@ -355,7 +368,7 @@ static const struct interval double_width[] = {
 { 0x3000, 0x303E },
 { 0x3041, 0x3096 },
 { 0x3099, 0x30FF },
-{ 0x3105, 0x312E },
+{ 0x3105, 0x312F },
 { 0x3131, 0x318E },
 { 0x3190, 0x31BA },
 { 0x31C0, 0x31E3 },
@@ -375,7 +388,7 @@ static const struct interval double_width[] = {
 { 0xFF01, 0xFF60 },
 { 0xFFE0, 0xFFE6 },
 { 0x16FE0, 0x16FE1 },
-{ 0x17000, 0x187EC },
+{ 0x17000, 0x187F1 },
 { 0x18800, 0x18AF2 },
 { 0x1B000, 0x1B11E },
 { 0x1B170, 0x1B2FB },
@@ -410,13 +423,15 @@ static const struct interval double_width[] = {
 { 0x1F6CC, 0x1F6CC },
 { 0x1F6D0, 0x1F6D2 },
 { 0x1F6EB, 0x1F6EC },
-{ 0x1F6F4, 0x1F6F8 },
+{ 0x1F6F4, 0x1F6F9 },
 { 0x1F910, 0x1F93E },
-{ 0x1F940, 0x1F94C },
-{ 0x1F950, 0x1F96B },
-{ 0x1F980, 0x1F997 },
-{ 0x1F9C0, 0x1F9C0 },
-{ 0x1F9D0, 0x1F9E6 },
+{ 0x1F940, 0x1F970 },
+{ 0x1F973, 0x1F976 },
+{ 0x1F97A, 0x1F97A },
+{ 0x1F97C, 0x1F9A2 },
+{ 0x1F9B0, 0x1F9B9 },
+{ 0x1F9C0, 0x1F9C2 },
+{ 0x1F9D0, 0x1F9FF },
 { 0x2, 0x2FFFD },
 { 0x3, 0x3FFFD }
 };
-- 
2.18.0.203.gfac676dfb9



Re: [PATCH 0/6] Compile cleanly in pedantic mode

2018-07-09 Thread Beat Bolli
On 09.07.18 21:25, Beat Bolli wrote:
> While developing 6aaded550 ("builtin/config: work around an unsized
> array forward declaration", 2018-07-05), I have compiled Git with
> CFLAGS="-std=c99 -pedantic".
> 
> This series fixes a few compiler warnings when compiling with these
> options.

As a small aside, I have also compiled all of Git with -std=c11 using
gcc 8.1. This didn't turn up any new warnings, so we're looking pretty
future-proof in this regard.

Cheers,
Beat


Re: [RFC PATCH 4/6] sequencer.c: avoid empty statements at top level

2018-07-09 Thread Beat Bolli
On 09.07.18 23:34, Junio C Hamano wrote:
> Beat Bolli  writes:
> 
>> The marco GIT_PATH_FUNC expands to a complete statement including the
>> semicolon. Remove two extra trailing semicolons.
> 
> Wait a bit.  The observation in the log message and the
> implementation of GIT_PATH_FUNC() do not match.
> 
> #define GIT_PATH_FUNC(func, filename) \
> const char *func(void) \
> { \
> static char *ret; \
> if (!ret) \
> ret = git_pathdup(filename); \
> return ret; \
> }
> 
> The code generated does "include semicolon" but that is not why the
> caller should place semicolon after the closing parens.  Perhaps
> replace "including the semicolon." with something else, like ", and
> adding a semicolon after it not only is unnecessary but is wrong."
> or soemthing like that?

This message is fixed in the non-RFC series that I sent at 19:25 UTC. I
noticed the error after the message from Philip Oakley.

Beat


Re: [PATCH 0/6] Compile cleanly in pedantic mode

2018-07-09 Thread Beat Bolli
On 09.07.18 23:45, Junio C Hamano wrote:
> Beat Bolli  writes:
> 
>> While developing 6aaded550 ("builtin/config: work around an unsized
>> array forward declaration", 2018-07-05), I have compiled Git with
>> CFLAGS="-std=c99 -pedantic".
> 
> Nicely done.  
> 
> With these 6 patches and the USE_PARENCE_AROUND_GETTEXT_N hack, the
> forward decl of the unsized static array you dealt with separately
> becomes the only remaining violation in the codebase, which is good.
> 
> Will queue.  Thanks.

Thanks!

Beat


Re: [PATCH 0/6] Compile cleanly in pedantic mode

2018-07-10 Thread Beat Bolli

Hi Junio

Am 09.07.2018 23:45, schrieb Junio C Hamano:

Beat Bolli  writes:


While developing 6aaded550 ("builtin/config: work around an unsized
array forward declaration", 2018-07-05), I have compiled Git with
CFLAGS="-std=c99 -pedantic".


Nicely done.

With these 6 patches and the USE_PARENCE_AROUND_GETTEXT_N hack, the
forward decl of the unsized static array you dealt with separately
becomes the only remaining violation in the codebase, which is good.

Will queue.  Thanks.


Should we add a "pedantic" flag to DEVOPTS that would simplify building 
pedantically? It would also have to set USE_PARENS_AROUND_GETTEXT_N so 
as to not overwhelm the developer with too much output.


Beat


Re: [PATCH v3 4/4] builtin/rebase: support running "git rebase "

2018-07-17 Thread Beat Bolli
On 06.07.18 14:08, Pratik Karki wrote:
> +static GIT_PATH_FUNC(apply_dir, "rebase-apply");
> +static GIT_PATH_FUNC(merge_dir, "rebase-merge");

Maybe fix this up with

-static GIT_PATH_FUNC(apply_dir, "rebase-apply");
-static GIT_PATH_FUNC(merge_dir, "rebase-merge");
+static GIT_PATH_FUNC(apply_dir, "rebase-apply")
+static GIT_PATH_FUNC(merge_dir, "rebase-merge")

?

(See
https://public-inbox.org/git/20180709192537.18564-5-dev+...@drbeat.li/#t)

Cheers, Beat



Re: [PATCH v3 4/4] builtin/rebase: support running "git rebase "

2018-07-17 Thread Beat Bolli
On 17.07.18 23:49, Beat Bolli wrote:
> On 06.07.18 14:08, Pratik Karki wrote:
>> +static GIT_PATH_FUNC(apply_dir, "rebase-apply");
>> +static GIT_PATH_FUNC(merge_dir, "rebase-merge");
> 
> Maybe fix this up with
> 
> -static GIT_PATH_FUNC(apply_dir, "rebase-apply");
> -static GIT_PATH_FUNC(merge_dir, "rebase-merge");
> +static GIT_PATH_FUNC(apply_dir, "rebase-apply")
> +static GIT_PATH_FUNC(merge_dir, "rebase-merge")
> 
> ?

Sorry, this should have been a reply to [PATCH v4 4/4]. The remark still
applies, though.

> (See https://public-inbox.org/git/20180709192537.18564-5-dev+...@drbeat.li/#t)

Cheers, Beat



[PATCH] fixup! builtin/rebase: support running "git rebase "

2018-07-21 Thread Beat Bolli
The macro GIT_PATH_FUNC expands to a function definition that ends with
a closing brace. The extra semicolon produces a warning when compiling
with -pedantic.

Signed-off-by: Beat Bolli 
---

Junio, this applies on top of pk/rebase-in-c.

Thanks, Beat

 builtin/rebase.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/builtin/rebase.c b/builtin/rebase.c
index e38ea80874..6aaae2436f 100644
--- a/builtin/rebase.c
+++ b/builtin/rebase.c
@@ -14,8 +14,8 @@
 #include "refs.h"
 #include "quote.h"
 
-static GIT_PATH_FUNC(apply_dir, "rebase-apply");
-static GIT_PATH_FUNC(merge_dir, "rebase-merge");
+static GIT_PATH_FUNC(apply_dir, "rebase-apply")
+static GIT_PATH_FUNC(merge_dir, "rebase-merge")
 
 enum rebase_type {
REBASE_AM,
-- 
2.18.0.203.gfac676dfb9



[PATCH] Makefile: add a DEVOPTS flag to get pedantic compilation

2018-07-21 Thread Beat Bolli
In the interest of code hygiene, make it easier to compile Git with the
flag -pedantic.

Pure pedantic compilation results in one warning per use of the
translation macro `N_`, therefore also disable the parenthesising of
i18n strings with -DUSE_PARENS_AROUND_GETTEXT_N=0 to show only real
warnings.

Signed-off-by: Beat Bolli 
---

This is the convenience knob for all developers that led to the series
bb/pedantic[1]. It does not depend on this series, though.

[1] https://public-inbox.org/git/20180708144342.11922-1-dev+...@drbeat.li/T/#u

 Makefile   | 4 
 config.mak.dev | 4 
 2 files changed, 8 insertions(+)

diff --git a/Makefile b/Makefile
index 0cb6590f24..f800054379 100644
--- a/Makefile
+++ b/Makefile
@@ -484,6 +484,10 @@ all::
 #The DEVELOPER mode enables -Wextra with a few exceptions. By
 #setting this flag the exceptions are removed, and all of
 #-Wextra is used.
+#
+#pedantic:
+#
+#Enable -pedantic compilation.
 
 GIT-VERSION-FILE: FORCE
@$(SHELL_PATH) ./GIT-VERSION-GEN
diff --git a/config.mak.dev b/config.mak.dev
index 2d244ca470..f21f0d0209 100644
--- a/config.mak.dev
+++ b/config.mak.dev
@@ -1,6 +1,10 @@
 ifeq ($(filter no-error,$(DEVOPTS)),)
 CFLAGS += -Werror
 endif
+ifneq ($(filter pedantic,$(DEVOPTS)),)
+CFLAGS += -pedantic
+# don't warn for each N_ use
+CFLAGS += -DUSE_PARENS_AROUND_GETTEXT_N=0
+endif
 CFLAGS += -Wdeclaration-after-statement
 CFLAGS += -Wno-format-zero-length
 CFLAGS += -Wold-style-definition
-- 
2.18.0.203.gfac676dfb9



[PATCH v2] Makefile: add a DEVOPTS flag to get pedantic compilation

2018-07-21 Thread Beat Bolli
In the interest of code hygiene, make it easier to compile Git with the
flag -pedantic.

Pure pedantic compilation with GCC 7.3 results in one warning per use of
the translation macro `N_`:

warning: array initialized from parenthesized string constant [-Wpedantic]

Therefore also disable the parenthesising of i18n strings with
-DUSE_PARENS_AROUND_GETTEXT_N=no.

Signed-off-by: Beat Bolli 
---

This is the convenience knob for all developers that led to the series
bb/pedantic[1]. It does not depend on this series, though.

[1] https://public-inbox.org/git/20180708144342.11922-1-dev+...@drbeat.li/T/#u

 Makefile   | 6 ++
 config.mak.dev | 5 +
 2 files changed, 11 insertions(+)

diff --git a/Makefile b/Makefile
index 0cb6590f24..2bfc051652 100644
--- a/Makefile
+++ b/Makefile
@@ -484,6 +484,12 @@ all::
 #The DEVELOPER mode enables -Wextra with a few exceptions. By
 #setting this flag the exceptions are removed, and all of
 #-Wextra is used.
+#
+#pedantic:
+#
+#Enable -pedantic compilation. This also disables
+#USE_PARENS_AROUND_GETTEXT_N to produce only relevant warnings.
 
 GIT-VERSION-FILE: FORCE
@$(SHELL_PATH) ./GIT-VERSION-GEN
diff --git a/config.mak.dev b/config.mak.dev
index 2d244ca470..e11dd94741 100644
--- a/config.mak.dev
+++ b/config.mak.dev
@@ -1,6 +1,11 @@
 ifeq ($(filter no-error,$(DEVOPTS)),)
 CFLAGS += -Werror
 endif
+ifneq ($(filter pedantic,$(DEVOPTS)),)
+CFLAGS += -pedantic
+# don't warn for each N_ use
+CFLAGS += -DUSE_PARENS_AROUND_GETTEXT_N=no
+endif
 CFLAGS += -Wdeclaration-after-statement
 CFLAGS += -Wno-format-zero-length
 CFLAGS += -Wold-style-definition
-- 
2.18.0.203.gfac676dfb9



Re: [PATCH v2] Makefile: add a DEVOPTS flag to get pedantic compilation

2018-07-23 Thread Beat Bolli
On 23.07.18 20:53, Junio C Hamano wrote:
> Beat Bolli  writes:
> 
>> In the interest of code hygiene, make it easier to compile Git with the
>> flag -pedantic.
>>
>> Pure pedantic compilation with GCC 7.3 results in one warning per use of
>> the translation macro `N_`:
>>
>> warning: array initialized from parenthesized string constant 
>> [-Wpedantic]
>>
>> Therefore also disable the parenthesising of i18n strings with
>> -DUSE_PARENS_AROUND_GETTEXT_N=no.
>>
>> Signed-off-by: Beat Bolli 
>> ---
>>
>> This is the convenience knob for all developers that led to the series
>> bb/pedantic[1]. It does not depend on this series, though.
> 
> Yup, but "make DEVELOPER=Yes" build won't pass unless this patch is
> queued after those clean-up ;-)

Then there's a bug in this patch. It should only have an effect if we
"make DEVELOPER=Yes DEVOPTS=pedantic". Did you try this?

> Remind me if I forget to tweak =no back to =0 before pushing the
> result out.

No problem, I can send a v3 with this change reverted.

Beat

> 
> Thanks.
> 
>> [1] 
>> https://public-inbox.org/git/20180708144342.11922-1-dev+...@drbeat.li/T/#u
>>
>>  Makefile   | 6 ++
>>  config.mak.dev | 5 +
>>  2 files changed, 11 insertions(+)
>>
>> diff --git a/Makefile b/Makefile
>> index 0cb6590f24..2bfc051652 100644
>> --- a/Makefile
>> +++ b/Makefile
>> @@ -484,6 +484,12 @@ all::
>>  #The DEVELOPER mode enables -Wextra with a few exceptions. By
>>  #setting this flag the exceptions are removed, and all of
>>  #-Wextra is used.
>> +#
>> +#pedantic:
>> +#
>> +#Enable -pedantic compilation. This also disables
>> +#USE_PARENS_AROUND_GETTEXT_N to produce only relevant warnings.
>>  
>>  GIT-VERSION-FILE: FORCE
>>  @$(SHELL_PATH) ./GIT-VERSION-GEN
>> diff --git a/config.mak.dev b/config.mak.dev
>> index 2d244ca470..e11dd94741 100644
>> --- a/config.mak.dev
>> +++ b/config.mak.dev
>> @@ -1,6 +1,11 @@
>>  ifeq ($(filter no-error,$(DEVOPTS)),)
>>  CFLAGS += -Werror
>>  endif
>> +ifneq ($(filter pedantic,$(DEVOPTS)),)
>> +CFLAGS += -pedantic
>> +# don't warn for each N_ use
>> +CFLAGS += -DUSE_PARENS_AROUND_GETTEXT_N=no
>> +endif
>>  CFLAGS += -Wdeclaration-after-statement
>>  CFLAGS += -Wno-format-zero-length
>>  CFLAGS += -Wold-style-definition



[PATCH v3] Makefile: add a DEVOPTS flag to get pedantic compilation

2018-07-24 Thread Beat Bolli
In the interest of code hygiene, make it easier to compile Git with the
flag -pedantic.

Pure pedantic compilation with GCC 7.3 results in one warning per use of
the translation macro `N_`:

warning: array initialized from parenthesized string constant [-Wpedantic]

Therefore also disable the parenthesising of i18n strings with
-DUSE_PARENS_AROUND_GETTEXT_N=no.

Signed-off-by: Beat Bolli 
---

Now with -DUSE_PARENS_AROUND_GETTEXT_N=0 instead of =No.

This is the convenience knob for all developers that led to the series
bb/pedantic[1]. It does not depend on this series, though.

[1] https://public-inbox.org/git/20180708144342.11922-1-dev+...@drbeat.li/T/#u

 Makefile   | 6 ++
 config.mak.dev | 5 +
 2 files changed, 11 insertions(+)

diff --git a/Makefile b/Makefile
index 0cb6590f24..2bfc051652 100644
--- a/Makefile
+++ b/Makefile
@@ -484,6 +484,12 @@ all::
 #The DEVELOPER mode enables -Wextra with a few exceptions. By
 #setting this flag the exceptions are removed, and all of
 #-Wextra is used.
+#
+#pedantic:
+#
+#Enable -pedantic compilation. This also disables
+#USE_PARENS_AROUND_GETTEXT_N to produce only relevant warnings.
 
 GIT-VERSION-FILE: FORCE
@$(SHELL_PATH) ./GIT-VERSION-GEN
diff --git a/config.mak.dev b/config.mak.dev
index 2d244ca470..e11dd94741 100644
--- a/config.mak.dev
+++ b/config.mak.dev
@@ -1,6 +1,11 @@
 ifeq ($(filter no-error,$(DEVOPTS)),)
 CFLAGS += -Werror
 endif
+ifneq ($(filter pedantic,$(DEVOPTS)),)
+CFLAGS += -pedantic
+# don't warn for each N_ use
+CFLAGS += -DUSE_PARENS_AROUND_GETTEXT_N=0
+endif
 CFLAGS += -Wdeclaration-after-statement
 CFLAGS += -Wno-format-zero-length
 CFLAGS += -Wold-style-definition
-- 
2.18.0.203.gfac676dfb9



Re: [PATCH v1] msvc: fix non-standard escape sequence in source

2018-07-24 Thread Beat Bolli
Hi Jeff

On 24.07.18 16:42, g...@jeffhostetler.com wrote:
> From: Jeff Hostetler 
> 
> Replace non-standard "\e" escape sequence with "\x1B".

This was already fixed in <20180708144342.11922-4-dev+...@drbeat.li>.

Cheers,
Beat


> 
> In commit 7a17918c34f4e83982456ffe22d880c3cda5384f a trace message with
> several "\e" escape sequences was added.  This causes a compiler warning
> under MSVC.
> 
> According to [1], the "\e" sequence is an extension supported by GCC,
> clang, and tcc.
> 
> [1] https://en.wikipedia.org/wiki/Escape_sequences_in_C
> 
> Signed-off-by: Jeff Hostetler 
> ---
>  convert.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/convert.c b/convert.c
> index 56cfe31..52092be 100644
> --- a/convert.c
> +++ b/convert.c
> @@ -335,7 +335,7 @@ static void trace_encoding(const char *context, const 
> char *path,
>   strbuf_addf(&trace, "%s (%s, considered %s):\n", context, path, 
> encoding);
>   for (i = 0; i < len && buf; ++i) {
>   strbuf_addf(
> - &trace,"| \e[2m%2i:\e[0m %2x \e[2m%c\e[0m%c",
> + &trace,"| \x1B[2m%2i:\x1B[0m %2x \x1B[2m%c\x1B[0m%c",
>   i,
>   (unsigned char) buf[i],
>   (buf[i] > 32 && buf[i] < 127 ? buf[i] : ' '),
> 




Re: [PATCH v1] config.c: fix msvc compile error

2018-07-24 Thread Beat Bolli
Hi Jeff

On 24.07.18 17:30, g...@jeffhostetler.com wrote:
> From: Jeff Hostetler 
> 
> In commit fb0dc3bac135e9f6243bd6d293e8c9293c73b9cd code was added
> to builtin/config.c to define a new function and a forward declaration
> for an array of unknown size.  This causes a compile error under MSVC.
> 
> Reorder the code to forward declare the function instead of the array.

This was already fixed (differently) in
<20180705183445.30901-1-dev+...@drbeat.li>.

Cheers,
Beat


> Signed-off-by: Jeff Hostetler 
> ---
>  builtin/config.c | 79 
> 
>  1 file changed, 40 insertions(+), 39 deletions(-)
> 
> diff --git a/builtin/config.c b/builtin/config.c
> index b29d26d..564f18f 100644
> --- a/builtin/config.c
> +++ b/builtin/config.c
> @@ -67,7 +67,46 @@ static int show_origin;
>   { OPTION_CALLBACK, (s), (l), (v), NULL, (h), PARSE_OPT_NOARG | \
>   PARSE_OPT_NONEG, option_parse_type, (i) }
>  
> -static struct option builtin_config_options[];
> +static int option_parse_type(const struct option *opt, const char *arg,
> +  int unset);
> +
> +static struct option builtin_config_options[] = {
> + OPT_GROUP(N_("Config file location")),
> + OPT_BOOL(0, "global", &use_global_config, N_("use global config file")),
> + OPT_BOOL(0, "system", &use_system_config, N_("use system config file")),
> + OPT_BOOL(0, "local", &use_local_config, N_("use repository config 
> file")),
> + OPT_STRING('f', "file", &given_config_source.file, N_("file"), N_("use 
> given config file")),
> + OPT_STRING(0, "blob", &given_config_source.blob, N_("blob-id"), 
> N_("read config from given blob object")),
> + OPT_GROUP(N_("Action")),
> + OPT_BIT(0, "get", &actions, N_("get value: name [value-regex]"), 
> ACTION_GET),
> + OPT_BIT(0, "get-all", &actions, N_("get all values: key 
> [value-regex]"), ACTION_GET_ALL),
> + OPT_BIT(0, "get-regexp", &actions, N_("get values for regexp: 
> name-regex [value-regex]"), ACTION_GET_REGEXP),
> + OPT_BIT(0, "get-urlmatch", &actions, N_("get value specific for the 
> URL: section[.var] URL"), ACTION_GET_URLMATCH),
> + OPT_BIT(0, "replace-all", &actions, N_("replace all matching variables: 
> name value [value_regex]"), ACTION_REPLACE_ALL),
> + OPT_BIT(0, "add", &actions, N_("add a new variable: name value"), 
> ACTION_ADD),
> + OPT_BIT(0, "unset", &actions, N_("remove a variable: name 
> [value-regex]"), ACTION_UNSET),
> + OPT_BIT(0, "unset-all", &actions, N_("remove all matches: name 
> [value-regex]"), ACTION_UNSET_ALL),
> + OPT_BIT(0, "rename-section", &actions, N_("rename section: old-name 
> new-name"), ACTION_RENAME_SECTION),
> + OPT_BIT(0, "remove-section", &actions, N_("remove a section: name"), 
> ACTION_REMOVE_SECTION),
> + OPT_BIT('l', "list", &actions, N_("list all"), ACTION_LIST),
> + OPT_BIT('e', "edit", &actions, N_("open an editor"), ACTION_EDIT),
> + OPT_BIT(0, "get-color", &actions, N_("find the color configured: slot 
> [default]"), ACTION_GET_COLOR),
> + OPT_BIT(0, "get-colorbool", &actions, N_("find the color setting: slot 
> [stdout-is-tty]"), ACTION_GET_COLORBOOL),
> + OPT_GROUP(N_("Type")),
> + OPT_CALLBACK('t', "type", &type, "", N_("value is given this type"), 
> option_parse_type),
> + OPT_CALLBACK_VALUE(0, "bool", &type, N_("value is \"true\" or 
> \"false\""), TYPE_BOOL),
> + OPT_CALLBACK_VALUE(0, "int", &type, N_("value is decimal number"), 
> TYPE_INT),
> + OPT_CALLBACK_VALUE(0, "bool-or-int", &type, N_("value is --bool or 
> --int"), TYPE_BOOL_OR_INT),
> + OPT_CALLBACK_VALUE(0, "path", &type, N_("value is a path (file or 
> directory name)"), TYPE_PATH),
> + OPT_CALLBACK_VALUE(0, "expiry-date", &type, N_("value is an expiry 
> date"), TYPE_EXPIRY_DATE),
> + OPT_GROUP(N_("Other")),
> + OPT_BOOL('z', "null", &end_null, N_("terminate values with NUL byte")),
> + OPT_BOOL(0, "name-only", &omit_values, N_("show variable names only")),
> + OPT_BOOL(0, "includes", &respect_includes_opt, N_("respect include 
> directives on lookup")),
> + OPT_BOOL(0, "show-origin", &show_origin, N_("show origin of config 
> (file, standard input, blob, command line)")),
> + OPT_STRING(0, "default", &default_value, N_("value"), N_("with --get, 
> use default value when missing entry")),
> + OPT_END(),
> +};
>  
>  static int option_parse_type(const struct option *opt, const char *arg,
>int unset)
> @@ -119,44 +158,6 @@ static int option_parse_type(const struct option *opt, 
> const char *arg,
>   return 0;
>  }
>  
> -static struct option builtin_config_options[] = {
> - OPT_GROUP(N_("Config file location")),
> - OPT_BOOL(0, "global", &use_global_config, N_("use global config file")),
> - OPT_BOOL(0, "system", &use_system_config, N_("use system config file")),
> - OPT_BOOL(0, "local", &use_local_config, N_("use repository config 
> file"))

Re: [PATCH v1] config.c: fix msvc compile error

2018-07-24 Thread Beat Bolli
On 24.07.18 20:22, Junio C Hamano wrote:
> Beat Bolli  writes:
> 
>> Hi Jeff
>>
>> On 24.07.18 17:30, g...@jeffhostetler.com wrote:
>>> From: Jeff Hostetler 
>>>
>>> In commit fb0dc3bac135e9f6243bd6d293e8c9293c73b9cd code was added
>>> to builtin/config.c to define a new function and a forward declaration
>>> for an array of unknown size.  This causes a compile error under MSVC.
>>>
>>> Reorder the code to forward declare the function instead of the array.
>>
>> This was already fixed (differently) in
>> <20180705183445.30901-1-dev+...@drbeat.li>.
> 
> Thanks for saving me from having to dig the list archive myself.
> Yes, it is already applied to the tip of the topic that originally
> caused the breakage.
> 
Just a general question:

Is it OK to refer to patches on pu with the Message-ID, or would you
prefer the commit hash? The hash changes whenever you recreate pu,
doesn't it?

Beat



Re: [PATCH v1] config.c: fix msvc compile error

2018-07-24 Thread Beat Bolli
On 24.07.18 20:50, Junio C Hamano wrote:
> Beat Bolli  writes:
> 
>> On 24.07.18 20:22, Junio C Hamano wrote:
>>
>>>> This was already fixed (differently) in
>>>> <20180705183445.30901-1-dev+...@drbeat.li>.
>>>
>>> Thanks for saving me from having to dig the list archive myself.
>>> Yes, it is already applied to the tip of the topic that originally
>>> caused the breakage.
>>>
>> Just a general question:
>>
>> Is it OK to refer to patches on pu with the Message-ID, or would you
>> prefer the commit hash? The hash changes whenever you recreate pu,
>> doesn't it?
> 
> Either is fine in practice.  The commits themselves on a topic
> branch that is not yet in 'next' usually stay the same once the tip
> of 'pu' that contains them gets published.  Even though I often use
> "git rebase -i", "git commit --amend", etc. to fix up posted patches
> while turning them into commits on topic branches, I usually stop
> doing so once I push out day's integration result.
> 
> Until a new version of the series is posted to replace them on the
> topic branch, that is.  But at that point we are talking about new
> patches with different message-ids that got turned into different
> commit objects, so either commit object name or message id that
> refer to older iteration would still name the same old version, and
> new names would refer to the same new version.
> 

Ok, thanks!



[PATCH v4] Makefile: add a DEVOPTS flag to get pedantic compilation

2018-07-24 Thread Beat Bolli
In the interest of code hygiene, make it easier to compile Git with the
flag -pedantic.

Pure pedantic compilation with GCC 7.3 results in one warning per use of
the translation macro `N_`:

warning: array initialized from parenthesized string constant [-Wpedantic]

Therefore also disable the parenthesising of i18n strings with
-DUSE_PARENS_AROUND_GETTEXT_N=0.

Signed-off-by: Beat Bolli 
---

Now with -DUSE_PARENS_AROUND_GETTEXT_N=0 instead of =No.

This is the convenience knob for all developers that led to the series
bb/pedantic[1]. It does not depend on this series, though.

[1] https://public-inbox.org/git/20180708144342.11922-1-dev+...@drbeat.li/T/#u

 Makefile   | 6 ++
 config.mak.dev | 5 +
 2 files changed, 11 insertions(+)

diff --git a/Makefile b/Makefile
index 0cb6590f24..2bfc051652 100644
--- a/Makefile
+++ b/Makefile
@@ -484,6 +484,12 @@ all::
 #The DEVELOPER mode enables -Wextra with a few exceptions. By
 #setting this flag the exceptions are removed, and all of
 #-Wextra is used.
+#
+#pedantic:
+#
+#Enable -pedantic compilation. This also disables
+#USE_PARENS_AROUND_GETTEXT_N to produce only relevant warnings.
 
 GIT-VERSION-FILE: FORCE
@$(SHELL_PATH) ./GIT-VERSION-GEN
diff --git a/config.mak.dev b/config.mak.dev
index 2d244ca470..e11dd94741 100644
--- a/config.mak.dev
+++ b/config.mak.dev
@@ -1,6 +1,11 @@
 ifeq ($(filter no-error,$(DEVOPTS)),)
 CFLAGS += -Werror
 endif
+ifneq ($(filter pedantic,$(DEVOPTS)),)
+CFLAGS += -pedantic
+# don't warn for each N_ use
+CFLAGS += -DUSE_PARENS_AROUND_GETTEXT_N=0
+endif
 CFLAGS += -Wdeclaration-after-statement
 CFLAGS += -Wno-format-zero-length
 CFLAGS += -Wold-style-definition
-- 
2.18.0.203.gfac676dfb9



[PATCH 1/2] packfile: drop a repeated enum declaration

2018-07-24 Thread Beat Bolli
When compiling under Apple LLVM version 9.1.0 (clang-902.0.39.2) with
"make DEVELOPER=1 DEVOPTS=pedantic", the compiler says

error: redeclaration of already-defined enum 'object_type' is a GNU
extension [-Werror,-Wgnu-redeclared-enum]

According to https://en.cppreference.com/w/c/language/declarations
(section "Redeclaration"), a repeated declaration after the definition
is only legal for structs and unions, but not for enums.

Drop the belated declaration of enum object_type. It seems that each
includer of packfile.h includes the definition of the enum before
including packfile.h.

Signed-off-by: Beat Bolli 
---
 packfile.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/packfile.h b/packfile.h
index 51383774ec72..9b6198c4c7e0 100644
--- a/packfile.h
+++ b/packfile.h
@@ -6,7 +6,6 @@
 /* in object-store.h */
 struct packed_git;
 struct object_info;
-enum object_type;
 
 /*
  * Generate the filename to be used for a pack file with checksum "sha1" and
-- 
2.18.0



[PATCH 0/2] Pedantic fixes for Apple clang

2018-07-24 Thread Beat Bolli
Following up on my previous series bb/pedantic for gcc, here are two
fixes for pedantic compilation under MacOS 10.13.6 (High Sierra) with
the command line tools of Xcode Version 9.4.1 (9F2000).

Beat Bolli (2):
  packfile: drop a repeated enum declaration
  remote-odb: un-inline function remote_odb_reinit

 packfile.h   | 1 -
 remote-odb.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

-- 
2.18.0



[PATCH 2/2] remote-odb: un-inline function remote_odb_reinit

2018-07-24 Thread Beat Bolli
When compiling under Apple LLVM version 9.1.0 (clang-902.0.39.2) with
"make DEVELOPER=1 DEVOPTS=pedantic", the compiler says

remote-odb.c:87:2: error: static function 'remote_odb_do_init' is
used in an inline function with external linkage
[-Werror,-Wstatic-in-inline]

Remove the inline specifier that would only make sense if
remote_odb_reinit were defined in the header file. Moving it into the
header is not possible because the called function remote_odb_do_init is
static and thus not visible from the includers of the header.

Signed-off-by: Beat Bolli 
---
 remote-odb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/remote-odb.c b/remote-odb.c
index 847a86505778..49cf8e30aa92 100644
--- a/remote-odb.c
+++ b/remote-odb.c
@@ -82,7 +82,7 @@ static inline void remote_odb_init(void)
remote_odb_do_init(0);
 }
 
-inline void remote_odb_reinit(void)
+void remote_odb_reinit(void)
 {
remote_odb_do_init(1);
 }
-- 
2.18.0



Re: [PATCH 2/2] remote-odb: un-inline function remote_odb_reinit

2018-07-24 Thread Beat Bolli
Hi Jonathan

On 24.07.18 23:59, Jonathan Nieder wrote:
> Hi,
> 
> Beat Bolli wrote:
> 
>> When compiling under Apple LLVM version 9.1.0 (clang-902.0.39.2) with
>> "make DEVELOPER=1 DEVOPTS=pedantic", the compiler says
>>
>> remote-odb.c:87:2: error: static function 'remote_odb_do_init' is
>> used in an inline function with external linkage
>> [-Werror,-Wstatic-in-inline]
>>
>> Remove the inline specifier that would only make sense if
>> remote_odb_reinit were defined in the header file. Moving it into the
>> header is not possible because the called function remote_odb_do_init is
>> static and thus not visible from the includers of the header.
>>
>> Signed-off-by: Beat Bolli 
>> ---
>>  remote-odb.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> What branch does this apply to?

To pu as of today.

> [...]
>> --- a/remote-odb.c
>> +++ b/remote-odb.c
>> @@ -82,7 +82,7 @@ static inline void remote_odb_init(void)
>>  remote_odb_do_init(0);
>>  }
>>  
>> -inline void remote_odb_reinit(void)
>> +void remote_odb_reinit(void)
> 
> This looks like an oversight in
> https://public-inbox.org/git/20180713174959.16748-6-chrisc...@tuxfamily.org/:
> there isn't any reason for this function to be inline.
> 
> Christian, can you squash it in on your next reroll?

That would probably make sense. I didn't check how mature the topics
were that caused errors.

Beat


Re: [PATCH v4] Makefile: add a DEVOPTS flag to get pedantic compilation

2018-07-25 Thread Beat Bolli
On 25.07.18 18:57, Junio C Hamano wrote:
> Beat Bolli  writes:
> 
>> In the interest of code hygiene, make it easier to compile Git with the
>> flag -pedantic.
>>
>> Pure pedantic compilation with GCC 7.3 results in one warning per use of
>> the translation macro `N_`:
>>
>> warning: array initialized from parenthesized string constant 
>> [-Wpedantic]
>>
>> Therefore also disable the parenthesising of i18n strings with
>> -DUSE_PARENS_AROUND_GETTEXT_N=0.
>>
>> Signed-off-by: Beat Bolli 
>> ---
> 
> Hmph, what did you change between v3 and v4?

Just the commit text. In v3, it still said =No instead of =0.

>> diff --git a/Makefile b/Makefile
>> index 0cb6590f24..2bfc051652 100644
>> --- a/Makefile
>> +++ b/Makefile
>> @@ -484,6 +484,12 @@ all::
> 
> The postimage of this hunk is supposed to be 11 lines long, as you
> have five additional line in the middle of 6 original context lines.
> Where did this 12 come from?  I am only interested in finding out if
> our patch generation tool(s) have some bugs with this question.
> 
> If this is only because you hand-edit your patch, then we have no
> tool breakage to worry about, but please refrain from doing so in
> the future (instead always go back to the commit, amend it, and
> re-run format-patch).
> 
> Thanks.

You got me there :-/

Won't happen again, sorry.

>>  #The DEVELOPER mode enables -Wextra with a few exceptions. By
>>  #setting this flag the exceptions are removed, and all of
>>  #-Wextra is used.
>> +#
>> +#pedantic:
>> +#
>> +#Enable -pedantic compilation. This also disables
>> +#USE_PARENS_AROUND_GETTEXT_N to produce only relevant warnings.
>>  
>>  GIT-VERSION-FILE: FORCE
>>  @$(SHELL_PATH) ./GIT-VERSION-GEN
> 




[PATCH v2 0/1] Pedantic fix for Apple clang

2018-07-25 Thread Beat Bolli
Following up on my previous series bb/pedantic for gcc, this is a fix
for pedantic compilation under MacOS 10.13.6 (High Sierra) with the
command line tools of Xcode Version 9.4.1 (9F2000).

Changes against v1:

- [1/2]: include cache.h in packfile.h.

- [2/2]: drop it. Christian Couder is going to include this in the next
  version of the cc/remote-odb topic.

Beat Bolli (1):
  packfile: drop a repeated enum declaration

 packfile.h   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

-- 
2.18.0



[PATCH v2] packfile: ensure that enum object_type is defined

2018-07-25 Thread Beat Bolli
When compiling under Apple LLVM version 9.1.0 (clang-902.0.39.2) with
"make DEVELOPER=1 DEVOPTS=pedantic", the compiler says

error: redeclaration of already-defined enum 'object_type' is a GNU
extension [-Werror,-Wgnu-redeclared-enum]

According to https://en.cppreference.com/w/c/language/declarations
(section "Redeclaration"), a repeated declaration after the definition
is only legal for structs and unions, but not for enums.

Drop the belated declaration of enum object_type and include cache.h
instead to make sure the enum is defined.

Helped-by: Jonathan Nieder 
Signed-off-by: Beat Bolli 
---
 packfile.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packfile.h b/packfile.h
index 51383774ec72..28318c5c7c42 100644
--- a/packfile.h
+++ b/packfile.h
@@ -1,12 +1,12 @@
 #ifndef PACKFILE_H
 #define PACKFILE_H
 
+#include "cache.h"
 #include "oidset.h"
 
 /* in object-store.h */
 struct packed_git;
 struct object_info;
-enum object_type;
 
 /*
  * Generate the filename to be used for a pack file with checksum "sha1" and
-- 
2.18.0



[PATCH] t3900: add some more quotes

2018-01-10 Thread Beat Bolli
In 89a70b80 ("t0302 & t3900: add forgotten quotes", 2018-01-03), quotes
were added to protect against spaces in $HOME. In the test_when_finished
hander, two files are deleted which must be quoted individually.

Signed-off-by: Beat Bolli 
---
 t/t3900-i18n-commit.sh | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/t/t3900-i18n-commit.sh b/t/t3900-i18n-commit.sh
index 9e4e694d9..dc00db87b 100755
--- a/t/t3900-i18n-commit.sh
+++ b/t/t3900-i18n-commit.sh
@@ -40,7 +40,7 @@ test_expect_success 'UTF-16 refused because of NULs' '
 '
 
 test_expect_success 'UTF-8 invalid characters refused' '
-   test_when_finished "rm -f \"$HOME/stderr $HOME/invalid\"" &&
+   test_when_finished "rm -f \"$HOME/stderr\" \"$HOME/invalid\"" &&
echo "UTF-8 characters" >F &&
printf "Commit message\n\nInvalid surrogate:\355\240\200\n" \
>"$HOME/invalid" &&
@@ -49,7 +49,7 @@ test_expect_success 'UTF-8 invalid characters refused' '
 '
 
 test_expect_success 'UTF-8 overlong sequences rejected' '
-   test_when_finished "rm -f \"$HOME/stderr $HOME/invalid\"" &&
+   test_when_finished "rm -f \"$HOME/stderr\" \"$HOME/invalid\"" &&
rm -f "$HOME/stderr" "$HOME/invalid" &&
echo "UTF-8 overlong" >F &&
printf "\340\202\251ommit message\n\nThis is not a space:\300\240\n" \
@@ -59,7 +59,7 @@ test_expect_success 'UTF-8 overlong sequences rejected' '
 '
 
 test_expect_success 'UTF-8 non-characters refused' '
-   test_when_finished "rm -f \"$HOME/stderr $HOME/invalid\"" &&
+   test_when_finished "rm -f \"$HOME/stderr\" \"$HOME/invalid\"" &&
echo "UTF-8 non-character 1" >F &&
printf "Commit message\n\nNon-character:\364\217\277\276\n" \
>"$HOME/invalid" &&
@@ -68,7 +68,7 @@ test_expect_success 'UTF-8 non-characters refused' '
 '
 
 test_expect_success 'UTF-8 non-characters refused' '
-   test_when_finished "rm -f \"$HOME/stderr $HOME/invalid\"" &&
+   test_when_finished "rm -f \"$HOME/stderr\" \"$HOME/invalid\"" &&
echo "UTF-8 non-character 2." >F &&
printf "Commit message\n\nNon-character:\357\267\220\n" \
>"$HOME/invalid" &&
-- 
2.15.0.rc1.299.gda03b47c3



[PATCH v2] t3900: add some more quotes

2018-01-10 Thread Beat Bolli
In 89a70b80 ("t0302 & t3900: add forgotten quotes", 2018-01-03), quotes
were added to protect against spaces in $HOME. In the test_when_finished
handler, two files are deleted which must be quoted individually.

Signed-off-by: Beat Bolli 
---

Diff to v1:

s/hander/handler/ in the message.

 t/t3900-i18n-commit.sh | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/t/t3900-i18n-commit.sh b/t/t3900-i18n-commit.sh
index 9e4e694d9..dc00db87b 100755
--- a/t/t3900-i18n-commit.sh
+++ b/t/t3900-i18n-commit.sh
@@ -40,7 +40,7 @@ test_expect_success 'UTF-16 refused because of NULs' '
 '
 
 test_expect_success 'UTF-8 invalid characters refused' '
-   test_when_finished "rm -f \"$HOME/stderr $HOME/invalid\"" &&
+   test_when_finished "rm -f \"$HOME/stderr\" \"$HOME/invalid\"" &&
echo "UTF-8 characters" >F &&
printf "Commit message\n\nInvalid surrogate:\355\240\200\n" \
>"$HOME/invalid" &&
@@ -49,7 +49,7 @@ test_expect_success 'UTF-8 invalid characters refused' '
 '
 
 test_expect_success 'UTF-8 overlong sequences rejected' '
-   test_when_finished "rm -f \"$HOME/stderr $HOME/invalid\"" &&
+   test_when_finished "rm -f \"$HOME/stderr\" \"$HOME/invalid\"" &&
rm -f "$HOME/stderr" "$HOME/invalid" &&
echo "UTF-8 overlong" >F &&
printf "\340\202\251ommit message\n\nThis is not a space:\300\240\n" \
@@ -59,7 +59,7 @@ test_expect_success 'UTF-8 overlong sequences rejected' '
 '
 
 test_expect_success 'UTF-8 non-characters refused' '
-   test_when_finished "rm -f \"$HOME/stderr $HOME/invalid\"" &&
+   test_when_finished "rm -f \"$HOME/stderr\" \"$HOME/invalid\"" &&
echo "UTF-8 non-character 1" >F &&
printf "Commit message\n\nNon-character:\364\217\277\276\n" \
>"$HOME/invalid" &&
@@ -68,7 +68,7 @@ test_expect_success 'UTF-8 non-characters refused' '
 '
 
 test_expect_success 'UTF-8 non-characters refused' '
-   test_when_finished "rm -f \"$HOME/stderr $HOME/invalid\"" &&
+   test_when_finished "rm -f \"$HOME/stderr\" \"$HOME/invalid\"" &&
echo "UTF-8 non-character 2." >F &&
printf "Commit message\n\nNon-character:\357\267\220\n" \
>"$HOME/invalid" &&
-- 
2.15.0.rc1.299.gda03b47c3



[PATCH] git-gui: search for all current SSH key types

2018-02-24 Thread Beat Bolli
OpenSSH has supported Ed25519 keys since version 6.4 (2014-01-30), and
ECDSA keys since version 5.7 (2011-01-24). git-gui fails to find these
key types in its Help/Show SSH Key dialog.

Teach git-gui to show Ed25519 and ECDSA keys as well.

This was originally reported in
https://github.com/git-for-windows/git/issues/1487 and subseqently in
https://public-inbox.org/git/f65780f29e48994380e2bce87c6f071101146...@deerlm99ex2msx.ww931.my-it-solutions.net/

Signed-off-by: Beat Bolli 
---
Cc: Alexander Gavrilov 
Cc: Pat Thoyts 

 git-gui/lib/sshkey.tcl | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/git-gui/lib/sshkey.tcl b/git-gui/lib/sshkey.tcl
index aa6457bbb..589ff8f78 100644
--- a/git-gui/lib/sshkey.tcl
+++ b/git-gui/lib/sshkey.tcl
@@ -2,7 +2,10 @@
 # Copyright (C) 2006, 2007 Shawn Pearce
 
 proc find_ssh_key {} {
-   foreach name {~/.ssh/id_dsa.pub ~/.ssh/id_rsa.pub ~/.ssh/identity.pub} {
+   foreach name {
+   ~/.ssh/id_dsa.pub ~/.ssh/id_ecdsa.pub ~/.ssh/id_ed25519.pub
+   ~/.ssh/id_rsa.pub ~/.ssh/identity.pub
+   } {
if {[file exists $name]} {
set fh[open $name r]
set cont  [read $fh]
-- 
2.15.0.rc1.299.gda03b47c3



[PATCH] gitk: adjust the menu line numbers to compensate for the new entry

2015-08-11 Thread Beat Bolli
The previous commit[1] added a new context menu entry. Therefore, the
line numbers of the folloeing entries need to be incremented when their
text or state is changed.

[1] <1437218139-7031-1-git-send-email-dev+...@drbeat.li>,
http://article.gmane.org/gmane.comp.version-control.git/274161

Signed-off-by: Beat Bolli 
Cc: Paul Mackerras 
---
Paul, feel free to squash this commit into my previous one.

Signed-off-by: Beat Bolli 
---
 gitk-git/gitk | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gitk-git/gitk b/gitk-git/gitk
index d05169a..bc0e586 100755
--- a/gitk-git/gitk
+++ b/gitk-git/gitk
@@ -8877,13 +8877,13 @@ proc rowmenu {x y id} {
 if {$id ne $nullid && $id ne $nullid2} {
set menu $rowctxmenu
if {$mainhead ne {}} {
-   $menu entryconfigure 7 -label [mc "Reset %s branch to here" 
$mainhead] -state normal
+   $menu entryconfigure 8 -label [mc "Reset %s branch to here" 
$mainhead] -state normal
} else {
-   $menu entryconfigure 7 -label [mc "Detached head: can't reset" 
$mainhead] -state disabled
+   $menu entryconfigure 8 -label [mc "Detached head: can't reset" 
$mainhead] -state disabled
}
-   $menu entryconfigure 9 -state $mstate
$menu entryconfigure 10 -state $mstate
$menu entryconfigure 11 -state $mstate
+   $menu entryconfigure 12 -state $mstate
 } else {
set menu $fakerowmenu
 }
-- 
2.5.0.492.g918e48c
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4] gitk: Add a "Copy commit summary" command

2015-08-13 Thread Beat Bolli
On 13.08.15 09:37, Paul Mackerras wrote:
> On Sat, Jul 18, 2015 at 01:15:39PM +0200, Beat Bolli wrote:
>> When referring to earlier commits in commit messages or other text, one
>> of the established formats is
>>
>>  ("", )
>>
>> Add a "Copy commit summary" command to the context menu that puts this
>> text for the currently selected commit on the clipboard. This makes it
>> easy for our users to create well-formatted commit references.
>>
>> The  is produced with the %h format specifier to make it
>> unique. Its length can be controlled with the gitk preference
>> "Auto-select SHA1 (length)", or, if this preference is set to its
>> default value (40), with the Git config setting core.abbrev.
>>
>> Signed-off-by: Beat Bolli 
> 
> Thanks, applied.
> 
Please also apply the follow-up patch in this thread [1]. It fixes menu
entry numbers that were changed by this patch.

Thanks,
Beat


[1] http://article.gmane.org/gmane.comp.version-control.git/275729
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH, RESEND] gitk: adjust the menu line numbers to compensate for the new entry

2015-09-07 Thread Beat Bolli
Commit d835dbb9 ("gitk: Add a "Copy commit summary" command",
2015-08-13) in the upstream gitk repo added a new context menu entry.
Therefore, the line numbers of the entries below the new one need to be
adjusted when their text or state is changed.

Signed-off-by: Beat Bolli 
Cc: Paul Mackerras 
---
 gitk-git/gitk | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gitk-git/gitk b/gitk-git/gitk
index d05169a..bc0e586 100755
--- a/gitk-git/gitk
+++ b/gitk-git/gitk
@@ -8877,13 +8877,13 @@ proc rowmenu {x y id} {
 if {$id ne $nullid && $id ne $nullid2} {
set menu $rowctxmenu
if {$mainhead ne {}} {
-   $menu entryconfigure 7 -label [mc "Reset %s branch to here" 
$mainhead] -state normal
+   $menu entryconfigure 8 -label [mc "Reset %s branch to here" 
$mainhead] -state normal
} else {
-   $menu entryconfigure 7 -label [mc "Detached head: can't reset" 
$mainhead] -state disabled
+   $menu entryconfigure 8 -label [mc "Detached head: can't reset" 
$mainhead] -state disabled
}
-   $menu entryconfigure 9 -state $mstate
$menu entryconfigure 10 -state $mstate
$menu entryconfigure 11 -state $mstate
+   $menu entryconfigure 12 -state $mstate
 } else {
set menu $fakerowmenu
 }
-- 
2.5.0.492.g918e48c
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] git-gui: remove the garbage collection hint

2015-09-29 Thread Beat Bolli
Git core has had automatic garbage collection for a long time.
Git-gui does not need a similar heuristic.

Signed-off-by: Beat Bolli 
Cc: Pat Thoyts 
---
 git-gui.sh   |  3 ---
 lib/database.tcl | 26 --
 2 files changed, 29 deletions(-)

diff --git a/git-gui.sh b/git-gui.sh
index 11048c7..2866777 100755
--- a/git-gui.sh
+++ b/git-gui.sh
@@ -4004,9 +4004,6 @@ after 1 {
$ui_comm configure -state disabled -background gray
}
 }
-if {[is_enabled multicommit] && ![is_config_false gui.gcwarning]} {
-   after 1000 hint_gc
-}
 if {[is_enabled retcode]} {
bind .  {+terminate_me %W}
 }
diff --git a/lib/database.tcl b/lib/database.tcl
index 1f187ed..808bdf6 100644
--- a/lib/database.tcl
+++ b/lib/database.tcl
@@ -87,29 +87,3 @@ proc do_fsck_objects {} {
lappend cmd --strict
console::exec $w $cmd
 }
-
-proc hint_gc {} {
-   set ndirs 1
-   set limit 8
-   if {[is_Windows]} {
-   set ndirs 4
-   set limit 1
-   }
-
-   set count [llength [glob \
-   -nocomplain \
-   -- \
-   [gitdir objects 4\[0-[expr {$ndirs-1}]\]/*]]]
-
-   if {$count >= $limit * $ndirs} {
-   set objects_current [expr {$count * 256/$ndirs}]
-   if {[ask_popup \
-   [mc "This repository currently has approximately %i 
loose objects.
-
-To maintain optimal performance it is strongly recommended that you compress 
the database.
-
-Compress the database now?" $objects_current]] eq yes} {
-   do_gc
-   }
-   }
-}
-- 
2.5.0.492.g918e48c
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] gitk: add missing accelerators

2015-09-30 Thread Beat Bolli
In d99b4b0de27a ("gitk: Accelerators for the main menu", 2015-09-09),
accelerators were added to allow efficient keyboard navigation. One
instance of the strings "Edit view..." and "Delete view" were left
without the ampersand.

Add the missing ampersand characters to unbreak our international
users.

Signed-off-by: Beat Bolli 
Cc: Paul Mackerras 
---
 gitk-git/gitk | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gitk-git/gitk b/gitk-git/gitk
index 2028b55..fcc606e 100755
--- a/gitk-git/gitk
+++ b/gitk-git/gitk
@@ -12452,8 +12452,8 @@ if {$cmdline_files ne {} || $revtreeargs ne {} || 
$revtreeargscmd ne {}} {
 set viewchanged(1) 0
 set vdatemode(1) 0
 addviewmenu 1
-.bar.view entryconf [mca "Edit view..."] -state normal
-.bar.view entryconf [mca "Delete view"] -state normal
+.bar.view entryconf [mca "&Edit view..."] -state normal
+.bar.view entryconf [mca "&Delete view"] -state normal
 }
 
 if {[info exists permviews]} {
-- 
2.6.0
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] SubmittingPatches: hint at gitk's "Copy commit summary" command

2016-08-26 Thread Beat Bolli
Amend the section on referencing previous commits with a hint to the
gitk command that was added exactly for this purpose.

Signed-off-by: Beat Bolli 
---
 Documentation/SubmittingPatches | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index 500230c..94a1661 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -124,7 +124,8 @@ archive, summarize the relevant points of the discussion.
 If you want to reference a previous commit in the history of a stable
 branch use the format "abbreviated sha1 (subject, date)". So for example
 like this: "Commit f86a374 (pack-bitmap.c: fix a memleak, 2015-03-30)
-noticed [...]".
+noticed [...]". The "Copy commit summary" command of gitk generates this
+format.
 
 
 (3) Generate your patch using Git tools out of your commits.
-- 
2.7.2
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] gitk: align the commit summary format to the documentation

2016-08-26 Thread Beat Bolli
In 175d38c (SubmittingPatches: document how to reference previous commits,
2016-07-28) the format for referring to older commits was specified.

Make the text generated by the "Copy commit summary" command match this
format.

Signed-off-by: Beat Bolli 
Cc: Paul Mackerras 
---
 gitk-git/gitk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gitk-git/gitk b/gitk-git/gitk
index 805a1c7..a27bf99 100755
--- a/gitk-git/gitk
+++ b/gitk-git/gitk
@@ -9382,7 +9382,7 @@ proc mktaggo {} {
 proc copysummary {} {
 global rowmenuid autosellen
 
-set format "%h (\"%s\", %ad)"
+set format "%h (%s, %ad)"
 set cmd [list git show -s --pretty=format:$format --date=short]
 if {$autosellen < 40} {
 lappend cmd --abbrev=$autosellen
-- 
2.7.2
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] gitk: align the commit summary format to the documentation

2016-08-26 Thread Beat Bolli
On 26.08.16 21:16, Stefan Beller wrote:
> On Fri, Aug 26, 2016 at 11:24 AM, Junio C Hamano  wrote:
>> Beat Bolli  writes:
>>
>>> In 175d38c (SubmittingPatches: document how to reference previous commits,
>>> 2016-07-28) the format for referring to older commits was specified.
>>>
>>> Make the text generated by the "Copy commit summary" command match this
>>> format.
>>
>> Hmph.  I didn't know gitk already had its own command to produce a
>> short string.  I actually think what it produces
> 
> It was added in d835dbb91fe (gitk: Add a "Copy commit summary" command,
> 2015-07-18), it doesn't seem to be in your tree yet, so maybe wait
> with this patch
> until you pulled gitk?

This commit was part of release 2.6.0.

>>> In 175d38c ("SubmittingPatches: document how to reference previous commits",
>>> 2016-07-28) the format for referring to older commits was specified.
>>
>> is easier to read when pasted into a sentence than what the recent
>> update 175d38ca ("SubmittingPatches: document how to reference
>> previous commits", 2016-07-28) suggests to do, i.e.
>>
>>> In 175d38c (SubmittingPatches: document how to reference previous commits,
>>> 2016-07-28) the format for referring to older commits was specified.
>>
>> Heiko, Stefan, I think you two were involved in adding that new
>> paragraph.   What do you think?
> 
> So the subtle difference is adding '"' around the commit message subject?
> 
> I agree we should fix that.

So would you prepare a amendment to your documentation commit so that
Junio can disregard my two patches?

Thanks,
Beat
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] gitk: Add a "Copy commit summary" command

2015-07-16 Thread Beat Bolli
When referring to earlier commits in commit messages or other text, one
of the established formats is

 ("", )

Add a "Copy commit summary" command to the context menu that puts this
text for the currently selected commit on the clipboard. This makes it
easy for our users to create well-formatted commit references.

Signed-off-by: Beat Bolli 
Cc: Paul Mackerras 
---
 gitk-git/gitk | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/gitk-git/gitk b/gitk-git/gitk
index 9a2daf3..72a2756 100755
--- a/gitk-git/gitk
+++ b/gitk-git/gitk
@@ -2617,6 +2617,7 @@ proc makewindow {} {
{mc "Diff selected -> this" command {diffvssel 1}}
{mc "Make patch" command mkpatch}
{mc "Create tag" command mktag}
+   {mc "Copy commit summary" command copysummary}
{mc "Write commit to file" command writecommit}
{mc "Create new branch" command mkbranch}
{mc "Cherry-pick this commit" command cherrypick}
@@ -9341,6 +9342,19 @@ proc mktaggo {} {
 mktagcan
 }
 
+proc copysummary {} {
+global rowmenuid commitinfo
+
+set id [string range $rowmenuid 0 7]
+set info $commitinfo($rowmenuid)
+set commit [lindex $info 0]
+set date [formatdate [lindex $info 2]]
+set summary "$id (\"$commit\", $date)"
+
+clipboard clear
+clipboard append $summary
+}
+
 proc writecommit {} {
 global rowmenuid wrcomtop commitinfo wrcomcmd NS
 
-- 
2.1.4
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3] gitk: Add a "Copy commit summary" command

2015-07-17 Thread Beat Bolli

When referring to earlier commits in commit messages or other text, one
of the established formats is

 ("", )

Add a "Copy commit summary" command to the context menu that puts this
text for the currently selected commit on the clipboard. This makes it
easy for our users to create well-formatted commit references.

The  is produced with the %h format specifier to make it
unique. Its minimum length can be controlled with the config setting
core.abbrev.

Signed-off-by: Beat Bolli 
Reviewed-by: Eric Sunshine 
Reviewed-by: Johannes Sixt 
Cc: Paul Mackerras 

---
Changes since v2:
- call git log to produce a unique 
- use the short date format

Changes since v1:
- drop the "commit " literal in front of the 
---
 gitk-git/gitk | 12 
 1 file changed, 12 insertions(+)

diff --git a/gitk-git/gitk b/gitk-git/gitk
index 9a2daf3..4915f53 100755
--- a/gitk-git/gitk
+++ b/gitk-git/gitk
@@ -2617,6 +2617,7 @@ proc makewindow {} {
{mc "Diff selected -> this" command {diffvssel 1}}
{mc "Make patch" command mkpatch}
{mc "Create tag" command mktag}
+   {mc "Copy commit summary" command copysummary}
{mc "Write commit to file" command writecommit}
{mc "Create new branch" command mkbranch}
{mc "Cherry-pick this commit" command cherrypick}
@@ -9341,6 +9342,17 @@ proc mktaggo {} {
 mktagcan
 }

+proc copysummary {} {
+global rowmenuid
+
+set format "%h (\"%s\", %ad)"
+set summary [exec git show -s --pretty=format:$format --date=short 
\

+ $rowmenuid]
+
+clipboard clear
+clipboard append $summary
+}
+
 proc writecommit {} {
 global rowmenuid wrcomtop commitinfo wrcomcmd NS

--
2.4.0
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] gitk: Add a "Copy commit summary" command

2015-07-17 Thread Beat Bolli

On 2015-07-17 10:50, li...@haller-berlin.de wrote:

Junio C Hamano  wrote:


Beat Bolli  writes:

> When referring to earlier commits in commit messages or other 
text, one

> of the established formats is
>
>  ("", )
> ...
> +proc copysummary {} {
> +global rowmenuid commitinfo
> +
> +set id [string range $rowmenuid 0 7]
> +set info $commitinfo($rowmenuid)
> +set commit [lindex $info 0]

7 hexdigits is not always an appropriate value for all projects.
The minimum necessary to guarantee uniqueness varies on project, and
it is not a good idea to hardcode such a small value.  Not-so-old
Linux kernel history seems to use at least 12, for example.

I believe that the "one of the established formats" comes from a
"git one" alias I published somewhere long time ago, that did
something like this:

  git show -s --abbrev=8 --pretty='format:%h (%s, %ai' "$@" |
  sed -e 's/ [012][0-9]:[0-5][0-9]:[0-5][0-9] 
[-+][0-9][0-9][0-9][0-9]$/)/'


where the combination of --abbrev=8 and format:%h asks for a unique
abbreviation that is at least 8 hexdigits long but can use more than
8 if it is not long enough to uniquely identify the given commit.


For the intended use case of this feature (referring to earlier 
commits

in commit messages), guaranteeing uniqueness isn't sufficiant either.
What is unique at the time of creating the commit might no longer be
unique a few years later.


This is true, but the purpose of the format with the summary text and 
date
is exactly to make it redundant enough that the hash doesn't have to be 
unique

in eternity.

So one strategy would be to add one or two digits to what %h returns, 
to

give some future leeway; or rely on the user to configure core.abbrev
appropriatly for their project; or just make the hard-coded value
configurable, as Hannes suggests.

FWIW, a discussion of this that I find useful can be found here:
<http://blog.cuviper.com/2013/11/10/how-short-can-git-abbreviate/>.

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] gitk: Add a "Copy commit summary" command

2015-07-17 Thread Beat Bolli
On 17.07.15 19:28, Junio C Hamano wrote:
> Eric Sunshine  writes:
> 
>>> Signed-off-by: Beat Bolli 
>>> Reviewed-by: Eric Sunshine 
>>> Reviewed-by: Johannes Sixt 
>>
>> You should drop these Reviewed-by: footers, as they imply that the
>> code was thoroughly digested and the implementation deemed correct.
> 
> ... and the most importantly, the named people said that themselves.
> 
> I do not think that happened here (yet).
> 
>>> +proc copysummary {} {
>>> +global rowmenuid
>>> +
>>> +set format "%h (\"%s\", %ad)"
>>> +set summary [exec git show -s --pretty=format:$format --date=short \
>>> + $rowmenuid]
>>> +
>>> +clipboard clear
>>> +clipboard append $summary
>>> +}
>>> +
> 
> I think this is a reasonable implementation.  The usual "spawning a
> process for each commit is too expensive" would not apply, because
> it is done on demand only for the single commit that the end-user
> specified.

Thanks, Junio! That was my thought as well.

So, the question remains now if adding something like
--abbrev=$autosellen (maybe only if it's not set to its default value),
as Paul suggested, would make sense.

Beat
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4] gitk: Add a "Copy commit summary" command

2015-07-18 Thread Beat Bolli
When referring to earlier commits in commit messages or other text, one
of the established formats is

 ("", )

Add a "Copy commit summary" command to the context menu that puts this
text for the currently selected commit on the clipboard. This makes it
easy for our users to create well-formatted commit references.

The  is produced with the %h format specifier to make it
unique. Its length can be controlled with the gitk preference
"Auto-select SHA1 (length)", or, if this preference is set to its
default value (40), with the Git config setting core.abbrev.

Signed-off-by: Beat Bolli 
Cc: Paul Mackerras 
---
Changes since v3:
- consider $autosellen for the --abbrev value

Changes since v2:
- call git show to produce a unique 
- use the short date format

Changes since v1:
- drop the "commit " literal in front of the 

Signed-off-by: Beat Bolli 
---
 gitk-git/gitk | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/gitk-git/gitk b/gitk-git/gitk
index 9a2daf3..d05169a 100755
--- a/gitk-git/gitk
+++ b/gitk-git/gitk
@@ -2617,6 +2617,7 @@ proc makewindow {} {
{mc "Diff selected -> this" command {diffvssel 1}}
{mc "Make patch" command mkpatch}
{mc "Create tag" command mktag}
+   {mc "Copy commit summary" command copysummary}
{mc "Write commit to file" command writecommit}
{mc "Create new branch" command mkbranch}
{mc "Cherry-pick this commit" command cherrypick}
@@ -9341,6 +9342,20 @@ proc mktaggo {} {
 mktagcan
 }
 
+proc copysummary {} {
+global rowmenuid autosellen
+
+set format "%h (\"%s\", %ad)"
+set cmd [list git show -s --pretty=format:$format --date=short]
+if {$autosellen < 40} {
+lappend cmd --abbrev=$autosellen
+}
+set summary [eval exec $cmd $rowmenuid]
+
+clipboard clear
+clipboard append $summary
+}
+
 proc writecommit {} {
 global rowmenuid wrcomtop commitinfo wrcomcmd NS
 
-- 
2.1.4
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] gitk: Add a "Copy commit summary" command

2015-07-18 Thread Beat Bolli
On 18.07.15 14:23, Paul Mackerras wrote:
> On Fri, Jul 17, 2015 at 08:30:24AM -0700, Junio C Hamano wrote:
>> Paul Mackerras  writes:
>>
>>> We have an item in the preferences menu to control the SHA1 length
>>> that is automatically selected when going to a new commit.  It's
>>> stored in the variable $autosellen.  That seems like it would be a
>>> reasonable choice for the SHA1 length to use here.
>>
>> Reusing a configuration that is used to control something similar
>> sounds sensible to me.
>>
>>> The only possible
>>> problem is that it defaults to 40 and so might give an overly long
>>> result for some users.  Maybe you could use $autosellen but limit it
>>> to at most 12 or 16 or something like that.
>>
>> How is the thing that is "automatically selected when going to a new
>> commit" used by the end user?  What is the reason why people may
>> want to configure it?  I understand that this is the string that
>> goes into the selection buffer, so presumably people are using this
>> selection to paste elsewhere?  If so, that sounds like very similar
>> to Beat's use case---perhaps if 40 is too long for Beat's use case
>> as a sensible default, then it is also too long for its original use
>> case?
> 
> It's used for pasting into commit messages and emails, and it's used
> for pasting onto the command line when typing git commands.  For the
> second, the length doesn't matter; the limit was added for the first
> case.
> 
>> Or do you expect it to be common to want to use autosellen set to 40
>> and Beat's abbrev len set to much shorter, e.g. 16?  If so they may
>> deserve two different settings, with different defaults.
> 
> I would think that if $autosellen is 40 it's almost certainly because
> the user hasn't found that control in the preferences window. :)
> 
>> Artificially limiting it to 12 or 16 does not sound all that
>> sensible, though.
> 
> Adding --abbrev=$autosellen if $autosellen is not 40 sounds like it
> would do what we want.

That's exactly what I did in v4 of the patch:
http://article.gmane.org/gmane.comp.version-control.git/274161

Thanks,
Beat
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4] gitk: Add a "Copy commit summary" command

2015-07-21 Thread Beat Bolli
Guys,

can I get a Yea or Nay for this patch?

Does it go in via Paul's gitk repo or directly through Junio?


Thanks,
Beat

On 18.07.15 13:15, Beat Bolli wrote:
> When referring to earlier commits in commit messages or other text, one
> of the established formats is
> 
>  ("", )
> 
> Add a "Copy commit summary" command to the context menu that puts this
> text for the currently selected commit on the clipboard. This makes it
> easy for our users to create well-formatted commit references.
> 
> The  is produced with the %h format specifier to make it
> unique. Its length can be controlled with the gitk preference
> "Auto-select SHA1 (length)", or, if this preference is set to its
> default value (40), with the Git config setting core.abbrev.
> 
> Signed-off-by: Beat Bolli 
> Cc: Paul Mackerras 
> ---
> Changes since v3:
> - consider $autosellen for the --abbrev value
> 
> Changes since v2:
> - call git show to produce a unique 
> - use the short date format
> 
> Changes since v1:
> - drop the "commit " literal in front of the 
> 
> Signed-off-by: Beat Bolli 
> ---
>  gitk-git/gitk | 15 +++
>  1 file changed, 15 insertions(+)
> 
> diff --git a/gitk-git/gitk b/gitk-git/gitk
> index 9a2daf3..d05169a 100755
> --- a/gitk-git/gitk
> +++ b/gitk-git/gitk
> @@ -2617,6 +2617,7 @@ proc makewindow {} {
>   {mc "Diff selected -> this" command {diffvssel 1}}
>   {mc "Make patch" command mkpatch}
>   {mc "Create tag" command mktag}
> + {mc "Copy commit summary" command copysummary}
>   {mc "Write commit to file" command writecommit}
>   {mc "Create new branch" command mkbranch}
>   {mc "Cherry-pick this commit" command cherrypick}
> @@ -9341,6 +9342,20 @@ proc mktaggo {} {
>  mktagcan
>  }
>  
> +proc copysummary {} {
> +global rowmenuid autosellen
> +
> +set format "%h (\"%s\", %ad)"
> +set cmd [list git show -s --pretty=format:$format --date=short]
> +if {$autosellen < 40} {
> +lappend cmd --abbrev=$autosellen
> +}
> +set summary [eval exec $cmd $rowmenuid]
> +
> +clipboard clear
> +clipboard append $summary
> +}
> +
>  proc writecommit {} {
>  global rowmenuid wrcomtop commitinfo wrcomcmd NS
>  
> 
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4] gitk: Add a "Copy commit summary" command

2015-07-21 Thread Beat Bolli
On 21.07.15 12:28, Paul Mackerras wrote:
> On Tue, Jul 21, 2015 at 12:19:23PM +0200, Beat Bolli wrote:
>> Guys,
>>
>> can I get a Yea or Nay for this patch?
>>
>> Does it go in via Paul's gitk repo or directly through Junio?
> 
> I'll put it in.  It goes into my repo and from there into Junio's.
> I'm on vacation and travelling this week, so please be patient.

No problem, enjoy your vacation!

Beat
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] update-unicode.sh: automatically download newer definition files

2016-12-02 Thread Beat Bolli
Checking just for the files' existence is not enough; we should also
download them if a newer version exists on the Unicode servers.

Signed-off-by: Beat Bolli 
---
 update_unicode.sh | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/update_unicode.sh b/update_unicode.sh
index 27af77c..3c84270 100755
--- a/update_unicode.sh
+++ b/update_unicode.sh
@@ -10,12 +10,8 @@ if ! test -d unicode; then
mkdir unicode
 fi &&
 ( cd unicode &&
-   if ! test -f UnicodeData.txt; then
-   wget 
http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
-   fi &&
-   if ! test -f EastAsianWidth.txt; then
-   wget 
http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
-   fi &&
+   wget -N http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt \
+   http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt 
&&
if ! test -d uniset; then
git clone https://github.com/depp/uniset.git
fi &&
-- 
2.7.2


[PATCH 2/2] unicode: update the tables to Unicode 9.0

2016-12-02 Thread Beat Bolli
A rerun of the previously fixed update-unicode.sh produces these new
tables.

Signed-off-by: Beat Bolli 
---
 unicode_width.h | 122 +++-
 1 file changed, 111 insertions(+), 11 deletions(-)

diff --git a/unicode_width.h b/unicode_width.h
index 47cdd23..73b5fd6 100644
--- a/unicode_width.h
+++ b/unicode_width.h
@@ -25,7 +25,7 @@ static const struct interval zero_width[] = {
 { 0x0825, 0x0827 },
 { 0x0829, 0x082D },
 { 0x0859, 0x085B },
-{ 0x08E4, 0x0902 },
+{ 0x08D4, 0x0902 },
 { 0x093A, 0x093A },
 { 0x093C, 0x093C },
 { 0x0941, 0x0948 },
@@ -120,6 +120,7 @@ static const struct interval zero_width[] = {
 { 0x17C9, 0x17D3 },
 { 0x17DD, 0x17DD },
 { 0x180B, 0x180E },
+{ 0x1885, 0x1886 },
 { 0x18A9, 0x18A9 },
 { 0x1920, 0x1922 },
 { 0x1927, 0x1928 },
@@ -158,7 +159,7 @@ static const struct interval zero_width[] = {
 { 0x1CF4, 0x1CF4 },
 { 0x1CF8, 0x1CF9 },
 { 0x1DC0, 0x1DF5 },
-{ 0x1DFC, 0x1DFF },
+{ 0x1DFB, 0x1DFF },
 { 0x200B, 0x200F },
 { 0x202A, 0x202E },
 { 0x2060, 0x2064 },
@@ -171,13 +172,13 @@ static const struct interval zero_width[] = {
 { 0x3099, 0x309A },
 { 0xA66F, 0xA672 },
 { 0xA674, 0xA67D },
-{ 0xA69F, 0xA69F },
+{ 0xA69E, 0xA69F },
 { 0xA6F0, 0xA6F1 },
 { 0xA802, 0xA802 },
 { 0xA806, 0xA806 },
 { 0xA80B, 0xA80B },
 { 0xA825, 0xA826 },
-{ 0xA8C4, 0xA8C4 },
+{ 0xA8C4, 0xA8C5 },
 { 0xA8E0, 0xA8F1 },
 { 0xA926, 0xA92D },
 { 0xA947, 0xA951 },
@@ -204,7 +205,7 @@ static const struct interval zero_width[] = {
 { 0xABED, 0xABED },
 { 0xFB1E, 0xFB1E },
 { 0xFE00, 0xFE0F },
-{ 0xFE20, 0xFE2D },
+{ 0xFE20, 0xFE2F },
 { 0xFEFF, 0xFEFF },
 { 0xFFF9, 0xFFFB },
 { 0x101FD, 0x101FD },
@@ -228,16 +229,21 @@ static const struct interval zero_width[] = {
 { 0x11173, 0x11173 },
 { 0x11180, 0x11181 },
 { 0x111B6, 0x111BE },
+{ 0x111CA, 0x111CC },
 { 0x1122F, 0x11231 },
 { 0x11234, 0x11234 },
 { 0x11236, 0x11237 },
+{ 0x1123E, 0x1123E },
 { 0x112DF, 0x112DF },
 { 0x112E3, 0x112EA },
-{ 0x11301, 0x11301 },
+{ 0x11300, 0x11301 },
 { 0x1133C, 0x1133C },
 { 0x11340, 0x11340 },
 { 0x11366, 0x1136C },
 { 0x11370, 0x11374 },
+{ 0x11438, 0x1143F },
+{ 0x11442, 0x11444 },
+{ 0x11446, 0x11446 },
 { 0x114B3, 0x114B8 },
 { 0x114BA, 0x114BA },
 { 0x114BF, 0x114C0 },
@@ -245,6 +251,7 @@ static const struct interval zero_width[] = {
 { 0x115B2, 0x115B5 },
 { 0x115BC, 0x115BD },
 { 0x115BF, 0x115C0 },
+{ 0x115DC, 0x115DD },
 { 0x11633, 0x1163A },
 { 0x1163D, 0x1163D },
 { 0x1163F, 0x11640 },
@@ -252,6 +259,16 @@ static const struct interval zero_width[] = {
 { 0x116AD, 0x116AD },
 { 0x116B0, 0x116B5 },
 { 0x116B7, 0x116B7 },
+{ 0x1171D, 0x1171F },
+{ 0x11722, 0x11725 },
+{ 0x11727, 0x1172B },
+{ 0x11C30, 0x11C36 },
+{ 0x11C38, 0x11C3D },
+{ 0x11C3F, 0x11C3F },
+{ 0x11C92, 0x11CA7 },
+{ 0x11CAA, 0x11CB0 },
+{ 0x11CB2, 0x11CB3 },
+{ 0x11CB5, 0x11CB6 },
 { 0x16AF0, 0x16AF4 },
 { 0x16B30, 0x16B36 },
 { 0x16F8F, 0x16F92 },
@@ -262,16 +279,28 @@ static const struct interval zero_width[] = {
 { 0x1D185, 0x1D18B },
 { 0x1D1AA, 0x1D1AD },
 { 0x1D242, 0x1D244 },
+{ 0x1DA00, 0x1DA36 },
+{ 0x1DA3B, 0x1DA6C },
+{ 0x1DA75, 0x1DA75 },
+{ 0x1DA84, 0x1DA84 },
+{ 0x1DA9B, 0x1DA9F },
+{ 0x1DAA1, 0x1DAAF },
+{ 0x1E000, 0x1E006 },
+{ 0x1E008, 0x1E018 },
+{ 0x1E01B, 0x1E021 },
+{ 0x1E023, 0x1E024 },
+{ 0x1E026, 0x1E02A },
 { 0x1E8D0, 0x1E8D6 },
+{ 0x1E944, 0x1E94A },
 { 0xE0001, 0xE0001 },
 { 0xE0020, 0xE007F },
 { 0xE0100, 0xE01EF }
 };
 static const struct interval double_width[] = {
-{ /* plane */ 0x0, 0x1C },
-{ /* plane */ 0x1C, 0x21 },
-{ /* plane */ 0x21, 0x22 },
-{ /* plane */ 0x22, 0x23 },
+{ /* plane */ 0x0, 0x3D },
+{ /* plane */ 0x3D, 0x68 },
+{ /* plane */ 0x68, 0x69 },
+{ /* plane */ 0x69, 0x6A },
 { /* plane */ 0x0, 0x0 },
 { /* plane */ 0x0, 0x0 },
 { /* plane */ 0x0, 0x0 },
@@ -286,7 +315,40 @@ static const struct interval double_width[] = {
 { /* plane */ 0x0, 0x0 },
 { /* plane */ 0x0, 0x0 },
 { 0x1100, 0x115F },
+{ 0x231A, 0x231B },
 { 0x2329, 0x232A },
+{ 0x23E9, 0x23EC },
+{ 0x23F0, 0x23F0 },
+{ 0x23F3, 0x23F3 },
+{ 0x25FD, 0x25FE },
+{ 0x2614, 0x2615 },
+{ 0x2648, 0x2653 },
+{ 0x267F, 0x267F },
+{ 0x2693, 0x2693 },
+{ 0x26A1, 0x26A1 },
+{ 0x26AA, 0x26AB },
+{ 0x26BD, 0x26BE },
+{ 0x26C4, 0x26C5 },
+{ 0x26CE, 0x26CE },
+{ 0x26D4, 0x26D4 },
+{ 0x26EA, 0x26EA },
+{ 0x26F2, 0x26F3 },
+{ 0x26F5, 0x26F5 },
+{ 0x26FA, 0x26FA },
+{ 0x26FD, 0x26FD },
+{ 0x2705, 0x2705 },
+{ 0x270A, 0x270B },
+{ 0x2728, 0x2728 },
+{ 0x274C, 0x274C },
+{ 0x274E, 0x274E },
+{ 0x2753, 0x2755 },
+{ 0x2757, 0x2757 },
+{ 0x2795, 0x2797 },
+{ 0x27B0, 0x27B0 },
+{ 0x27BF, 0x27BF },
+{ 0x2B1B, 0x2B1C },
+{ 0x2B50, 0x2B50 },
+{ 0x2B55, 0x2B55 },
 { 0x2E80, 0x2E99 },
 { 0x2E9B, 0x2EF3 },
 { 0x2F00, 0x2FD5 },
@@ -313,11 +375,49 @@ static const struct interval double_width[] = {
 { 0xFE68, 0xFE6B },
 { 0xFF01, 0xFF60 },
 { 0xFFE0, 0xFFE6 },
+{ 0x16FE0, 0x16FE0 },
+{ 0x17000, 0x187EC },
+{ 0x18800, 0x18AF2 },
 { 0x1B000, 0x1B001 },
+{ 0x1F004, 0x1F004 },
+{ 0x1F0CF, 0x1F0CF },
+{ 0x1F18E, 0x1F18E },
+{ 0x1F191

[PATCH 3/3] unicode_width.h: fix the double_width[] table

2016-12-03 Thread Beat Bolli
The function bisearch() in utf8.c does a pure binary search in
double_width. It does not care about the 17 plane offsets which
unicode/uniset/uniset prepends. Leaving the plane offsets in the table
may cause wrong results.

Filter out the plane offsets in the update-unicode.sh and regenerate
the table.

Signed-off-by: Beat Bolli 
---
 unicode_width.h   | 17 -
 update_unicode.sh |  2 +-
 2 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/unicode_width.h b/unicode_width.h
index 73b5fd6..02207be 100644
--- a/unicode_width.h
+++ b/unicode_width.h
@@ -297,23 +297,6 @@ static const struct interval zero_width[] = {
 { 0xE0100, 0xE01EF }
 };
 static const struct interval double_width[] = {
-{ /* plane */ 0x0, 0x3D },
-{ /* plane */ 0x3D, 0x68 },
-{ /* plane */ 0x68, 0x69 },
-{ /* plane */ 0x69, 0x6A },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
 { 0x1100, 0x115F },
 { 0x231A, 0x231B },
 { 0x2329, 0x232A },
diff --git a/update_unicode.sh b/update_unicode.sh
index 3c84270..4c1ec8d 100755
--- a/update_unicode.sh
+++ b/update_unicode.sh
@@ -30,7 +30,7 @@ fi &&
  grep -v plane)
};
static const struct interval double_width[] = {
-   $(uniset/uniset --32 eaw:F,W)
+   $(uniset/uniset --32 eaw:F,W | grep -v plane)
};
EOF
 )
-- 
2.7.2


[PATCH v2 2/3] unicode_width.h: update the tables to Unicode 9.0

2016-12-03 Thread Beat Bolli
Rerunning update-unicode.sh fixed in the previous commit produces these new
tables.

Signed-off-by: Beat Bolli 
---
Diff to v1:
  - reword the commit message

 unicode_width.h | 122 +++-
 1 file changed, 111 insertions(+), 11 deletions(-)

diff --git a/unicode_width.h b/unicode_width.h
index 47cdd23..73b5fd6 100644
--- a/unicode_width.h
+++ b/unicode_width.h
@@ -25,7 +25,7 @@ static const struct interval zero_width[] = {
 { 0x0825, 0x0827 },
 { 0x0829, 0x082D },
 { 0x0859, 0x085B },
-{ 0x08E4, 0x0902 },
+{ 0x08D4, 0x0902 },
 { 0x093A, 0x093A },
 { 0x093C, 0x093C },
 { 0x0941, 0x0948 },
@@ -120,6 +120,7 @@ static const struct interval zero_width[] = {
 { 0x17C9, 0x17D3 },
 { 0x17DD, 0x17DD },
 { 0x180B, 0x180E },
+{ 0x1885, 0x1886 },
 { 0x18A9, 0x18A9 },
 { 0x1920, 0x1922 },
 { 0x1927, 0x1928 },
@@ -158,7 +159,7 @@ static const struct interval zero_width[] = {
 { 0x1CF4, 0x1CF4 },
 { 0x1CF8, 0x1CF9 },
 { 0x1DC0, 0x1DF5 },
-{ 0x1DFC, 0x1DFF },
+{ 0x1DFB, 0x1DFF },
 { 0x200B, 0x200F },
 { 0x202A, 0x202E },
 { 0x2060, 0x2064 },
@@ -171,13 +172,13 @@ static const struct interval zero_width[] = {
 { 0x3099, 0x309A },
 { 0xA66F, 0xA672 },
 { 0xA674, 0xA67D },
-{ 0xA69F, 0xA69F },
+{ 0xA69E, 0xA69F },
 { 0xA6F0, 0xA6F1 },
 { 0xA802, 0xA802 },
 { 0xA806, 0xA806 },
 { 0xA80B, 0xA80B },
 { 0xA825, 0xA826 },
-{ 0xA8C4, 0xA8C4 },
+{ 0xA8C4, 0xA8C5 },
 { 0xA8E0, 0xA8F1 },
 { 0xA926, 0xA92D },
 { 0xA947, 0xA951 },
@@ -204,7 +205,7 @@ static const struct interval zero_width[] = {
 { 0xABED, 0xABED },
 { 0xFB1E, 0xFB1E },
 { 0xFE00, 0xFE0F },
-{ 0xFE20, 0xFE2D },
+{ 0xFE20, 0xFE2F },
 { 0xFEFF, 0xFEFF },
 { 0xFFF9, 0xFFFB },
 { 0x101FD, 0x101FD },
@@ -228,16 +229,21 @@ static const struct interval zero_width[] = {
 { 0x11173, 0x11173 },
 { 0x11180, 0x11181 },
 { 0x111B6, 0x111BE },
+{ 0x111CA, 0x111CC },
 { 0x1122F, 0x11231 },
 { 0x11234, 0x11234 },
 { 0x11236, 0x11237 },
+{ 0x1123E, 0x1123E },
 { 0x112DF, 0x112DF },
 { 0x112E3, 0x112EA },
-{ 0x11301, 0x11301 },
+{ 0x11300, 0x11301 },
 { 0x1133C, 0x1133C },
 { 0x11340, 0x11340 },
 { 0x11366, 0x1136C },
 { 0x11370, 0x11374 },
+{ 0x11438, 0x1143F },
+{ 0x11442, 0x11444 },
+{ 0x11446, 0x11446 },
 { 0x114B3, 0x114B8 },
 { 0x114BA, 0x114BA },
 { 0x114BF, 0x114C0 },
@@ -245,6 +251,7 @@ static const struct interval zero_width[] = {
 { 0x115B2, 0x115B5 },
 { 0x115BC, 0x115BD },
 { 0x115BF, 0x115C0 },
+{ 0x115DC, 0x115DD },
 { 0x11633, 0x1163A },
 { 0x1163D, 0x1163D },
 { 0x1163F, 0x11640 },
@@ -252,6 +259,16 @@ static const struct interval zero_width[] = {
 { 0x116AD, 0x116AD },
 { 0x116B0, 0x116B5 },
 { 0x116B7, 0x116B7 },
+{ 0x1171D, 0x1171F },
+{ 0x11722, 0x11725 },
+{ 0x11727, 0x1172B },
+{ 0x11C30, 0x11C36 },
+{ 0x11C38, 0x11C3D },
+{ 0x11C3F, 0x11C3F },
+{ 0x11C92, 0x11CA7 },
+{ 0x11CAA, 0x11CB0 },
+{ 0x11CB2, 0x11CB3 },
+{ 0x11CB5, 0x11CB6 },
 { 0x16AF0, 0x16AF4 },
 { 0x16B30, 0x16B36 },
 { 0x16F8F, 0x16F92 },
@@ -262,16 +279,28 @@ static const struct interval zero_width[] = {
 { 0x1D185, 0x1D18B },
 { 0x1D1AA, 0x1D1AD },
 { 0x1D242, 0x1D244 },
+{ 0x1DA00, 0x1DA36 },
+{ 0x1DA3B, 0x1DA6C },
+{ 0x1DA75, 0x1DA75 },
+{ 0x1DA84, 0x1DA84 },
+{ 0x1DA9B, 0x1DA9F },
+{ 0x1DAA1, 0x1DAAF },
+{ 0x1E000, 0x1E006 },
+{ 0x1E008, 0x1E018 },
+{ 0x1E01B, 0x1E021 },
+{ 0x1E023, 0x1E024 },
+{ 0x1E026, 0x1E02A },
 { 0x1E8D0, 0x1E8D6 },
+{ 0x1E944, 0x1E94A },
 { 0xE0001, 0xE0001 },
 { 0xE0020, 0xE007F },
 { 0xE0100, 0xE01EF }
 };
 static const struct interval double_width[] = {
-{ /* plane */ 0x0, 0x1C },
-{ /* plane */ 0x1C, 0x21 },
-{ /* plane */ 0x21, 0x22 },
-{ /* plane */ 0x22, 0x23 },
+{ /* plane */ 0x0, 0x3D },
+{ /* plane */ 0x3D, 0x68 },
+{ /* plane */ 0x68, 0x69 },
+{ /* plane */ 0x69, 0x6A },
 { /* plane */ 0x0, 0x0 },
 { /* plane */ 0x0, 0x0 },
 { /* plane */ 0x0, 0x0 },
@@ -286,7 +315,40 @@ static const struct interval double_width[] = {
 { /* plane */ 0x0, 0x0 },
 { /* plane */ 0x0, 0x0 },
 { 0x1100, 0x115F },
+{ 0x231A, 0x231B },
 { 0x2329, 0x232A },
+{ 0x23E9, 0x23EC },
+{ 0x23F0, 0x23F0 },
+{ 0x23F3, 0x23F3 },
+{ 0x25FD, 0x25FE },
+{ 0x2614, 0x2615 },
+{ 0x2648, 0x2653 },
+{ 0x267F, 0x267F },
+{ 0x2693, 0x2693 },
+{ 0x26A1, 0x26A1 },
+{ 0x26AA, 0x26AB },
+{ 0x26BD, 0x26BE },
+{ 0x26C4, 0x26C5 },
+{ 0x26CE, 0x26CE },
+{ 0x26D4, 0x26D4 },
+{ 0x26EA, 0x26EA },
+{ 0x26F2, 0x26F3 },
+{ 0x26F5, 0x26F5 },
+{ 0x26FA, 0x26FA },
+{ 0x26FD, 0x26FD },
+{ 0x2705, 0x2705 },
+{ 0x270A, 0x270B },
+{ 0x2728, 0x2728 },
+{ 0x274C, 0x274C },
+{ 0x274E, 0x274E },
+{ 0x2753, 0x2755 },
+{ 0x2757, 0x2757 },
+{ 0x2795, 0x2797 },
+{ 0x27B0, 0x27B0 },
+{ 0x27BF, 0x27BF },
+{ 0x2B1B, 0x2B1C },
+{ 0x2B50, 0x2B50 },
+{ 0x2B55, 0x2B55 },
 { 0x2E80, 0x2E99 },
 { 0x2E9B, 0x2EF3 },
 { 0x2F00, 0x2FD5 },
@@ -313,11 +375,49 @@ static const struct interval double_width[] = {
 { 0xFE68, 0xFE6B },
 { 0xFF01, 0xFF60 },
 { 0xFFE0, 0xFFE6 },
+{ 0x16FE0, 0x16FE0 },
+{ 0x17000, 0x187EC },
+{ 0x18800, 0x18AF2 },
 { 0x1B000, 0x1B001 },
+{ 0x1F004, 0x1F004 },
+{ 0x1F0CF

[PATCH v2 3/3] unicode_width.h: fix the double_width[] table

2016-12-03 Thread Beat Bolli
The function bisearch() in utf8.c does a pure binary search in
double_width. It does not care about the 17 plane offsets which
unicode/uniset/uniset prepends. Leaving the plane offsets in the table
may cause wrong results.

Filter out the plane offsets in update-unicode.sh and regenerate the
table.

Cc: Torsten Bögershausen 
Signed-off-by: Beat Bolli 
---
Diff to v1:
  - add Thorsten's Cc:

 unicode_width.h   | 17 -
 update_unicode.sh |  2 +-
 2 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/unicode_width.h b/unicode_width.h
index 73b5fd6..02207be 100644
--- a/unicode_width.h
+++ b/unicode_width.h
@@ -297,23 +297,6 @@ static const struct interval zero_width[] = {
 { 0xE0100, 0xE01EF }
 };
 static const struct interval double_width[] = {
-{ /* plane */ 0x0, 0x3D },
-{ /* plane */ 0x3D, 0x68 },
-{ /* plane */ 0x68, 0x69 },
-{ /* plane */ 0x69, 0x6A },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
 { 0x1100, 0x115F },
 { 0x231A, 0x231B },
 { 0x2329, 0x232A },
diff --git a/update_unicode.sh b/update_unicode.sh
index 3c84270..4c1ec8d 100755
--- a/update_unicode.sh
+++ b/update_unicode.sh
@@ -30,7 +30,7 @@ fi &&
  grep -v plane)
};
static const struct interval double_width[] = {
-   $(uniset/uniset --32 eaw:F,W)
+   $(uniset/uniset --32 eaw:F,W | grep -v plane)
};
EOF
 )
-- 
2.7.2


[PATCH v2 1/3] update-unicode.sh: automatically download newer definition files

2016-12-03 Thread Beat Bolli
Checking just for the unicode data files' existence is not sufficient;
we should also download them if a newer version exists on the Unicode
consortium's servers. Option -N of wget does this nicely for us.

Cc: Torsten Bögershausen 
Signed-off-by: Beat Bolli 
---
Diff to v1:
  - reword the commit message
  - add Thorsten's Cc:

 update_unicode.sh | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/update_unicode.sh b/update_unicode.sh
index 27af77c..3c84270 100755
--- a/update_unicode.sh
+++ b/update_unicode.sh
@@ -10,12 +10,8 @@ if ! test -d unicode; then
mkdir unicode
 fi &&
 ( cd unicode &&
-   if ! test -f UnicodeData.txt; then
-   wget 
http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
-   fi &&
-   if ! test -f EastAsianWidth.txt; then
-   wget 
http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
-   fi &&
+   wget -N http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt \
+   http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt 
&&
if ! test -d uniset; then
git clone https://github.com/depp/uniset.git
fi &&
-- 
2.7.2


[PATCH v3 1/3] update-unicode.sh: automatically download newer definition files

2016-12-03 Thread Beat Bolli
Checking just for the unicode data files' existence is not sufficient;
we should also download them if a newer version exists on the Unicode
consortium's servers. Option -N of wget does this nicely for us.

Cc: Torsten Bögershausen 
Signed-off-by: Beat Bolli 
---
Diff to v2:
  - reorder the commits: fix all of update-unicode.sh first, then
regenerate unicode_width.h only once

 update_unicode.sh | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/update_unicode.sh b/update_unicode.sh
index 27af77c..3c84270 100755
--- a/update_unicode.sh
+++ b/update_unicode.sh
@@ -10,12 +10,8 @@ if ! test -d unicode; then
mkdir unicode
 fi &&
 ( cd unicode &&
-   if ! test -f UnicodeData.txt; then
-   wget 
http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
-   fi &&
-   if ! test -f EastAsianWidth.txt; then
-   wget 
http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
-   fi &&
+   wget -N http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt \
+   http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt 
&&
if ! test -d uniset; then
git clone https://github.com/depp/uniset.git
fi &&
-- 
2.7.2


[PATCH v3 2/3] update-unicode.sh: strip the plane offsets from the double_width[] table

2016-12-03 Thread Beat Bolli
The function bisearch() in utf8.c does a pure binary search in
double_width. It does not care about the 17 plane offsets which
unicode/uniset/uniset prepends. Leaving the plane offsets in the table
may cause wrong results.

Filter out the plane offsets in update-unicode.sh.

Cc: Torsten Bögershausen 
Signed-off-by: Beat Bolli 
---
 update_unicode.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/update_unicode.sh b/update_unicode.sh
index 3c84270..4c1ec8d 100755
--- a/update_unicode.sh
+++ b/update_unicode.sh
@@ -30,7 +30,7 @@ fi &&
  grep -v plane)
};
static const struct interval double_width[] = {
-   $(uniset/uniset --32 eaw:F,W)
+   $(uniset/uniset --32 eaw:F,W | grep -v plane)
};
EOF
 )
-- 
2.7.2


[PATCH v3 3/3] unicode_width.h: update the tables to Unicode 9.0

2016-12-03 Thread Beat Bolli
Rerunning update-unicode.sh that we fixed in the two previous commits
produces these new tables.

Signed-off-by: Beat Bolli 
---
 unicode_width.h | 131 +---
 1 file changed, 107 insertions(+), 24 deletions(-)

diff --git a/unicode_width.h b/unicode_width.h
index 47cdd23..02207be 100644
--- a/unicode_width.h
+++ b/unicode_width.h
@@ -25,7 +25,7 @@ static const struct interval zero_width[] = {
 { 0x0825, 0x0827 },
 { 0x0829, 0x082D },
 { 0x0859, 0x085B },
-{ 0x08E4, 0x0902 },
+{ 0x08D4, 0x0902 },
 { 0x093A, 0x093A },
 { 0x093C, 0x093C },
 { 0x0941, 0x0948 },
@@ -120,6 +120,7 @@ static const struct interval zero_width[] = {
 { 0x17C9, 0x17D3 },
 { 0x17DD, 0x17DD },
 { 0x180B, 0x180E },
+{ 0x1885, 0x1886 },
 { 0x18A9, 0x18A9 },
 { 0x1920, 0x1922 },
 { 0x1927, 0x1928 },
@@ -158,7 +159,7 @@ static const struct interval zero_width[] = {
 { 0x1CF4, 0x1CF4 },
 { 0x1CF8, 0x1CF9 },
 { 0x1DC0, 0x1DF5 },
-{ 0x1DFC, 0x1DFF },
+{ 0x1DFB, 0x1DFF },
 { 0x200B, 0x200F },
 { 0x202A, 0x202E },
 { 0x2060, 0x2064 },
@@ -171,13 +172,13 @@ static const struct interval zero_width[] = {
 { 0x3099, 0x309A },
 { 0xA66F, 0xA672 },
 { 0xA674, 0xA67D },
-{ 0xA69F, 0xA69F },
+{ 0xA69E, 0xA69F },
 { 0xA6F0, 0xA6F1 },
 { 0xA802, 0xA802 },
 { 0xA806, 0xA806 },
 { 0xA80B, 0xA80B },
 { 0xA825, 0xA826 },
-{ 0xA8C4, 0xA8C4 },
+{ 0xA8C4, 0xA8C5 },
 { 0xA8E0, 0xA8F1 },
 { 0xA926, 0xA92D },
 { 0xA947, 0xA951 },
@@ -204,7 +205,7 @@ static const struct interval zero_width[] = {
 { 0xABED, 0xABED },
 { 0xFB1E, 0xFB1E },
 { 0xFE00, 0xFE0F },
-{ 0xFE20, 0xFE2D },
+{ 0xFE20, 0xFE2F },
 { 0xFEFF, 0xFEFF },
 { 0xFFF9, 0xFFFB },
 { 0x101FD, 0x101FD },
@@ -228,16 +229,21 @@ static const struct interval zero_width[] = {
 { 0x11173, 0x11173 },
 { 0x11180, 0x11181 },
 { 0x111B6, 0x111BE },
+{ 0x111CA, 0x111CC },
 { 0x1122F, 0x11231 },
 { 0x11234, 0x11234 },
 { 0x11236, 0x11237 },
+{ 0x1123E, 0x1123E },
 { 0x112DF, 0x112DF },
 { 0x112E3, 0x112EA },
-{ 0x11301, 0x11301 },
+{ 0x11300, 0x11301 },
 { 0x1133C, 0x1133C },
 { 0x11340, 0x11340 },
 { 0x11366, 0x1136C },
 { 0x11370, 0x11374 },
+{ 0x11438, 0x1143F },
+{ 0x11442, 0x11444 },
+{ 0x11446, 0x11446 },
 { 0x114B3, 0x114B8 },
 { 0x114BA, 0x114BA },
 { 0x114BF, 0x114C0 },
@@ -245,6 +251,7 @@ static const struct interval zero_width[] = {
 { 0x115B2, 0x115B5 },
 { 0x115BC, 0x115BD },
 { 0x115BF, 0x115C0 },
+{ 0x115DC, 0x115DD },
 { 0x11633, 0x1163A },
 { 0x1163D, 0x1163D },
 { 0x1163F, 0x11640 },
@@ -252,6 +259,16 @@ static const struct interval zero_width[] = {
 { 0x116AD, 0x116AD },
 { 0x116B0, 0x116B5 },
 { 0x116B7, 0x116B7 },
+{ 0x1171D, 0x1171F },
+{ 0x11722, 0x11725 },
+{ 0x11727, 0x1172B },
+{ 0x11C30, 0x11C36 },
+{ 0x11C38, 0x11C3D },
+{ 0x11C3F, 0x11C3F },
+{ 0x11C92, 0x11CA7 },
+{ 0x11CAA, 0x11CB0 },
+{ 0x11CB2, 0x11CB3 },
+{ 0x11CB5, 0x11CB6 },
 { 0x16AF0, 0x16AF4 },
 { 0x16B30, 0x16B36 },
 { 0x16F8F, 0x16F92 },
@@ -262,31 +279,59 @@ static const struct interval zero_width[] = {
 { 0x1D185, 0x1D18B },
 { 0x1D1AA, 0x1D1AD },
 { 0x1D242, 0x1D244 },
+{ 0x1DA00, 0x1DA36 },
+{ 0x1DA3B, 0x1DA6C },
+{ 0x1DA75, 0x1DA75 },
+{ 0x1DA84, 0x1DA84 },
+{ 0x1DA9B, 0x1DA9F },
+{ 0x1DAA1, 0x1DAAF },
+{ 0x1E000, 0x1E006 },
+{ 0x1E008, 0x1E018 },
+{ 0x1E01B, 0x1E021 },
+{ 0x1E023, 0x1E024 },
+{ 0x1E026, 0x1E02A },
 { 0x1E8D0, 0x1E8D6 },
+{ 0x1E944, 0x1E94A },
 { 0xE0001, 0xE0001 },
 { 0xE0020, 0xE007F },
 { 0xE0100, 0xE01EF }
 };
 static const struct interval double_width[] = {
-{ /* plane */ 0x0, 0x1C },
-{ /* plane */ 0x1C, 0x21 },
-{ /* plane */ 0x21, 0x22 },
-{ /* plane */ 0x22, 0x23 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
 { 0x1100, 0x115F },
+{ 0x231A, 0x231B },
 { 0x2329, 0x232A },
+{ 0x23E9, 0x23EC },
+{ 0x23F0, 0x23F0 },
+{ 0x23F3, 0x23F3 },
+{ 0x25FD, 0x25FE },
+{ 0x2614, 0x2615 },
+{ 0x2648, 0x2653 },
+{ 0x267F, 0x267F },
+{ 0x2693, 0x2693 },
+{ 0x26A1, 0x26A1 },
+{ 0x26AA, 0x26AB },
+{ 0x26BD, 0x26BE },
+{ 0x26C4, 0x26C5 },
+{ 0x26CE, 0x26CE },
+{ 0x26D4, 0x26D4 },
+{ 0x26EA, 0x26EA },
+{ 0x26F2, 0x26F3 },
+{ 0x26F5, 0x26F5 },
+{ 0x26FA, 0x26FA },
+{ 0x26FD, 0x26FD },
+{ 0x2705, 0x2705 },
+{ 0x270A, 0x270B },
+{ 0x2728, 0x2728 },
+{ 0x274C, 0x274C },
+{ 0x274E, 0x274E },
+{ 0x2753, 0x2755 },
+{ 0x2757, 0x2757 },
+{ 0x2795, 0x2797 },
+{ 0x27B0, 0x27B0 },
+{ 0x27BF, 0x27BF },
+{ 0x2B1B, 0x2B1C },
+{ 0x2B50, 0x2B50 },
+{ 0x2B55, 0x2B55 },
 { 0x2E80, 0x2E99 },
 { 0x2E9B, 0x2EF3 },
 { 0x2F00, 0x2FD5 },
@@ -313,11 +358,49 @@ static const struct interval double_width[] = {
 { 0xFE68, 0xFE6B },
 { 0xFF01, 0xFF60 },
 { 0xFFE0, 0xFFE6 },
+{ 0x16FE0, 0x16FE0 },
+{ 0x17000, 0x187EC },
+{ 0x18800, 0x18AF2 },
 { 0x1B000, 0x1B001 },
+{ 0x1F004, 0x1F004 },
+{ 0x1F0CF

Re: [PATCH v3 1/3] update-unicode.sh: automatically download newer definition files

2016-12-03 Thread Beat Bolli
On 03.12.16 17:40, Torsten =?unknown-8bit?Q?B=C3=B6gershausen?= wrote:
> On Sat, Dec 03, 2016 at 02:19:31PM +0100, Beat Bolli wrote:
>> Checking just for the unicode data files' existence is not sufficient;
>> we should also download them if a newer version exists on the Unicode
>> consortium's servers. Option -N of wget does this nicely for us.
>>
>> Cc: Torsten B??gershausen 
> 
> The V3 series makes perfect sense, thanks for cleaning up my mess.
Yeah, it took me three tries, too :-)

> (And can we remove the Cc: line, or replace with it Reviewed-by ?)
If you prefer, sure.

Do you have any other comments?

Beat


[PATCH v4 1/3] update-unicode.sh: automatically download newer definition files

2016-12-03 Thread Beat Bolli
Checking just for the unicode data files' existence is not sufficient;
we should also download them if a newer version exists on the Unicode
consortium's servers. Option -N of wget does this nicely for us.

Reviewed-by: Torsten Boegershausen 
Signed-off-by: Beat Bolli 
---
Diff to v3:
  - change the Cc: into Reviewed-by: on Thorsten's request
  - include the old reroll diffs

Diff to v2:
  - reorder the commits: fix all of update-unicode.sh first, then
regenerate unicode_width.h only once

Diff to v1:
  - reword the commit message
  - add Thorsten's Cc:

 update_unicode.sh | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/update_unicode.sh b/update_unicode.sh
index 27af77c..3c84270 100755
--- a/update_unicode.sh
+++ b/update_unicode.sh
@@ -10,12 +10,8 @@ if ! test -d unicode; then
mkdir unicode
 fi &&
 ( cd unicode &&
-   if ! test -f UnicodeData.txt; then
-   wget 
http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
-   fi &&
-   if ! test -f EastAsianWidth.txt; then
-   wget 
http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
-   fi &&
+   wget -N http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt \
+   http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt 
&&
if ! test -d uniset; then
git clone https://github.com/depp/uniset.git
fi &&
-- 
2.7.2


[PATCH v4 3/3] unicode_width.h: update the tables to Unicode 9.0

2016-12-03 Thread Beat Bolli
Rerunning update-unicode.sh that we fixed in the two previous commits
produces these new tables.

Signed-off-by: Beat Bolli 
---
 unicode_width.h | 131 +---
 1 file changed, 107 insertions(+), 24 deletions(-)

diff --git a/unicode_width.h b/unicode_width.h
index 47cdd23..02207be 100644
--- a/unicode_width.h
+++ b/unicode_width.h
@@ -25,7 +25,7 @@ static const struct interval zero_width[] = {
 { 0x0825, 0x0827 },
 { 0x0829, 0x082D },
 { 0x0859, 0x085B },
-{ 0x08E4, 0x0902 },
+{ 0x08D4, 0x0902 },
 { 0x093A, 0x093A },
 { 0x093C, 0x093C },
 { 0x0941, 0x0948 },
@@ -120,6 +120,7 @@ static const struct interval zero_width[] = {
 { 0x17C9, 0x17D3 },
 { 0x17DD, 0x17DD },
 { 0x180B, 0x180E },
+{ 0x1885, 0x1886 },
 { 0x18A9, 0x18A9 },
 { 0x1920, 0x1922 },
 { 0x1927, 0x1928 },
@@ -158,7 +159,7 @@ static const struct interval zero_width[] = {
 { 0x1CF4, 0x1CF4 },
 { 0x1CF8, 0x1CF9 },
 { 0x1DC0, 0x1DF5 },
-{ 0x1DFC, 0x1DFF },
+{ 0x1DFB, 0x1DFF },
 { 0x200B, 0x200F },
 { 0x202A, 0x202E },
 { 0x2060, 0x2064 },
@@ -171,13 +172,13 @@ static const struct interval zero_width[] = {
 { 0x3099, 0x309A },
 { 0xA66F, 0xA672 },
 { 0xA674, 0xA67D },
-{ 0xA69F, 0xA69F },
+{ 0xA69E, 0xA69F },
 { 0xA6F0, 0xA6F1 },
 { 0xA802, 0xA802 },
 { 0xA806, 0xA806 },
 { 0xA80B, 0xA80B },
 { 0xA825, 0xA826 },
-{ 0xA8C4, 0xA8C4 },
+{ 0xA8C4, 0xA8C5 },
 { 0xA8E0, 0xA8F1 },
 { 0xA926, 0xA92D },
 { 0xA947, 0xA951 },
@@ -204,7 +205,7 @@ static const struct interval zero_width[] = {
 { 0xABED, 0xABED },
 { 0xFB1E, 0xFB1E },
 { 0xFE00, 0xFE0F },
-{ 0xFE20, 0xFE2D },
+{ 0xFE20, 0xFE2F },
 { 0xFEFF, 0xFEFF },
 { 0xFFF9, 0xFFFB },
 { 0x101FD, 0x101FD },
@@ -228,16 +229,21 @@ static const struct interval zero_width[] = {
 { 0x11173, 0x11173 },
 { 0x11180, 0x11181 },
 { 0x111B6, 0x111BE },
+{ 0x111CA, 0x111CC },
 { 0x1122F, 0x11231 },
 { 0x11234, 0x11234 },
 { 0x11236, 0x11237 },
+{ 0x1123E, 0x1123E },
 { 0x112DF, 0x112DF },
 { 0x112E3, 0x112EA },
-{ 0x11301, 0x11301 },
+{ 0x11300, 0x11301 },
 { 0x1133C, 0x1133C },
 { 0x11340, 0x11340 },
 { 0x11366, 0x1136C },
 { 0x11370, 0x11374 },
+{ 0x11438, 0x1143F },
+{ 0x11442, 0x11444 },
+{ 0x11446, 0x11446 },
 { 0x114B3, 0x114B8 },
 { 0x114BA, 0x114BA },
 { 0x114BF, 0x114C0 },
@@ -245,6 +251,7 @@ static const struct interval zero_width[] = {
 { 0x115B2, 0x115B5 },
 { 0x115BC, 0x115BD },
 { 0x115BF, 0x115C0 },
+{ 0x115DC, 0x115DD },
 { 0x11633, 0x1163A },
 { 0x1163D, 0x1163D },
 { 0x1163F, 0x11640 },
@@ -252,6 +259,16 @@ static const struct interval zero_width[] = {
 { 0x116AD, 0x116AD },
 { 0x116B0, 0x116B5 },
 { 0x116B7, 0x116B7 },
+{ 0x1171D, 0x1171F },
+{ 0x11722, 0x11725 },
+{ 0x11727, 0x1172B },
+{ 0x11C30, 0x11C36 },
+{ 0x11C38, 0x11C3D },
+{ 0x11C3F, 0x11C3F },
+{ 0x11C92, 0x11CA7 },
+{ 0x11CAA, 0x11CB0 },
+{ 0x11CB2, 0x11CB3 },
+{ 0x11CB5, 0x11CB6 },
 { 0x16AF0, 0x16AF4 },
 { 0x16B30, 0x16B36 },
 { 0x16F8F, 0x16F92 },
@@ -262,31 +279,59 @@ static const struct interval zero_width[] = {
 { 0x1D185, 0x1D18B },
 { 0x1D1AA, 0x1D1AD },
 { 0x1D242, 0x1D244 },
+{ 0x1DA00, 0x1DA36 },
+{ 0x1DA3B, 0x1DA6C },
+{ 0x1DA75, 0x1DA75 },
+{ 0x1DA84, 0x1DA84 },
+{ 0x1DA9B, 0x1DA9F },
+{ 0x1DAA1, 0x1DAAF },
+{ 0x1E000, 0x1E006 },
+{ 0x1E008, 0x1E018 },
+{ 0x1E01B, 0x1E021 },
+{ 0x1E023, 0x1E024 },
+{ 0x1E026, 0x1E02A },
 { 0x1E8D0, 0x1E8D6 },
+{ 0x1E944, 0x1E94A },
 { 0xE0001, 0xE0001 },
 { 0xE0020, 0xE007F },
 { 0xE0100, 0xE01EF }
 };
 static const struct interval double_width[] = {
-{ /* plane */ 0x0, 0x1C },
-{ /* plane */ 0x1C, 0x21 },
-{ /* plane */ 0x21, 0x22 },
-{ /* plane */ 0x22, 0x23 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
 { 0x1100, 0x115F },
+{ 0x231A, 0x231B },
 { 0x2329, 0x232A },
+{ 0x23E9, 0x23EC },
+{ 0x23F0, 0x23F0 },
+{ 0x23F3, 0x23F3 },
+{ 0x25FD, 0x25FE },
+{ 0x2614, 0x2615 },
+{ 0x2648, 0x2653 },
+{ 0x267F, 0x267F },
+{ 0x2693, 0x2693 },
+{ 0x26A1, 0x26A1 },
+{ 0x26AA, 0x26AB },
+{ 0x26BD, 0x26BE },
+{ 0x26C4, 0x26C5 },
+{ 0x26CE, 0x26CE },
+{ 0x26D4, 0x26D4 },
+{ 0x26EA, 0x26EA },
+{ 0x26F2, 0x26F3 },
+{ 0x26F5, 0x26F5 },
+{ 0x26FA, 0x26FA },
+{ 0x26FD, 0x26FD },
+{ 0x2705, 0x2705 },
+{ 0x270A, 0x270B },
+{ 0x2728, 0x2728 },
+{ 0x274C, 0x274C },
+{ 0x274E, 0x274E },
+{ 0x2753, 0x2755 },
+{ 0x2757, 0x2757 },
+{ 0x2795, 0x2797 },
+{ 0x27B0, 0x27B0 },
+{ 0x27BF, 0x27BF },
+{ 0x2B1B, 0x2B1C },
+{ 0x2B50, 0x2B50 },
+{ 0x2B55, 0x2B55 },
 { 0x2E80, 0x2E99 },
 { 0x2E9B, 0x2EF3 },
 { 0x2F00, 0x2FD5 },
@@ -313,11 +358,49 @@ static const struct interval double_width[] = {
 { 0xFE68, 0xFE6B },
 { 0xFF01, 0xFF60 },
 { 0xFFE0, 0xFFE6 },
+{ 0x16FE0, 0x16FE0 },
+{ 0x17000, 0x187EC },
+{ 0x18800, 0x18AF2 },
 { 0x1B000, 0x1B001 },
+{ 0x1F004, 0x1F004 },
+{ 0x1F0CF

[PATCH v4 2/3] update-unicode.sh: strip the plane offsets from the double_width[] table

2016-12-03 Thread Beat Bolli
The function bisearch() in utf8.c does a pure binary search in
double_width. It does not care about the 17 plane offsets which
unicode/uniset/uniset prepends. Leaving the plane offsets in the table
may cause wrong results.

Filter out the plane offsets in update-unicode.sh.

Reviewed-by: Torsten Bögershausen 
Signed-off-by: Beat Bolli 
---
 update_unicode.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/update_unicode.sh b/update_unicode.sh
index 3c84270..4c1ec8d 100755
--- a/update_unicode.sh
+++ b/update_unicode.sh
@@ -30,7 +30,7 @@ fi &&
  grep -v plane)
};
static const struct interval double_width[] = {
-   $(uniset/uniset --32 eaw:F,W)
+   $(uniset/uniset --32 eaw:F,W | grep -v plane)
};
EOF
 )
-- 
2.7.2


Re: [PATCH v4 1/3] update-unicode.sh: automatically download newer definition files

2016-12-06 Thread Beat Bolli
On 05.12.16 21:31, Junio C Hamano wrote:
> Torsten Bögershausen  writes:
> 
>> On Sat, Dec 03, 2016 at 10:00:47PM +0100, Beat Bolli wrote:
>>> Checking just for the unicode data files' existence is not sufficient;
>>> we should also download them if a newer version exists on the Unicode
>>> consortium's servers. Option -N of wget does this nicely for us.
>>>
>>> Reviewed-by: Torsten Boegershausen 
>>
>> Minor remark (Not sure if this motivates v5, may be Junio can fix it 
>> locally?)
>> s/oe/ö/
>>
>> Beside this: Thanks again (and I learned about the -N option of wget)
> 
> Will fix up while queuing (only 1/3 needs it, 2/3 has it right).
> 
> Also, I'll do s/update-unicode.sh/update_unicode.sh/ on the title
> and the message to match the reality.  At some point we might want
> to fix the reality to match people's expectations, though.

Thanks, Junio. This was a bit sloppy of me.

I really appreciate your regard for the small things!

Cheers, Beat


[PATCH 2/3] update_unicode.sh: remove the plane filters

2016-12-11 Thread Beat Bolli
The uniset upstream has accepted my patches that eliminate the Unicode
plane offsets from the output in '--32' mode.

Remove the corresponding filter in update_unicode.sh.

Signed-off-by: Beat Bolli 
---
 update_unicode.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/update_unicode.sh b/update_unicode.sh
index 9ca7d8b..e595bf8 100755
--- a/update_unicode.sh
+++ b/update_unicode.sh
@@ -31,11 +31,10 @@ fi &&
UNICODE_DIR=. && export UNICODE_DIR &&
cat >$UNICODEWIDTH_H <<-EOF
static const struct interval zero_width[] = {
-   $(uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD |
- grep -v plane)
+   $(uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD)
};
static const struct interval double_width[] = {
-   $(uniset/uniset --32 eaw:F,W | grep -v plane)
+   $(uniset/uniset --32 eaw:F,W)
};
EOF
 )
-- 
2.7.2


[PATCH 1/3] update_unicode.sh: update the uniset repo if it exists

2016-12-11 Thread Beat Bolli
We need to track the new commits in uniset, otherwise their and our code
get out of sync.

Signed-off-by: Beat Bolli 
---

Junio, these go on top of my bb/unicode-9.0 branch, please.

Thanks!

 update_unicode.sh | 5 +
 1 file changed, 5 insertions(+)

diff --git a/update_unicode.sh b/update_unicode.sh
index 4c1ec8d..9ca7d8b 100755
--- a/update_unicode.sh
+++ b/update_unicode.sh
@@ -14,6 +14,11 @@ fi &&
http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt 
&&
if ! test -d uniset; then
git clone https://github.com/depp/uniset.git
+   else
+   (
+   cd uniset &&
+   git pull
+   )
fi &&
(
cd uniset &&
-- 
2.7.2


[PATCH 3/3] update_unicode.sh: restore hexadecimal output

2016-12-11 Thread Beat Bolli
The uniset upstream has decided that decimal numbers are The True Way, so
let's convert them back to the usual format that's closer to the U+
standard.

The generated unicode_widths.h file again looks exactly the same as two
commits ago.

Signed-off-by: Beat Bolli 
---
 update_unicode.sh | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/update_unicode.sh b/update_unicode.sh
index e595bf8..d7720d5 100755
--- a/update_unicode.sh
+++ b/update_unicode.sh
@@ -5,6 +5,12 @@
 #Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
 #Cf Format  a format control character
 #
+
+dec_to_hex() {
+   # convert any decimal numbers to 4-digit hex
+   perl -pe 's/(\d+)/sprintf("0x%04X", $1)/ge'
+}
+
 UNICODEWIDTH_H=../unicode_width.h
 if ! test -d unicode; then
mkdir unicode
@@ -29,7 +35,7 @@ fi &&
make
) &&
UNICODE_DIR=. && export UNICODE_DIR &&
-   cat >$UNICODEWIDTH_H <<-EOF
+   dec_to_hex >$UNICODEWIDTH_H <<-EOF
static const struct interval zero_width[] = {
$(uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD)
};
-- 
2.7.2


Re: [PATCH 1/3] update_unicode.sh: update the uniset repo if it exists

2016-12-12 Thread Beat Bolli

On 2016-12-12 06:53, Torsten Bögershausen wrote:

On 2016-12-12 00:34, Beat Bolli wrote:
We need to track the new commits in uniset, otherwise their and our 
code

get out of sync.

Signed-off-by: Beat Bolli 
---

Junio, these go on top of my bb/unicode-9.0 branch, please.

Thanks!

 update_unicode.sh | 5 +
 1 file changed, 5 insertions(+)

diff --git a/update_unicode.sh b/update_unicode.sh
index 4c1ec8d..9ca7d8b 100755
--- a/update_unicode.sh
+++ b/update_unicode.sh
@@ -14,6 +14,11 @@ fi &&
http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt 
&&
if ! test -d uniset; then
git clone https://github.com/depp/uniset.git
+   else
+   (
+   cd uniset &&
+   git pull

If upstream has accepted your patches, that's nice.

Minor question, especially to the next commit:
Should we make sure to checkout the exact version, which has been 
tested?

In this case  cb97792880625e24a9f581412d03659091a0e54f

And this is for both a fresh clone and the git pull
needs to be replaced by
git fetch && git checkout cb97792880625e24a9f581412d03659091a0e54f


(Which of course is a shell variable)


I was actually wondering what the policy was for adding submodules to 
the Git repo,
but then decided against it. Another option would be to fork uniset on 
GitHub and

just let it stay on a working commit.

Junio, what's your stance on this?

Beat


Re: [PATCH 1/3] update_unicode.sh: update the uniset repo if it exists

2016-12-12 Thread Beat Bolli
On 12.12.16 19:12, Torsten Bögershausen wrote:
> 
>>> Minor question, especially to the next commit:
>>> Should we make sure to checkout the exact version, which has been tested?
>>> In this case  cb97792880625e24a9f581412d03659091a0e54f
>>>
>>> And this is for both a fresh clone and the git pull
>>> needs to be replaced by
>>> git fetch && git checkout cb97792880625e24a9f581412d03659091a0e54f
>>>
>>>
>>> (Which of course is a shell variable)
>>
>> I was actually wondering what the policy was for adding submodules to the 
>> Git repo,
>> but then decided against it. Another option would be to fork uniset on 
>> GitHub and
>> just let it stay on a working commit.
>>
>> Junio, what's your stance on this?
>>
>> Beat
> 
> If I run  ./update_unicode.sh on the latest master of   
> https://github.com/depp/uniset.git ,
> commit  a5fac4a091857dd5429cc2d, I get a diff in  unicode_width.h like this:
> 
> -{ 0x0300, 0x036F },
> 
> +{ 768, 879 },
> 
> IOW, all hex values are printed as decimal values.
> Not a problem for the compiler, but for the human
> to check the unicode tables.
> 
> So I think we should "pin" the version of uniset.

That's what patch 3/3 fixes.


Re: [PATCH 1/3] update_unicode.sh: update the uniset repo if it exists

2016-12-12 Thread Beat Bolli
On 12.12.16 19:33, Junio C Hamano wrote:
> Torsten Bögershausen  writes:
> 
>> If I run ./update_unicode.sh on the latest master of
>> https://github.com/depp/uniset.git , commit
>> a5fac4a091857dd5429cc2d, I get a diff in unicode_width.h like
>> this:
>>
>> -{ 0x0300, 0x036F },
>>
>> +{ 768, 879 },
>>
>> IOW, all hex values are printed as decimal values.
>> Not a problem for the compiler, but for the human
>> to check the unicode tables.
>>
>> So I think we should "pin" the version of uniset.
> 
> Sure, and I'd rather see the update-unicode.sh script moved
> somewhere in contrib/ while at it.  Those who are interested in
> keeping up with the unicode standard are tiny minority of the
> developer population, and most of us would treat the built width
> table as the source (after all, that is what we ship).
> 
> To be bluntly honest, I'd rather not to see "update-unicode.sh"
> download and build uniset at all.  It's as if po/ hierarchy shipping
> with its own script to download and build msgmerge--that's madness.
> Needless to say, shipping the sources for uniset embedded in our
> project tree (either as a snapshot-fork or as a submodule) is even
> worse.  Those who want to muck with po/ are expected to have
> msgmerge and friends.  Why not expect the same for those who want to
> update the unicode width table?
> 
> I'd rather see a written instruction telling which snapshot to get
> and from where to build and place on their $PATH in the README file,
> sitting next to the update-unicode.sh script in contrib/uniwidth/
> directory, for those who are interested in building the width table
> "from the source", and the update-unicode.sh script to assume that
> uniset is available.
> 

OK. So please don't merge bb/unicode-9.0 to next yet; I'll prepare a
reroll following your description.

Torsten, is this alright with you?

Cheers, Beat


[PATCH v2 6/6] unicode_width.h: update the width tables to Unicode 9.0

2016-12-13 Thread Beat Bolli
Rerunning update-unicode.sh that we fixed in the previous commits
produces these new tables.

Signed-off-by: Beat Bolli 
---
 unicode_width.h | 131 +---
 1 file changed, 107 insertions(+), 24 deletions(-)

diff --git a/unicode_width.h b/unicode_width.h
index 47cdd23..02207be 100644
--- a/unicode_width.h
+++ b/unicode_width.h
@@ -25,7 +25,7 @@ static const struct interval zero_width[] = {
 { 0x0825, 0x0827 },
 { 0x0829, 0x082D },
 { 0x0859, 0x085B },
-{ 0x08E4, 0x0902 },
+{ 0x08D4, 0x0902 },
 { 0x093A, 0x093A },
 { 0x093C, 0x093C },
 { 0x0941, 0x0948 },
@@ -120,6 +120,7 @@ static const struct interval zero_width[] = {
 { 0x17C9, 0x17D3 },
 { 0x17DD, 0x17DD },
 { 0x180B, 0x180E },
+{ 0x1885, 0x1886 },
 { 0x18A9, 0x18A9 },
 { 0x1920, 0x1922 },
 { 0x1927, 0x1928 },
@@ -158,7 +159,7 @@ static const struct interval zero_width[] = {
 { 0x1CF4, 0x1CF4 },
 { 0x1CF8, 0x1CF9 },
 { 0x1DC0, 0x1DF5 },
-{ 0x1DFC, 0x1DFF },
+{ 0x1DFB, 0x1DFF },
 { 0x200B, 0x200F },
 { 0x202A, 0x202E },
 { 0x2060, 0x2064 },
@@ -171,13 +172,13 @@ static const struct interval zero_width[] = {
 { 0x3099, 0x309A },
 { 0xA66F, 0xA672 },
 { 0xA674, 0xA67D },
-{ 0xA69F, 0xA69F },
+{ 0xA69E, 0xA69F },
 { 0xA6F0, 0xA6F1 },
 { 0xA802, 0xA802 },
 { 0xA806, 0xA806 },
 { 0xA80B, 0xA80B },
 { 0xA825, 0xA826 },
-{ 0xA8C4, 0xA8C4 },
+{ 0xA8C4, 0xA8C5 },
 { 0xA8E0, 0xA8F1 },
 { 0xA926, 0xA92D },
 { 0xA947, 0xA951 },
@@ -204,7 +205,7 @@ static const struct interval zero_width[] = {
 { 0xABED, 0xABED },
 { 0xFB1E, 0xFB1E },
 { 0xFE00, 0xFE0F },
-{ 0xFE20, 0xFE2D },
+{ 0xFE20, 0xFE2F },
 { 0xFEFF, 0xFEFF },
 { 0xFFF9, 0xFFFB },
 { 0x101FD, 0x101FD },
@@ -228,16 +229,21 @@ static const struct interval zero_width[] = {
 { 0x11173, 0x11173 },
 { 0x11180, 0x11181 },
 { 0x111B6, 0x111BE },
+{ 0x111CA, 0x111CC },
 { 0x1122F, 0x11231 },
 { 0x11234, 0x11234 },
 { 0x11236, 0x11237 },
+{ 0x1123E, 0x1123E },
 { 0x112DF, 0x112DF },
 { 0x112E3, 0x112EA },
-{ 0x11301, 0x11301 },
+{ 0x11300, 0x11301 },
 { 0x1133C, 0x1133C },
 { 0x11340, 0x11340 },
 { 0x11366, 0x1136C },
 { 0x11370, 0x11374 },
+{ 0x11438, 0x1143F },
+{ 0x11442, 0x11444 },
+{ 0x11446, 0x11446 },
 { 0x114B3, 0x114B8 },
 { 0x114BA, 0x114BA },
 { 0x114BF, 0x114C0 },
@@ -245,6 +251,7 @@ static const struct interval zero_width[] = {
 { 0x115B2, 0x115B5 },
 { 0x115BC, 0x115BD },
 { 0x115BF, 0x115C0 },
+{ 0x115DC, 0x115DD },
 { 0x11633, 0x1163A },
 { 0x1163D, 0x1163D },
 { 0x1163F, 0x11640 },
@@ -252,6 +259,16 @@ static const struct interval zero_width[] = {
 { 0x116AD, 0x116AD },
 { 0x116B0, 0x116B5 },
 { 0x116B7, 0x116B7 },
+{ 0x1171D, 0x1171F },
+{ 0x11722, 0x11725 },
+{ 0x11727, 0x1172B },
+{ 0x11C30, 0x11C36 },
+{ 0x11C38, 0x11C3D },
+{ 0x11C3F, 0x11C3F },
+{ 0x11C92, 0x11CA7 },
+{ 0x11CAA, 0x11CB0 },
+{ 0x11CB2, 0x11CB3 },
+{ 0x11CB5, 0x11CB6 },
 { 0x16AF0, 0x16AF4 },
 { 0x16B30, 0x16B36 },
 { 0x16F8F, 0x16F92 },
@@ -262,31 +279,59 @@ static const struct interval zero_width[] = {
 { 0x1D185, 0x1D18B },
 { 0x1D1AA, 0x1D1AD },
 { 0x1D242, 0x1D244 },
+{ 0x1DA00, 0x1DA36 },
+{ 0x1DA3B, 0x1DA6C },
+{ 0x1DA75, 0x1DA75 },
+{ 0x1DA84, 0x1DA84 },
+{ 0x1DA9B, 0x1DA9F },
+{ 0x1DAA1, 0x1DAAF },
+{ 0x1E000, 0x1E006 },
+{ 0x1E008, 0x1E018 },
+{ 0x1E01B, 0x1E021 },
+{ 0x1E023, 0x1E024 },
+{ 0x1E026, 0x1E02A },
 { 0x1E8D0, 0x1E8D6 },
+{ 0x1E944, 0x1E94A },
 { 0xE0001, 0xE0001 },
 { 0xE0020, 0xE007F },
 { 0xE0100, 0xE01EF }
 };
 static const struct interval double_width[] = {
-{ /* plane */ 0x0, 0x1C },
-{ /* plane */ 0x1C, 0x21 },
-{ /* plane */ 0x21, 0x22 },
-{ /* plane */ 0x22, 0x23 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
-{ /* plane */ 0x0, 0x0 },
 { 0x1100, 0x115F },
+{ 0x231A, 0x231B },
 { 0x2329, 0x232A },
+{ 0x23E9, 0x23EC },
+{ 0x23F0, 0x23F0 },
+{ 0x23F3, 0x23F3 },
+{ 0x25FD, 0x25FE },
+{ 0x2614, 0x2615 },
+{ 0x2648, 0x2653 },
+{ 0x267F, 0x267F },
+{ 0x2693, 0x2693 },
+{ 0x26A1, 0x26A1 },
+{ 0x26AA, 0x26AB },
+{ 0x26BD, 0x26BE },
+{ 0x26C4, 0x26C5 },
+{ 0x26CE, 0x26CE },
+{ 0x26D4, 0x26D4 },
+{ 0x26EA, 0x26EA },
+{ 0x26F2, 0x26F3 },
+{ 0x26F5, 0x26F5 },
+{ 0x26FA, 0x26FA },
+{ 0x26FD, 0x26FD },
+{ 0x2705, 0x2705 },
+{ 0x270A, 0x270B },
+{ 0x2728, 0x2728 },
+{ 0x274C, 0x274C },
+{ 0x274E, 0x274E },
+{ 0x2753, 0x2755 },
+{ 0x2757, 0x2757 },
+{ 0x2795, 0x2797 },
+{ 0x27B0, 0x27B0 },
+{ 0x27BF, 0x27BF },
+{ 0x2B1B, 0x2B1C },
+{ 0x2B50, 0x2B50 },
+{ 0x2B55, 0x2B55 },
 { 0x2E80, 0x2E99 },
 { 0x2E9B, 0x2EF3 },
 { 0x2F00, 0x2FD5 },
@@ -313,11 +358,49 @@ static const struct interval double_width[] = {
 { 0xFE68, 0xFE6B },
 { 0xFF01, 0xFF60 },
 { 0xFFE0, 0xFFE6 },
+{ 0x16FE0, 0x16FE0 },
+{ 0x17000, 0x187EC },
+{ 0x18800, 0x18AF2 },
 { 0x1B000, 0x1B001 },
+{ 0x1F004, 0x1F004 },
+{ 0x1F0CF

[PATCH v2 0/6] unicode_width.h: update the width tables to Unicode 9.0

2016-12-13 Thread Beat Bolli
This is v2 of my Unicode 9.0 series. After a short discussion [1], we
decided to move the generator script into contrib. This is what this
series now does first. The script is then updated in contrib.

Diff to v1:
- complete commit reordering
- fix nits in the commit messages

.gitignore   |   1 -
contrib/update-unicode/.gitignore|   3 ++
contrib/update-unicode/README|  20 +++
contrib/update-unicode/update_unicode.sh |  33 ++
unicode_width.h  | 131 
++-
update_unicode.sh|  40 --
6 files changed, 163 insertions(+), 65 deletions(-)

[1] http://public-inbox.org/git/xmqqr35dm203@gitster.mtv.corp.google.com/


[PATCH v2 4/6] update-unicode.sh: automatically download newer definition files

2016-12-13 Thread Beat Bolli
Checking just for the unicode data files' existence is not sufficient;
we should also download them if a newer version exists on the Unicode
consortium's servers. Option -N of wget does this nicely for us.

Reviewed-by: Torsten Bögershausen 
Signed-off-by: Beat Bolli 
---
 contrib/update-unicode/update_unicode.sh | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/contrib/update-unicode/update_unicode.sh 
b/contrib/update-unicode/update_unicode.sh
index 9f1bf31..56871a1 100755
--- a/contrib/update-unicode/update_unicode.sh
+++ b/contrib/update-unicode/update_unicode.sh
@@ -8,12 +8,8 @@
 cd "$(dirname "$0")"
 UNICODEWIDTH_H=$(git rev-parse --show-toplevel)/unicode_width.h
 
-if ! test -f UnicodeData.txt; then
-   wget http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
-fi &&
-if ! test -f EastAsianWidth.txt; then
-   wget http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
-fi &&
+wget -N http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt \
+   http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt &&
 if ! test -d uniset; then
git clone https://github.com/depp/uniset.git &&
( cd uniset && git checkout 4b186196dd )
-- 
2.7.2


[PATCH v2 2/6] update_unicode.sh: remove an unnecessary subshell level

2016-12-13 Thread Beat Bolli
After the move into contrib/update-unicode, we no longer create the
unicode directory to have a clean working folder. Instead, the directory
of the script is used. This means that the subshell can be removed.

Signed-off-by: Beat Bolli 
---
 contrib/update-unicode/update_unicode.sh | 53 
 1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/contrib/update-unicode/update_unicode.sh 
b/contrib/update-unicode/update_unicode.sh
index 7b90126..ff664ec 100755
--- a/contrib/update-unicode/update_unicode.sh
+++ b/contrib/update-unicode/update_unicode.sh
@@ -7,32 +7,31 @@
 #
 cd "$(dirname "$0")"
 UNICODEWIDTH_H=$(git rev-parse --show-toplevel)/unicode_width.h
+
+if ! test -f UnicodeData.txt; then
+   wget http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
+fi &&
+if ! test -f EastAsianWidth.txt; then
+   wget http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
+fi &&
+if ! test -d uniset; then
+   git clone https://github.com/depp/uniset.git
+fi &&
 (
-   if ! test -f UnicodeData.txt; then
-   wget 
http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
+   cd uniset &&
+   if ! test -x uniset; then
+   autoreconf -i &&
+   ./configure --enable-warnings=-Werror CFLAGS='-O0 -ggdb'
fi &&
-   if ! test -f EastAsianWidth.txt; then
-   wget 
http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
-   fi &&
-   if ! test -d uniset; then
-   git clone https://github.com/depp/uniset.git
-   fi &&
-   (
-   cd uniset &&
-   if ! test -x uniset; then
-   autoreconf -i &&
-   ./configure --enable-warnings=-Werror CFLAGS='-O0 -ggdb'
-   fi &&
-   make
-   ) &&
-   UNICODE_DIR=. && export UNICODE_DIR &&
-   cat >$UNICODEWIDTH_H <<-EOF
-   static const struct interval zero_width[] = {
-   $(uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD |
- grep -v plane)
-   };
-   static const struct interval double_width[] = {
-   $(uniset/uniset --32 eaw:F,W)
-   };
-   EOF
-)
+   make
+) &&
+UNICODE_DIR=. && export UNICODE_DIR &&
+cat >$UNICODEWIDTH_H <<-EOF
+static const struct interval zero_width[] = {
+   $(uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD |
+ grep -v plane)
+};
+static const struct interval double_width[] = {
+   $(uniset/uniset --32 eaw:F,W)
+};
+EOF
-- 
2.7.2


[PATCH v2 5/6] update_unicode.sh: remove the plane filter

2016-12-13 Thread Beat Bolli
The uniset upstream has accepted my patches that eliminate the Unicode
plane offsets from the output in '--32' mode.

Remove the corresponding filter in update_unicode.sh.

This also fixes the issue that the plane offsets were not removed from
the second uniset call.

Signed-off-by: Beat Bolli 
---
 contrib/update-unicode/update_unicode.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/contrib/update-unicode/update_unicode.sh 
b/contrib/update-unicode/update_unicode.sh
index 56871a1..e05db92 100755
--- a/contrib/update-unicode/update_unicode.sh
+++ b/contrib/update-unicode/update_unicode.sh
@@ -25,8 +25,7 @@ fi &&
 UNICODE_DIR=. && export UNICODE_DIR &&
 cat >$UNICODEWIDTH_H <<-EOF
 static const struct interval zero_width[] = {
-   $(uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD |
- grep -v plane)
+   $(uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD)
 };
 static const struct interval double_width[] = {
$(uniset/uniset --32 eaw:F,W)
-- 
2.7.2


[PATCH v2 3/6] update_unicode.sh: pin the uniset repo to a known good commit

2016-12-13 Thread Beat Bolli
The uniset upstream has added more commits that for example change the
hexadecimal output in '--32' mode to decimal. Let's pin the repo to a
commit that still outputs the width tables in the format we want.

Signed-off-by: Beat Bolli 
---
 contrib/update-unicode/update_unicode.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/contrib/update-unicode/update_unicode.sh 
b/contrib/update-unicode/update_unicode.sh
index ff664ec..9f1bf31 100755
--- a/contrib/update-unicode/update_unicode.sh
+++ b/contrib/update-unicode/update_unicode.sh
@@ -15,7 +15,8 @@ if ! test -f EastAsianWidth.txt; then
wget http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
 fi &&
 if ! test -d uniset; then
-   git clone https://github.com/depp/uniset.git
+   git clone https://github.com/depp/uniset.git &&
+   ( cd uniset && git checkout 4b186196dd )
 fi &&
 (
cd uniset &&
-- 
2.7.2


[PATCH v2 1/6] update_unicode.sh: move it into contrib/update-unicode

2016-12-13 Thread Beat Bolli
As it's used only by a tiny minority of the Git developer population,
this script does not belong into the main Git source directory.

Move it into contrib/ and adjust the paths to account for the new
location.

Signed-off-by: Beat Bolli 
---
 .gitignore   |  1 -
 contrib/update-unicode/.gitignore|  3 +++
 contrib/update-unicode/README| 20 
 contrib/update-unicode/update_unicode.sh | 38 ++
 update_unicode.sh| 40 
 5 files changed, 61 insertions(+), 41 deletions(-)
 create mode 100644 contrib/update-unicode/.gitignore
 create mode 100644 contrib/update-unicode/README
 create mode 100755 contrib/update-unicode/update_unicode.sh
 delete mode 100755 update_unicode.sh

diff --git a/.gitignore b/.gitignore
index f96e50e..ae0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -204,7 +204,6 @@
 /config.mak.autogen
 /config.mak.append
 /configure
-/unicode
 /tags
 /TAGS
 /cscope*
diff --git a/contrib/update-unicode/.gitignore 
b/contrib/update-unicode/.gitignore
new file mode 100644
index 000..b0ebc6a
--- /dev/null
+++ b/contrib/update-unicode/.gitignore
@@ -0,0 +1,3 @@
+uniset/
+UnicodeData.txt
+EastAsianWidth.txt
diff --git a/contrib/update-unicode/README b/contrib/update-unicode/README
new file mode 100644
index 000..b9e2fc8
--- /dev/null
+++ b/contrib/update-unicode/README
@@ -0,0 +1,20 @@
+TL;DR: Run update_unicode.sh after the publication of a new Unicode
+standard and commit the resulting unicode_widths.h file.
+
+The long version
+
+
+The Git source code ships the file unicode_widths.h which contains
+tables of zero and double width Unicode code points, respectively.
+These tables are generated using update_unicode.sh in this directory.
+update_unicode.sh itself uses a third-party tool, uniset, to query two
+Unicode data files for the interesting code points.
+
+On first run, update_unicode.sh clones uniset from Github and builds it.
+This requires a current-ish version of autoconf (2.69 works per December
+2016).
+
+On each run, update_unicode.sh checks whether more recent Unicode data
+files are available from the Unicode consortium, and rebuilds the header
+unicode_widths.h with the new data. The new header can then be
+committed.
diff --git a/contrib/update-unicode/update_unicode.sh 
b/contrib/update-unicode/update_unicode.sh
new file mode 100755
index 000..7b90126
--- /dev/null
+++ b/contrib/update-unicode/update_unicode.sh
@@ -0,0 +1,38 @@
+#!/bin/sh
+#See http://www.unicode.org/reports/tr44/
+#
+#Me Enclosing_Mark  an enclosing combining mark
+#Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
+#Cf Format  a format control character
+#
+cd "$(dirname "$0")"
+UNICODEWIDTH_H=$(git rev-parse --show-toplevel)/unicode_width.h
+(
+   if ! test -f UnicodeData.txt; then
+   wget 
http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
+   fi &&
+   if ! test -f EastAsianWidth.txt; then
+   wget 
http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
+   fi &&
+   if ! test -d uniset; then
+   git clone https://github.com/depp/uniset.git
+   fi &&
+   (
+   cd uniset &&
+   if ! test -x uniset; then
+   autoreconf -i &&
+   ./configure --enable-warnings=-Werror CFLAGS='-O0 -ggdb'
+   fi &&
+   make
+   ) &&
+   UNICODE_DIR=. && export UNICODE_DIR &&
+   cat >$UNICODEWIDTH_H <<-EOF
+   static const struct interval zero_width[] = {
+   $(uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD |
+ grep -v plane)
+   };
+   static const struct interval double_width[] = {
+   $(uniset/uniset --32 eaw:F,W)
+   };
+   EOF
+)
diff --git a/update_unicode.sh b/update_unicode.sh
deleted file mode 100755
index 27af77c..000
--- a/update_unicode.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/sh
-#See http://www.unicode.org/reports/tr44/
-#
-#Me Enclosing_Mark  an enclosing combining mark
-#Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
-#Cf Format  a format control character
-#
-UNICODEWIDTH_H=../unicode_width.h
-if ! test -d unicode; then
-   mkdir unicode
-fi &&
-( cd unicode &&
-   if ! test -f UnicodeData.txt; then
-   wget 
http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
-   fi &&
-   if ! test -f EastAsianWidth.txt; then
-   wget 
http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
-   fi &&
-   if ! test -d uniset; then
-   git clone https://github.com/depp/uniset.git
-   fi &&
-   (
-   cd uniset &am

Re: [PATCH v2 4/6] update-unicode.sh: automatically download newer definition files

2016-12-14 Thread Beat Bolli
On 14.12.16 00:31, Beat Bolli wrote:

> [PATCH v2 4/6] update-unicode.sh: automatically download newer definition 
> files

Dang! And again I'm not capable of putting an underline instead of the
dash...

Junio, would you please reword the subject to

Re: [PATCH v2 4/6] update_unicode.sh: automatically download newer
definition files

Thanks,
Beat


> we should also download them if a newer version exists on the Unicode
> consortium's servers. Option -N of wget does this nicely for us.
> 
> Reviewed-by: Torsten Bögershausen 
> Signed-off-by: Beat Bolli 
> ---
>  contrib/update-unicode/update_unicode.sh | 8 ++--
>  1 file changed, 2 insertions(+), 6 deletions(-)
> 
> diff --git a/contrib/update-unicode/update_unicode.sh 
> b/contrib/update-unicode/update_unicode.sh
> index 9f1bf31..56871a1 100755
> --- a/contrib/update-unicode/update_unicode.sh
> +++ b/contrib/update-unicode/update_unicode.sh
> @@ -8,12 +8,8 @@
>  cd "$(dirname "$0")"
>  UNICODEWIDTH_H=$(git rev-parse --show-toplevel)/unicode_width.h
>  
> -if ! test -f UnicodeData.txt; then
> - wget http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
> -fi &&
> -if ! test -f EastAsianWidth.txt; then
> - wget http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
> -fi &&
> +wget -N http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt \
> + http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt &&
>  if ! test -d uniset; then
>   git clone https://github.com/depp/uniset.git &&
>   ( cd uniset && git checkout 4b186196dd )
> 


Re: [PATCH v2 1/3] mingw: adjust is_console() to work with stdin

2016-12-22 Thread Beat Bolli
On 22.12.16 18:08, Johannes Schindelin wrote:
> When determining whether a handle corresponds to a *real* Win32 Console
> (as opposed to, say, a character device such as /dev/null), we use the
> GetConsoleOutputBufferInfo() function as a tell-tale.
> 
> However, that does not work for *input* handles associated with a
> console. Let's just use the GetConsoleMode() function for input handles,
> and since it does not work on output handles fall back to the previous
> method for those.
> 
> This patch prepares for using is_console() instead of my previous
> misguided attempt in cbb3f3c9b1 (mingw: intercept isatty() to handle
> /dev/null as Git expects it, 2016-12-11) that broke everything on
> Windows.
> 
> Signed-off-by: Johannes Schindelin 
> ---
>  compat/winansi.c | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/compat/winansi.c b/compat/winansi.c
> index cb725fb02f..590d61cb1b 100644
> --- a/compat/winansi.c
> +++ b/compat/winansi.c
> @@ -84,6 +84,7 @@ static void warn_if_raster_font(void)
>  static int is_console(int fd)
>  {
>   CONSOLE_SCREEN_BUFFER_INFO sbi;
> + DWORD mode;

Nit: can we move this definition into the block below where it's used?

>   HANDLE hcon;
>  
>   static int initialized = 0;
> @@ -98,7 +99,10 @@ static int is_console(int fd)
>   return 0;
>  
>   /* check if its a handle to a console output screen buffer */
> - if (!GetConsoleScreenBufferInfo(hcon, &sbi))
> + if (!fd) {

Right here:
+   DWORD mode;

> + if (!GetConsoleMode(hcon, &mode))
> + return 0;
> + } else if (!GetConsoleScreenBufferInfo(hcon, &sbi))
>   return 0;
>  
>   /* initialize attributes */
> 


Re: [PATCH v2 1/3] mingw: adjust is_console() to work with stdin

2016-12-23 Thread Beat Bolli

Hi Dscho

On 2016-12-23 10:30, Johannes Schindelin wrote:

Hi Beat,

On Fri, 23 Dec 2016, Beat Bolli wrote:


On 22.12.16 18:08, Johannes Schindelin wrote:
> diff --git a/compat/winansi.c b/compat/winansi.c
> index cb725fb02f..590d61cb1b 100644
> --- a/compat/winansi.c
> +++ b/compat/winansi.c
> @@ -84,6 +84,7 @@ static void warn_if_raster_font(void)
>  static int is_console(int fd)
>  {
>CONSOLE_SCREEN_BUFFER_INFO sbi;
> +  DWORD mode;

Nit: can we move this definition into the block below where it's used?

>HANDLE hcon;
>
>static int initialized = 0;
> @@ -98,7 +99,10 @@ static int is_console(int fd)
>return 0;
>
>/* check if its a handle to a console output screen buffer */
> -  if (!GetConsoleScreenBufferInfo(hcon, &sbi))
> +  if (!fd) {

Right here:
+   DWORD mode;


By that reasoning, the CONSOLE_SCREEN_BUFFER_INFO declaration that has
function-wide scope should also move below:


> +  if (!GetConsoleMode(hcon, &mode))
> +  return 0;


Right here.


> +  } else if (!GetConsoleScreenBufferInfo(hcon, &sbi))
>return 0;
>
>/* initialize attributes */


As the existing code followed a different convention, so does my patch.

If you choose to submit a change that moved the `mode` declaration to
narrow its scope, please also move the `sbi` declaration for 
consistency.


It's probably not worth it. It just jumped at me when reading the patch, 
and, writing much C++ recently, it looked weird to have a definition so 
far away from the single use of the variable.


Cheers,
Beat


  1   2   >