[PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint

2012-10-13 Thread Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 ctype.c   | 18 ++
 git-compat-util.h | 13 +
 2 files changed, 31 insertions(+)

diff --git a/ctype.c b/ctype.c
index faeaf34..b4bf48a 100644
--- a/ctype.c
+++ b/ctype.c
@@ -26,6 +26,24 @@ const unsigned char sane_ctype[256] = {
/* Nothing in the 128.. range */
 };
 
+enum {
+   CN = GIT_CNTRL,
+   PU = GIT_PUNCT,
+   XD = GIT_XDIGIT,
+};
+
+const unsigned char sane_ctype2[256] = {
+   CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, /*
0..15 */
+   CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, /*   
16..31 */
+   0,  PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, /*   
32..47 */
+   XD, XD, XD, XD, XD, XD, XD, XD, XD, XD, PU, PU, PU, PU, PU, PU, /*   
48..63 */
+   PU, 0,  XD, 0,  XD, 0,  XD, 0,  0,  0,  0,  0,  0,  0,  0,  0,  /*   
64..79 */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  PU, PU, PU, PU, PU, /*   
80..95 */
+   PU, 0,  XD, 0,  XD, 0,  XD, 0,  0,  0,  0,  0,  0,  0,  0,  0,  /*  
96..111 */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  PU, PU, PU, PU, CN, /* 
112..127 */
+   /* Nothing in the 128.. range */
+};
+
 /* For case-insensitive kwset */
 const char tolower_trans_tbl[256] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
diff --git a/git-compat-util.h b/git-compat-util.h
index f8b859c..ea11694 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -510,14 +510,23 @@ extern const char tolower_trans_tbl[256];
 #undef isupper
 #undef tolower
 #undef toupper
+#undef iscntrl
+#undef ispunct
+#undef isxdigit
+#undef isprint
 extern const unsigned char sane_ctype[256];
+extern const unsigned char sane_ctype2[256];
 #define GIT_SPACE 0x01
 #define GIT_DIGIT 0x02
 #define GIT_ALPHA 0x04
 #define GIT_GLOB_SPECIAL 0x08
 #define GIT_REGEX_SPECIAL 0x10
 #define GIT_PATHSPEC_MAGIC 0x20
+#define GIT_CNTRL 0x01
+#define GIT_PUNCT 0x02
+#define GIT_XDIGIT 0x04
 #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
+#define sane_istest2(x,mask) ((sane_ctype2[(unsigned char)(x)] & (mask)) != 0)
 #define isascii(x) (((x) & ~0x7f) == 0)
 #define isspace(x) sane_istest(x,GIT_SPACE)
 #define isdigit(x) sane_istest(x,GIT_DIGIT)
@@ -527,6 +536,10 @@ extern const unsigned char sane_ctype[256];
 #define isupper(x) sane_iscase(x, 0)
 #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
 #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
+#define iscntrl(x) sane_istest2(x, GIT_CNTRL)
+#define ispunct(x) sane_istest2(x, GIT_PUNCT)
+#define isxdigit(x) sane_istest2(x, GIT_XDIGIT)
+#define isprint(x) (isalnum(x) || isspace(x) || ispunct(x))
 #define tolower(x) sane_case((unsigned char)(x), 0x20)
 #define toupper(x) sane_case((unsigned char)(x), 0)
 #define is_pathspec_magic(x) sane_istest(x,GIT_PATHSPEC_MAGIC)
-- 
1.8.0.rc2.11.g2b79d01

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint

2012-10-13 Thread Junio C Hamano
Nguyễn Thái Ngọc Duy   writes:

> Signed-off-by: Nguyễn Thái Ngọc Duy 
> ---

The description to justify why it is ctype2[] seems to have been
lost.  Intended?

>  ctype.c   | 18 ++
>  git-compat-util.h | 13 +
>  2 files changed, 31 insertions(+)
>
> diff --git a/ctype.c b/ctype.c
> index faeaf34..b4bf48a 100644
> --- a/ctype.c
> +++ b/ctype.c
> @@ -26,6 +26,24 @@ const unsigned char sane_ctype[256] = {
>   /* Nothing in the 128.. range */
>  };
>  
> +enum {
> + CN = GIT_CNTRL,
> + PU = GIT_PUNCT,
> + XD = GIT_XDIGIT,
> +};
> +
> +const unsigned char sane_ctype2[256] = {
> + CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, /*
> 0..15 */
> + CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, /*   
> 16..31 */
> + 0,  PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, /*   
> 32..47 */
> + XD, XD, XD, XD, XD, XD, XD, XD, XD, XD, PU, PU, PU, PU, PU, PU, /*   
> 48..63 */
> + PU, 0,  XD, 0,  XD, 0,  XD, 0,  0,  0,  0,  0,  0,  0,  0,  0,  /*   
> 64..79 */
> + 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  PU, PU, PU, PU, PU, /*   
> 80..95 */
> + PU, 0,  XD, 0,  XD, 0,  XD, 0,  0,  0,  0,  0,  0,  0,  0,  0,  /*  
> 96..111 */
> + 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  PU, PU, PU, PU, CN, /* 
> 112..127 */
> + /* Nothing in the 128.. range */
> +};
> +
>  /* For case-insensitive kwset */
>  const char tolower_trans_tbl[256] = {
>   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
> diff --git a/git-compat-util.h b/git-compat-util.h
> index f8b859c..ea11694 100644
> --- a/git-compat-util.h
> +++ b/git-compat-util.h
> @@ -510,14 +510,23 @@ extern const char tolower_trans_tbl[256];
>  #undef isupper
>  #undef tolower
>  #undef toupper
> +#undef iscntrl
> +#undef ispunct
> +#undef isxdigit
> +#undef isprint
>  extern const unsigned char sane_ctype[256];
> +extern const unsigned char sane_ctype2[256];
>  #define GIT_SPACE 0x01
>  #define GIT_DIGIT 0x02
>  #define GIT_ALPHA 0x04
>  #define GIT_GLOB_SPECIAL 0x08
>  #define GIT_REGEX_SPECIAL 0x10
>  #define GIT_PATHSPEC_MAGIC 0x20
> +#define GIT_CNTRL 0x01
> +#define GIT_PUNCT 0x02
> +#define GIT_XDIGIT 0x04
>  #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
> +#define sane_istest2(x,mask) ((sane_ctype2[(unsigned char)(x)] & (mask)) != 
> 0)
>  #define isascii(x) (((x) & ~0x7f) == 0)
>  #define isspace(x) sane_istest(x,GIT_SPACE)
>  #define isdigit(x) sane_istest(x,GIT_DIGIT)
> @@ -527,6 +536,10 @@ extern const unsigned char sane_ctype[256];
>  #define isupper(x) sane_iscase(x, 0)
>  #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
>  #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | 
> GIT_REGEX_SPECIAL)
> +#define iscntrl(x) sane_istest2(x, GIT_CNTRL)
> +#define ispunct(x) sane_istest2(x, GIT_PUNCT)
> +#define isxdigit(x) sane_istest2(x, GIT_XDIGIT)
> +#define isprint(x) (isalnum(x) || isspace(x) || ispunct(x))
>  #define tolower(x) sane_case((unsigned char)(x), 0x20)
>  #define toupper(x) sane_case((unsigned char)(x), 0)
>  #define is_pathspec_magic(x) sane_istest(x,GIT_PATHSPEC_MAGIC)
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint

2012-10-13 Thread Nguyen Thai Ngoc Duy
On Sun, Oct 14, 2012 at 12:02 PM, Junio C Hamano  wrote:
> Nguyễn Thái Ngọc Duy   writes:
>
>> Signed-off-by: Nguyễn Thái Ngọc Duy 
>> ---
>
> The description to justify why it is ctype2[] seems to have been
> lost.  Intended?

Nope. I added the description after generating patches and forgot to
update the same to my branch. Thanks for catching.
-- 
Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint

2012-10-14 Thread René Scharfe

Am 14.10.2012 04:35, schrieb Nguyễn Thái Ngọc Duy:


Signed-off-by: Nguyễn Thái Ngọc Duy 
---
  ctype.c   | 18 ++
  git-compat-util.h | 13 +
  2 files changed, 31 insertions(+)

diff --git a/ctype.c b/ctype.c
index faeaf34..b4bf48a 100644
--- a/ctype.c
+++ b/ctype.c
@@ -26,6 +26,24 @@ const unsigned char sane_ctype[256] = {
/* Nothing in the 128.. range */
  };

+enum {
+   CN = GIT_CNTRL,
+   PU = GIT_PUNCT,
+   XD = GIT_XDIGIT,
+};
+
+const unsigned char sane_ctype2[256] = {
+   CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, /*
0..15 */
+   CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, /*   
16..31 */
+   0,  PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, /*   
32..47 */
+   XD, XD, XD, XD, XD, XD, XD, XD, XD, XD, PU, PU, PU, PU, PU, PU, /*   
48..63 */
+   PU, 0,  XD, 0,  XD, 0,  XD, 0,  0,  0,  0,  0,  0,  0,  0,  0,  /*   
64..79 */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  PU, PU, PU, PU, PU, /*   
80..95 */
+   PU, 0,  XD, 0,  XD, 0,  XD, 0,  0,  0,  0,  0,  0,  0,  0,  0,  /*  
96..111 */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  PU, PU, PU, PU, CN, /* 
112..127 */


Shouldn't [ace] (65, 67, 69) and [ACE] (97, 99, 101) be xdigits as well?

But how about using the existing hexval_table instead, like this:

#define isxdigit(x) (hexval_table[(x)] != -1)

With that, couldn't you squeeze the other two classes into the existing 
sane_type?


By the way, I'm working on a patch series for implementing a lot more 
character classes with table lookups.  It grew out of a desire to make 
bad_ref_char() faster but perhaps got a bit out of hand by now; it's at 
24 patches and still not finished.  I'm curious how long we have until 
it escapes. ;-)



 #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
+#define iscntrl(x) sane_istest2(x, GIT_CNTRL)
+#define ispunct(x) sane_istest2(x, GIT_PUNCT)
+#define isxdigit(x) sane_istest2(x, GIT_XDIGIT)
+#define isprint(x) (isalnum(x) || isspace(x) || ispunct(x))


If a single table is used, you can do with a single table lookup by 
adding the bits for the component classes, like isalnum and 
is_regex_special do.


René

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint

2012-10-14 Thread Nguyen Thai Ngoc Duy
On Sun, Oct 14, 2012 at 7:59 PM, René Scharfe
 wrote:
>> +const unsigned char sane_ctype2[256] = {
>> +   CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, /*
>> 0..15 */
>> +   CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, /*
>> 16..31 */
>> +   0,  PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, /*
>> 32..47 */
>> +   XD, XD, XD, XD, XD, XD, XD, XD, XD, XD, PU, PU, PU, PU, PU, PU, /*
>> 48..63 */
>> +   PU, 0,  XD, 0,  XD, 0,  XD, 0,  0,  0,  0,  0,  0,  0,  0,  0,  /*
>> 64..79 */
>> +   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  PU, PU, PU, PU, PU, /*
>> 80..95 */
>> +   PU, 0,  XD, 0,  XD, 0,  XD, 0,  0,  0,  0,  0,  0,  0,  0,  0,  /*
>> 96..111 */
>> +   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  PU, PU, PU, PU, CN, /*
>> 112..127 */
>
>
> Shouldn't [ace] (65, 67, 69) and [ACE] (97, 99, 101) be xdigits as well?

Hmm.. I generated it from LANG=C. I wonder where I got it wrong..

> But how about using the existing hexval_table instead, like this:
>
> #define isxdigit(x) (hexval_table[(x)] != -1)
>
> With that, couldn't you squeeze the other two classes into the existing
> sane_type?

No there are still conflicts: 9, 10 and 13 as spaces (vs controls) and
123, 124 and 126 as regex/pathspec special (vs punctuation).

> By the way, I'm working on a patch series for implementing a lot more
> character classes with table lookups.  It grew out of a desire to make
> bad_ref_char() faster but perhaps got a bit out of hand by now; it's at 24
> patches and still not finished.  I'm curious how long we have until it
> escapes. ;-)

I don't think the series is going to graduate any time soon :)
-- 
Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint

2012-10-14 Thread René Scharfe

Am 14.10.2012 15:25, schrieb Nguyen Thai Ngoc Duy:

On Sun, Oct 14, 2012 at 7:59 PM, René Scharfe
 wrote:

With that, couldn't you squeeze the other two classes into the existing
sane_type?


No there are still conflicts: 9, 10 and 13 as spaces (vs controls) and
123, 124 and 126 as regex/pathspec special (vs punctuation).


That's not a problem, an entry in the table can have more than one bit 
set -- just OR them together in ctype.c.  It may not look as nice, but 
that's OK.  You could also define a character for GIT_SPACE | GIT_CNTRL 
etc. for cosmetic reasons.


René

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint

2012-10-14 Thread Nguyen Thai Ngoc Duy
On Sun, Oct 14, 2012 at 03:59:31PM +0200, René Scharfe wrote:
> Am 14.10.2012 15:25, schrieb Nguyen Thai Ngoc Duy:
> > On Sun, Oct 14, 2012 at 7:59 PM, René Scharfe
> >  wrote:
> >> With that, couldn't you squeeze the other two classes into the existing
> >> sane_type?
> >
> > No there are still conflicts: 9, 10 and 13 as spaces (vs controls) and
> > 123, 124 and 126 as regex/pathspec special (vs punctuation).
> 
> That's not a problem, an entry in the table can have more than one bit 
> set -- just OR them together in ctype.c.  It may not look as nice, but 
> that's OK.  You could also define a character for GIT_SPACE | GIT_CNTRL 
> etc. for cosmetic reasons.

Only space chars is not a subset of control chars, which needs a new
combination. So the result does not look as bad as I thought:

-- 8< --
diff --git a/ctype.c b/ctype.c
index faeaf34..0bfebb4 100644
--- a/ctype.c
+++ b/ctype.c
@@ -11,18 +11,21 @@ enum {
D = GIT_DIGIT,
G = GIT_GLOB_SPECIAL,   /* *, ?, [, \\ */
R = GIT_REGEX_SPECIAL,  /* $, (, ), +, ., ^, {, | */
-   P = GIT_PATHSPEC_MAGIC  /* other non-alnum, except for ] and } */
+   P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
+   X = GIT_CNTRL,
+   U = GIT_PUNCT,
+   Z = GIT_CNTRL | GIT_SPACE
 };
 
 const unsigned char sane_ctype[256] = {
-   0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /*   0.. 15 */
-   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  16.. 31 */
+   X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /*   0.. 15 */
+   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /*  16.. 31 */
S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /*  32.. 47 */
D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /*  48.. 63 */
P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /*  64.. 79 */
-   A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, P, /*  80.. 95 */
+   A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /*  80.. 95 */
P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /*  96..111 */
-   A, A, A, A, A, A, A, A, A, A, A, R, R, 0, P, 0, /* 112..127 */
+   A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */
/* Nothing in the 128.. range */
 };
 
diff --git a/git-compat-util.h b/git-compat-util.h
index f8b859c..db77f3e 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -510,6 +510,10 @@ extern const char tolower_trans_tbl[256];
 #undef isupper
 #undef tolower
 #undef toupper
+#undef iscntrl
+#undef ispunct
+#undef isxdigit
+#undef isprint
 extern const unsigned char sane_ctype[256];
 #define GIT_SPACE 0x01
 #define GIT_DIGIT 0x02
@@ -517,6 +521,8 @@ extern const unsigned char sane_ctype[256];
 #define GIT_GLOB_SPECIAL 0x08
 #define GIT_REGEX_SPECIAL 0x10
 #define GIT_PATHSPEC_MAGIC 0x20
+#define GIT_CNTRL 0x40
+#define GIT_PUNCT 0x80
 #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
 #define isascii(x) (((x) & ~0x7f) == 0)
 #define isspace(x) sane_istest(x,GIT_SPACE)
@@ -527,6 +533,13 @@ extern const unsigned char sane_ctype[256];
 #define isupper(x) sane_iscase(x, 0)
 #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
 #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
+#define iscntrl(x) (sane_istest(x,GIT_CNTRL))
+#define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \
+   GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC)
+#define isxdigit(x) (hexval_table[x] != -1)
+#define isprint(x) (sane_istest(x, GIT_ALPHA | GIT_DIGIT | GIT_SPACE | \
+   GIT_PUNCT | GIT_REGEX_SPECIAL | GIT_GLOB_SPECIAL | \
+   GIT_PATHSPEC_MAGIC))
 #define tolower(x) sane_case((unsigned char)(x), 0x20)
 #define toupper(x) sane_case((unsigned char)(x), 0)
 #define is_pathspec_magic(x) sane_istest(x,GIT_PATHSPEC_MAGIC)
-- 8< --

-- 
Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint

2012-10-17 Thread Jan H. Schönherr
Hi Nguyen.

I just had a need for isprint() myself, and then I found
your code here.

I had a look at the POSIX locale as describe here:

http://sourceware.org/git/?p=glibc.git;a=blob;f=localedata/locales/POSIX

Some remarks below.

Am 14.10.2012 16:26, schrieb Nguyen Thai Ngoc Duy:
> -- 8< --
> diff --git a/ctype.c b/ctype.c
> index faeaf34..0bfebb4 100644
> --- a/ctype.c
> +++ b/ctype.c
> @@ -11,18 +11,21 @@ enum {
>   D = GIT_DIGIT,
>   G = GIT_GLOB_SPECIAL,   /* *, ?, [, \\ */
>   R = GIT_REGEX_SPECIAL,  /* $, (, ), +, ., ^, {, | */
> - P = GIT_PATHSPEC_MAGIC  /* other non-alnum, except for ] and } */
> + P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
> + X = GIT_CNTRL,
> + U = GIT_PUNCT,
> + Z = GIT_CNTRL | GIT_SPACE
>  };
>  
>  const unsigned char sane_ctype[256] = {
> - 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /*   0.. 15 */
> - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  16.. 31 */
> + X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /*   0.. 15 */
> + X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /*  16.. 31 */

"Normal" isspace() also includes vertical tab (11) and form-feed (12) as
white-space characters. Is there a reason, why they are not included here?

>   S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /*  32.. 47 */
>   D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /*  48.. 63 */
>   P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /*  64.. 79 */
> - A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, P, /*  80.. 95 */
> + A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /*  80.. 95 */
>   P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /*  96..111 */
> - A, A, A, A, A, A, A, A, A, A, A, R, R, 0, P, 0, /* 112..127 */
> + A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */
>   /* Nothing in the 128.. range */
>  };
>  
> diff --git a/git-compat-util.h b/git-compat-util.h
> index f8b859c..db77f3e 100644
> --- a/git-compat-util.h
> +++ b/git-compat-util.h
[...]
> @@ -527,6 +533,13 @@ extern const unsigned char sane_ctype[256];
>  #define isupper(x) sane_iscase(x, 0)
>  #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
>  #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | 
> GIT_REGEX_SPECIAL)
> +#define iscntrl(x) (sane_istest(x,GIT_CNTRL))
> +#define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \
> + GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC)
> +#define isxdigit(x) (hexval_table[x] != -1)
> +#define isprint(x) (sane_istest(x, GIT_ALPHA | GIT_DIGIT | GIT_SPACE | \
> + GIT_PUNCT | GIT_REGEX_SPECIAL | GIT_GLOB_SPECIAL | \
> + GIT_PATHSPEC_MAGIC))

"Normal" isprint() only includes space (32) from the white-space characters.
The other white-space characters are not considered printable.

Do we want to stay close to the "original", or not?

Regards
Jan

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint

2012-10-17 Thread Nguyen Thai Ngoc Duy
On Wed, Oct 17, 2012 at 7:09 PM, "Jan H. Schönherr"
 wrote:
>>  const unsigned char sane_ctype[256] = {
>> - 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /*   0.. 15 */
>> - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  16.. 31 */
>> + X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /*   0.. 15 */
>> + X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /*  16.. 31 */
>
> "Normal" isspace() also includes vertical tab (11) and form-feed (12) as
> white-space characters. Is there a reason, why they are not included here?

I'm not sure. They were not classified as spaces in the very first
version in 4546738 (Unlocalized isspace and friends - 2005-10-13).
Maybe Linus had a reason to do so.

>> +#define isprint(x) (sane_istest(x, GIT_ALPHA | GIT_DIGIT | GIT_SPACE | \
>> + GIT_PUNCT | GIT_REGEX_SPECIAL | GIT_GLOB_SPECIAL | \
>> + GIT_PATHSPEC_MAGIC))
>
> "Normal" isprint() only includes space (32) from the white-space characters.
> The other white-space characters are not considered printable.
>
> Do we want to stay close to the "original", or not?

We do. I followed [1] but obvious missed the last sentence in "print"
description: "No characters specified for the keyword cntrl shall be
specified". Thanks for catching. I'll fix it soon.

[1] http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
-- 
Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html