[PATCH v3 06/10] notmuch-restore: add 'notmuch format' support, auto detect

2012-01-14 Thread David Bremner
From: David Bremner 

This is format is whitespace separated tokens, encoded by
util/hex-escape.c

The format detection heuristic relies on the fact that '(' is not part
of the character set used by hex-escape. Since hex-escape is designed
to be OK for pathnames (and shells), this seems like a reasonable
assumption.

In principle the --format argument to notmuch-restore is notmuch
needed at this point, but it adds literally 5 lines of argument
description, so I left it.
---
 dump-restore-private.h |5 +-
 notmuch-restore.c  |  111 ++-
 2 files changed, 92 insertions(+), 24 deletions(-)

diff --git a/dump-restore-private.h b/dump-restore-private.h
index 34a5022..67795e5 100644
--- a/dump-restore-private.h
+++ b/dump-restore-private.h
@@ -5,8 +5,9 @@
 #include "command-line-arguments.h"

 typedef enum dump_formats {
-DUMP_FORMAT_SUP,
-DUMP_FORMAT_NOTMUCH
+DUMP_FORMAT_AUTO,
+DUMP_FORMAT_NOTMUCH,
+DUMP_FORMAT_SUP
 } dump_format_t;

 #endif
diff --git a/notmuch-restore.c b/notmuch-restore.c
index 87d9772..3fdfecc 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -19,6 +19,7 @@
  */

 #include "notmuch-client.h"
+#include "dump-restore-private.h"

 int
 notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
@@ -35,6 +36,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char 
*argv[])
 regex_t regex;
 int rerr;
 int opt_index;
+int input_format = DUMP_FORMAT_AUTO;

 config = notmuch_config_open (ctx, NULL, NULL);
 if (config == NULL)
@@ -48,6 +50,11 @@ notmuch_restore_command (unused (void *ctx), int argc, char 
*argv[])
 synchronize_flags = notmuch_config_get_maildir_synchronize_flags (config);

 notmuch_opt_desc_t options[] = {
+   { NOTMUCH_OPT_KEYWORD, _format, "format", 'f',
+ (notmuch_keyword_t []){ { "auto", DUMP_FORMAT_AUTO },
+ { "notmuch", DUMP_FORMAT_NOTMUCH },
+ { "sup", DUMP_FORMAT_SUP },
+ {0, 0} } },
{ NOTMUCH_OPT_POSITION, _file_name, 0, 0, 0 },
{ NOTMUCH_OPT_BOOLEAN,  , "accumulate", 'a', 0 },
{ 0, 0, 0, 0, 0 }
@@ -77,37 +84,85 @@ notmuch_restore_command (unused (void *ctx), int argc, char 
*argv[])
return 1;
 }

-/* Dump output is one line per message. We match a sequence of
- * non-space characters for the message-id, then one or more
- * spaces, then a list of space-separated tags as a sequence of
- * characters within literal '(' and ')'. */
-if ( xregcomp (,
-  "^([^ ]+) \\(([^)]*)\\)$",
-  REG_EXTENDED) )
-   INTERNAL_ERROR("compile time constant regex failed.");
+
+/* These are out here to re-use the buffers with hex_decode */
+
+char *message_id = NULL;
+size_t message_id_size = 0;
+char *tag = NULL;
+size_t tag_size = 0;
+notmuch_bool_t first_line = TRUE;

 while ((line_len = getline (, _size, input)) != -1) {
regmatch_t match[3];
-   char *message_id, *file_tags, *tag, *next;
+   char  *file_tags, *next;
notmuch_message_t *message = NULL;
+
notmuch_status_t status;
notmuch_tags_t *db_tags;
char *db_tags_str;

chomp_newline (line);
+   if (first_line && input_format == DUMP_FORMAT_AUTO) {
+   char *p;

-   rerr = xregexec (, line, 3, match, 0);
-   if (rerr == REG_NOMATCH)
-   {
-   fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
-line);
+   for (p = line; *p; p++) {
+   if (*p == '(')
+   input_format = DUMP_FORMAT_SUP;
+   }
+
+   if (input_format == DUMP_FORMAT_AUTO)
+   input_format = DUMP_FORMAT_NOTMUCH;
+
+   }
+
+   /* sup dump output is one line per message. We match a
+* sequence of non-space characters for the message-id, then
+* one or more spaces, then a list of space-separated tags as
+* a sequence of characters within literal '(' and ')'. */
+   if (first_line && input_format == DUMP_FORMAT_SUP) {
+   if ( xregcomp (,
+  "^([^ ]+) \\(([^)]*)\\)$",
+  REG_EXTENDED) )
+   INTERNAL_ERROR("compile time constant regex failed.");
+   }
+
+
+   /* Silently ignore blank lines */
+
+   if (line[0] == '\0') {
continue;
}

-   message_id = xstrndup (line + match[1].rm_so,
-  match[1].rm_eo - match[1].rm_so);
-   file_tags = xstrndup (line + match[2].rm_so,
- match[2].rm_eo - match[2].rm_so);
+   if (input_format == DUMP_FORMAT_SUP) {
+   rerr = xregexec (, line, 3, match, 0);
+   if (rerr == REG_NOMATCH)
+   {
+   fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
+ 

[PATCH v3 06/10] notmuch-restore: add 'notmuch format' support, auto detect

2012-01-14 Thread David Bremner
From: David Bremner brem...@debian.org

This is format is whitespace separated tokens, encoded by
util/hex-escape.c

The format detection heuristic relies on the fact that '(' is not part
of the character set used by hex-escape. Since hex-escape is designed
to be OK for pathnames (and shells), this seems like a reasonable
assumption.

In principle the --format argument to notmuch-restore is notmuch
needed at this point, but it adds literally 5 lines of argument
description, so I left it.
---
 dump-restore-private.h |5 +-
 notmuch-restore.c  |  111 ++-
 2 files changed, 92 insertions(+), 24 deletions(-)

diff --git a/dump-restore-private.h b/dump-restore-private.h
index 34a5022..67795e5 100644
--- a/dump-restore-private.h
+++ b/dump-restore-private.h
@@ -5,8 +5,9 @@
 #include command-line-arguments.h
 
 typedef enum dump_formats {
-DUMP_FORMAT_SUP,
-DUMP_FORMAT_NOTMUCH
+DUMP_FORMAT_AUTO,
+DUMP_FORMAT_NOTMUCH,
+DUMP_FORMAT_SUP
 } dump_format_t;
 
 #endif
diff --git a/notmuch-restore.c b/notmuch-restore.c
index 87d9772..3fdfecc 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -19,6 +19,7 @@
  */
 
 #include notmuch-client.h
+#include dump-restore-private.h
 
 int
 notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
@@ -35,6 +36,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char 
*argv[])
 regex_t regex;
 int rerr;
 int opt_index;
+int input_format = DUMP_FORMAT_AUTO;
 
 config = notmuch_config_open (ctx, NULL, NULL);
 if (config == NULL)
@@ -48,6 +50,11 @@ notmuch_restore_command (unused (void *ctx), int argc, char 
*argv[])
 synchronize_flags = notmuch_config_get_maildir_synchronize_flags (config);
 
 notmuch_opt_desc_t options[] = {
+   { NOTMUCH_OPT_KEYWORD, input_format, format, 'f',
+ (notmuch_keyword_t []){ { auto, DUMP_FORMAT_AUTO },
+ { notmuch, DUMP_FORMAT_NOTMUCH },
+ { sup, DUMP_FORMAT_SUP },
+ {0, 0} } },
{ NOTMUCH_OPT_POSITION, input_file_name, 0, 0, 0 },
{ NOTMUCH_OPT_BOOLEAN,  accumulate, accumulate, 'a', 0 },
{ 0, 0, 0, 0, 0 }
@@ -77,37 +84,85 @@ notmuch_restore_command (unused (void *ctx), int argc, char 
*argv[])
return 1;
 }
 
-/* Dump output is one line per message. We match a sequence of
- * non-space characters for the message-id, then one or more
- * spaces, then a list of space-separated tags as a sequence of
- * characters within literal '(' and ')'. */
-if ( xregcomp (regex,
-  ^([^ ]+) \\(([^)]*)\\)$,
-  REG_EXTENDED) )
-   INTERNAL_ERROR(compile time constant regex failed.);
+
+/* These are out here to re-use the buffers with hex_decode */
+
+char *message_id = NULL;
+size_t message_id_size = 0;
+char *tag = NULL;
+size_t tag_size = 0;
+notmuch_bool_t first_line = TRUE;
 
 while ((line_len = getline (line, line_size, input)) != -1) {
regmatch_t match[3];
-   char *message_id, *file_tags, *tag, *next;
+   char  *file_tags, *next;
notmuch_message_t *message = NULL;
+
notmuch_status_t status;
notmuch_tags_t *db_tags;
char *db_tags_str;
 
chomp_newline (line);
+   if (first_line  input_format == DUMP_FORMAT_AUTO) {
+   char *p;
 
-   rerr = xregexec (regex, line, 3, match, 0);
-   if (rerr == REG_NOMATCH)
-   {
-   fprintf (stderr, Warning: Ignoring invalid input line: %s\n,
-line);
+   for (p = line; *p; p++) {
+   if (*p == '(')
+   input_format = DUMP_FORMAT_SUP;
+   }
+
+   if (input_format == DUMP_FORMAT_AUTO)
+   input_format = DUMP_FORMAT_NOTMUCH;
+
+   }
+
+   /* sup dump output is one line per message. We match a
+* sequence of non-space characters for the message-id, then
+* one or more spaces, then a list of space-separated tags as
+* a sequence of characters within literal '(' and ')'. */
+   if (first_line  input_format == DUMP_FORMAT_SUP) {
+   if ( xregcomp (regex,
+  ^([^ ]+) \\(([^)]*)\\)$,
+  REG_EXTENDED) )
+   INTERNAL_ERROR(compile time constant regex failed.);
+   }
+
+
+   /* Silently ignore blank lines */
+
+   if (line[0] == '\0') {
continue;
}
 
-   message_id = xstrndup (line + match[1].rm_so,
-  match[1].rm_eo - match[1].rm_so);
-   file_tags = xstrndup (line + match[2].rm_so,
- match[2].rm_eo - match[2].rm_so);
+   if (input_format == DUMP_FORMAT_SUP) {
+   rerr = xregexec (regex, line, 3, match, 0);
+   if (rerr == REG_NOMATCH)
+   {
+   fprintf (stderr, Warning: Ignoring invalid input