On (10/10/13 23:03), Dmitri Pal wrote:
>Hello,
>
>The attached patches address ticket
>https://fedorahosted.org/sssd/ticket/2106
>The patches do not define new flags as suggested in the ticket.
>As we discussed with Nalin we try to detect the encoding based on the
>BOM and do the right thing without flags.
>If we see that this is not enough we will add flags but that would add a
>new function so such change would be destined for a bigger release.
>For now I think the use case is addressed. If we see the need to other
>use cases we will open other tickets.
>
>The patch depends on the previous set of patches that deal with C-Style
>comments.
>
>I also found some other unrelated issue that I logged as a separate ticket.
>https://fedorahosted.org/sssd/ticket/2119
>
>-- 
>Thank you,
>Dmitri Pal
>
>Sr. Engineering Manager for IdM portfolio
>Red Hat Inc.
>
>
>-------------------------------
>Looking to carve out IT costs?
>www.redhat.com/carveoutcosts/
>
>
>

>From ee1988301ba164a12263b7faf8c9bf686174deab Mon Sep 17 00:00:00 2001
>From: Dmitri Pal <d...@redhat.com>
>Date: Thu, 10 Oct 2013 22:47:23 -0400
>Subject: [PATCH 3/4] [INI] Convert files to UTF
>
>Patch adds functionality that detects BOM
>at the beginning of the file and uses
>it to convert to UTF8.
>If no BOM is detected file is assumed to be UTF8
>encoded.
>---
> ini/ini_fileobj.c |  317 +++++++++++++++++++++++++++++++++++++++++++++++++++--
> 1 files changed, 306 insertions(+), 11 deletions(-)
>
>diff --git a/ini/ini_fileobj.c b/ini/ini_fileobj.c
>index 7ac9dc6..000d36e 100644
>--- a/ini/ini_fileobj.c
>+++ b/ini/ini_fileobj.c
>@@ -21,14 +21,29 @@
> 
> #include "config.h"
> #include <errno.h>
>+#include <sys/types.h>
>+#include <sys/stat.h>
>+#include <fcntl.h>
> #include <string.h>
> #include <stdlib.h>
>+#include <iconv.h>
> #include "trace.h"
> #include "ini_defines.h"
> #include "ini_configobj.h"
> #include "ini_config_priv.h"
> #include "path_utils.h"
> 
>+#define ICONV_BUFFER    100
>+
>+#define BOM4_SIZE 4
>+#define BOM3_SIZE 3
>+#define BOM2_SIZE 2
>+
>+#define INDEX_UTF32BE 0
>+#define INDEX_UTF32LE 1
>+#define INDEX_UTF16BE 2
>+#define INDEX_UTF16LE 3
>+#define INDEX_UTF8    4
> 
> /* Close file but not destroy the object */
> void ini_config_file_close(struct ini_cfgfile *file_ctx)
>@@ -52,6 +67,7 @@ void ini_config_file_destroy(struct ini_cfgfile *file_ctx)
> 
>     if(file_ctx) {
>         free(file_ctx->filename);
>+        simplebuffer_free(file_ctx->file_data);
>         if(file_ctx->file) fclose(file_ctx->file);
>         free(file_ctx);
>     }
>@@ -59,37 +75,298 @@ void ini_config_file_destroy(struct ini_cfgfile *file_ctx)
>     TRACE_FLOW_EXIT();
> }
> 
>-/* Internal common initialization part */
>-static int common_file_init(struct ini_cfgfile *file_ctx)
>+/* How much I plan to read? */
>+size_t how_much_to_read(size_t left, size_t increment)
>+{
>+    if(left > increment) return increment;
>+    else return left;
>+}
>+
>+int check_bom(int ind, unsigned char *buffer, size_t len, size_t *bom_shift)
>+{
>+    TRACE_FLOW_ENTRY();
>+
>+    if (len > BOM4_SIZE) {
>+        if ((buffer[0] == 0x00) &&
>+            (buffer[1] == 0x00) &&
>+            (buffer[2] == 0xFE) &&
>+            (buffer[3] == 0xFF)) {
>+                TRACE_FLOW_RETURN(INDEX_UTF32BE);
>+                *bom_shift = BOM4_SIZE;
>+                return INDEX_UTF32BE;
>+        }
>+        else if ((buffer[0] == 0xFF) &&
>+                 (buffer[1] == 0xFE) &&
>+                 (buffer[2] == 0x00) &&
>+                 (buffer[3] == 0x00)) {
>+                TRACE_FLOW_RETURN(INDEX_UTF32LE);
>+                *bom_shift = BOM4_SIZE;
>+                return INDEX_UTF32LE;
>+        }
>+    }
>+
>+    if (len > BOM3_SIZE) {
>+        if ((buffer[0] == 0xEF) &&
>+            (buffer[1] == 0xBB) &&
>+            (buffer[2] == 0xBF)) {
>+                TRACE_FLOW_RETURN(INDEX_UTF8);
>+                *bom_shift = BOM3_SIZE;
>+                return INDEX_UTF8;
>+        }
>+    }
>+
>+    if (len > BOM2_SIZE) {
>+        if ((buffer[0] == 0xFE) &&
>+            (buffer[1] == 0xFF)) {
>+                TRACE_FLOW_RETURN(INDEX_UTF16BE);
>+                *bom_shift = BOM2_SIZE;
>+                return INDEX_UTF16BE;
>+        }
>+        else if ((buffer[0] == 0xFF) &&
>+                 (buffer[1] == 0xFE)) {
>+                TRACE_FLOW_RETURN(INDEX_UTF16LE);
>+                *bom_shift = BOM2_SIZE;
>+                return INDEX_UTF16LE;
>+        }
>+    }
>+
>+    TRACE_FLOW_RETURN(ind);
>+    return ind;
>+}
>+
>+/* Internal conversion part */
>+static int common_file_convert(int raw_file, struct ini_cfgfile *file_ctx)
> {
This function is quite long (174 LOC).
It would be better to split it into smaller functions.

>     int error = EOK;
>+    size_t read_cnt = 0;
>+    size_t total_read = 0;
>+    size_t to_read = 0;
>+    size_t in_buffer = 0;
>+    int ind = INDEX_UTF8;
>+    iconv_t conv = 0;
>+    size_t conv_res = 0;
>+    char read_buf[ICONV_BUFFER+1];
>+    char result_buf[ICONV_BUFFER];
>+    char *src, *dest;
>+    size_t to_convert = 0;
>+    size_t room_left = 0;
>+    size_t bom_shift = 0;
>+    const char *encodings[] = {  "UTF-32BE",
>+                                 "UTF-32LE",
>+                                 "UTF-16BE",
>+                                 "UTF-16LE",
>+                                 "UTF-8" };
> 
>     TRACE_FLOW_ENTRY();
> 
>+    do {
>+        to_read = how_much_to_read(file_ctx->file_stats.st_size - total_read,
>+                                   ICONV_BUFFER - in_buffer);
>+
>+        TRACE_INFO_NUMBER("About to read", to_read);
>+        errno = 0;
>+        read_cnt = read(raw_file, read_buf + in_buffer, to_read);
>+        if (read_cnt == -1) {
>+            error = errno;
>+            close(raw_file);
read can fail on second round of loop do-while
and conv handle will not be closed.

>+            TRACE_ERROR_NUMBER("Failed to read data from file", error);
>+            return error;
>+        }
>+
>+        if (read_cnt != to_read) {
>+            error = EIO;
>+            close(raw_file);
The same situation is here.

>+            TRACE_ERROR_NUMBER("Read less than required", error);
>+            return error;
>+        }
>+
>+        /* First time do some initialization */
>+        if(total_read == 0) {
           ^^^
          missing space
>+            TRACE_INFO_STRING("Reading first time.","Checking BOM");
>+
>+            ind = check_bom(ind, (unsigned char *)read_buf, read_cnt, 
>&bom_shift);
                                                                        
^^^^^^^^^^^
                                                               line length > 80
>+
>+            TRACE_INFO_STRING("Converting to", encodings[INDEX_UTF8]);
>+            TRACE_INFO_STRING("Converting from", encodings[ind]);
>+
>+            errno = 0;
>+            conv = iconv_open(encodings[INDEX_UTF8], encodings[ind]);
>+            if (conv == (iconv_t) -1) {
>+                error = errno;
>+                close(raw_file);
>+                TRACE_ERROR_NUMBER("Failed to create converter", error);
>+                return error;
>+            }
>+        }
          ^^^^^
          I would prefer to have initialisation >>> if(total_read == 0) <<<
          before the outermost do while, because it generate coverity warning.
                      Overwriting "conv" in "conv = iconv_open(encodings[4],
                      encodings[ind])" leaks the storage that "conv" points to.
          I know that it is a fall positive, but code will be simpler.

>+        total_read += read_cnt;
>+        TRACE_INFO_NUMBER("Total read", total_read);
>+
>+        do {
>+            /* Do conversion */
>+            errno = 0;
>+            src = read_buf + bom_shift;
>+            dest = result_buf;
>+            to_convert = read_cnt + in_buffer - bom_shift;
>+            bom_shift = 0;
>+            room_left = ICONV_BUFFER;
>+            conv_res = iconv(conv, &src, &to_convert, &dest, &room_left);
>+            if (conv == (iconv_t) -1) {
                  ^^^^
             conv_res should be checked and it has type size_t
>+                error = errno;
>+                switch(error) {
>+                case EILSEQ:
>+                    TRACE_ERROR_NUMBER("Invalid multibyte encoding", error);
>+                    close(raw_file);
>+                    iconv_close(conv);
>+                    return error;
>+                case EINVAL:
>+                    /* We need to just read more if we can */
>+                    if (total_read != file_ctx->file_stats.st_size) {
>+                        TRACE_INFO_STRING("Incomplete sequence.", "");
>+                        TRACE_INFO_NUMBER("File size.",
>+                                          file_ctx->file_stats.st_size);
>+                        memmove(read_buf, src, to_convert);
>+                        in_buffer = to_convert;
>+                    }
>+                    else {
>+                        /* Or return error if we can't */
>+                        TRACE_ERROR_NUMBER("Incomplete sequence", error);
>+                        close(raw_file);
>+                        iconv_close(conv);
>+                        return error;
>+                    }
                  ^^^^^^^^^
                missing break.
                Is it intentional?
http://www.freeipa.org/page/Coding_Style#Switch
/* FALLTHROUGH */
>+                case E2BIG:
>+                    TRACE_INFO_STRING("No room in the output buffer.", "");
>+                    error = simplebuffer_add_raw(file_ctx->file_data,
>+                                                 result_buf,
>+                                                 ICONV_BUFFER - room_left,
>+                                                 ICONV_BUFFER);
>+                    if (error) {
>+                        TRACE_ERROR_NUMBER("Failed to store converted bytes",
>+                                            error);
>+                        close(raw_file);
>+                        iconv_close(conv);
>+                        return error;
>+                    }
>+
>+                    room_left = ICONV_BUFFER;
>+                    dest = result_buf;
>+                    continue;
>+                default:
>+                    TRACE_ERROR_NUMBER("Unexpected internal error",
>+                                        error);
>+                    close(raw_file);
>+                    iconv_close(conv);
>+                    return ENOTSUP;
>+                }
>+            }
>+            /* The whole buffer was sucessfully converted */
>+            error = simplebuffer_add_raw(file_ctx->file_data,
>+                                         result_buf,
>+                                         ICONV_BUFFER - room_left,
>+                                         ICONV_BUFFER);
>+            if (error) {
>+                TRACE_ERROR_NUMBER("Failed to store converted bytes",
>+                                    error);
>+                close(raw_file);
>+                iconv_close(conv);
>+                return error;
>+            }
>+            TRACE_INFO_STRING("Saved procesed portion.",
>+                              (char 
>*)simplebuffer_get_vbuf(file_ctx->file_data));
>+
>+            in_buffer = 0;
>+            break;
>+        }
>+        while (1);
>+    }
>+    while(total_read < file_ctx->file_stats.st_size);
>+
>+    close(raw_file);
>+    iconv_close(conv);
>+
>     /* Open file */
>-    TRACE_INFO_STRING("File", file_ctx->filename);
>+    TRACE_INFO_STRING("File data",
>+                      (char *)simplebuffer_get_vbuf(file_ctx->file_data));
>+    TRACE_INFO_NUMBER("File len",
>+                      simplebuffer_get_len(file_ctx->file_data));
>+    TRACE_INFO_NUMBER("Size",
>+                      file_ctx->file_data->size);
>     errno = 0;
>-    file_ctx->file = fopen(file_ctx->filename, "r");
>+    file_ctx->file = fmemopen(simplebuffer_get_vbuf(file_ctx->file_data),
>+                              simplebuffer_get_len(file_ctx->file_data),
>+                              "r");
>     if (!(file_ctx->file)) {
>         error = errno;
>         TRACE_ERROR_NUMBER("Failed to open file", error);
>         return error;
>     }
> 
>-    file_ctx->stats_read = 0;
>+    TRACE_FLOW_EXIT();
>+    return EOK;
>+}
>+
>+
>+/* Internal common initialization part */
>+static int common_file_init(struct ini_cfgfile *file_ctx)
>+{
>+    int error = EOK;
>+    int raw_file = 0;
>+    int stat_ret = 0;
>+
>+    TRACE_FLOW_ENTRY();
>+
>+    TRACE_INFO_STRING("File", file_ctx->filename);
>+
>+    /* Open file in binary mode first */
>+    errno = 0;
>+    raw_file = open(file_ctx->filename, O_RDONLY);
>+    if (raw_file == -1) {
>+        error = errno;
>+        TRACE_ERROR_NUMBER("Failed to open file in binary mode", error);
>+        return error;
>+    }
>+
>+    /* Get the size of the file */
>+    errno = 0;
>+    stat_ret = fstat(raw_file, &(file_ctx->file_stats));
>+    if (stat_ret == -1) {
>+        error = errno;
>+        TRACE_ERROR_NUMBER("Failed to get file stats", error);
Resource leak
Handle variable "raw_file" going out of scope leaks the handle.
>+        return error;
>+    }
> 
>-    /* Collect stats */
>-    if (file_ctx->metadata_flags & INI_META_STATS) {
>+    /* Trick to overcome the fact that
>+     * fopen and fmemopen behave differently when file
>+     * is 0 length
>+     */
>+    if (file_ctx->file_stats.st_size) {
>+        error = common_file_convert(raw_file, file_ctx);
>+        if (error) {
>+            TRACE_ERROR_NUMBER("Failed to convert file",
>+                                error);
>+            close(raw_file);
>+            return error;
>+        }
>+    }
>+    else {
>         errno = 0;
>-        if (fstat(fileno(file_ctx->file),
>-                  &(file_ctx->file_stats)) < 0) {
>+        file_ctx->file = fdopen(raw_file, "r");
>+        if (!(file_ctx->file)) {
>             error = errno;
>-            TRACE_ERROR_NUMBER("Failed to get file stats.", error);
>+            TRACE_ERROR_NUMBER("Failed to fdopen file", error);
>             return error;
>         }
>+
>+    }
>+
>+    /* Collect stats */
>+    if (file_ctx->metadata_flags & INI_META_STATS) {
>         file_ctx->stats_read = 1;
>     }
>-    else memset(&(file_ctx->file_stats), 0, sizeof(struct stat));
>+    else {
>+        memset(&(file_ctx->file_stats), 0, sizeof(struct stat));
>+        file_ctx->stats_read = 0;
>+    }
> 
>     TRACE_FLOW_EXIT();
>     return EOK;
>@@ -119,6 +396,15 @@ int ini_config_file_open(const char *filename,
> 
>     new_ctx->filename = NULL;
>     new_ctx->file = NULL;
>+    new_ctx->file_data = NULL;
>+
>+    error = simplebuffer_alloc(&(new_ctx->file_data));
>+    if (error) {
>+        TRACE_ERROR_NUMBER("Failed to allocate buffer ctx.", error);
>+        ini_config_file_destroy(new_ctx);
>+        return error;
>+
>+    }
> 
>     /* Store flags */
>     new_ctx->metadata_flags = metadata_flags;
>@@ -176,6 +462,15 @@ int ini_config_file_reopen(struct ini_cfgfile 
>*file_ctx_in,
>     }
> 
>     new_ctx->file = NULL;
>+    new_ctx->file_data = NULL;
>+
>+    error = simplebuffer_alloc(&(new_ctx->file_data));
>+    if (error) {
>+        TRACE_ERROR_NUMBER("Failed to allocate buffer ctx.", error);
>+        ini_config_file_destroy(new_ctx);
It is better to initialize new_ctx with memset or initialize
new_ctx->filename to NULL, because ini_config_file_destroy can access to
uninitialized variable.

>+        return error;
>+
>+    }
> 
>     /* Store flags */
>     new_ctx->metadata_flags = file_ctx_in->metadata_flags;
>-- 
>1.7.1
>

>From e4fcd1ba0f86675e6fe6455c80d5215b821af973 Mon Sep 17 00:00:00 2001
>From: Dmitri Pal <d...@redhat.com>
>Date: Wed, 9 Oct 2013 23:29:22 -0400
>Subject: [PATCH 1/4] [INI] Expose buffer context as void
>
>In some cases the intenal buffer needs to be accessed as void.
>Add a function that would do that instead of type casting
>and dealing with 'const'.
>---
> basicobjects/simplebuffer.c |    7 +++++++
> basicobjects/simplebuffer.h |    4 ++++
> 2 files changed, 11 insertions(+), 0 deletions(-)
>
ACK

>From b48891b2753bd8d26f71f6cbb18b28bd22ece9cc Mon Sep 17 00:00:00 2001
>From: Dmitri Pal <d...@redhat.com>
>Date: Thu, 10 Oct 2013 22:51:22 -0400
>Subject: [PATCH 4/4] [INI] Updated unit test for UTF8 conversion
>
>Patch adds new test files and bom creation function.
>---
> ini/ini.d/real16be.conf |  Bin 0 -> 3288 bytes
> ini/ini.d/real16le.conf |  Bin 0 -> 3288 bytes
> ini/ini.d/real32be.conf |  Bin 0 -> 6576 bytes
> ini/ini.d/real32le.conf |  Bin 0 -> 6576 bytes
> ini/ini.d/real8.conf    |   70 +++++++++++++++++++++++++++++++++++++++++++++++
> ini/ini_parse_ut.c      |   35 +++++++++++++++++++++++
> 6 files changed, 105 insertions(+), 0 deletions(-)
> create mode 100644 ini/ini.d/real16be.conf
> create mode 100644 ini/ini.d/real16le.conf
> create mode 100644 ini/ini.d/real32be.conf
> create mode 100644 ini/ini.d/real32le.conf
> create mode 100644 ini/ini.d/real8.conf
>

The are some trailing whitespaces in real8.conf, but it seems like intentional.

ACK

>From f0c79c9091d9165d60aaa030d09fe63450215c5d Mon Sep 17 00:00:00 2001
>From: Dmitri Pal <d...@redhat.com>
>Date: Wed, 9 Oct 2013 23:31:49 -0400
>Subject: [PATCH 2/4] [INI] Extend internal file handle
>
>When we need to decode the file we need to keep it
>in memory so we need to have a buffer.
>Adding simple buffer into the internal structure.
>---
> ini/ini_config_priv.h |    3 +++
> 1 files changed, 3 insertions(+), 0 deletions(-)
>
>diff --git a/ini/ini_config_priv.h b/ini/ini_config_priv.h
>index 89ad8a9..e1292f7 100644
>--- a/ini/ini_config_priv.h
>+++ b/ini/ini_config_priv.h
>@@ -26,6 +26,7 @@
> #include <sys/stat.h>
> #include <unistd.h>
> #include "collection.h"
>+#include "simplebuffer.h"
> #include "ini_comment.h"
> 
> /* Configuration object */
>@@ -71,6 +72,8 @@ struct ini_cfgfile {
>     struct stat file_stats;
>     /* Were stats read ? */
>     int stats_read;
>+    /* Internal buffer */
>+    struct simplebuffer *file_data;
> };
> 
> /* Parsing error */

ACK

LS
_______________________________________________
sssd-devel mailing list
sssd-devel@lists.fedorahosted.org
https://lists.fedorahosted.org/mailman/listinfo/sssd-devel

Reply via email to