On (17/10/13 12:56), Lukas Slebodnik wrote:
>On (10/10/13 23:03), Dmitri Pal wrote:
>>Hello,
>>
>>The attached patches address ticket
>>https://fedorahosted.org/sssd/ticket/2106
>>The patches do not define new flags as suggested in the ticket.
>>As we discussed with Nalin we try to detect the encoding based on the
>>BOM and do the right thing without flags.
>>If we see that this is not enough we will add flags but that would add a
>>new function so such change would be destined for a bigger release.
>>For now I think the use case is addressed. If we see the need to other
>>use cases we will open other tickets.
>>
>>The patch depends on the previous set of patches that deal with C-Style
>>comments.
>>
>>I also found some other unrelated issue that I logged as a separate ticket.
>>https://fedorahosted.org/sssd/ticket/2119
>>
>>-- 
>>Thank you,
>>Dmitri Pal
>>
>>Sr. Engineering Manager for IdM portfolio
>>Red Hat Inc.
>>
>>
>>-------------------------------
>>Looking to carve out IT costs?
>>www.redhat.com/carveoutcosts/
>>
>>
>>
>
>>From ee1988301ba164a12263b7faf8c9bf686174deab Mon Sep 17 00:00:00 2001
>>From: Dmitri Pal <d...@redhat.com>
>>Date: Thu, 10 Oct 2013 22:47:23 -0400
>>Subject: [PATCH 3/4] [INI] Convert files to UTF
>>
>>Patch adds functionality that detects BOM
>>at the beginning of the file and uses
>>it to convert to UTF8.
>>If no BOM is detected file is assumed to be UTF8
>>encoded.
>>---
>> ini/ini_fileobj.c |  317 
>> +++++++++++++++++++++++++++++++++++++++++++++++++++--
>> 1 files changed, 306 insertions(+), 11 deletions(-)
>>
>>diff --git a/ini/ini_fileobj.c b/ini/ini_fileobj.c
>>index 7ac9dc6..000d36e 100644
>>--- a/ini/ini_fileobj.c
>>+++ b/ini/ini_fileobj.c
>>@@ -21,14 +21,29 @@
>> 
>> #include "config.h"
>> #include <errno.h>
>>+#include <sys/types.h>
>>+#include <sys/stat.h>
>>+#include <fcntl.h>
>> #include <string.h>
>> #include <stdlib.h>
>>+#include <iconv.h>
>> #include "trace.h"
>> #include "ini_defines.h"
>> #include "ini_configobj.h"
>> #include "ini_config_priv.h"
>> #include "path_utils.h"
>> 
>>+#define ICONV_BUFFER    100
>>+
>>+#define BOM4_SIZE 4
>>+#define BOM3_SIZE 3
>>+#define BOM2_SIZE 2
>>+
>>+#define INDEX_UTF32BE 0
>>+#define INDEX_UTF32LE 1
>>+#define INDEX_UTF16BE 2
>>+#define INDEX_UTF16LE 3
>>+#define INDEX_UTF8    4
I forgot to write one comment in the previous mail.
Could we use enum for INDEX_* instead of constants?

>> 
>> /* Close file but not destroy the object */
>> void ini_config_file_close(struct ini_cfgfile *file_ctx)
>>@@ -52,6 +67,7 @@ void ini_config_file_destroy(struct ini_cfgfile *file_ctx)
>> 
>>     if(file_ctx) {
>>         free(file_ctx->filename);
>>+        simplebuffer_free(file_ctx->file_data);
>>         if(file_ctx->file) fclose(file_ctx->file);
>>         free(file_ctx);
>>     }
>>@@ -59,37 +75,298 @@ void ini_config_file_destroy(struct ini_cfgfile 
>>*file_ctx)
>>     TRACE_FLOW_EXIT();
>> }
>> 
>>-/* Internal common initialization part */
>>-static int common_file_init(struct ini_cfgfile *file_ctx)
>>+/* How much I plan to read? */
>>+size_t how_much_to_read(size_t left, size_t increment)
>>+{
>>+    if(left > increment) return increment;
>>+    else return left;
>>+}
>>+
>>+int check_bom(int ind, unsigned char *buffer, size_t len, size_t *bom_shift)
>>+{
>>+    TRACE_FLOW_ENTRY();
>>+
>>+    if (len > BOM4_SIZE) {
>>+        if ((buffer[0] == 0x00) &&
>>+            (buffer[1] == 0x00) &&
>>+            (buffer[2] == 0xFE) &&
>>+            (buffer[3] == 0xFF)) {
>>+                TRACE_FLOW_RETURN(INDEX_UTF32BE);
>>+                *bom_shift = BOM4_SIZE;
>>+                return INDEX_UTF32BE;
>>+        }
>>+        else if ((buffer[0] == 0xFF) &&
>>+                 (buffer[1] == 0xFE) &&
>>+                 (buffer[2] == 0x00) &&
>>+                 (buffer[3] == 0x00)) {
>>+                TRACE_FLOW_RETURN(INDEX_UTF32LE);
>>+                *bom_shift = BOM4_SIZE;
>>+                return INDEX_UTF32LE;
>>+        }
>>+    }
>>+
>>+    if (len > BOM3_SIZE) {
>>+        if ((buffer[0] == 0xEF) &&
>>+            (buffer[1] == 0xBB) &&
>>+            (buffer[2] == 0xBF)) {
>>+                TRACE_FLOW_RETURN(INDEX_UTF8);
>>+                *bom_shift = BOM3_SIZE;
>>+                return INDEX_UTF8;
>>+        }
>>+    }
>>+
>>+    if (len > BOM2_SIZE) {
>>+        if ((buffer[0] == 0xFE) &&
>>+            (buffer[1] == 0xFF)) {
>>+                TRACE_FLOW_RETURN(INDEX_UTF16BE);
>>+                *bom_shift = BOM2_SIZE;
>>+                return INDEX_UTF16BE;
>>+        }
>>+        else if ((buffer[0] == 0xFF) &&
>>+                 (buffer[1] == 0xFE)) {
>>+                TRACE_FLOW_RETURN(INDEX_UTF16LE);
>>+                *bom_shift = BOM2_SIZE;
>>+                return INDEX_UTF16LE;
>>+        }
>>+    }
>>+
>>+    TRACE_FLOW_RETURN(ind);
>>+    return ind;
>>+}
>>+
>>+/* Internal conversion part */
>>+static int common_file_convert(int raw_file, struct ini_cfgfile *file_ctx)
>> {
>This function is quite long (174 LOC).
>It would be better to split it into smaller functions.
>
>>     int error = EOK;
>>+    size_t read_cnt = 0;
>>+    size_t total_read = 0;
>>+    size_t to_read = 0;
>>+    size_t in_buffer = 0;
>>+    int ind = INDEX_UTF8;
>>+    iconv_t conv = 0;
>>+    size_t conv_res = 0;
>>+    char read_buf[ICONV_BUFFER+1];
>>+    char result_buf[ICONV_BUFFER];
>>+    char *src, *dest;
>>+    size_t to_convert = 0;
>>+    size_t room_left = 0;
>>+    size_t bom_shift = 0;
>>+    const char *encodings[] = {  "UTF-32BE",
>>+                                 "UTF-32LE",
>>+                                 "UTF-16BE",
>>+                                 "UTF-16LE",
>>+                                 "UTF-8" };
>> 
>>     TRACE_FLOW_ENTRY();
>> 
>>+    do {
>>+        to_read = how_much_to_read(file_ctx->file_stats.st_size - total_read,
>>+                                   ICONV_BUFFER - in_buffer);
>>+
>>+        TRACE_INFO_NUMBER("About to read", to_read);
>>+        errno = 0;
>>+        read_cnt = read(raw_file, read_buf + in_buffer, to_read);
>>+        if (read_cnt == -1) {
>>+            error = errno;
>>+            close(raw_file);
>read can fail on second round of loop do-while
>and conv handle will not be closed.
>
>>+            TRACE_ERROR_NUMBER("Failed to read data from file", error);
>>+            return error;
>>+        }
>>+
>>+        if (read_cnt != to_read) {
>>+            error = EIO;
>>+            close(raw_file);
>The same situation is here.
>
>>+            TRACE_ERROR_NUMBER("Read less than required", error);
>>+            return error;
>>+        }
>>+
>>+        /* First time do some initialization */
>>+        if(total_read == 0) {
>           ^^^
>          missing space
>>+            TRACE_INFO_STRING("Reading first time.","Checking BOM");
>>+
>>+            ind = check_bom(ind, (unsigned char *)read_buf, read_cnt, 
>>&bom_shift);
>                                                                        
> ^^^^^^^^^^^
>                                                               line length > 80
>>+
>>+            TRACE_INFO_STRING("Converting to", encodings[INDEX_UTF8]);
>>+            TRACE_INFO_STRING("Converting from", encodings[ind]);
>>+
>>+            errno = 0;
>>+            conv = iconv_open(encodings[INDEX_UTF8], encodings[ind]);
>>+            if (conv == (iconv_t) -1) {
>>+                error = errno;
>>+                close(raw_file);
>>+                TRACE_ERROR_NUMBER("Failed to create converter", error);
>>+                return error;
>>+            }
>>+        }
>          ^^^^^
>          I would prefer to have initialisation >>> if(total_read == 0) <<<
>          before the outermost do while, because it generate coverity warning.
>                      Overwriting "conv" in "conv = iconv_open(encodings[4],
>                      encodings[ind])" leaks the storage that "conv" points to.
>          I know that it is a fall positive, but code will be simpler.
>
>>+        total_read += read_cnt;
>>+        TRACE_INFO_NUMBER("Total read", total_read);
>>+
>>+        do {
>>+            /* Do conversion */
>>+            errno = 0;
>>+            src = read_buf + bom_shift;
>>+            dest = result_buf;
>>+            to_convert = read_cnt + in_buffer - bom_shift;
>>+            bom_shift = 0;
>>+            room_left = ICONV_BUFFER;
>>+            conv_res = iconv(conv, &src, &to_convert, &dest, &room_left);
>>+            if (conv == (iconv_t) -1) {
>                  ^^^^
>             conv_res should be checked and it has type size_t
>>+                error = errno;
>>+                switch(error) {
>>+                case EILSEQ:
>>+                    TRACE_ERROR_NUMBER("Invalid multibyte encoding", error);
>>+                    close(raw_file);
>>+                    iconv_close(conv);
>>+                    return error;
>>+                case EINVAL:
>>+                    /* We need to just read more if we can */
>>+                    if (total_read != file_ctx->file_stats.st_size) {
>>+                        TRACE_INFO_STRING("Incomplete sequence.", "");
>>+                        TRACE_INFO_NUMBER("File size.",
>>+                                          file_ctx->file_stats.st_size);
>>+                        memmove(read_buf, src, to_convert);
>>+                        in_buffer = to_convert;
>>+                    }
>>+                    else {
>>+                        /* Or return error if we can't */
>>+                        TRACE_ERROR_NUMBER("Incomplete sequence", error);
>>+                        close(raw_file);
>>+                        iconv_close(conv);
>>+                        return error;
>>+                    }
>                  ^^^^^^^^^
>                missing break.
>                Is it intentional?
>http://www.freeipa.org/page/Coding_Style#Switch
>/* FALLTHROUGH */
>>+                case E2BIG:
>>+                    TRACE_INFO_STRING("No room in the output buffer.", "");
>>+                    error = simplebuffer_add_raw(file_ctx->file_data,
>>+                                                 result_buf,
>>+                                                 ICONV_BUFFER - room_left,
>>+                                                 ICONV_BUFFER);
>>+                    if (error) {
>>+                        TRACE_ERROR_NUMBER("Failed to store converted bytes",
>>+                                            error);
>>+                        close(raw_file);
>>+                        iconv_close(conv);
>>+                        return error;
>>+                    }
>>+
>>+                    room_left = ICONV_BUFFER;
>>+                    dest = result_buf;
>>+                    continue;
>>+                default:
>>+                    TRACE_ERROR_NUMBER("Unexpected internal error",
>>+                                        error);
>>+                    close(raw_file);
>>+                    iconv_close(conv);
>>+                    return ENOTSUP;
>>+                }
>>+            }
>>+            /* The whole buffer was sucessfully converted */
>>+            error = simplebuffer_add_raw(file_ctx->file_data,
>>+                                         result_buf,
>>+                                         ICONV_BUFFER - room_left,
>>+                                         ICONV_BUFFER);
>>+            if (error) {
>>+                TRACE_ERROR_NUMBER("Failed to store converted bytes",
>>+                                    error);
>>+                close(raw_file);
>>+                iconv_close(conv);
>>+                return error;
>>+            }
>>+            TRACE_INFO_STRING("Saved procesed portion.",
>>+                              (char 
>>*)simplebuffer_get_vbuf(file_ctx->file_data));
>>+
>>+            in_buffer = 0;
>>+            break;
>>+        }
>>+        while (1);
>>+    }
>>+    while(total_read < file_ctx->file_stats.st_size);
>>+
>>+    close(raw_file);
>>+    iconv_close(conv);
>>+
>>     /* Open file */
>>-    TRACE_INFO_STRING("File", file_ctx->filename);
>>+    TRACE_INFO_STRING("File data",
>>+                      (char *)simplebuffer_get_vbuf(file_ctx->file_data));
>>+    TRACE_INFO_NUMBER("File len",
>>+                      simplebuffer_get_len(file_ctx->file_data));
>>+    TRACE_INFO_NUMBER("Size",
>>+                      file_ctx->file_data->size);
>>     errno = 0;
>>-    file_ctx->file = fopen(file_ctx->filename, "r");
>>+    file_ctx->file = fmemopen(simplebuffer_get_vbuf(file_ctx->file_data),
>>+                              simplebuffer_get_len(file_ctx->file_data),
>>+                              "r");
>>     if (!(file_ctx->file)) {
>>         error = errno;
>>         TRACE_ERROR_NUMBER("Failed to open file", error);
>>         return error;
>>     }
>> 
>>-    file_ctx->stats_read = 0;
>>+    TRACE_FLOW_EXIT();
>>+    return EOK;
>>+}
>>+
>>+
>>+/* Internal common initialization part */
>>+static int common_file_init(struct ini_cfgfile *file_ctx)
>>+{
>>+    int error = EOK;
>>+    int raw_file = 0;
>>+    int stat_ret = 0;
>>+
>>+    TRACE_FLOW_ENTRY();
>>+
>>+    TRACE_INFO_STRING("File", file_ctx->filename);
>>+
>>+    /* Open file in binary mode first */
>>+    errno = 0;
>>+    raw_file = open(file_ctx->filename, O_RDONLY);
>>+    if (raw_file == -1) {
>>+        error = errno;
>>+        TRACE_ERROR_NUMBER("Failed to open file in binary mode", error);
>>+        return error;
>>+    }
>>+
>>+    /* Get the size of the file */
>>+    errno = 0;
>>+    stat_ret = fstat(raw_file, &(file_ctx->file_stats));
>>+    if (stat_ret == -1) {
>>+        error = errno;
>>+        TRACE_ERROR_NUMBER("Failed to get file stats", error);
>Resource leak
>Handle variable "raw_file" going out of scope leaks the handle.
>>+        return error;
>>+    }
>> 
>>-    /* Collect stats */
>>-    if (file_ctx->metadata_flags & INI_META_STATS) {
>>+    /* Trick to overcome the fact that
>>+     * fopen and fmemopen behave differently when file
>>+     * is 0 length
>>+     */
>>+    if (file_ctx->file_stats.st_size) {
>>+        error = common_file_convert(raw_file, file_ctx);
>>+        if (error) {
>>+            TRACE_ERROR_NUMBER("Failed to convert file",
>>+                                error);
>>+            close(raw_file);
>>+            return error;
>>+        }
>>+    }
>>+    else {
>>         errno = 0;
>>-        if (fstat(fileno(file_ctx->file),
>>-                  &(file_ctx->file_stats)) < 0) {
>>+        file_ctx->file = fdopen(raw_file, "r");
>>+        if (!(file_ctx->file)) {
>>             error = errno;
>>-            TRACE_ERROR_NUMBER("Failed to get file stats.", error);
>>+            TRACE_ERROR_NUMBER("Failed to fdopen file", error);
>>             return error;
>>         }
>>+
>>+    }
>>+
>>+    /* Collect stats */
>>+    if (file_ctx->metadata_flags & INI_META_STATS) {
>>         file_ctx->stats_read = 1;
>>     }
>>-    else memset(&(file_ctx->file_stats), 0, sizeof(struct stat));
>>+    else {
>>+        memset(&(file_ctx->file_stats), 0, sizeof(struct stat));
>>+        file_ctx->stats_read = 0;
>>+    }
>> 
>>     TRACE_FLOW_EXIT();
>>     return EOK;
>>@@ -119,6 +396,15 @@ int ini_config_file_open(const char *filename,
>> 
>>     new_ctx->filename = NULL;
>>     new_ctx->file = NULL;
>>+    new_ctx->file_data = NULL;
>>+
>>+    error = simplebuffer_alloc(&(new_ctx->file_data));
>>+    if (error) {
>>+        TRACE_ERROR_NUMBER("Failed to allocate buffer ctx.", error);
>>+        ini_config_file_destroy(new_ctx);
>>+        return error;
>>+
>>+    }
>> 
>>     /* Store flags */
>>     new_ctx->metadata_flags = metadata_flags;
>>@@ -176,6 +462,15 @@ int ini_config_file_reopen(struct ini_cfgfile 
>>*file_ctx_in,
>>     }
>> 
>>     new_ctx->file = NULL;
>>+    new_ctx->file_data = NULL;
>>+
>>+    error = simplebuffer_alloc(&(new_ctx->file_data));
>>+    if (error) {
>>+        TRACE_ERROR_NUMBER("Failed to allocate buffer ctx.", error);
>>+        ini_config_file_destroy(new_ctx);
>It is better to initialize new_ctx with memset or initialize
>new_ctx->filename to NULL, because ini_config_file_destroy can access to
>uninitialized variable.
>
>>+        return error;
>>+
>>+    }
>> 
>>     /* Store flags */
>>     new_ctx->metadata_flags = file_ctx_in->metadata_flags;
>>-- 
>>1.7.1
>>
>
>>From e4fcd1ba0f86675e6fe6455c80d5215b821af973 Mon Sep 17 00:00:00 2001
>>From: Dmitri Pal <d...@redhat.com>
>>Date: Wed, 9 Oct 2013 23:29:22 -0400
>>Subject: [PATCH 1/4] [INI] Expose buffer context as void
>>
>>In some cases the intenal buffer needs to be accessed as void.
>>Add a function that would do that instead of type casting
>>and dealing with 'const'.
>>---
>> basicobjects/simplebuffer.c |    7 +++++++
>> basicobjects/simplebuffer.h |    4 ++++
>> 2 files changed, 11 insertions(+), 0 deletions(-)
>>
>ACK
>
>>From b48891b2753bd8d26f71f6cbb18b28bd22ece9cc Mon Sep 17 00:00:00 2001
>>From: Dmitri Pal <d...@redhat.com>
>>Date: Thu, 10 Oct 2013 22:51:22 -0400
>>Subject: [PATCH 4/4] [INI] Updated unit test for UTF8 conversion
>>
>>Patch adds new test files and bom creation function.
>>---
>> ini/ini.d/real16be.conf |  Bin 0 -> 3288 bytes
>> ini/ini.d/real16le.conf |  Bin 0 -> 3288 bytes
>> ini/ini.d/real32be.conf |  Bin 0 -> 6576 bytes
>> ini/ini.d/real32le.conf |  Bin 0 -> 6576 bytes
>> ini/ini.d/real8.conf    |   70 
>> +++++++++++++++++++++++++++++++++++++++++++++++
>> ini/ini_parse_ut.c      |   35 +++++++++++++++++++++++
>> 6 files changed, 105 insertions(+), 0 deletions(-)
>> create mode 100644 ini/ini.d/real16be.conf
>> create mode 100644 ini/ini.d/real16le.conf
>> create mode 100644 ini/ini.d/real32be.conf
>> create mode 100644 ini/ini.d/real32le.conf
>> create mode 100644 ini/ini.d/real8.conf
>>
>
>The are some trailing whitespaces in real8.conf, but it seems like intentional.
>
>ACK
>
>>From f0c79c9091d9165d60aaa030d09fe63450215c5d Mon Sep 17 00:00:00 2001
>>From: Dmitri Pal <d...@redhat.com>
>>Date: Wed, 9 Oct 2013 23:31:49 -0400
>>Subject: [PATCH 2/4] [INI] Extend internal file handle
>>
>>When we need to decode the file we need to keep it
>>in memory so we need to have a buffer.
>>Adding simple buffer into the internal structure.
>>---
>> ini/ini_config_priv.h |    3 +++
>> 1 files changed, 3 insertions(+), 0 deletions(-)
>>
>>diff --git a/ini/ini_config_priv.h b/ini/ini_config_priv.h
>>index 89ad8a9..e1292f7 100644
>>--- a/ini/ini_config_priv.h
>>+++ b/ini/ini_config_priv.h
>>@@ -26,6 +26,7 @@
>> #include <sys/stat.h>
>> #include <unistd.h>
>> #include "collection.h"
>>+#include "simplebuffer.h"
>> #include "ini_comment.h"
>> 
>> /* Configuration object */
>>@@ -71,6 +72,8 @@ struct ini_cfgfile {
>>     struct stat file_stats;
>>     /* Were stats read ? */
>>     int stats_read;
>>+    /* Internal buffer */
>>+    struct simplebuffer *file_data;
>> };
>> 
>> /* Parsing error */
>
>ACK
>
>LS
>_______________________________________________
>sssd-devel mailing list
>sssd-devel@lists.fedorahosted.org
>https://lists.fedorahosted.org/mailman/listinfo/sssd-devel
_______________________________________________
sssd-devel mailing list
sssd-devel@lists.fedorahosted.org
https://lists.fedorahosted.org/mailman/listinfo/sssd-devel

Reply via email to