On (10/10/13 23:03), Dmitri Pal wrote: >Hello, > >The attached patches address ticket >https://fedorahosted.org/sssd/ticket/2106 >The patches do not define new flags as suggested in the ticket. >As we discussed with Nalin we try to detect the encoding based on the >BOM and do the right thing without flags. >If we see that this is not enough we will add flags but that would add a >new function so such change would be destined for a bigger release. >For now I think the use case is addressed. If we see the need to other >use cases we will open other tickets. > >The patch depends on the previous set of patches that deal with C-Style >comments. > >I also found some other unrelated issue that I logged as a separate ticket. >https://fedorahosted.org/sssd/ticket/2119 > >-- >Thank you, >Dmitri Pal > >Sr. Engineering Manager for IdM portfolio >Red Hat Inc. > > >------------------------------- >Looking to carve out IT costs? >www.redhat.com/carveoutcosts/ > > >
>From ee1988301ba164a12263b7faf8c9bf686174deab Mon Sep 17 00:00:00 2001 >From: Dmitri Pal <d...@redhat.com> >Date: Thu, 10 Oct 2013 22:47:23 -0400 >Subject: [PATCH 3/4] [INI] Convert files to UTF > >Patch adds functionality that detects BOM >at the beginning of the file and uses >it to convert to UTF8. >If no BOM is detected file is assumed to be UTF8 >encoded. >--- > ini/ini_fileobj.c | 317 +++++++++++++++++++++++++++++++++++++++++++++++++++-- > 1 files changed, 306 insertions(+), 11 deletions(-) > >diff --git a/ini/ini_fileobj.c b/ini/ini_fileobj.c >index 7ac9dc6..000d36e 100644 >--- a/ini/ini_fileobj.c >+++ b/ini/ini_fileobj.c >@@ -21,14 +21,29 @@ > > #include "config.h" > #include <errno.h> >+#include <sys/types.h> >+#include <sys/stat.h> >+#include <fcntl.h> > #include <string.h> > #include <stdlib.h> >+#include <iconv.h> > #include "trace.h" > #include "ini_defines.h" > #include "ini_configobj.h" > #include "ini_config_priv.h" > #include "path_utils.h" > >+#define ICONV_BUFFER 100 >+ >+#define BOM4_SIZE 4 >+#define BOM3_SIZE 3 >+#define BOM2_SIZE 2 >+ >+#define INDEX_UTF32BE 0 >+#define INDEX_UTF32LE 1 >+#define INDEX_UTF16BE 2 >+#define INDEX_UTF16LE 3 >+#define INDEX_UTF8 4 > > /* Close file but not destroy the object */ > void ini_config_file_close(struct ini_cfgfile *file_ctx) >@@ -52,6 +67,7 @@ void ini_config_file_destroy(struct ini_cfgfile *file_ctx) > > if(file_ctx) { > free(file_ctx->filename); >+ simplebuffer_free(file_ctx->file_data); > if(file_ctx->file) fclose(file_ctx->file); > free(file_ctx); > } >@@ -59,37 +75,298 @@ void ini_config_file_destroy(struct ini_cfgfile *file_ctx) > TRACE_FLOW_EXIT(); > } > >-/* Internal common initialization part */ >-static int common_file_init(struct ini_cfgfile *file_ctx) >+/* How much I plan to read? */ >+size_t how_much_to_read(size_t left, size_t increment) >+{ >+ if(left > increment) return increment; >+ else return left; >+} >+ >+int check_bom(int ind, unsigned char *buffer, size_t len, size_t *bom_shift) >+{ >+ TRACE_FLOW_ENTRY(); >+ >+ if (len > BOM4_SIZE) { >+ if ((buffer[0] == 0x00) && >+ (buffer[1] == 0x00) && >+ (buffer[2] == 0xFE) && >+ (buffer[3] == 0xFF)) { >+ TRACE_FLOW_RETURN(INDEX_UTF32BE); >+ *bom_shift = BOM4_SIZE; >+ return INDEX_UTF32BE; >+ } >+ else if ((buffer[0] == 0xFF) && >+ (buffer[1] == 0xFE) && >+ (buffer[2] == 0x00) && >+ (buffer[3] == 0x00)) { >+ TRACE_FLOW_RETURN(INDEX_UTF32LE); >+ *bom_shift = BOM4_SIZE; >+ return INDEX_UTF32LE; >+ } >+ } >+ >+ if (len > BOM3_SIZE) { >+ if ((buffer[0] == 0xEF) && >+ (buffer[1] == 0xBB) && >+ (buffer[2] == 0xBF)) { >+ TRACE_FLOW_RETURN(INDEX_UTF8); >+ *bom_shift = BOM3_SIZE; >+ return INDEX_UTF8; >+ } >+ } >+ >+ if (len > BOM2_SIZE) { >+ if ((buffer[0] == 0xFE) && >+ (buffer[1] == 0xFF)) { >+ TRACE_FLOW_RETURN(INDEX_UTF16BE); >+ *bom_shift = BOM2_SIZE; >+ return INDEX_UTF16BE; >+ } >+ else if ((buffer[0] == 0xFF) && >+ (buffer[1] == 0xFE)) { >+ TRACE_FLOW_RETURN(INDEX_UTF16LE); >+ *bom_shift = BOM2_SIZE; >+ return INDEX_UTF16LE; >+ } >+ } >+ >+ TRACE_FLOW_RETURN(ind); >+ return ind; >+} >+ >+/* Internal conversion part */ >+static int common_file_convert(int raw_file, struct ini_cfgfile *file_ctx) > { This function is quite long (174 LOC). It would be better to split it into smaller functions. > int error = EOK; >+ size_t read_cnt = 0; >+ size_t total_read = 0; >+ size_t to_read = 0; >+ size_t in_buffer = 0; >+ int ind = INDEX_UTF8; >+ iconv_t conv = 0; >+ size_t conv_res = 0; >+ char read_buf[ICONV_BUFFER+1]; >+ char result_buf[ICONV_BUFFER]; >+ char *src, *dest; >+ size_t to_convert = 0; >+ size_t room_left = 0; >+ size_t bom_shift = 0; >+ const char *encodings[] = { "UTF-32BE", >+ "UTF-32LE", >+ "UTF-16BE", >+ "UTF-16LE", >+ "UTF-8" }; > > TRACE_FLOW_ENTRY(); > >+ do { >+ to_read = how_much_to_read(file_ctx->file_stats.st_size - total_read, >+ ICONV_BUFFER - in_buffer); >+ >+ TRACE_INFO_NUMBER("About to read", to_read); >+ errno = 0; >+ read_cnt = read(raw_file, read_buf + in_buffer, to_read); >+ if (read_cnt == -1) { >+ error = errno; >+ close(raw_file); read can fail on second round of loop do-while and conv handle will not be closed. >+ TRACE_ERROR_NUMBER("Failed to read data from file", error); >+ return error; >+ } >+ >+ if (read_cnt != to_read) { >+ error = EIO; >+ close(raw_file); The same situation is here. >+ TRACE_ERROR_NUMBER("Read less than required", error); >+ return error; >+ } >+ >+ /* First time do some initialization */ >+ if(total_read == 0) { ^^^ missing space >+ TRACE_INFO_STRING("Reading first time.","Checking BOM"); >+ >+ ind = check_bom(ind, (unsigned char *)read_buf, read_cnt, >&bom_shift); ^^^^^^^^^^^ line length > 80 >+ >+ TRACE_INFO_STRING("Converting to", encodings[INDEX_UTF8]); >+ TRACE_INFO_STRING("Converting from", encodings[ind]); >+ >+ errno = 0; >+ conv = iconv_open(encodings[INDEX_UTF8], encodings[ind]); >+ if (conv == (iconv_t) -1) { >+ error = errno; >+ close(raw_file); >+ TRACE_ERROR_NUMBER("Failed to create converter", error); >+ return error; >+ } >+ } ^^^^^ I would prefer to have initialisation >>> if(total_read == 0) <<< before the outermost do while, because it generate coverity warning. Overwriting "conv" in "conv = iconv_open(encodings[4], encodings[ind])" leaks the storage that "conv" points to. I know that it is a fall positive, but code will be simpler. >+ total_read += read_cnt; >+ TRACE_INFO_NUMBER("Total read", total_read); >+ >+ do { >+ /* Do conversion */ >+ errno = 0; >+ src = read_buf + bom_shift; >+ dest = result_buf; >+ to_convert = read_cnt + in_buffer - bom_shift; >+ bom_shift = 0; >+ room_left = ICONV_BUFFER; >+ conv_res = iconv(conv, &src, &to_convert, &dest, &room_left); >+ if (conv == (iconv_t) -1) { ^^^^ conv_res should be checked and it has type size_t >+ error = errno; >+ switch(error) { >+ case EILSEQ: >+ TRACE_ERROR_NUMBER("Invalid multibyte encoding", error); >+ close(raw_file); >+ iconv_close(conv); >+ return error; >+ case EINVAL: >+ /* We need to just read more if we can */ >+ if (total_read != file_ctx->file_stats.st_size) { >+ TRACE_INFO_STRING("Incomplete sequence.", ""); >+ TRACE_INFO_NUMBER("File size.", >+ file_ctx->file_stats.st_size); >+ memmove(read_buf, src, to_convert); >+ in_buffer = to_convert; >+ } >+ else { >+ /* Or return error if we can't */ >+ TRACE_ERROR_NUMBER("Incomplete sequence", error); >+ close(raw_file); >+ iconv_close(conv); >+ return error; >+ } ^^^^^^^^^ missing break. Is it intentional? http://www.freeipa.org/page/Coding_Style#Switch /* FALLTHROUGH */ >+ case E2BIG: >+ TRACE_INFO_STRING("No room in the output buffer.", ""); >+ error = simplebuffer_add_raw(file_ctx->file_data, >+ result_buf, >+ ICONV_BUFFER - room_left, >+ ICONV_BUFFER); >+ if (error) { >+ TRACE_ERROR_NUMBER("Failed to store converted bytes", >+ error); >+ close(raw_file); >+ iconv_close(conv); >+ return error; >+ } >+ >+ room_left = ICONV_BUFFER; >+ dest = result_buf; >+ continue; >+ default: >+ TRACE_ERROR_NUMBER("Unexpected internal error", >+ error); >+ close(raw_file); >+ iconv_close(conv); >+ return ENOTSUP; >+ } >+ } >+ /* The whole buffer was sucessfully converted */ >+ error = simplebuffer_add_raw(file_ctx->file_data, >+ result_buf, >+ ICONV_BUFFER - room_left, >+ ICONV_BUFFER); >+ if (error) { >+ TRACE_ERROR_NUMBER("Failed to store converted bytes", >+ error); >+ close(raw_file); >+ iconv_close(conv); >+ return error; >+ } >+ TRACE_INFO_STRING("Saved procesed portion.", >+ (char >*)simplebuffer_get_vbuf(file_ctx->file_data)); >+ >+ in_buffer = 0; >+ break; >+ } >+ while (1); >+ } >+ while(total_read < file_ctx->file_stats.st_size); >+ >+ close(raw_file); >+ iconv_close(conv); >+ > /* Open file */ >- TRACE_INFO_STRING("File", file_ctx->filename); >+ TRACE_INFO_STRING("File data", >+ (char *)simplebuffer_get_vbuf(file_ctx->file_data)); >+ TRACE_INFO_NUMBER("File len", >+ simplebuffer_get_len(file_ctx->file_data)); >+ TRACE_INFO_NUMBER("Size", >+ file_ctx->file_data->size); > errno = 0; >- file_ctx->file = fopen(file_ctx->filename, "r"); >+ file_ctx->file = fmemopen(simplebuffer_get_vbuf(file_ctx->file_data), >+ simplebuffer_get_len(file_ctx->file_data), >+ "r"); > if (!(file_ctx->file)) { > error = errno; > TRACE_ERROR_NUMBER("Failed to open file", error); > return error; > } > >- file_ctx->stats_read = 0; >+ TRACE_FLOW_EXIT(); >+ return EOK; >+} >+ >+ >+/* Internal common initialization part */ >+static int common_file_init(struct ini_cfgfile *file_ctx) >+{ >+ int error = EOK; >+ int raw_file = 0; >+ int stat_ret = 0; >+ >+ TRACE_FLOW_ENTRY(); >+ >+ TRACE_INFO_STRING("File", file_ctx->filename); >+ >+ /* Open file in binary mode first */ >+ errno = 0; >+ raw_file = open(file_ctx->filename, O_RDONLY); >+ if (raw_file == -1) { >+ error = errno; >+ TRACE_ERROR_NUMBER("Failed to open file in binary mode", error); >+ return error; >+ } >+ >+ /* Get the size of the file */ >+ errno = 0; >+ stat_ret = fstat(raw_file, &(file_ctx->file_stats)); >+ if (stat_ret == -1) { >+ error = errno; >+ TRACE_ERROR_NUMBER("Failed to get file stats", error); Resource leak Handle variable "raw_file" going out of scope leaks the handle. >+ return error; >+ } > >- /* Collect stats */ >- if (file_ctx->metadata_flags & INI_META_STATS) { >+ /* Trick to overcome the fact that >+ * fopen and fmemopen behave differently when file >+ * is 0 length >+ */ >+ if (file_ctx->file_stats.st_size) { >+ error = common_file_convert(raw_file, file_ctx); >+ if (error) { >+ TRACE_ERROR_NUMBER("Failed to convert file", >+ error); >+ close(raw_file); >+ return error; >+ } >+ } >+ else { > errno = 0; >- if (fstat(fileno(file_ctx->file), >- &(file_ctx->file_stats)) < 0) { >+ file_ctx->file = fdopen(raw_file, "r"); >+ if (!(file_ctx->file)) { > error = errno; >- TRACE_ERROR_NUMBER("Failed to get file stats.", error); >+ TRACE_ERROR_NUMBER("Failed to fdopen file", error); > return error; > } >+ >+ } >+ >+ /* Collect stats */ >+ if (file_ctx->metadata_flags & INI_META_STATS) { > file_ctx->stats_read = 1; > } >- else memset(&(file_ctx->file_stats), 0, sizeof(struct stat)); >+ else { >+ memset(&(file_ctx->file_stats), 0, sizeof(struct stat)); >+ file_ctx->stats_read = 0; >+ } > > TRACE_FLOW_EXIT(); > return EOK; >@@ -119,6 +396,15 @@ int ini_config_file_open(const char *filename, > > new_ctx->filename = NULL; > new_ctx->file = NULL; >+ new_ctx->file_data = NULL; >+ >+ error = simplebuffer_alloc(&(new_ctx->file_data)); >+ if (error) { >+ TRACE_ERROR_NUMBER("Failed to allocate buffer ctx.", error); >+ ini_config_file_destroy(new_ctx); >+ return error; >+ >+ } > > /* Store flags */ > new_ctx->metadata_flags = metadata_flags; >@@ -176,6 +462,15 @@ int ini_config_file_reopen(struct ini_cfgfile >*file_ctx_in, > } > > new_ctx->file = NULL; >+ new_ctx->file_data = NULL; >+ >+ error = simplebuffer_alloc(&(new_ctx->file_data)); >+ if (error) { >+ TRACE_ERROR_NUMBER("Failed to allocate buffer ctx.", error); >+ ini_config_file_destroy(new_ctx); It is better to initialize new_ctx with memset or initialize new_ctx->filename to NULL, because ini_config_file_destroy can access to uninitialized variable. >+ return error; >+ >+ } > > /* Store flags */ > new_ctx->metadata_flags = file_ctx_in->metadata_flags; >-- >1.7.1 > >From e4fcd1ba0f86675e6fe6455c80d5215b821af973 Mon Sep 17 00:00:00 2001 >From: Dmitri Pal <d...@redhat.com> >Date: Wed, 9 Oct 2013 23:29:22 -0400 >Subject: [PATCH 1/4] [INI] Expose buffer context as void > >In some cases the intenal buffer needs to be accessed as void. >Add a function that would do that instead of type casting >and dealing with 'const'. >--- > basicobjects/simplebuffer.c | 7 +++++++ > basicobjects/simplebuffer.h | 4 ++++ > 2 files changed, 11 insertions(+), 0 deletions(-) > ACK >From b48891b2753bd8d26f71f6cbb18b28bd22ece9cc Mon Sep 17 00:00:00 2001 >From: Dmitri Pal <d...@redhat.com> >Date: Thu, 10 Oct 2013 22:51:22 -0400 >Subject: [PATCH 4/4] [INI] Updated unit test for UTF8 conversion > >Patch adds new test files and bom creation function. >--- > ini/ini.d/real16be.conf | Bin 0 -> 3288 bytes > ini/ini.d/real16le.conf | Bin 0 -> 3288 bytes > ini/ini.d/real32be.conf | Bin 0 -> 6576 bytes > ini/ini.d/real32le.conf | Bin 0 -> 6576 bytes > ini/ini.d/real8.conf | 70 +++++++++++++++++++++++++++++++++++++++++++++++ > ini/ini_parse_ut.c | 35 +++++++++++++++++++++++ > 6 files changed, 105 insertions(+), 0 deletions(-) > create mode 100644 ini/ini.d/real16be.conf > create mode 100644 ini/ini.d/real16le.conf > create mode 100644 ini/ini.d/real32be.conf > create mode 100644 ini/ini.d/real32le.conf > create mode 100644 ini/ini.d/real8.conf > The are some trailing whitespaces in real8.conf, but it seems like intentional. ACK >From f0c79c9091d9165d60aaa030d09fe63450215c5d Mon Sep 17 00:00:00 2001 >From: Dmitri Pal <d...@redhat.com> >Date: Wed, 9 Oct 2013 23:31:49 -0400 >Subject: [PATCH 2/4] [INI] Extend internal file handle > >When we need to decode the file we need to keep it >in memory so we need to have a buffer. >Adding simple buffer into the internal structure. >--- > ini/ini_config_priv.h | 3 +++ > 1 files changed, 3 insertions(+), 0 deletions(-) > >diff --git a/ini/ini_config_priv.h b/ini/ini_config_priv.h >index 89ad8a9..e1292f7 100644 >--- a/ini/ini_config_priv.h >+++ b/ini/ini_config_priv.h >@@ -26,6 +26,7 @@ > #include <sys/stat.h> > #include <unistd.h> > #include "collection.h" >+#include "simplebuffer.h" > #include "ini_comment.h" > > /* Configuration object */ >@@ -71,6 +72,8 @@ struct ini_cfgfile { > struct stat file_stats; > /* Were stats read ? */ > int stats_read; >+ /* Internal buffer */ >+ struct simplebuffer *file_data; > }; > > /* Parsing error */ ACK LS _______________________________________________ sssd-devel mailing list sssd-devel@lists.fedorahosted.org https://lists.fedorahosted.org/mailman/listinfo/sssd-devel