On (17/10/13 12:56), Lukas Slebodnik wrote: >On (10/10/13 23:03), Dmitri Pal wrote: >>Hello, >> >>The attached patches address ticket >>https://fedorahosted.org/sssd/ticket/2106 >>The patches do not define new flags as suggested in the ticket. >>As we discussed with Nalin we try to detect the encoding based on the >>BOM and do the right thing without flags. >>If we see that this is not enough we will add flags but that would add a >>new function so such change would be destined for a bigger release. >>For now I think the use case is addressed. If we see the need to other >>use cases we will open other tickets. >> >>The patch depends on the previous set of patches that deal with C-Style >>comments. >> >>I also found some other unrelated issue that I logged as a separate ticket. >>https://fedorahosted.org/sssd/ticket/2119 >> >>-- >>Thank you, >>Dmitri Pal >> >>Sr. Engineering Manager for IdM portfolio >>Red Hat Inc. >> >> >>------------------------------- >>Looking to carve out IT costs? >>www.redhat.com/carveoutcosts/ >> >> >> > >>From ee1988301ba164a12263b7faf8c9bf686174deab Mon Sep 17 00:00:00 2001 >>From: Dmitri Pal <d...@redhat.com> >>Date: Thu, 10 Oct 2013 22:47:23 -0400 >>Subject: [PATCH 3/4] [INI] Convert files to UTF >> >>Patch adds functionality that detects BOM >>at the beginning of the file and uses >>it to convert to UTF8. >>If no BOM is detected file is assumed to be UTF8 >>encoded. >>--- >> ini/ini_fileobj.c | 317 >> +++++++++++++++++++++++++++++++++++++++++++++++++++-- >> 1 files changed, 306 insertions(+), 11 deletions(-) >> >>diff --git a/ini/ini_fileobj.c b/ini/ini_fileobj.c >>index 7ac9dc6..000d36e 100644 >>--- a/ini/ini_fileobj.c >>+++ b/ini/ini_fileobj.c >>@@ -21,14 +21,29 @@ >> >> #include "config.h" >> #include <errno.h> >>+#include <sys/types.h> >>+#include <sys/stat.h> >>+#include <fcntl.h> >> #include <string.h> >> #include <stdlib.h> >>+#include <iconv.h> >> #include "trace.h" >> #include "ini_defines.h" >> #include "ini_configobj.h" >> #include "ini_config_priv.h" >> #include "path_utils.h" >> >>+#define ICONV_BUFFER 100 >>+ >>+#define BOM4_SIZE 4 >>+#define BOM3_SIZE 3 >>+#define BOM2_SIZE 2 >>+ >>+#define INDEX_UTF32BE 0 >>+#define INDEX_UTF32LE 1 >>+#define INDEX_UTF16BE 2 >>+#define INDEX_UTF16LE 3 >>+#define INDEX_UTF8 4 I forgot to write one comment in the previous mail. Could we use enum for INDEX_* instead of constants?
>> >> /* Close file but not destroy the object */ >> void ini_config_file_close(struct ini_cfgfile *file_ctx) >>@@ -52,6 +67,7 @@ void ini_config_file_destroy(struct ini_cfgfile *file_ctx) >> >> if(file_ctx) { >> free(file_ctx->filename); >>+ simplebuffer_free(file_ctx->file_data); >> if(file_ctx->file) fclose(file_ctx->file); >> free(file_ctx); >> } >>@@ -59,37 +75,298 @@ void ini_config_file_destroy(struct ini_cfgfile >>*file_ctx) >> TRACE_FLOW_EXIT(); >> } >> >>-/* Internal common initialization part */ >>-static int common_file_init(struct ini_cfgfile *file_ctx) >>+/* How much I plan to read? */ >>+size_t how_much_to_read(size_t left, size_t increment) >>+{ >>+ if(left > increment) return increment; >>+ else return left; >>+} >>+ >>+int check_bom(int ind, unsigned char *buffer, size_t len, size_t *bom_shift) >>+{ >>+ TRACE_FLOW_ENTRY(); >>+ >>+ if (len > BOM4_SIZE) { >>+ if ((buffer[0] == 0x00) && >>+ (buffer[1] == 0x00) && >>+ (buffer[2] == 0xFE) && >>+ (buffer[3] == 0xFF)) { >>+ TRACE_FLOW_RETURN(INDEX_UTF32BE); >>+ *bom_shift = BOM4_SIZE; >>+ return INDEX_UTF32BE; >>+ } >>+ else if ((buffer[0] == 0xFF) && >>+ (buffer[1] == 0xFE) && >>+ (buffer[2] == 0x00) && >>+ (buffer[3] == 0x00)) { >>+ TRACE_FLOW_RETURN(INDEX_UTF32LE); >>+ *bom_shift = BOM4_SIZE; >>+ return INDEX_UTF32LE; >>+ } >>+ } >>+ >>+ if (len > BOM3_SIZE) { >>+ if ((buffer[0] == 0xEF) && >>+ (buffer[1] == 0xBB) && >>+ (buffer[2] == 0xBF)) { >>+ TRACE_FLOW_RETURN(INDEX_UTF8); >>+ *bom_shift = BOM3_SIZE; >>+ return INDEX_UTF8; >>+ } >>+ } >>+ >>+ if (len > BOM2_SIZE) { >>+ if ((buffer[0] == 0xFE) && >>+ (buffer[1] == 0xFF)) { >>+ TRACE_FLOW_RETURN(INDEX_UTF16BE); >>+ *bom_shift = BOM2_SIZE; >>+ return INDEX_UTF16BE; >>+ } >>+ else if ((buffer[0] == 0xFF) && >>+ (buffer[1] == 0xFE)) { >>+ TRACE_FLOW_RETURN(INDEX_UTF16LE); >>+ *bom_shift = BOM2_SIZE; >>+ return INDEX_UTF16LE; >>+ } >>+ } >>+ >>+ TRACE_FLOW_RETURN(ind); >>+ return ind; >>+} >>+ >>+/* Internal conversion part */ >>+static int common_file_convert(int raw_file, struct ini_cfgfile *file_ctx) >> { >This function is quite long (174 LOC). >It would be better to split it into smaller functions. > >> int error = EOK; >>+ size_t read_cnt = 0; >>+ size_t total_read = 0; >>+ size_t to_read = 0; >>+ size_t in_buffer = 0; >>+ int ind = INDEX_UTF8; >>+ iconv_t conv = 0; >>+ size_t conv_res = 0; >>+ char read_buf[ICONV_BUFFER+1]; >>+ char result_buf[ICONV_BUFFER]; >>+ char *src, *dest; >>+ size_t to_convert = 0; >>+ size_t room_left = 0; >>+ size_t bom_shift = 0; >>+ const char *encodings[] = { "UTF-32BE", >>+ "UTF-32LE", >>+ "UTF-16BE", >>+ "UTF-16LE", >>+ "UTF-8" }; >> >> TRACE_FLOW_ENTRY(); >> >>+ do { >>+ to_read = how_much_to_read(file_ctx->file_stats.st_size - total_read, >>+ ICONV_BUFFER - in_buffer); >>+ >>+ TRACE_INFO_NUMBER("About to read", to_read); >>+ errno = 0; >>+ read_cnt = read(raw_file, read_buf + in_buffer, to_read); >>+ if (read_cnt == -1) { >>+ error = errno; >>+ close(raw_file); >read can fail on second round of loop do-while >and conv handle will not be closed. > >>+ TRACE_ERROR_NUMBER("Failed to read data from file", error); >>+ return error; >>+ } >>+ >>+ if (read_cnt != to_read) { >>+ error = EIO; >>+ close(raw_file); >The same situation is here. > >>+ TRACE_ERROR_NUMBER("Read less than required", error); >>+ return error; >>+ } >>+ >>+ /* First time do some initialization */ >>+ if(total_read == 0) { > ^^^ > missing space >>+ TRACE_INFO_STRING("Reading first time.","Checking BOM"); >>+ >>+ ind = check_bom(ind, (unsigned char *)read_buf, read_cnt, >>&bom_shift); > > ^^^^^^^^^^^ > line length > 80 >>+ >>+ TRACE_INFO_STRING("Converting to", encodings[INDEX_UTF8]); >>+ TRACE_INFO_STRING("Converting from", encodings[ind]); >>+ >>+ errno = 0; >>+ conv = iconv_open(encodings[INDEX_UTF8], encodings[ind]); >>+ if (conv == (iconv_t) -1) { >>+ error = errno; >>+ close(raw_file); >>+ TRACE_ERROR_NUMBER("Failed to create converter", error); >>+ return error; >>+ } >>+ } > ^^^^^ > I would prefer to have initialisation >>> if(total_read == 0) <<< > before the outermost do while, because it generate coverity warning. > Overwriting "conv" in "conv = iconv_open(encodings[4], > encodings[ind])" leaks the storage that "conv" points to. > I know that it is a fall positive, but code will be simpler. > >>+ total_read += read_cnt; >>+ TRACE_INFO_NUMBER("Total read", total_read); >>+ >>+ do { >>+ /* Do conversion */ >>+ errno = 0; >>+ src = read_buf + bom_shift; >>+ dest = result_buf; >>+ to_convert = read_cnt + in_buffer - bom_shift; >>+ bom_shift = 0; >>+ room_left = ICONV_BUFFER; >>+ conv_res = iconv(conv, &src, &to_convert, &dest, &room_left); >>+ if (conv == (iconv_t) -1) { > ^^^^ > conv_res should be checked and it has type size_t >>+ error = errno; >>+ switch(error) { >>+ case EILSEQ: >>+ TRACE_ERROR_NUMBER("Invalid multibyte encoding", error); >>+ close(raw_file); >>+ iconv_close(conv); >>+ return error; >>+ case EINVAL: >>+ /* We need to just read more if we can */ >>+ if (total_read != file_ctx->file_stats.st_size) { >>+ TRACE_INFO_STRING("Incomplete sequence.", ""); >>+ TRACE_INFO_NUMBER("File size.", >>+ file_ctx->file_stats.st_size); >>+ memmove(read_buf, src, to_convert); >>+ in_buffer = to_convert; >>+ } >>+ else { >>+ /* Or return error if we can't */ >>+ TRACE_ERROR_NUMBER("Incomplete sequence", error); >>+ close(raw_file); >>+ iconv_close(conv); >>+ return error; >>+ } > ^^^^^^^^^ > missing break. > Is it intentional? >http://www.freeipa.org/page/Coding_Style#Switch >/* FALLTHROUGH */ >>+ case E2BIG: >>+ TRACE_INFO_STRING("No room in the output buffer.", ""); >>+ error = simplebuffer_add_raw(file_ctx->file_data, >>+ result_buf, >>+ ICONV_BUFFER - room_left, >>+ ICONV_BUFFER); >>+ if (error) { >>+ TRACE_ERROR_NUMBER("Failed to store converted bytes", >>+ error); >>+ close(raw_file); >>+ iconv_close(conv); >>+ return error; >>+ } >>+ >>+ room_left = ICONV_BUFFER; >>+ dest = result_buf; >>+ continue; >>+ default: >>+ TRACE_ERROR_NUMBER("Unexpected internal error", >>+ error); >>+ close(raw_file); >>+ iconv_close(conv); >>+ return ENOTSUP; >>+ } >>+ } >>+ /* The whole buffer was sucessfully converted */ >>+ error = simplebuffer_add_raw(file_ctx->file_data, >>+ result_buf, >>+ ICONV_BUFFER - room_left, >>+ ICONV_BUFFER); >>+ if (error) { >>+ TRACE_ERROR_NUMBER("Failed to store converted bytes", >>+ error); >>+ close(raw_file); >>+ iconv_close(conv); >>+ return error; >>+ } >>+ TRACE_INFO_STRING("Saved procesed portion.", >>+ (char >>*)simplebuffer_get_vbuf(file_ctx->file_data)); >>+ >>+ in_buffer = 0; >>+ break; >>+ } >>+ while (1); >>+ } >>+ while(total_read < file_ctx->file_stats.st_size); >>+ >>+ close(raw_file); >>+ iconv_close(conv); >>+ >> /* Open file */ >>- TRACE_INFO_STRING("File", file_ctx->filename); >>+ TRACE_INFO_STRING("File data", >>+ (char *)simplebuffer_get_vbuf(file_ctx->file_data)); >>+ TRACE_INFO_NUMBER("File len", >>+ simplebuffer_get_len(file_ctx->file_data)); >>+ TRACE_INFO_NUMBER("Size", >>+ file_ctx->file_data->size); >> errno = 0; >>- file_ctx->file = fopen(file_ctx->filename, "r"); >>+ file_ctx->file = fmemopen(simplebuffer_get_vbuf(file_ctx->file_data), >>+ simplebuffer_get_len(file_ctx->file_data), >>+ "r"); >> if (!(file_ctx->file)) { >> error = errno; >> TRACE_ERROR_NUMBER("Failed to open file", error); >> return error; >> } >> >>- file_ctx->stats_read = 0; >>+ TRACE_FLOW_EXIT(); >>+ return EOK; >>+} >>+ >>+ >>+/* Internal common initialization part */ >>+static int common_file_init(struct ini_cfgfile *file_ctx) >>+{ >>+ int error = EOK; >>+ int raw_file = 0; >>+ int stat_ret = 0; >>+ >>+ TRACE_FLOW_ENTRY(); >>+ >>+ TRACE_INFO_STRING("File", file_ctx->filename); >>+ >>+ /* Open file in binary mode first */ >>+ errno = 0; >>+ raw_file = open(file_ctx->filename, O_RDONLY); >>+ if (raw_file == -1) { >>+ error = errno; >>+ TRACE_ERROR_NUMBER("Failed to open file in binary mode", error); >>+ return error; >>+ } >>+ >>+ /* Get the size of the file */ >>+ errno = 0; >>+ stat_ret = fstat(raw_file, &(file_ctx->file_stats)); >>+ if (stat_ret == -1) { >>+ error = errno; >>+ TRACE_ERROR_NUMBER("Failed to get file stats", error); >Resource leak >Handle variable "raw_file" going out of scope leaks the handle. >>+ return error; >>+ } >> >>- /* Collect stats */ >>- if (file_ctx->metadata_flags & INI_META_STATS) { >>+ /* Trick to overcome the fact that >>+ * fopen and fmemopen behave differently when file >>+ * is 0 length >>+ */ >>+ if (file_ctx->file_stats.st_size) { >>+ error = common_file_convert(raw_file, file_ctx); >>+ if (error) { >>+ TRACE_ERROR_NUMBER("Failed to convert file", >>+ error); >>+ close(raw_file); >>+ return error; >>+ } >>+ } >>+ else { >> errno = 0; >>- if (fstat(fileno(file_ctx->file), >>- &(file_ctx->file_stats)) < 0) { >>+ file_ctx->file = fdopen(raw_file, "r"); >>+ if (!(file_ctx->file)) { >> error = errno; >>- TRACE_ERROR_NUMBER("Failed to get file stats.", error); >>+ TRACE_ERROR_NUMBER("Failed to fdopen file", error); >> return error; >> } >>+ >>+ } >>+ >>+ /* Collect stats */ >>+ if (file_ctx->metadata_flags & INI_META_STATS) { >> file_ctx->stats_read = 1; >> } >>- else memset(&(file_ctx->file_stats), 0, sizeof(struct stat)); >>+ else { >>+ memset(&(file_ctx->file_stats), 0, sizeof(struct stat)); >>+ file_ctx->stats_read = 0; >>+ } >> >> TRACE_FLOW_EXIT(); >> return EOK; >>@@ -119,6 +396,15 @@ int ini_config_file_open(const char *filename, >> >> new_ctx->filename = NULL; >> new_ctx->file = NULL; >>+ new_ctx->file_data = NULL; >>+ >>+ error = simplebuffer_alloc(&(new_ctx->file_data)); >>+ if (error) { >>+ TRACE_ERROR_NUMBER("Failed to allocate buffer ctx.", error); >>+ ini_config_file_destroy(new_ctx); >>+ return error; >>+ >>+ } >> >> /* Store flags */ >> new_ctx->metadata_flags = metadata_flags; >>@@ -176,6 +462,15 @@ int ini_config_file_reopen(struct ini_cfgfile >>*file_ctx_in, >> } >> >> new_ctx->file = NULL; >>+ new_ctx->file_data = NULL; >>+ >>+ error = simplebuffer_alloc(&(new_ctx->file_data)); >>+ if (error) { >>+ TRACE_ERROR_NUMBER("Failed to allocate buffer ctx.", error); >>+ ini_config_file_destroy(new_ctx); >It is better to initialize new_ctx with memset or initialize >new_ctx->filename to NULL, because ini_config_file_destroy can access to >uninitialized variable. > >>+ return error; >>+ >>+ } >> >> /* Store flags */ >> new_ctx->metadata_flags = file_ctx_in->metadata_flags; >>-- >>1.7.1 >> > >>From e4fcd1ba0f86675e6fe6455c80d5215b821af973 Mon Sep 17 00:00:00 2001 >>From: Dmitri Pal <d...@redhat.com> >>Date: Wed, 9 Oct 2013 23:29:22 -0400 >>Subject: [PATCH 1/4] [INI] Expose buffer context as void >> >>In some cases the intenal buffer needs to be accessed as void. >>Add a function that would do that instead of type casting >>and dealing with 'const'. >>--- >> basicobjects/simplebuffer.c | 7 +++++++ >> basicobjects/simplebuffer.h | 4 ++++ >> 2 files changed, 11 insertions(+), 0 deletions(-) >> >ACK > >>From b48891b2753bd8d26f71f6cbb18b28bd22ece9cc Mon Sep 17 00:00:00 2001 >>From: Dmitri Pal <d...@redhat.com> >>Date: Thu, 10 Oct 2013 22:51:22 -0400 >>Subject: [PATCH 4/4] [INI] Updated unit test for UTF8 conversion >> >>Patch adds new test files and bom creation function. >>--- >> ini/ini.d/real16be.conf | Bin 0 -> 3288 bytes >> ini/ini.d/real16le.conf | Bin 0 -> 3288 bytes >> ini/ini.d/real32be.conf | Bin 0 -> 6576 bytes >> ini/ini.d/real32le.conf | Bin 0 -> 6576 bytes >> ini/ini.d/real8.conf | 70 >> +++++++++++++++++++++++++++++++++++++++++++++++ >> ini/ini_parse_ut.c | 35 +++++++++++++++++++++++ >> 6 files changed, 105 insertions(+), 0 deletions(-) >> create mode 100644 ini/ini.d/real16be.conf >> create mode 100644 ini/ini.d/real16le.conf >> create mode 100644 ini/ini.d/real32be.conf >> create mode 100644 ini/ini.d/real32le.conf >> create mode 100644 ini/ini.d/real8.conf >> > >The are some trailing whitespaces in real8.conf, but it seems like intentional. > >ACK > >>From f0c79c9091d9165d60aaa030d09fe63450215c5d Mon Sep 17 00:00:00 2001 >>From: Dmitri Pal <d...@redhat.com> >>Date: Wed, 9 Oct 2013 23:31:49 -0400 >>Subject: [PATCH 2/4] [INI] Extend internal file handle >> >>When we need to decode the file we need to keep it >>in memory so we need to have a buffer. >>Adding simple buffer into the internal structure. >>--- >> ini/ini_config_priv.h | 3 +++ >> 1 files changed, 3 insertions(+), 0 deletions(-) >> >>diff --git a/ini/ini_config_priv.h b/ini/ini_config_priv.h >>index 89ad8a9..e1292f7 100644 >>--- a/ini/ini_config_priv.h >>+++ b/ini/ini_config_priv.h >>@@ -26,6 +26,7 @@ >> #include <sys/stat.h> >> #include <unistd.h> >> #include "collection.h" >>+#include "simplebuffer.h" >> #include "ini_comment.h" >> >> /* Configuration object */ >>@@ -71,6 +72,8 @@ struct ini_cfgfile { >> struct stat file_stats; >> /* Were stats read ? */ >> int stats_read; >>+ /* Internal buffer */ >>+ struct simplebuffer *file_data; >> }; >> >> /* Parsing error */ > >ACK > >LS >_______________________________________________ >sssd-devel mailing list >sssd-devel@lists.fedorahosted.org >https://lists.fedorahosted.org/mailman/listinfo/sssd-devel _______________________________________________ sssd-devel mailing list sssd-devel@lists.fedorahosted.org https://lists.fedorahosted.org/mailman/listinfo/sssd-devel