Address -Wconversion for string/byte lengths. For text lengths, unicode code point counts, and sometimes arbitrary byte lengths: add casts and adress potential overflow issues with checks.
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/a4b0b3b2 Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/a4b0b3b2 Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/a4b0b3b2 Branch: refs/heads/master Commit: a4b0b3b252f4bf253039756bad02b2fe80077114 Parents: 5ba1525 Author: Marvin Humphrey <mar...@rectangular.com> Authored: Mon May 2 17:08:32 2016 -0700 Committer: Marvin Humphrey <mar...@rectangular.com> Committed: Wed May 4 19:21:37 2016 -0700 ---------------------------------------------------------------------- core/Lucy/Analysis/Normalizer.c | 18 +++++++++++++----- core/Lucy/Analysis/SnowballStemmer.c | 11 +++++++++-- core/Lucy/Analysis/StandardTokenizer.c | 10 ++++++---- core/Lucy/Highlight/Highlighter.c | 4 ++-- core/Lucy/Index/HighlightWriter.c | 8 ++++---- core/Lucy/Index/Posting/MatchPosting.c | 2 +- core/Lucy/Plan/TextType.c | 6 +++--- core/Lucy/Util/Json.c | 4 ++-- core/Lucy/Util/MemoryPool.c | 2 +- 9 files changed, 41 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucy/blob/a4b0b3b2/core/Lucy/Analysis/Normalizer.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Analysis/Normalizer.c b/core/Lucy/Analysis/Normalizer.c index 4258374..d569141 100644 --- a/core/Lucy/Analysis/Normalizer.c +++ b/core/Lucy/Analysis/Normalizer.c @@ -81,7 +81,7 @@ Normalizer_Transform_IMP(Normalizer *self, Inversion *inversion) { TokenIVARS *const token_ivars = Token_IVARS(token); ssize_t len = utf8proc_decompose((uint8_t*)token_ivars->text, - token_ivars->len, buffer, bufsize, + (ssize_t)token_ivars->len, buffer, bufsize, ivars->options); if (len > bufsize) { @@ -91,9 +91,13 @@ Normalizer_Transform_IMP(Normalizer *self, Inversion *inversion) { } // allocate additional INITIAL_BUFSIZE items bufsize = len + INITIAL_BUFSIZE; - buffer = (int32_t*)MALLOCATE((bufsize + 1) * sizeof(int32_t)); + if ((size_t)bufsize >= SIZE_MAX / sizeof(int32_t) - sizeof(int32_t)) { + THROW(ERR, "Requested bufsize too large: %u64", + (uint64_t)bufsize); + } + buffer = (int32_t*)MALLOCATE(((size_t)bufsize + 1) * sizeof(int32_t)); len = utf8proc_decompose((uint8_t*)token_ivars->text, - token_ivars->len, buffer, bufsize, + (ssize_t)token_ivars->len, buffer, bufsize, ivars->options); } if (len < 0) { @@ -104,11 +108,15 @@ Normalizer_Transform_IMP(Normalizer *self, Inversion *inversion) { if (len >= 0) { if (len > (ssize_t)token_ivars->len) { + if (len >= INT32_MAX - 1) { + THROW(ERR, "Normalized result over 2 GB: %u64", + (uint64_t)len); + } FREEMEM(token_ivars->text); - token_ivars->text = (char*)MALLOCATE(len + 1); + token_ivars->text = (char*)MALLOCATE((size_t)len + 1); } memcpy(token_ivars->text, buffer, len + 1); - token_ivars->len = len; + token_ivars->len = (size_t)len; } } http://git-wip-us.apache.org/repos/asf/lucy/blob/a4b0b3b2/core/Lucy/Analysis/SnowballStemmer.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Analysis/SnowballStemmer.c b/core/Lucy/Analysis/SnowballStemmer.c index 982e4c1..8e6870c 100644 --- a/core/Lucy/Analysis/SnowballStemmer.c +++ b/core/Lucy/Analysis/SnowballStemmer.c @@ -79,9 +79,16 @@ SnowStemmer_Transform_IMP(SnowballStemmer *self, Inversion *inversion) { TokenIVARS *const token_ivars = Token_IVARS(token); const sb_symbol *stemmed_text = sb_stemmer_stem(snowstemmer, (sb_symbol*)token_ivars->text, - token_ivars->len); - size_t len = sb_stemmer_length(snowstemmer); + (int)token_ivars->len); + int length = sb_stemmer_length(snowstemmer); + if (length < 0) { + THROW(ERR, "Unexpected value for sb_stemmer_length: %d", length); + } + size_t len = (size_t)length; if (len > token_ivars->len) { + if (len >= INT32_MAX - 1) { + THROW(ERR, "String over 2Gb: %u64", (uint64_t)len); + } FREEMEM(token_ivars->text); token_ivars->text = (char*)MALLOCATE(len + 1); } http://git-wip-us.apache.org/repos/asf/lucy/blob/a4b0b3b2/core/Lucy/Analysis/StandardTokenizer.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Analysis/StandardTokenizer.c b/core/Lucy/Analysis/StandardTokenizer.c index 4a90700..012d428 100644 --- a/core/Lucy/Analysis/StandardTokenizer.c +++ b/core/Lucy/Analysis/StandardTokenizer.c @@ -147,9 +147,10 @@ S_parse_single(const char *text, size_t len, lucy_StringIter *iter, lucy_StringIter start = *iter; int wb = S_skip_extend_format(text, len, iter); - Token *token = Token_new(text + start.byte_pos, - iter->byte_pos - start.byte_pos, - start.char_pos, iter->char_pos, 1.0f, 1); + Token *token + = Token_new(text + start.byte_pos, iter->byte_pos - start.byte_pos, + (uint32_t)start.char_pos, (uint32_t)iter->char_pos, + 1.0f, 1); Inversion_Append(inversion, token); return wb; @@ -251,7 +252,8 @@ S_parse_word(const char *text, size_t len, lucy_StringIter *iter, Token *token; word_break: token = Token_new(text + start.byte_pos, end.byte_pos - start.byte_pos, - start.char_pos, end.char_pos, 1.0f, 1); + (uint32_t)start.char_pos, (uint32_t)end.char_pos, + 1.0f, 1); Inversion_Append(inversion, token); return wb; http://git-wip-us.apache.org/repos/asf/lucy/blob/a4b0b3b2/core/Lucy/Highlight/Highlighter.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Highlight/Highlighter.c b/core/Lucy/Highlight/Highlighter.c index 875d019..6db17dd 100644 --- a/core/Lucy/Highlight/Highlighter.c +++ b/core/Lucy/Highlight/Highlighter.c @@ -389,10 +389,10 @@ Highlighter_Raw_Excerpt_IMP(Highlighter *self, String *field_val, // around the hottest point in the field, start the fragment at the // beginning. start = 0; - max_skip = best_location; + max_skip = (uint32_t)best_location; } else { - start = best_location - ivars->slop; + start = best_location - (int32_t)ivars->slop; max_skip = ivars->slop; StrIter_Advance(top, (size_t)start); } http://git-wip-us.apache.org/repos/asf/lucy/blob/a4b0b3b2/core/Lucy/Index/HighlightWriter.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Index/HighlightWriter.c b/core/Lucy/Index/HighlightWriter.c index b580dbc..64b1a4c 100644 --- a/core/Lucy/Index/HighlightWriter.c +++ b/core/Lucy/Index/HighlightWriter.c @@ -156,9 +156,9 @@ HLWriter_TV_Buf_IMP(HighlightWriter *self, Inversion *inversion) { while ((tokens = Inversion_Next_Cluster(inversion, &freq)) != NULL) { Token *token = *tokens; char *const token_text = Token_Get_Text(token); - const int32_t token_len = Token_Get_Len(token); + const size_t token_len = Token_Get_Len(token); - int32_t overlap = StrHelp_overlap(last_text, token_text, + size_t overlap = StrHelp_overlap(last_text, token_text, last_len, token_len); char *ptr; char *orig; @@ -179,8 +179,8 @@ HLWriter_TV_Buf_IMP(HighlightWriter *self, Inversion *inversion) { num_postings += 1; // Append the string diff to the tv_buf. - NumUtil_encode_ci32(overlap, &ptr); - NumUtil_encode_ci32((token_len - overlap), &ptr); + NumUtil_encode_ci32((int32_t)overlap, &ptr); + NumUtil_encode_ci32((int32_t)(token_len - overlap), &ptr); memcpy(ptr, (token_text + overlap), (token_len - overlap)); ptr += token_len - overlap; http://git-wip-us.apache.org/repos/asf/lucy/blob/a4b0b3b2/core/Lucy/Index/Posting/MatchPosting.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Index/Posting/MatchPosting.c b/core/Lucy/Index/Posting/MatchPosting.c index c1530ea..a428fac 100644 --- a/core/Lucy/Index/Posting/MatchPosting.c +++ b/core/Lucy/Index/Posting/MatchPosting.c @@ -133,7 +133,7 @@ MatchPost_Add_Inversion_To_Pool_IMP(MatchPosting *self, Inversion_Reset(inversion); while ((tokens = Inversion_Next_Cluster(inversion, &freq)) != NULL) { TokenIVARS *const token_ivars = Token_IVARS(*tokens); - uint32_t raw_post_bytes + size_t raw_post_bytes = MAX_RAW_POSTING_LEN(base_size, token_ivars->len); RawPosting *raw_posting = RawPost_new(MemPool_Grab(mem_pool, raw_post_bytes), doc_id, http://git-wip-us.apache.org/repos/asf/lucy/blob/a4b0b3b2/core/Lucy/Plan/TextType.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Plan/TextType.c b/core/Lucy/Plan/TextType.c index 318fa47..5a3e323 100644 --- a/core/Lucy/Plan/TextType.c +++ b/core/Lucy/Plan/TextType.c @@ -129,13 +129,13 @@ TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream, } // Count how many bytes the strings share at the top. - const int32_t overlap = StrHelp_overlap(last_text, new_text, - last_size, new_size); + const size_t overlap = StrHelp_overlap(last_text, new_text, + last_size, new_size); const char *const diff_start_str = new_text + overlap; const size_t diff_len = new_size - overlap; // Write number of common bytes and common bytes. - OutStream_Write_CI32(outstream, overlap); + OutStream_Write_CI32(outstream, (int32_t)overlap); OutStream_Write_String(outstream, diff_start_str, diff_len); // Update value. http://git-wip-us.apache.org/repos/asf/lucy/blob/a4b0b3b2/core/Lucy/Util/Json.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Util/Json.c b/core/Lucy/Util/Json.c index c74f64f..6c7473d 100644 --- a/core/Lucy/Util/Json.c +++ b/core/Lucy/Util/Json.c @@ -566,7 +566,7 @@ S_parse_string(const char **json_ptr, const char *limit) { } else { // Optimize common case where there are no escapes. - size_t len = end - top; + size_t len = (size_t)(end - top); if (!StrHelp_utf8_valid(top, len)) { String *mess = MAKE_MESS("Bad UTF-8 in JSON"); Err_set_error(Err_new(mess)); @@ -582,7 +582,7 @@ S_unescape_text(const char *top, const char *end) { // because only a \u escape can theoretically be too long and // StrHelp_encode_utf8_char guards against sequences over 4 bytes. // Therefore we can allocate once and not worry about reallocating. - size_t cap = end - top + 1; + size_t cap = (size_t)(end - top) + 1; char *target_buf = (char*)MALLOCATE(cap); size_t target_size = 0; for (const char *text = top; text < end; text++) { http://git-wip-us.apache.org/repos/asf/lucy/blob/a4b0b3b2/core/Lucy/Util/MemoryPool.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Util/MemoryPool.c b/core/Lucy/Util/MemoryPool.c index d1d394b..ac89873 100644 --- a/core/Lucy/Util/MemoryPool.c +++ b/core/Lucy/Util/MemoryPool.c @@ -126,7 +126,7 @@ MemPool_Grab_IMP(MemoryPool *self, size_t amount) { void MemPool_Resize_IMP(MemoryPool *self, void *ptr, size_t new_amount) { MemoryPoolIVARS *const ivars = MemPool_IVARS(self); - const size_t last_amount = ivars->buf - ivars->last_buf; + const size_t last_amount = (size_t)(ivars->buf - ivars->last_buf); INCREASE_TO_WORD_MULTIPLE(new_amount); if (ptr != ivars->last_buf) {