Changeset: 9e4cf96b0ccc for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/9e4cf96b0ccc
Modified Files:
        monetdb5/modules/mal/txtsim.c
Branch: default
Log Message:

size_t is really not needed that much (most internal string functions return 
int)
also the pocount returns an int


diffs (112 lines):

diff --git a/monetdb5/modules/mal/txtsim.c b/monetdb5/modules/mal/txtsim.c
--- a/monetdb5/modules/mal/txtsim.c
+++ b/monetdb5/modules/mal/txtsim.c
@@ -414,14 +414,14 @@ typedef struct {
        BUN o;               /* position in the BAT */
        str val;             /* string value */
        int *cp_sequence;    /* string as array of Unicode codepoints */
-       size_t len;          /* string length in characters (multi-byte 
characters count as 1)*/
-       size_t cp_seq_len;   /* string length in bytes */
+       int len;          /* string length in characters (multi-byte characters 
count as 1)*/
+       int cp_seq_len;   /* string length in bytes */
        uint64_t abm;        /* 64bit alphabet bitmap */
-       size_t abm_popcount; /* hamming weight of abm */
+       int abm_popcount; /* hamming weight of abm */
 } str_item;
 
 static inline
-size_t _popcount64(uint64_t x) {
+int _popcount64(uint64_t x) {
        x = (x & 0x5555555555555555ULL) + ((x >> 1) & 0x5555555555555555ULL);
        x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL);
        x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >> 4) & 0x0F0F0F0F0F0F0F0FULL);
@@ -429,7 +429,7 @@ size_t _popcount64(uint64_t x) {
 }
 
 static inline
-size_t popcount64(uint64_t x) {
+int popcount64(uint64_t x) {
        return _popcount64(x);
        /* __builtin_popcountll is the gcc builtin
         * It is fast as long as the hardware
@@ -453,7 +453,7 @@ str_2_codepointseq(str_item *s)
        if (s->cp_sequence == NULL)
                throw(MAL, "str_2_byteseq", SQLSTATE(HY013) MAL_MALLOC_FAIL);
 
-       for (size_t i = 0; i < s->len; i++) {
+       for (int i = 0; i < s->len; i++) {
                UTF8_GETCHAR(c, p);
                if (c == 0)
                        break;
@@ -465,8 +465,9 @@ illegal:
 }
 
 static void
-str_alphabet_bitmap(str_item *s) {
-       size_t i;
+str_alphabet_bitmap(str_item *s)
+{
+       int i;
 
        s->abm = 0ULL;
        for (i=0; i < s->len; i++) {
@@ -482,7 +483,7 @@ jarowinkler_lp(const str_item *a, const 
 
        /* calculate common string prefix up to prefixlen chars */
        l = 0;
-       for (size_t i = 0; i < MIN3(a->len, b->len, JARO_WINKLER_PREFIX_LEN); 
i++)
+       for (int i = 0; i < MIN3(a->len, b->len, JARO_WINKLER_PREFIX_LEN); i++)
                l += (a->cp_sequence[i] == b->cp_sequence[i]);
 
        return (double)l * JARO_WINKLER_SCALING_FACTOR;
@@ -671,11 +672,11 @@ TXTSIMminjarowinkler(bit *res, str *x, s
        do {                                                                    
        \
                for (n=0;n<lci.ncand;n++) {                     \
                        ssl[n].len = UTF8_strlen(ssl[n].val);\
-                       ssl[n].cp_seq_len = strlen(ssl[n].val);\
+                       ssl[n].cp_seq_len = str_strlen(ssl[n].val);\
                }                                                               
                \
                for (n=0;n<rci.ncand;n++) {                     \
                        ssr[n].len = UTF8_strlen(ssr[n].val);\
-                       ssr[n].cp_seq_len = strlen(ssr[n].val);\
+                       ssr[n].cp_seq_len = str_strlen(ssr[n].val);\
                }                                                               
                \
        } while (false)
 
@@ -743,10 +744,10 @@ TXTSIMminjarowinkler(bit *res, str *x, s
 
 static inline int
 maxlevenshtein_extcol_stritem(const str_item *si1, const str_item *si2, 
unsigned int *column, const int k) {
-       unsigned int x, y, lastdiag, olddiag;
-       int c1, c2;
+       unsigned int lastdiag, olddiag;
+       int c1, c2, x, y;
        unsigned int min;
-       size_t s1len = si1->len, s2len = si2->len;
+       int s1len = si1->len, s2len = si2->len;
        int *s1 = si1->cp_sequence, *s2 = si2->cp_sequence;
        /* first test if the strings are equal */
        if (s1len == s2len) {
@@ -896,10 +897,10 @@ fail:
 }
 
 static inline void
-jarowinklerrangebounds(size_t *lb, size_t *ub, const str_item *a, const double 
lp, const double threshold)
+jarowinklerrangebounds(int *lb, int *ub, const str_item *a, const double lp, 
const double threshold)
 {
-       *lb = (size_t)floor(3.0 * a->len * (threshold - lp) / (1.0 - lp) - (2.0 
* a->len));
-       *ub = (size_t)ceil(a->len / ((3.0 * (threshold - lp) / (1.0 - lp)) - 
2.0 ));
+       *lb = (int)floor(3.0 * a->len * (threshold - lp) / (1.0 - lp) - (2.0 * 
a->len));
+       *ub = (int)ceil(a->len / ((3.0 * (threshold - lp) / (1.0 - lp)) - 2.0 
));
 }
 
 /* version with given lp and m, and t=0*/
@@ -920,7 +921,7 @@ minjarowinklerjoin(BAT **r1, BAT **r2, B
 {
        strjoincommonvars;
        str_item shortest;
-       size_t lb, ub;
+       int lb, ub;
        const bool sliding_window_allowed = threshold > (2.01 + 
JARO_WINKLER_PREFIX_LEN * JARO_WINKLER_SCALING_FACTOR) / 3.0;
        int *s1flags=NULL, *s2flags=NULL;
        double s;
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to