Hi,
I've prepared a Quick & Dirty patch serie for some missing parts in
intarray contrib module. Here they go with some explanation...
On May 06 12:38, Volkan YAZICI wrote:
> [4]
> In the inner_int_contains() function of _int_tool.c, there's a while
> loop like
>
> [Code assumes that arrays are sorted.]
>
> na = ARRNELEMS(a);
> nb = ARRNELEMS(b);
> da = ARRPTR(a);
> db = ARRPTR(b);
>
> i = j = n = 0;
> while (i < na && j < nb)
> if (da[i] < db[j])
> i++;
> else if (da[i] == db[j])
> {
> n++;
> i++;
> j++;
> }
> else
> j++;
>
> return (n == nb) ? TRUE : FALSE;
>
> AFAICS, last "j++" should be replaced with "return FALSE". Because, "n"
> cannot be equal to "nb" no more, if "j" gets incremented without
> incrementing "n" (remember "j < nb" in the "while" condition).
intarray_contains.patch.0 is for above problem.
[5]
ISTM, inner_int_union() of _int_tool.c, makes some redundant visits to
array:
...
/* union */
i = j = 0;
while (i < na && j < nb)
if (da[i] < db[j])
*dr++ = da[i++];
else
*dr++ = db[j++];
while (i < na)
*dr++ = da[i++];
while (j < nb)
*dr++ = db[j++];
}
if (ARRNELEMS(r) > 1)
r = _int_unique(r);
IMHO, uniting unique values (instead of uniting and then removing
duplicates) should work faster. intarray_union.patch.0 is for this
problem. (Patched code, handles uniting for unique values.)
[6]
There's a seperate sorting algorithm isort() in _int_tool.c. Looks like
it executes some kind of shell sort on the array and returns true if
array has any duplicates. It's used for common sorting and deciding on
executing _int_unique() on the related array if isort() says it has
duplicates.
IIRC, our inner qsort() has a smaller O(N) degree when compared to above
sorting algorithm. Also for the code's sake it'd be better to switch
using qsort() in all sorting related stuff. For these reasons,
intarray_sort.patch.0 addresses this (probable) gotcha.
All 3 patches passed regression tests for intarray contrib module. But
these are just for addressing some gotchas I found while reading code.
Your comments for these problems(?) are welcome.
Regards.
Index: contrib/intarray/_int_tool.c
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/intarray/_int_tool.c,v
retrieving revision 1.6
diff -c -r1.6 _int_tool.c
*** contrib/intarray/_int_tool.c 19 Nov 2005 03:00:09 -0000 1.6
--- contrib/intarray/_int_tool.c 6 May 2006 17:51:35 -0000
***************
*** 34,40 ****
j++;
}
else
! j++;
return (n == nb) ? TRUE : FALSE;
}
--- 34,40 ----
j++;
}
else
! break;
return (n == nb) ? TRUE : FALSE;
}
Index: contrib/intarray/_int_tool.c
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/intarray/_int_tool.c,v
retrieving revision 1.6
diff -c -r1.6 _int_tool.c
*** contrib/intarray/_int_tool.c 19 Nov 2005 03:00:09 -0000 1.6
--- contrib/intarray/_int_tool.c 7 May 2006 09:59:09 -0000
***************
*** 183,221 ****
return;
}
-
- /* len >= 2 */
- bool
- isort(int4 *a, int len)
- {
- int4 tmp,
- index;
- int4 *cur,
- *end;
- bool r = FALSE;
-
- end = a + len;
- do
- {
- index = 0;
- cur = a + 1;
- while (cur < end)
- {
- if (*(cur - 1) > *cur)
- {
- tmp = *(cur - 1);
- *(cur - 1) = *cur;
- *cur = tmp;
- index = 1;
- }
- else if (!r && *(cur - 1) == *cur)
- r = TRUE;
- cur++;
- }
- } while (index);
- return r;
- }
-
ArrayType *
new_intArrayType(int num)
{
--- 183,188 ----
Index: contrib/intarray/_int.h
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/intarray/_int.h,v
retrieving revision 1.9
diff -c -r1.9 _int.h
*** contrib/intarray/_int.h 3 May 2006 16:31:07 -0000 1.9
--- contrib/intarray/_int.h 7 May 2006 09:59:10 -0000
***************
*** 38,56 ****
#define ARRISVOID(x) ((x) == NULL || ARRNELEMS(x) == 0)
- #define SORT(x) \
- do { \
- if ( ARRNELEMS( x ) > 1 ) \
- isort( ARRPTR( x ), ARRNELEMS( x ) ); \
- } while(0)
-
- #define PREPAREARR(x) \
- do { \
- if ( ARRNELEMS( x ) > 1 ) \
- if ( isort( ARRPTR( x ), ARRNELEMS( x ) ) ) \
- x = _int_unique( x ); \
- } while(0)
-
/* "wish" function */
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
--- 38,43 ----
***************
*** 105,111 ****
/*
** useful function
*/
- bool isort(int4 *a, const int len);
ArrayType *new_intArrayType(int num);
ArrayType *copy_intArrayType(ArrayType *a);
ArrayType *resize_intArrayType(ArrayType *a, int num);
--- 92,97 ----
***************
*** 171,173 ****
--- 157,168 ----
if (ARRNELEMS(a) > 1)
\
qsort((void*)ARRPTR(a), ARRNELEMS(a),sizeof(int4),
\
(direction) ? compASC : compDESC )
+
+ #define SORT(x) QSORT(x, 1)
+
+ #define PREPAREARR(x) \
+ if (ARRNELEMS(x) > 1) \
+ { \
+ SORT(x); \
+ x = _int_unique( x ); \
+ }
Index: contrib/intarray/_int_tool.c
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/intarray/_int_tool.c,v
retrieving revision 1.6
diff -c -r1.6 _int_tool.c
*** contrib/intarray/_int_tool.c 19 Nov 2005 03:00:09 -0000 1.6
--- contrib/intarray/_int_tool.c 6 May 2006 17:49:13 -0000
***************
*** 94,103 ****
if (ARRISVOID(b))
r = copy_intArrayType(a);
! if (r)
! dr = ARRPTR(r);
else
{
na = ARRNELEMS(a);
nb = ARRNELEMS(b);
da = ARRPTR(a);
--- 94,108 ----
if (ARRISVOID(b))
r = copy_intArrayType(a);
! if (r) /* Only one of the arrays isn't void and it's assigned to r. */
! {
! if (ARRNELEMS(r) > 1)
! r = _int_unique(r); /*
Remove repetitive values. */
! }
else
{
+ int k;
+
na = ARRNELEMS(a);
nb = ARRNELEMS(b);
da = ARRPTR(a);
***************
*** 106,128 ****
r = new_intArrayType(na + nb);
dr = ARRPTR(r);
! /* union */
! i = j = 0;
! while (i < na && j < nb)
! if (da[i] < db[j])
! *dr++ = da[i++];
! else
! *dr++ = db[j++];
!
! while (i < na)
! *dr++ = da[i++];
! while (j < nb)
! *dr++ = db[j++];
!
}
! if (ARRNELEMS(r) > 1)
! r = _int_unique(r);
return r;
}
--- 111,164 ----
r = new_intArrayType(na + nb);
dr = ARRPTR(r);
! /* Move i to next new integer in the array. */
! #define NEXT_NEW(arr, len, i) \
! if (i < len) \
! { \
! while (++i < len) \
! if (arr[i] != arr[i-1]) \
! break; \
}
+
+ /*
+ * Form sorted union of arrays A and B.
+ * (Repetitive values will be discarded.)
+ */
+ for (i = j = k = 0; i < na || j < nb; k++)
+ {
+ if (i >= na) /* A is
exhausted. */
+ {
+ dr[k] = db[j];
+ NEXT_NEW(db, nb, j);
+ }
+ else if (j >= nb) /* B is
exhausted. */
+ {
+ dr[k] = da[i];
+ NEXT_NEW(da, na, i);
+ }
+ else
/* a[] and b[] still isn't exhausted. */
+ {
+ if (da[i] < db[j])
+ {
+ dr[k] = da[i];
+ NEXT_NEW(da, na, i);
+ }
+ else if (da[i] > db[j])
+ {
+ dr[k] = db[j];
+ NEXT_NEW(db, nb, j);
+ }
+ else
+ {
+ dr[k] = da[i];
+ NEXT_NEW(da, na, i);
+ NEXT_NEW(db, nb, j);
+ }
+ }
+ }
! r = resize_intArrayType(r, k);
! }
return r;
}
---------------------------(end of broadcast)---------------------------
TIP 5: don't forget to increase your free space map settings