Sisyphus, good question.
It turns out that none of those array pushes are responsible! If I comment them
all out the segfault still happens. It seems to be something that occurs on
function return, having to do with something going on in the body of the
function.
I tried replacing the SvGROW with a regular malloc() and free(), with no effect
whatsoever.
Here's the entire current version of the function, in case you're interested.
Sorry it's a bit verbose.
What happens is that when the strings in the input SV*s are longish (dx0, px0,
dx1, px1), the segfault occurs. Otherwise it returns without error. Also as I
said, running it in the debugger returns without error.
------------------------------------------------------------------
SV* _dx_merge( SV* isr0SV, SV* isr1SV, int flags, int interval )
{
assert( SvROK( isr0SV ) );
assert( SvROK( isr1SV ) );
// source isr left hand
int df0 = (int) SvIV( (SV*) *av_fetch( (AV*)SvRV(isr0SV), 0, 0) );
int cf0 = (int) SvIV( (SV*) *av_fetch( (AV*)SvRV(isr0SV), 1, 0) );
char* dx0 = SvPV_nolen( (SV*) *av_fetch( (AV*)SvRV(isr0SV), 2, 0) );
char* px0 = SvPV_nolen( (SV*) *av_fetch( (AV*)SvRV(isr0SV), 3, 0) );
int ld0 = (int) SvIV( (SV*) *av_fetch( (AV*)SvRV(isr0SV), 4, 0) );
int ds0 = (int) SvIV( (SV*) *av_fetch( (AV*)SvRV(isr0SV), 5, 0) );
int tf0 = 0;
int delta0 = 0;
char* px0entry;
// source isr right hand
int df1 = (int) SvIV( (SV*) *av_fetch( (AV*)SvRV(isr1SV), 0, 0) );
int cf1 = (int) SvIV( (SV*) *av_fetch( (AV*)SvRV(isr1SV), 1, 0) );
char* dx1 = SvPV_nolen( (SV*) *av_fetch( (AV*)SvRV(isr1SV), 2, 0) );
char* px1 = SvPV_nolen( (SV*) *av_fetch( (AV*)SvRV(isr1SV), 3, 0) );
int ld1 = (int) SvIV( (SV*) *av_fetch( (AV*)SvRV(isr1SV), 4, 0) );
int ds1 = (int) SvIV( (SV*) *av_fetch( (AV*)SvRV(isr1SV), 5, 0) );
int tf1 = 0;
int delta1 = 0;
char* px1entry;
// result isr and trappings
int dx_len = 12 * ( df0 + df1 ); // max dx length df * 3 ints
int px_len = 8 * ( cf0 + cf1 ); // max px length cf * 2 ints
printf("Allocated %d bytes for dx, %d bytes for px\n", dx_len, px_len);
assert( ld0 + ds0 == ld1 + ds1 );
int corpus_size = ld0 + ds0; // corpus size in docs
int tf = 0; // per-document term count
int df = 0; // resulting count of documents (document frequency)
int cf = 0; // resulting match count (corpus frequency)
char* dx_ret = (char*) malloc( dx_len );
char* px_ret = (char*) malloc( px_len );
char* dx_result = dx_ret;
char* px_result = px_ret;
px_len = 0;
dx_len = 0;
int docid = 0;
int lastdoc = 0;
// function pointers
int (*left) ( char**, int*, char**, int*, int, char*, int) = _px_identity;
int (*right) ( char**, int*, char**, int*, int, char*, int) = _px_identity;
int (*center)( char**, int*, char**, int*, int, char*, char*, int, int, int )
= _center_merge;
if(flags & INTERSECT) {
left = _px_ignore;
center = _center_merge;
right = _px_ignore;
}
// assign left, right, and center functions
if(flags & LEFT_NEGATION) {
left = _px_ignore;
center = _center_ignore;
right = _px_identity;
}
if(flags & RIGHT_NEGATION) {
left = _px_identity;
center = _center_ignore;
right = _px_ignore;
}
if(flags & PHRASE){
left = _px_ignore;
center = _center_sequence;
right = _px_ignore;
}
if(flags & UNION) {
left = _px_identity;
center = _center_merge;
right = _px_identity;
}
while(df0 && df1) {
// read next deltas from dx strings
if(delta0 == 0) {
NEXT_DX(dx0, delta0, px0, px0entry, tf0);
}
if(delta1 == 0) {
NEXT_DX(dx1, delta1, px1, px1entry, tf1);
}
// call appropriate complement or center function
if(delta0 < delta1){
docid += delta0;
tf = left( &dx_result, &dx_len,
&px_result, &px_len,
docid-lastdoc, px0entry, tf0);
delta1 -= delta0;
df0--;
delta0 = 0;
} else if(delta1 < delta0){
docid += delta1;
tf = right( &dx_result, &dx_len,
&px_result, &px_len,
docid-lastdoc, px1entry, tf1 );
delta0 -= delta1;
df1--;
delta1 = 0;
} else { // deltas are equal
docid += delta0;
tf = center( &dx_result, &dx_len,
&px_result, &px_len,
docid-lastdoc,
px0entry, px1entry,
tf0, tf1,
interval );
df0--;
df1--;
delta0 = 0;
delta1 = 0;
}
cf += tf;
if (tf > 0){
df++;
lastdoc = docid;
}
}
// one list is exhausted.
// continue until both lists are exhausted
while (df0) {
if (delta0 == 0) {
NEXT_DX(dx0, delta0, px0, px0entry, tf0);
}
docid += delta0;
tf = left( &dx_result, &dx_len, &px_result, &px_len, docid-lastdoc,
px0entry, tf0);
cf += tf;
df0--;
delta0 = 0;
if (tf > 0){
df++;
lastdoc = docid;
}
}
while (df1) {
if (delta1 == 0) {
NEXT_DX(dx1, delta1, px1, px1entry, tf1);
}
docid += delta1;
tf = right( &dx_result, &dx_len, &px_result, &px_len, docid-lastdoc,
px1entry, tf1);
cf += tf;
df1--;
delta1 = 0;
if (tf > 0) {
df++;
lastdoc = docid;
}
}
AV* array = newAV();
av_push(array, newSViv(df)); // NDOCS
av_push(array, newSViv(cf)); // NWORDS
av_push(array, newSVpvn(dx_ret, dx_len)); // DX
av_push(array, newSVpvn(px_ret, px_len)); // PX
av_push(array, newSViv(lastdoc)); // LASTDOC
av_push(array, newSViv(corpus_size - lastdoc)); // SHORT
free(dx_ret);
free(px_ret);
printf("returning %d byte dx, %d byte px\n", dx_len, px_len);
return newRV_noinc((SV*) array);
}
--------------------------------------------------------
--------------------------------------------------------
ATTENTION: DO NOT read, copy or disseminate this communication unless you are
the intended addressee. This message and any file(s) or attachment(s)
transmitted with it are confidential, intended only for the named recipient,
and may contain information that is a trade secret, proprietary, protected by
the attorney work product doctrine, subject to the attorney-client privilege,
or is otherwise protected against unauthorized use or disclosure. This message
and any file(s) or attachment(s) transmitted with it are transmitted based on a
reasonable expectation of privacy consistent with ABA Formal Opinion No.
99-413. If you have received this communication in error, please e-mail the
sender and notify the sender immediately that you have received the
communication in error. Thank you.