Sisyphus, good question. 

It turns out that none of those array pushes are responsible! If I comment them 
all out the segfault still happens. It seems to be something that occurs on 
function return, having to do with something going on in the body of the 
function.

I tried replacing the SvGROW with a regular malloc() and free(), with no effect 
whatsoever.

Here's the entire current version of the function, in case you're interested. 
Sorry it's a bit verbose.

What happens is that when the strings in the input SV*s are longish (dx0, px0, 
dx1, px1), the segfault occurs. Otherwise it returns without error. Also as I 
said, running it in the debugger returns without error.



------------------------------------------------------------------


SV* _dx_merge( SV* isr0SV, SV* isr1SV, int flags, int interval )
{
  assert( SvROK( isr0SV ) ); 
  assert( SvROK( isr1SV ) );

  // source isr left hand
  int   df0 = (int) SvIV( (SV*) *av_fetch( (AV*)SvRV(isr0SV), 0, 0) );
  int   cf0 = (int) SvIV( (SV*) *av_fetch( (AV*)SvRV(isr0SV), 1, 0) );
  char* dx0 = SvPV_nolen( (SV*) *av_fetch( (AV*)SvRV(isr0SV), 2, 0) );
  char* px0 = SvPV_nolen( (SV*) *av_fetch( (AV*)SvRV(isr0SV), 3, 0) );
  int   ld0 = (int) SvIV( (SV*) *av_fetch( (AV*)SvRV(isr0SV), 4, 0) );
  int   ds0 = (int) SvIV( (SV*) *av_fetch( (AV*)SvRV(isr0SV), 5, 0) );
  int   tf0    = 0; 
  int   delta0 = 0; 
  char* px0entry; 

  // source isr right hand
  int   df1 = (int) SvIV( (SV*) *av_fetch( (AV*)SvRV(isr1SV), 0, 0) );
  int   cf1 = (int) SvIV( (SV*) *av_fetch( (AV*)SvRV(isr1SV), 1, 0) );
  char* dx1 = SvPV_nolen( (SV*) *av_fetch( (AV*)SvRV(isr1SV), 2, 0) );
  char* px1 = SvPV_nolen( (SV*) *av_fetch( (AV*)SvRV(isr1SV), 3, 0) );
  int   ld1 = (int) SvIV( (SV*) *av_fetch( (AV*)SvRV(isr1SV), 4, 0) );
  int   ds1 = (int) SvIV( (SV*) *av_fetch( (AV*)SvRV(isr1SV), 5, 0) );
  int   tf1    = 0;
  int   delta1 = 0;
  char* px1entry;

  // result isr and trappings
  int dx_len = 12 * ( df0 + df1 ); // max dx length df * 3 ints
  int px_len =  8 * ( cf0 + cf1 ); // max px length cf * 2 ints

printf("Allocated %d bytes for dx, %d bytes for px\n", dx_len, px_len);

  assert( ld0 + ds0 == ld1 + ds1 );
  int corpus_size = ld0 + ds0;     // corpus size in docs


  int tf = 0; // per-document term count
  int df = 0; // resulting count of documents (document frequency)
  int cf = 0; // resulting match count (corpus frequency)

  char* dx_ret = (char*) malloc( dx_len );  
  char* px_ret = (char*) malloc( px_len );  

  char* dx_result = dx_ret;
  char* px_result = px_ret;
  
  px_len = 0;
  dx_len = 0;
  
  int docid = 0;
  int lastdoc = 0;


  // function pointers
  int (*left)  ( char**, int*, char**, int*, int, char*, int) = _px_identity;
  int (*right) ( char**, int*, char**, int*, int, char*, int) = _px_identity;
  int (*center)( char**, int*, char**, int*, int, char*, char*, int, int, int ) 
    = _center_merge;


  if(flags & INTERSECT) {
    left   = _px_ignore;
    center = _center_merge;
    right  = _px_ignore;
  }

  // assign left, right, and center functions
  if(flags & LEFT_NEGATION) {
    left   = _px_ignore;
    center = _center_ignore;
    right  = _px_identity;
  }

  if(flags & RIGHT_NEGATION) {
    left   = _px_identity;
    center = _center_ignore;
    right  = _px_ignore;
  }

  if(flags & PHRASE){
    left   = _px_ignore;
    center = _center_sequence;
    right  = _px_ignore;
  }

  if(flags & UNION) {
    left   = _px_identity;
    center = _center_merge;
    right  = _px_identity;
  }



  while(df0 && df1) {


    // read next deltas from dx strings
    if(delta0 == 0) {
      NEXT_DX(dx0, delta0, px0, px0entry, tf0);
    }

    if(delta1 == 0) {
      NEXT_DX(dx1, delta1, px1, px1entry, tf1);
    }

    // call appropriate complement or center function
    if(delta0 < delta1){
      docid += delta0;
      tf = left( &dx_result, &dx_len, 
                 &px_result, &px_len, 
                 docid-lastdoc, px0entry, tf0);
      delta1 -= delta0;
      df0--;
      delta0 = 0;
    } else if(delta1 < delta0){
      docid += delta1;
      tf = right( &dx_result, &dx_len, 
                  &px_result, &px_len, 
                  docid-lastdoc, px1entry, tf1 );
      delta0 -= delta1;
      df1--;
      delta1 = 0;
    } else { // deltas are equal
      docid += delta0;
      

      tf = center( &dx_result, &dx_len, 
                   &px_result, &px_len, 
                   docid-lastdoc, 
                   px0entry, px1entry, 
                   tf0, tf1, 
                   interval );
      df0--;
      df1--;
      delta0 = 0;
      delta1 = 0;
    }

    cf += tf;
    if (tf > 0){
      df++;
      lastdoc = docid;
    }
  }

  // one list is exhausted.
  // continue until both lists are exhausted
  while (df0) {
    if (delta0 == 0) {
      NEXT_DX(dx0, delta0, px0, px0entry, tf0);
    } 
    docid += delta0;
    tf = left( &dx_result, &dx_len, &px_result, &px_len, docid-lastdoc, 
px0entry, tf0);
    cf += tf;
    df0--;
    delta0 = 0;
    if (tf > 0){
      df++;
      lastdoc = docid;
    }
  }
  while (df1) {
    if (delta1 == 0) {
      NEXT_DX(dx1, delta1, px1, px1entry, tf1);
    }
    docid += delta1;
    tf = right( &dx_result, &dx_len, &px_result, &px_len, docid-lastdoc, 
px1entry, tf1);
    cf += tf;
    df1--;
    delta1 = 0;
    if (tf > 0) {
      df++;
      lastdoc = docid;
    }
  }


  AV* array = newAV();
  av_push(array, newSViv(df));      // NDOCS
  av_push(array, newSViv(cf));      // NWORDS
  av_push(array, newSVpvn(dx_ret, dx_len));    // DX
  av_push(array, newSVpvn(px_ret, px_len));    // PX
  av_push(array, newSViv(lastdoc)); // LASTDOC
  av_push(array, newSViv(corpus_size - lastdoc)); // SHORT

  free(dx_ret);
  free(px_ret);
  
printf("returning %d byte dx, %d byte px\n", dx_len, px_len);
  return newRV_noinc((SV*) array);
}
 
--------------------------------------------------------

--------------------------------------------------------

 
 
ATTENTION: DO NOT read, copy or disseminate this communication unless you are 
the intended addressee. This message and any file(s) or attachment(s) 
transmitted with it are confidential, intended only for the named recipient, 
and may contain information that is a trade secret, proprietary, protected by 
the attorney work product doctrine, subject to the attorney-client privilege, 
or is otherwise protected against unauthorized use or disclosure. This message 
and any file(s) or attachment(s) transmitted with it are transmitted based on a 
reasonable expectation of privacy consistent with ABA Formal Opinion No. 
99-413. If you have received this communication in error, please e-mail the 
sender and notify the sender immediately that you have received the 
communication in error. Thank you.

Reply via email to