Re: [sqlite] Bug: Nested function call to replace() throws parser stack overflow exception

2017-08-08 Thread petern
Hey Brian.  I had some time this morning to look at it.  Try whacking this
new concat_replace into your SQLite build in either func.c or sqlite3.c
amalgam.  Also remember to hook it up with a new entry after say,
group_concat, as follows.

AGGREGATE(group_replace, 3, 0, 0,
groupReplaceStep,groupReplaceFinalize),

[This is a first pass so there could be an edge case with NULL/empty that I
missed... pls test it!  Honestly, the original replace() that I borrowed
from might also be a bit shaky with the NULL/empty edge cases.]

Here's a tester snippet you can play with:

sqlite> WITH T(pattern,replacement) AS (VALUES
('a','This'),('b','is'),('c','a'),('d','test')) SELECT group_replace('a b c
d',pattern,replacement) FROM T;
This is a test

---all C code below this point---

struct StrRepl {
  const unsigned char* zStr;
  u32 nStr;
  u8 replError; /* STRREPL_NOMEM or STRREPL_TOOBIG */
};
typedef struct StrRepl StrRepl;
#define STRREPL_NOMEM   1
#define STRREPL_TOOBIG  2

/*
 ** Aggregate group_replace(A,B,C) result string is derived from A by
replacing
 ** every exact match occurrence of B with C.  A is presumed constant over
the group
 ** while B and C may vary at each step.  Collating sequences are not used.
 */
static void groupReplaceStep(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
  ) {

  const unsigned char *zStr; /* The input string A */
  const unsigned char *zPattern; /* The pattern string B */
  const unsigned char *zRepl; /* The replacement string C */
  unsigned char *zOut; /* The output */
  int nStr; /* Size of zStr */
  int nPattern; /* Size of zPattern */
  int nRepl; /* Size of zRep */
  i64 nOut; /* Maximum size of zOut */
  int loopLimit; /* Last zStr[] that might match zPattern[] */
  int i, j; /* Loop counters */

  assert(argc == 3);
  UNUSED_PARAMETER(argc);
  zStr = sqlite3_value_text(argv[0]);
  if (zStr == 0) return;
  nStr = sqlite3_value_bytes(argv[0]);
  assert(zStr == sqlite3_value_text(argv[0])); /* No encoding change */
  zPattern = sqlite3_value_text(argv[1]);
  if (zPattern == 0) {
assert(sqlite3_value_type(argv[1]) == SQLITE_NULL
  || sqlite3_context_db_handle(context)->mallocFailed);

return;
  }
  if (zPattern[0] == 0) {
assert(sqlite3_value_type(argv[1]) != SQLITE_NULL);
sqlite3_result_value(context, argv[0]);
return;
  }
  nPattern = sqlite3_value_bytes(argv[1]);
  assert(zPattern == sqlite3_value_text(argv[1])); /* No encoding change */
  zRepl = sqlite3_value_text(argv[2]);
  if (zRepl == 0) return;
  nRepl = sqlite3_value_bytes(argv[2]);
  assert(zRepl == sqlite3_value_text(argv[2]));

  StrRepl* pStrRepl = (StrRepl*) sqlite3_aggregate_context(context, sizeof
(StrRepl));
  if (pStrRepl) {
if (pStrRepl->replError) return;
if (!pStrRepl->zStr) {
  if (sqlite3_value_type(argv[0]) == SQLITE_NULL
|| sqlite3_value_type(argv[1]) == SQLITE_NULL) return;
  pStrRepl->nStr = sqlite3_value_bytes(argv[0]);
  pStrRepl->zStr = contextMalloc(context,pStrRepl->nStr);

memcpy((void*)pStrRepl->zStr,sqlite3_value_text(argv[0]),pStrRepl->nStr);
/* No encoding change */
}
zPattern = sqlite3_value_text(argv[1]);
nPattern = sqlite3_value_bytes(argv[1]);
assert(zPattern == sqlite3_value_text(argv[1])); /* No encoding change
*/
if (0 == pStrRepl->nStr || 0 == nPattern) return;
zRepl = sqlite3_value_text(argv[2]);
nRepl = sqlite3_value_bytes(argv[2]);
if (0 == zRepl) return;
nOut = pStrRepl->nStr + 1;
assert(nOut < SQLITE_MAX_LENGTH);
zOut = contextMalloc(context, (i64) nOut);
if (0 == zOut) return;
loopLimit = pStrRepl->nStr - nPattern;
for (i = j = 0; i <= loopLimit; i++) {
  if (pStrRepl->zStr[i] != zPattern[0] || memcmp(&(pStrRepl->zStr)[i],
zPattern, nPattern)) {
zOut[j++] = pStrRepl->zStr[i];
  } else {
u8 *zOld;
sqlite3 *db = sqlite3_context_db_handle(context);
nOut += nRepl - nPattern;
testcase(nOut - 1 == db->aLimit[SQLITE_LIMIT_LENGTH]);
testcase(nOut - 2 == db->aLimit[SQLITE_LIMIT_LENGTH]);
if (nOut - 1 > db->aLimit[SQLITE_LIMIT_LENGTH]) {
  pStrRepl->replError = STRREPL_TOOBIG;
  sqlite3_free(zOut);
  return;
}
zOld = zOut;
zOut = sqlite3_realloc64(zOut, (int) nOut);
if (zOut == 0) {
  pStrRepl->replError = STRREPL_NOMEM;
  sqlite3_free(zOld);
  return;
}
memcpy(&zOut[j], zRepl, nRepl);
j += nRepl;
i += nPattern - 1;
  }
}
assert(j + pStrRepl->nStr - i + 1 == nOut);
memcpy(&zOut[j], &pStrRepl->zStr[i], pStrRepl->nStr - i);
j += pStrRepl->nStr - i;
assert(j <= nOut);
zOut[j] = 0;
void* pFree = (void*)pStrRepl->zStr;
pStrRepl->zStr = zOut;
pStrRepl->nStr = nOut;
sqlite3_free(pFree);
  }
}

static void groupReplaceFinalize(sqlite3_context *context) {
  const char **pzVal;
  StrRepl* pStrRepl = sqlite3_aggregate_context(context, 0);
  if (p

Re: [sqlite] Bug: Nested function call to replace() throws parser stack overflow exception

2017-08-07 Thread Jens Alfke

> On Aug 7, 2017, at 2:02 AM, Brian Clifford  
> wrote:
> 
> I have looked into using a user defined function however it was very slow.

It’s quite expensive to call from unmanaged (native) code into managed 
(C#/Java) code.
If possible, write the user defined function in C or C++, even if the rest of 
your code is in C#.

—Jens
___
sqlite-users mailing list
sqlite-users@mailinglists.sqlite.org
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users


Re: [sqlite] Bug: Nested function call to replace() throws parser stack overflow exception

2017-08-07 Thread Brian Clifford
Thank Simon,

I have looked into using a user defined function however it was very slow. I 
tried to create an expression index unfortunately I was not able as the 
function was seen as non deterministic. (I am using the C# interface and its 
not possible to define a user defined function as Determinstic from C# (Feature 
request!?))

Thanks
Brian

 

-- 




www.thinksmartbox.com

Facebook   Twitter 
  LinkedIn 
  YouTube 


Smartbox Assistive Technology, Ysobel House, Enigma Commercial Centre, Sandys 
Road, Malvern, WR14 1JJ

Tel: +44 (0) 1684 578868
___
sqlite-users mailing list
sqlite-users@mailinglists.sqlite.org
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users


Re: [sqlite] Bug: Nested function call to replace() throws parser stack overflow exception

2017-08-03 Thread Brian Clifford
Thanks for looking into this, I see in the original example I passed in a
nested call of 28 replaces() this was the cutoff point where it would fail,
the following line has about 36 replaces() this fails for me:

replace(replace(replace(replace(replace(replace(replace(replace(replace(
replace(replace(replace(replace(replace(replace(replace(replace(replace(
replace(replace(replace(replace(replace(replace(replace(replace(replace(
replace(replace(replace(replace(replace(replace(replace(replace(replace(Transcript,
',',''),';',''),':',''),'[',''),']',''),'{',''),'}',''),'(',
''),')',''),'\',''),'/',''),'"',''),'_',''),'+',''),'=',''),
'£',''),'$',''),'€',''),'%',''),'^',''),'&',''),'*',''),'@',
''),'#',''),'”',''),'“',''),'…',''),'¬',''),'¦',''),'`',''),
'~',''),'|',''),'.',''),'!',''),'?',''),'
','')

I get the parser stack overflow exception at the point where I create the
index:

"CREATE INDEX RecordingsNoPunctuationPart1Idx ON Recordings({0});"

Or in FULL:

"CREATE INDEX RecordingsNoPunctuationPart1Idx ON Recordings(replace(replace(
replace(replace(replace(replace(replace(replace(replace(replace(replace(
replace(replace(replace(replace(replace(replace(replace(replace(replace(
replace(replace(replace(replace(replace(replace(replace(replace(replace(
replace(replace(replace(replace(replace(replace(replace(Transcript,
',',''),';',''),':',''),'[',''),']',''),'{',''),'}',''),'(',
''),')',''),'\',''),'/',''),'"',''),'_',''),'+',''),'=',''),
'£',''),'$',''),'€',''),'%',''),'^',''),'&',''),'*',''),'@',
''),'#',''),'”',''),'“',''),'…',''),'¬',''),'¦',''),'`',''),
'~',''),'|',''),'.',''),'!',''),'?',''),'
',''));"


*Brian Clifford*
Software Developer

On 3 August 2017 at 11:38, Richard Hipp  wrote:

> I am unable to reproduce the problem.  Do you have any additional
> hints on how to make this fail?
>
> The depth of the stack in the push-down automaton used by the parser
> is deliberately limited to 100 levels.  This is a feature, not a bug.
> See https://www.sqlite.org/compile.html#yystackdepth for additional
> information.
>
> On 8/3/17, Brian Clifford  wrote:
> > Hi,
> >
> > I've subscribed to the sqllite mailing list but am still awaiting
> > authorization.
> >
> > I'd like to report a bug I found with sqlite where I'm creating a
> function
> > index which consists of approx 36 nested calls to the replace() function
> > (to remove punctuation)
> >
> > I get a parser stackoverflow exception after about 28 nested calls.
> >
> > Below is the pseudo-code I use for creating my table and the function
> index
> > ---
> >
> > string _ReplacePunctuationFunctionPart =
> > "replace(replace(replace(replace(replace(replace(
> replace(replace(replace(replace(replace(replace(replace(replace(replace(
> replace(replace(replace(replace(replace(replace(replace(replace(replace(
> replace(replace(replace(Transcript,',',''),';',''),':'
> ,''),'[',''),']',''),'{',''),'}',''),'(',''),')',''),'\','')
> ,'/',''),'"',''),'_',''),'+',''),'=',''),'£',''),'$',''),'€'
> ,''),'%',''),'^',''),'&',''),'*',''),'@',''),'#',''),'”',''),'“',''),'
> > ','')"
> >
> >
> >  "CREATE TABLE \"Recordings\" (\"Id\" INTEGER PRIMARY KEY AUTOINCREMENT
> NOT
> > NULL, \"Transcript\" TEXT NOT NULL COLLATE NOCASE, \"Mp3FileName\" TEXT
> NOT
> > NULL UNIQUE, \"Label\" TEXT);";
> >  "CREATE UNIQUE INDEX RecordingsMp3Index ON Recordings(Mp3FileName);";
> >  "CREATE INDEX RecordingsTextIndex ON Recordings(Transcript);";
> >  string.Format("CREATE INDEX RecordingsNoPunctuationIdx ON
> > Recordings({0});", _ReplacePunctuationFunctionPart);
> >
> >   SELECT 
> >
> >  string.Format("SELECT Id, {0} as 'TranscriptNoPunctuation' FROM
> > Recordings;", _ReplacePunctuationFunctionPart);
> >
> > --
> >
> > regards,
> >
> > *Brian Clifford*
> > Software Developer
> >
> > --
> >
> >
> > 
> >
> > www.thinksmartbox.com
> >
> > Facebook   Twitter
> >   LinkedIn
> > 
> YouTube
> > 
> >
> > Smartbox Assistive Technology, Ysobel House, Enigma Commercial Centre,
> > Sandys
> > Road, Malvern, WR14 1JJ
> >
> > Tel: +44 (0) 1684 578868
> > ___
> > sqlite-users mailing list
> > sqlite-users@mailinglists.sqlite.org
> > http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
> >
>
>
> --
> D. Richard Hipp
> d...@sqlite.org
>

-- 




www.thinksmartbox.com

Facebook   Twitter 
  LinkedIn 
  YouTube 


Smartbox Assistive Technology, Ysobel House, Enigma Commercial Centre, Sandys 
Road, Malvern, WR14 1JJ

Tel: +44 (0) 1684 578868
___
sqlite-users mailing list
sqlite-us

Re: [sqlite] Bug: Nested function call to replace() throws parser stack overflow exception

2017-08-03 Thread Simon Slavin


On 3 Aug 2017, at 3:33pm, petern  wrote:

> The deeply nested string replace pattern comes up frequently.  A
> suggestion.  Why not a built in aggregate replace function?

Two more suggestions:

Create a function which does exactly what you want.  Call it 
removePunctuation(p1).  Feed it a string and it removes everything which isn’t 
alphanumeric.  Or however it is you define punctuation.

Create a function called removeTheseCharacters(p1, p2).  Feed it p1 which is a 
string, and p2 which is another string make up of any number of characters.  
function returns a value which is p1 with all the characters in p2 removed.

Simon.
___
sqlite-users mailing list
sqlite-users@mailinglists.sqlite.org
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users


Re: [sqlite] Bug: Nested function call to replace() throws parser stack overflow exception

2017-08-03 Thread petern
The deeply nested string replace pattern comes up frequently.  A
suggestion.  Why not a built in aggregate replace function?

As such, the example here would be more readable and not limited by stack
depth as the expression:

SELECT replace(Transcript,column1,column2) FROM (VALUES
(',',''),(';',''),...)




On Thu, Aug 3, 2017 at 3:38 AM, Richard Hipp  wrote:

> I am unable to reproduce the problem.  Do you have any additional
> hints on how to make this fail?
>
> The depth of the stack in the push-down automaton used by the parser
> is deliberately limited to 100 levels.  This is a feature, not a bug.
> See https://www.sqlite.org/compile.html#yystackdepth for additional
> information.
>
> On 8/3/17, Brian Clifford  wrote:
> > Hi,
> >
> > I've subscribed to the sqllite mailing list but am still awaiting
> > authorization.
> >
> > I'd like to report a bug I found with sqlite where I'm creating a
> function
> > index which consists of approx 36 nested calls to the replace() function
> > (to remove punctuation)
> >
> > I get a parser stackoverflow exception after about 28 nested calls.
> >
> > Below is the pseudo-code I use for creating my table and the function
> index
> > ---
> >
> > string _ReplacePunctuationFunctionPart =
> > "replace(replace(replace(replace(replace(replace(
> replace(replace(replace(replace(replace(replace(replace(replace(replace(
> replace(replace(replace(replace(replace(replace(replace(replace(replace(
> replace(replace(replace(Transcript,',',''),';',''),':'
> ,''),'[',''),']',''),'{',''),'}',''),'(',''),')',''),'\','')
> ,'/',''),'"',''),'_',''),'+',''),'=',''),'£',''),'$',''),'€'
> ,''),'%',''),'^',''),'&',''),'*',''),'@',''),'#',''),'”',''),'“',''),'
> > ','')"
> >
> >
> >  "CREATE TABLE \"Recordings\" (\"Id\" INTEGER PRIMARY KEY AUTOINCREMENT
> NOT
> > NULL, \"Transcript\" TEXT NOT NULL COLLATE NOCASE, \"Mp3FileName\" TEXT
> NOT
> > NULL UNIQUE, \"Label\" TEXT);";
> >  "CREATE UNIQUE INDEX RecordingsMp3Index ON Recordings(Mp3FileName);";
> >  "CREATE INDEX RecordingsTextIndex ON Recordings(Transcript);";
> >  string.Format("CREATE INDEX RecordingsNoPunctuationIdx ON
> > Recordings({0});", _ReplacePunctuationFunctionPart);
> >
> >   SELECT 
> >
> >  string.Format("SELECT Id, {0} as 'TranscriptNoPunctuation' FROM
> > Recordings;", _ReplacePunctuationFunctionPart);
> >
> > --
> >
> > regards,
> >
> > *Brian Clifford*
> > Software Developer
> >
> > --
> >
> >
> > 
> >
> > www.thinksmartbox.com
> >
> > Facebook   Twitter
> >   LinkedIn
> > 
> YouTube
> > 
> >
> > Smartbox Assistive Technology, Ysobel House, Enigma Commercial Centre,
> > Sandys
> > Road, Malvern, WR14 1JJ
> >
> > Tel: +44 (0) 1684 578868
> > ___
> > sqlite-users mailing list
> > sqlite-users@mailinglists.sqlite.org
> > http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
> >
>
>
> --
> D. Richard Hipp
> d...@sqlite.org
> ___
> sqlite-users mailing list
> sqlite-users@mailinglists.sqlite.org
> http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
>
___
sqlite-users mailing list
sqlite-users@mailinglists.sqlite.org
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users


Re: [sqlite] Bug: Nested function call to replace() throws parser stack overflow exception

2017-08-03 Thread Richard Hipp
I am unable to reproduce the problem.  Do you have any additional
hints on how to make this fail?

The depth of the stack in the push-down automaton used by the parser
is deliberately limited to 100 levels.  This is a feature, not a bug.
See https://www.sqlite.org/compile.html#yystackdepth for additional
information.

On 8/3/17, Brian Clifford  wrote:
> Hi,
>
> I've subscribed to the sqllite mailing list but am still awaiting
> authorization.
>
> I'd like to report a bug I found with sqlite where I'm creating a function
> index which consists of approx 36 nested calls to the replace() function
> (to remove punctuation)
>
> I get a parser stackoverflow exception after about 28 nested calls.
>
> Below is the pseudo-code I use for creating my table and the function index
> ---
>
> string _ReplacePunctuationFunctionPart =
> "replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(Transcript,',',''),';',''),':',''),'[',''),']',''),'{',''),'}',''),'(',''),')',''),'\',''),'/',''),'"',''),'_',''),'+',''),'=',''),'£',''),'$',''),'€',''),'%',''),'^',''),'&',''),'*',''),'@',''),'#',''),'”',''),'“',''),'
> ','')"
>
>
>  "CREATE TABLE \"Recordings\" (\"Id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT
> NULL, \"Transcript\" TEXT NOT NULL COLLATE NOCASE, \"Mp3FileName\" TEXT NOT
> NULL UNIQUE, \"Label\" TEXT);";
>  "CREATE UNIQUE INDEX RecordingsMp3Index ON Recordings(Mp3FileName);";
>  "CREATE INDEX RecordingsTextIndex ON Recordings(Transcript);";
>  string.Format("CREATE INDEX RecordingsNoPunctuationIdx ON
> Recordings({0});", _ReplacePunctuationFunctionPart);
>
>   SELECT 
>
>  string.Format("SELECT Id, {0} as 'TranscriptNoPunctuation' FROM
> Recordings;", _ReplacePunctuationFunctionPart);
>
> --
>
> regards,
>
> *Brian Clifford*
> Software Developer
>
> --
>
>
> 
>
> www.thinksmartbox.com
>
> Facebook   Twitter
>   LinkedIn
>   YouTube
> 
>
> Smartbox Assistive Technology, Ysobel House, Enigma Commercial Centre,
> Sandys
> Road, Malvern, WR14 1JJ
>
> Tel: +44 (0) 1684 578868
> ___
> sqlite-users mailing list
> sqlite-users@mailinglists.sqlite.org
> http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
>


-- 
D. Richard Hipp
d...@sqlite.org
___
sqlite-users mailing list
sqlite-users@mailinglists.sqlite.org
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users


[sqlite] Bug: Nested function call to replace() throws parser stack overflow exception

2017-08-03 Thread Brian Clifford
Hi,

I've subscribed to the sqllite mailing list but am still awaiting
authorization.

I'd like to report a bug I found with sqlite where I'm creating a function
index which consists of approx 36 nested calls to the replace() function
(to remove punctuation)

I get a parser stackoverflow exception after about 28 nested calls.

Below is the pseudo-code I use for creating my table and the function index
---

string _ReplacePunctuationFunctionPart =
"replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(Transcript,',',''),';',''),':',''),'[',''),']',''),'{',''),'}',''),'(',''),')',''),'\',''),'/',''),'"',''),'_',''),'+',''),'=',''),'£',''),'$',''),'€',''),'%',''),'^',''),'&',''),'*',''),'@',''),'#',''),'”',''),'“',''),'
','')"


 "CREATE TABLE \"Recordings\" (\"Id\" INTEGER PRIMARY KEY AUTOINCREMENT NOT
NULL, \"Transcript\" TEXT NOT NULL COLLATE NOCASE, \"Mp3FileName\" TEXT NOT
NULL UNIQUE, \"Label\" TEXT);";
 "CREATE UNIQUE INDEX RecordingsMp3Index ON Recordings(Mp3FileName);";
 "CREATE INDEX RecordingsTextIndex ON Recordings(Transcript);";
 string.Format("CREATE INDEX RecordingsNoPunctuationIdx ON
Recordings({0});", _ReplacePunctuationFunctionPart);

  SELECT 

 string.Format("SELECT Id, {0} as 'TranscriptNoPunctuation' FROM
Recordings;", _ReplacePunctuationFunctionPart);

--

regards,

*Brian Clifford*
Software Developer

-- 




www.thinksmartbox.com

Facebook   Twitter 
  LinkedIn 
  YouTube 


Smartbox Assistive Technology, Ysobel House, Enigma Commercial Centre, Sandys 
Road, Malvern, WR14 1JJ

Tel: +44 (0) 1684 578868
___
sqlite-users mailing list
sqlite-users@mailinglists.sqlite.org
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users