[ https://issues.apache.org/jira/browse/TRAFODION-2662?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
David Wayne Birdsall reopened TRAFODION-2662: --------------------------------------------- Reopening in order to fix regression test compGeneral/TEST023. > Incremental UPDATE STATS fails on very large sample tables > ---------------------------------------------------------- > > Key: TRAFODION-2662 > URL: https://issues.apache.org/jira/browse/TRAFODION-2662 > Project: Apache Trafodion > Issue Type: Bug > Components: sql-cmp > Affects Versions: 2.2-incubating > Environment: Large clusters > Reporter: David Wayne Birdsall > Assignee: David Wayne Birdsall > Fix For: 2.2-incubating > > > A user attempted to use the incremental UPDATE STATISTICS feature on a 90 > billion row table. The persistent sample table was created using a sampling > rate of 5 per cent, and so it had about 4.5 billion rows. The incremental > UPDATE STATISTICS command failed with an error like the following: > >>Update statistics for table traf1 on existing columns incremental where C2 > >>>= '2017-06-10 00:00:00'; > *** ERROR[9219] Incremental UPDATE STATISTICS: An operation failed, possibly > due to an invalid WHERE clause. > *** ERROR[15001] A syntax error occurred at or before: > SELECT FROM TRAFODION.SEABASE.TRAF_SAMPLE_32_1498066565_999632 WHERE C2 >= > '2 > ^ (12 characters from start of SQL statement) > *** ERROR[8822] The statement was not prepared. > --- SQL operation failed with errors. > The script below reproduces the problem on a workstation. The script creates > a table, puts a few rows in it, creates a sample table, then doctors the > sample table metadata to make UPDATE STATISTICS think it has 4.5 billion > rows. The failure then occurs. > ?section setup > drop table if exists traf1; > CREATE TABLE traf1 > ( > C1 CHAR(32) CHARACTER SET ISO88591 COLLATE > DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED > , C2 TIMESTAMP(6) NO DEFAULT NOT NULL NOT > DROPPABLE NOT SERIALIZED > , C3 CHAR(32) CHARACTER SET ISO88591 COLLATE > DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED > , C4 VARCHAR(4096 BYTES) CHARACTER SET UTF8 > COLLATE DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED > , C5 VARCHAR(1024 BYTES) CHARACTER SET UTF8 > COLLATE DEFAULT DEFAULT NULL NOT SERIALIZED > , C6 SMALLINT DEFAULT 1 NOT SERIALIZED > , PRIMARY KEY (C1 ASC, C2 DESC, C3 ASC) > ) > SALT USING 4 PARTITIONS > ON (C1) > ATTRIBUTES ALIGNED FORMAT > HBASE_OPTIONS > ( > DATA_BLOCK_ENCODING = 'FAST_DIFF', > BLOCKSIZE = '262144', > MEMSTORE_FLUSH_SIZE = '1073741824' > ) > ; > insert into traf1 values ('abcdef',current_timestamp - interval '20' day, > '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14); > insert into traf1 values ('abcdef01',current_timestamp - interval '20' day, > '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12); > insert into traf1 values ('012345',current_timestamp - interval '20' day, > 'abc012','www.blahblah.com','a bit of it is fun',2); > insert into traf1 values ('01234567',current_timestamp - interval '20' day, > 'abc01234','www.blahblahblah.com','a bit of it is fun',31); > insert into traf1 values ('def01234',current_timestamp - interval '20' day, > '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14); > insert into traf1 values ('adbf01',current_timestamp - interval '20' day, > '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12); > insert into traf1 values ('a0b0c0',current_timestamp - interval '20' day, > 'abc012','www.blahblah.com','a bit of it is fun',2); > insert into traf1 values ('021357',current_timestamp - interval '20' day, > 'abc01234','www.blahblahblah.com','a bit of it is fun',30); > insert into traf1 values ('abcdef',current_timestamp - interval '10' day, > '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14); > insert into traf1 values ('abcddf01',current_timestamp - interval '10' day, > '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12); > insert into traf1 values ('01234345',current_timestamp - interval '10' day, > 'abc012','www.blahblah.com','a bit of it is fun',2); > insert into traf1 values ('034567',current_timestamp - interval '10' day, > 'abc01234','www.blahblahblah.com','a bit of it is fun',31); > insert into traf1 values ('def8201234',current_timestamp - interval '10' day, > '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',18); > insert into traf1 values ('adbf9a01',current_timestamp - interval '10' day, > '0123456789abcdef01','www.C4edin.com','a bit of try twice',2); > insert into traf1 values ('a0bbc0c0',current_timestamp - interval '10' day, > 'abc012','www.blahblah.com','a bit of it is fun',2); > insert into traf1 values ('02111357',current_timestamp - interval '10' day, > 'abc01234','www.blahblahblah.com','a bit of it is fun',30); > insert into traf1 values > ('def01234',current_timestamp,'01234789abcdef','www.xyz.stuff.com','a bit of > stuff',14); > insert into traf1 values > ('adbf01',current_timestamp,'0123456789abef01','www.xyz.morestuff.com','a bit > of more stuff',12); > insert into traf1 values > ('a0b0c0',current_timestamp,'abc03452','www.blahblah.com','a bit of it is > fun',21); > insert into traf1 values > ('02431357',current_timestamp,'abcd001234','www.blahblahblah.com','a bit of > it is fun',39); > insert into traf1 values > ('defab01234',current_timestamp,'01a0234789abcdef','www.cnn.com','a bit of > stuff',14); > insert into traf1 values > ('adb9ef01',current_timestamp,'012333456789abef01','www.youtube.com','a bit > of more stuff',8); > insert into traf1 values > ('a2b0b0c0',current_timestamp,'ab3452','www.blahblah.com','a bit of it is > fun',21); > insert into traf1 values > ('0243731357',current_timestamp,'abcd00122a34','www.blahblahblah.com','a bit > of it is fun',39); > ?section repro > UPDATE STATISTICS FOR TABLE traf1 ON EVERY COLUMN SAMPLE RANDOM 50 PERCENT > PERSISTENT; > -- doctor REQUESTED_SAMPLE_ROWS, ACTUAL_SAMPLE_ROWS, SAMPLING_RATIO > -- from sb_persistent_samples to make UPDATE STATS think the sample table > -- has 4.5 billion rows > update sb_persistent_samples set REQUESTED_SAMPLE_ROWS = 4500000000, > ACTUAL_SAMPLE_ROWS = 4500000000 > where table_uid = (select object_uid from "_MD_".objects where object_name = > 'TRAF1'); > cqd ustat_log 'reproULOG.txt'; > update statistics log on; > Update statistics for table traf1 on existing columns incremental where C2 >= > '2017-06-10 00:00:00'; > exit; -- This message was sent by Atlassian JIRA (v6.4.14#64029)