[ https://issues.apache.org/jira/browse/ASTERIXDB-2766?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17164599#comment-17164599 ]
Michael J. Carey commented on ASTERIXDB-2766: --------------------------------------------- NOW THEN: The problem is that AsterixDB's fulltext indexes don't work on datasets with variable-length primary keys. (Sorry...) Thus, the CREATE INDEX for the fulltext index should have failed and told you that - instead the system tried to do it and it doesn't work. I'll have someone (new-ish student contributors Rui Gao or Glenn Galviso) add that check! As a longer term wish item I am hoping Rui will fix that limitation - he is focused on text support in AsterixDB these days. > fulltext index issues a bug. > ---------------------------- > > Key: ASTERIXDB-2766 > URL: https://issues.apache.org/jira/browse/ASTERIXDB-2766 > Project: Apache AsterixDB > Issue Type: Bug > Components: HYR - Hyracks > Affects Versions: 0.9.4.1 > Environment: Windows/Linux > Reporter: Wenhai Li > Assignee: Ian Maxon > Priority: Major > > Recently, we want to utilize AsterixDB's fulltext to search related records > based on a token. We have the following issues. The problem is quite strange, > # If we DID NOT load records into the dataset, we can not see the error. > # Once we load (even only one record) records, the following problem appears. > Did I generate wrong records? > > Best, > > problem: > Caused by: java.lang.ArithmeticException: / by zero > at > org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeElementInvertedListCursor.setInvListInfo(FixedSizeElementInvertedListCursor.java:370) > ~[hyracks-storage-am-lsm-invertedindex-0.3.4-SNAPSHOT.jar:0.3.4-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor.doOpen(InvertedListCursor.java:55) > ~[hyracks-storage-am-lsm-invertedindex-0.3.4-SNAPSHOT.jar:0.3.4-SNAPSHOT] > at > org.apache.hyracks.storage.common.EnforcedIndexCursor.open(EnforcedIndexCursor.java:54) > ~[hyracks-storage-common-0.3.4-SNAPSHOT.jar:0.3.4-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndex.openInvertedListCursor(OnDiskInvertedIndex.java:213) > ~[hyracks-storage-am-lsm-invertedindex-0.3.4-SNAPSHOT.jar:0.3.4-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.invertedindex.search.TOccurrenceSearcher.search(TOccurrenceSearcher.java:56) > ~[hyracks-storage-am-lsm-invertedindex-0.3.4-SNAPSHOT.jar:0.3.4-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndex$OnDiskInvertedIndexAccessor.search(OnDiskInvertedIndex.java:498) > ~[hyracks-storage-am-lsm-invertedindex-0.3.4-SNAPSHOT.jar:0.3.4-SNAPSHOT] > at > org.apache.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndexSearchCursor.doHasNext(LSMInvertedIndexSearchCursor.java:162) > ~[hyracks-storage-am-lsm-invertedindex-0.3.4-SNAPSHOT.jar:0.3.4-SNAPSHOT] > at > org.apache.hyracks.storage.common.EnforcedIndexCursor.hasNext(EnforcedIndexCursor.java:69) > ~[hyracks-storage-common-0.3.4-SNAPSHOT.jar:0.3.4-SNAPSHOT] > at > org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.writeSearchResults(IndexSearchOperatorNodePushable.java:241) > ~[hyracks-storage-am-common-0.3.4-SNAPSHOT.jar:0.3.4-SNAPSHOT] > at > org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.nextFrame(IndexSearchOperatorNodePushable.java:290) > ~[hyracks-storage-am-common-0.3.4-SNAPSHOT.jar:0.3.4-SNAPSHOT] > > Schema: > USE Personicle; > DROP DATASET GeneralMeasurement IF EXISTS; > DROP TYPE GeneralMeasurementType IF EXISTS; > CREATE TYPE GeneralMeasurementType AS OPEN { > measureId: string, --primary key string for measurement, UUID > deviceId: string, > timestamp: bigint, > userName: string?, > beginAt: datetime?, > endAt: datetime?, > category: string, > attribute: string?, > activity: string?, > description: string? > }; > CREATE DATASET GeneralMeasurement(GeneralMeasurementType) PRIMARY KEY > measureId; > CREATE INDEX GeneralMeasurementDeviceIdIdx ON GeneralMeasurement(deviceId, > timestamp) type btree; > CREATE INDEX GeneralMeasurementAttributeIdx ON GeneralMeasurement(attribute) > type fulltext; > > USE Personicle; > load dataset GeneralMeasurement using localfs > (("path"="127.0.0.1:///f:/Work/Personicle/example/BigFoodLog.adm"),("input-format"="text-input-format"),("input-format"="text-input-format"),("format"="adm")); > > sampling record: > {"attribute":"1acc5da443f34eb1870f22873b8b489f","category":"foodlog","comments":"爱谷鸿 > ate 254.10932950875716g 矿泉水","description":"爱谷鸿 ate 254.10932950875716g > 矿泉水","deviceId":"c5223137c9284b649e0bf6bd0c37fe2f","endAt":datetime("2017-10-21T17:54:45"),"foodName":"矿泉水","latitude":22.300131276012458,"longitude":113.67809523288565,"measureId":"f59149a0cd834192b72f77c478fa2b40","preference_star":9,"startAt":datetime("2017-10-21T17:54:35"),"timestamp":1508579675000,"total_calories":417.7053493274758,"userName":"爱谷鸿","weight":254.10932950875716} > > query: > > USE Personicle; > select * from GeneralMeasurement > where ftcontains(attribute, `word-tokens`("1acc5da443f34eb1870f22873b8b489f > 07ef77218382441fbabdbb3563681605"), \{"mode":"any"}); -- This message was sent by Atlassian Jira (v8.3.4#803005)