[ https://issues.apache.org/jira/browse/SOLR-6085?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14207807#comment-14207807 ]
ASF subversion and git services commented on SOLR-6085: ------------------------------------------------------- Commit 1638711 from jan...@apache.org in branch 'dev/trunk' [ https://svn.apache.org/r1638711 ] SOLR-6085: Suggester crashes when prefixToken is longer than surface form > Suggester crashes when prefixToken is longer than surface form > -------------------------------------------------------------- > > Key: SOLR-6085 > URL: https://issues.apache.org/jira/browse/SOLR-6085 > Project: Solr > Issue Type: Bug > Components: SearchComponents - other > Affects Versions: 4.7.1, 4.8 > Reporter: Jorge Ferrández > Assignee: Jan Høydahl > Labels: suggester > Fix For: 4.7.3, 4.10.3, 5.0, Trunk > > Attachments: SOLR-6085.patch > > > AnalyzingInfixSuggester class fails when is queried with a ß character > (ezsett) used in German, but it doesn't happen for all data or for all words > containing this character. The exception reported is the following: > {code:java} > <response> > <lst name="responseHeader"> > <int name="status">500</int> > <int name="QTime">18</int> > </lst> > <lst name="error"> > <str name="msg">String index out of range: 5</str> > <str name="trace"> > java.lang.StringIndexOutOfBoundsException: String index out of range: 5 at > java.lang.String.substring(String.java:1907) at > org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester.addPrefixMatch(AnalyzingInfixSuggester.java:575) > at > org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester.highlight(AnalyzingInfixSuggester.java:525) > at > org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester.createResults(AnalyzingInfixSuggester.java:479) > at > org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester.lookup(AnalyzingInfixSuggester.java:437) > at > org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester.lookup(AnalyzingInfixSuggester.java:338) > at > org.apache.solr.spelling.suggest.SolrSuggester.getSuggestions(SolrSuggester.java:181) > at > org.apache.solr.handler.component.SuggestComponent.process(SuggestComponent.java:232) > at > org.apache.solr.handler.component.SearchHandler.handleRequestBody(SearchHandler.java:217) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135) > at > org.apache.solr.core.RequestHandlers$LazyRequestHandlerWrapper.handleRequest(RequestHandlers.java:241) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:1916) at > org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:780) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:427) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:217) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1419) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:455) at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1075) > at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:384) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1009) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116) > at org.eclipse.jetty.server.Server.handle(Server.java:368) at > org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:489) > at > org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53) > at > org.eclipse.jetty.server.AbstractHttpConnection.headerComplete(AbstractHttpConnection.java:942) > at > org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.headerComplete(AbstractHttpConnection.java:1004) > at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:640) at > org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:235) at > org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72) > at > org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264) > at > org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608) > at > org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543) > at java.lang.Thread.run(Thread.java:744) > </str> > <int name="code">500</int> > </lst> > </response> > {code} > With this query > http://localhost:8983/solr/suggest_de?suggest.q=gieß (for gießen, which is > actually in the data) > The problem seems to be that we use ASCIIFolding to unify ss and ß, which are > both valid alternatives in German. > Looking at the code we found that string limits are not properly checked for > the method involved in the exception: > {code:java} > protected void addPrefixMatch(StringBuilder sb, String surface, String > analyzed, String prefixToken) { > // TODO: apps can try to invert their analysis logic > // here, e.g. downcase the two before checking prefix: > sb.append("<b>"); > sb.append(surface.substring(0, prefixToken.length())); > sb.append("</b>"); > if (prefixToken.length() < surface.length()) { > sb.append(surface.substring(prefixToken.length())); > } > } > {code} > For example, when surface is "daß" and prefixToken is "dass", > surface.substring will fail. > A possible solution would be: > {code:java} > protected void addPrefixMatch(StringBuilder sb, String surface, String > analyzed, String prefixToken) { > // TODO: apps can try to invert their analysis logic > // here, e.g. downcase the two before checking prefix: > sb.append("<b>"); > if(prefixToken.length() > surface.length()){ > sb.append(surface); > } > else > { > sb.append(surface.substring(0, prefixToken.length())); > } > sb.append("</b>"); > if (prefixToken.length() < surface.length()) { > sb.append(surface.substring(prefixToken.length())); > } > } > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org