TomMD commented on a change in pull request #2306:
URL: https://github.com/apache/lucene-solr/pull/2306#discussion_r571474014



##########
File path: 
solr/contrib/scripting/src/java/org/apache/solr/scripting/xslt/XSLTLoader.java
##########
@@ -0,0 +1,534 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.scripting.xslt;
+
+import javax.xml.parsers.SAXParserFactory;
+import javax.xml.stream.FactoryConfigurationError;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamConstants;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.dom.DOMResult;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.sax.SAXSource;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import com.google.common.collect.Lists;
+import org.apache.commons.io.IOUtils;
+import org.apache.solr.common.EmptyEntityResolver;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.ShardParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.params.UpdateParams;
+import org.apache.solr.common.util.ContentStream;
+import org.apache.solr.common.util.ContentStreamBase;
+import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.common.util.XMLErrorLogger;
+import org.apache.solr.core.SolrConfig;
+import org.apache.solr.handler.RequestHandlerUtils;
+import org.apache.solr.handler.UpdateRequestHandler;
+import org.apache.solr.handler.loader.ContentStreamLoader;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.update.AddUpdateCommand;
+import org.apache.solr.update.CommitUpdateCommand;
+import org.apache.solr.update.DeleteUpdateCommand;
+import org.apache.solr.update.RollbackUpdateCommand;
+import org.apache.solr.update.processor.UpdateRequestProcessor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.InputSource;
+import org.xml.sax.XMLReader;
+
+import static org.apache.solr.common.params.CommonParams.ID;
+import static org.apache.solr.common.params.CommonParams.NAME;
+
+
+public class XSLTLoader extends ContentStreamLoader {
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  private static final AtomicBoolean WARNED_ABOUT_INDEX_TIME_BOOSTS = new 
AtomicBoolean();
+  static final XMLErrorLogger xmllog = new XMLErrorLogger(log);
+  
+  public static final String CONTEXT_TRANSFORMER_KEY = 
"xsltupdater.transformer";
+
+  private static final String XSLT_CACHE_PARAM = "xsltCacheLifetimeSeconds"; 
+
+  public static final int XSLT_CACHE_DEFAULT = 60;
+  
+  int xsltCacheLifetimeSeconds;
+  XMLInputFactory inputFactory;
+  SAXParserFactory saxFactory;
+
+  @Override
+  public XSLTLoader init(SolrParams args) {
+    // Init StAX parser:
+    inputFactory = XMLInputFactory.newInstance();
+    EmptyEntityResolver.configureXMLInputFactory(inputFactory);
+    inputFactory.setXMLReporter(xmllog);
+    try {
+      // The java 1.6 bundled stax parser (sjsxp) does not currently have a 
thread-safe
+      // XMLInputFactory, as that implementation tries to cache and reuse the
+      // XMLStreamReader.  Setting the parser-specific "reuse-instance" 
property to false
+      // prevents this.
+      // All other known open-source stax parsers (and the bea ref impl)
+      // have thread-safe factories.
+      inputFactory.setProperty("reuse-instance", Boolean.FALSE);
+    } catch (IllegalArgumentException ex) {
+      // Other implementations will likely throw this exception since 
"reuse-instance"
+      // isimplementation specific.
+      log.debug("Unable to set the 'reuse-instance' property for the input 
chain: {}", inputFactory);
+    }
+    
+    // Init SAX parser (for XSL):
+    saxFactory = SAXParserFactory.newInstance();
+    saxFactory.setNamespaceAware(true); // XSL needs this!
+    EmptyEntityResolver.configureSAXParserFactory(saxFactory);
+    
+    xsltCacheLifetimeSeconds = XSLT_CACHE_DEFAULT;
+    if(args != null) {
+      xsltCacheLifetimeSeconds = 
args.getInt(XSLT_CACHE_PARAM,XSLT_CACHE_DEFAULT);
+      log.debug("xsltCacheLifetimeSeconds={}", xsltCacheLifetimeSeconds);
+    }
+    return this;
+  }
+
+  @Override
+  public String getDefaultWT() {
+    return "xml";
+  }
+
+  @Override
+  public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream 
stream, UpdateRequestProcessor processor) throws Exception {
+    final String charset = 
ContentStreamBase.getCharsetFromContentType(stream.getContentType());
+    
+    InputStream is = null;
+    XMLStreamReader parser = null;
+
+    String tr = req.getParams().get(XSLTParams.TR,null);
+    if(tr!=null) {
+      if (req.getCore().getCoreDescriptor().isConfigSetTrusted() == false) {
+          throw new SolrException(ErrorCode.UNAUTHORIZED, "The configset for 
this collection was uploaded without any authentication in place,"
+                  + " and this operation is not available for collections with 
untrusted configsets. To use this feature, re-upload the configset"
+                  + " after enabling authentication and authorization.");
+      }
+
+      final Transformer t = getTransformer(tr,req);
+      final DOMResult result = new DOMResult();
+      
+      // first step: read XML and build DOM using Transformer (this is no 
overhead, as XSL always produces
+      // an internal result DOM tree, we just access it directly as input for 
StAX):
+      try {
+        is = stream.getStream();
+        final InputSource isrc = new InputSource(is);
+        isrc.setEncoding(charset);
+        final XMLReader xmlr = saxFactory.newSAXParser().getXMLReader();
+        xmlr.setErrorHandler(xmllog);
+        xmlr.setEntityResolver(EmptyEntityResolver.SAX_INSTANCE);
+        final SAXSource source = new SAXSource(xmlr, isrc);
+        t.transform(source, result);
+      } catch(TransformerException te) {
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
te.getMessage(), te);
+      } finally {
+        IOUtils.closeQuietly(is);
+      }
+      // second step: feed the intermediate DOM tree into StAX parser:
+      try {
+        parser = inputFactory.createXMLStreamReader(new 
DOMSource(result.getNode()));

Review comment:
       Oof, yeah @uschindler we feel your pain and are thinking about how to 
solve the problem.
   
   The bot is a bit like a dog with a bone. Once it finds and issue you can be 
sure any commit that "moves" the issue in a way that changes the identifier (ex 
changing the function name) will make it appear again as a "new" issue.  We are 
thinking on how to make better stable names so the above dismissal would also 
cover this case.
   
   A new `ignore bug` command is available if you comment with only `ignore 
bug` then muse will consider the issue resolved for the purpose of the below 
status bar and we'll use this data in the ML.  At this time I don't think 
github has an API to automatically resolve the comment but that is on our minds 
too.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to