gerlowskija commented on code in PR #2395:
URL: https://github.com/apache/solr/pull/2395#discussion_r1560999237


##########
solr/core/src/java/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessor.java:
##########
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.update.processor;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.Locale;
+import org.apache.solr.cloud.CloudDescriptor;
+import org.apache.solr.cloud.ZkController;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.core.CoreDescriptor;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.update.AddUpdateCommand;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class NumFieldLimitingUpdateRequestProcessor extends 
UpdateRequestProcessor {
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  private SolrQueryRequest req;
+  private int fieldThreshold;
+  private int currentNumFields;
+  private boolean warnOnly;
+
+  public NumFieldLimitingUpdateRequestProcessor(
+      SolrQueryRequest req,
+      UpdateRequestProcessor next,
+      int fieldThreshold,
+      int currentNumFields,
+      boolean warnOnly) {
+    super(next);
+    this.req = req;
+    this.fieldThreshold = fieldThreshold;
+    this.currentNumFields = currentNumFields;
+    this.warnOnly = warnOnly;
+  }
+
+  public void processAdd(AddUpdateCommand cmd) throws IOException {
+    if (!isCloudMode() || /* implicit isCloudMode==true */ 
isLeaderThatOwnsTheDoc(cmd)) {
+      if (coreExceedsFieldLimit()) {
+        throwExceptionOrLog(cmd.getPrintableId());
+      } else {
+        if (log.isDebugEnabled()) {
+          log.debug(
+              "Allowing document {}, since current core is under the max-field 
limit (numFields={}, maxThreshold={})",

Review Comment:
   Done 👍 



##########
solr/core/src/java/org/apache/solr/update/processor/NumFieldsMonitor.java:
##########
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.update.processor;
+
+import org.apache.solr.core.AbstractSolrEventListener;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.RefCounted;
+
+public class NumFieldsMonitor extends AbstractSolrEventListener {
+
+  private int numFields;
+
+  public NumFieldsMonitor(SolrCore core) {
+    super(core);
+    this.numFields = -1;
+  }
+
+  @Override
+  public void postCommit() {
+    RefCounted<SolrIndexSearcher> indexSearcher = null;
+    try {
+      indexSearcher = getCore().getSearcher();
+      // Get the number of fields directly from the IndexReader instead of the 
Schema object to also
+      // include the

Review Comment:
   > formatting?
   
   Done 👍 
   
   > are there any other interesting places in Solr to use a NumFieldsMonitor?
   
   Good question.  It does seem like a kindof neat thing to have around, but I 
can't think of any other use-cases off the top of my head...



##########
solr/core/src/test/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessorFactoryTest.java:
##########
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.update.processor;
+
+import org.apache.solr.SolrTestCase;
+import org.apache.solr.common.util.NamedList;
+import org.hamcrest.Matchers;
+import org.junit.Before;
+import org.junit.Test;
+
+public class NumFieldLimitingUpdateRequestProcessorFactoryTest extends 
SolrTestCase {
+
+  private NumFieldLimitingUpdateRequestProcessorFactory factory = null;
+
+  @Before
+  public void initFactory() {
+    factory = new NumFieldLimitingUpdateRequestProcessorFactory();
+  }
+
+  @Test
+  public void testReportsErrorIfMaximumFieldsNotProvided() {
+    final var initArgs = new NamedList<>();
+    final IllegalArgumentException thrown =
+        expectThrows(
+            IllegalArgumentException.class,
+            () -> {
+              factory.init(initArgs);
+            });
+    assertThat(thrown.getMessage(), Matchers.containsString("maxFields 
parameter is required"));
+    assertThat(thrown.getMessage(), Matchers.containsString("no value was 
provided"));
+  }
+
+  @Test
+  public void testReportsErrorIfMaximumFieldsIsInvalid() {
+    final var initArgs = new NamedList<>();
+    initArgs.add("maxFields", "nonIntegerValue");
+    IllegalArgumentException thrown =
+        expectThrows(
+            IllegalArgumentException.class,

Review Comment:
   [0] I really hate the formatting that 'tidy' applies here.  Why make this 6 
lines?  What's wrong with:
   
   ```
   var thrown = expectThrows(IllegalArgumentException.class, () -> {
       factory.init(initArgs);
   });
   ```



##########
solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java:
##########
@@ -388,4 +388,19 @@ public static String stringFromReader(Reader inReader) 
throws IOException {
       return stringWriter.toString();
     }
   }
+

Review Comment:
   We've long used Apache common-lang3's `StringUtils`, which offers this and 
all kinds of really nice String-utilities.
   
   But recently we decided to put any use of `StringUtils` (or maybe 
commons-lang altogether?) in our forbidden-api checking, and so we've been 
adding to the Solr `StrUtils` class slowly since then.  I'm sure there's a good 
reason for the forbidden-api, but not knowing the "why" I'm always irked when I 
hit this and have to write yet-another function that we could be getting "for 
free" from any number of libraries.
   
   And then precommit fails because we're doing a reference-equality check, so 
we have to put in a Suppression for that. Just so so so much fun...
   
   
![old-man-cloud](https://github.com/apache/solr/assets/9030708/eb7eb480-ef40-43c9-b517-b977becf38f7)
   



##########
solr/server/solr/configsets/_default/conf/solrconfig.xml:
##########
@@ -939,7 +939,7 @@
   </updateProcessor>
 
   <!-- The update.autoCreateFields property can be turned to false to disable 
schemaless mode -->
-  <updateRequestProcessorChain name="add-unknown-fields-to-the-schema" 
default="${update.autoCreateFields:true}"

Review Comment:
   No, fixed.



##########
solr/core/src/test/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessorIntegrationTest.java:
##########
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.update.processor;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.common.SolrInputDocument;
+import org.hamcrest.Matchers;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class NumFieldLimitingUpdateRequestProcessorIntegrationTest extends 
SolrCloudTestCase {
+
+  private static long ADMIN_OPERATION_TIMEOUT = 15 * 1000;

Review Comment:
   Even better, this was an unused constant.  Removed.



##########
solr/core/src/java/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessorFactory.java:
##########
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.update.processor;
+
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.plugin.SolrCoreAware;
+
+/**
+ * This factory generates an UpdateRequestProcessor which fails update 
requests once a core has
+ * exceeded a configurable maximum number of fields. Meant as a safeguard to 
help users notice
+ * potentially-dangerous schema design before performance and stability 
problems start to occur.
+ *
+ * <p>The URP uses the core's {@link SolrIndexSearcher} to judge the current 
number of fields.
+ * Accordingly, it undercounts the number of fields in the core - missing all 
fields added since the
+ * previous searcher was opened. As such, the URP's request-blocking is "best 
effort" - it cannot be
+ * relied on as a precise limit on the number of fields.
+ *
+ * <p>Additionally, the field-counting includes all documents present in the 
index, including any
+ * deleted docs that haven't yet been purged via segment merging. Note that 
this can differ
+ * significantly from the number of fields defined in managed-schema.xml - 
especially when dynamic
+ * fields are enabled. The only way to reduce this field count is to delete 
documents and wait until
+ * the deleted documents have been removed by segment merges. Users may of 
course speed up this
+ * process by tweaking Solr's segment-merging, triggering an "optimize" 
operation, etc.
+ *
+ * <p>{@link NumFieldLimitingUpdateRequestProcessorFactory} accepts two 
configuration parameters:
+ *
+ * <ul>
+ *   <li><code>maxFields</code> - (required) The maximum number of fields 
before update requests
+ *       should be aborted. Once this limit has been exceeded, additional 
update requests will fail
+ *       until fields have been removed or the "maxFields" is increased.
+ *   <li><code>warnOnly</code> - (optional) If <code>true</code> then the URP 
logs verbose warnings
+ *       about the limit being exceeded but doesn't abort update requests. 
Defaults to <code>false
+ *       </code> if not specified
+ * </ul>
+ *
+ * @since 9.6.0
+ */
+public class NumFieldLimitingUpdateRequestProcessorFactory extends 
UpdateRequestProcessorFactory
+    implements SolrCoreAware {
+
+  private static final String MAXIMUM_FIELDS_PARAM = "maxFields";
+  private static final String WARN_ONLY_PARAM = "warnOnly";
+
+  private NumFieldsMonitor numFieldsMonitor;
+  private int maximumFields;
+  private boolean warnOnly;
+
+  @Override
+  public void inform(final SolrCore core) {
+    // register a commit callback for monitoring the number of fields in the 
schema
+    numFieldsMonitor = new NumFieldsMonitor(core);
+    core.getUpdateHandler().registerCommitCallback(numFieldsMonitor);
+    core.registerNewSearcherListener(numFieldsMonitor);
+  }
+
+  public void init(NamedList<?> args) {
+    warnOnly = args.indexOf(WARN_ONLY_PARAM, 0) > 0 ? 
args.getBooleanArg(WARN_ONLY_PARAM) : false;
+
+    if (args.indexOf(MAXIMUM_FIELDS_PARAM, 0) < 0) {
+      throw new IllegalArgumentException(
+          "The "
+              + MAXIMUM_FIELDS_PARAM
+              + " parameter is required for "
+              + getClass().getName()
+              + ", but no value was provided.");
+    }
+    final Object rawMaxFields = args.get(MAXIMUM_FIELDS_PARAM);

Review Comment:
   Agreed - it's really unfortunate.  I looked at a number of URP's and 
NamedListInitializedPlugin's, but couldn't find anything I liked.
   
   Maybe we could file a ticket for that separately; it'd be a really nice 
cleanup for all our NLIP's to have a set of utilities:
   
   ```
   int getRequiredNamedListValueAsInteger(NamedList nl, String paramName);
   Integer getNamedListValueAsInteger(NamedList nl, String paramName);
   boolean getRequiredNamedListValueAsBoolean(NamedList nl, String paramName);
   Boolean getNamedListValueAsBoolean(NamedList nl, String paramName);
   ```
   
   Of course, even better would be to come up with a way for some XML parsing 
library to serialize this all into a strongly typed POJO for us, so we don't 
have to write any map/NL inspection code at all.  We already do essentially the 
same thing for v2 requests - why not lean on Jackson or something similar to do 
that when reading XML config?
   
   But obviously either of those approaches is pretty beyond the scope of this 
PR 😦   



##########
solr/core/src/test-files/solr/configsets/cloud-minimal-field-limiting/conf/solrconfig.xml:
##########
@@ -0,0 +1,73 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Minimal solrconfig.xml with /select, /admin and /update only -->
+
+<config>
+
+  <dataDir>${solr.data.dir:}</dataDir>
+
+  <directoryFactory name="DirectoryFactory"
+                    
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
+  <schemaFactory class="ClassicIndexSchemaFactory"/>
+
+  <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
+
+  <updateHandler class="solr.DirectUpdateHandler2">
+    <commitWithin>
+      <softCommit>${solr.commitwithin.softcommit:true}</softCommit>
+    </commitWithin>
+    <updateLog class="${solr.ulog:solr.UpdateLog}"></updateLog>
+  </updateHandler>
+
+  <requestHandler name="/select" class="solr.SearchHandler">
+    <lst name="defaults">
+      <str name="echoParams">explicit</str>
+      <str name="indent">true</str>
+      <str name="df">text</str>
+    </lst>
+  </requestHandler>
+
+  <updateProcessor class="solr.NumFieldLimitingUpdateRequestProcessorFactory" 
name="max-fields">
+    <int name="maxFields">${solr.test.maxFields:1234}</int>
+  </updateProcessor>
+  <updateProcessor class="solr.AddSchemaFieldsUpdateProcessorFactory" 
name="add-schema-fields">
+    <lst name="typeMapping">
+      <str name="valueClass">java.lang.String</str>
+      <str name="fieldType">text_general</str>
+      <lst name="copyField">
+        <str name="dest">*_str</str>
+        <int name="maxChars">256</int>
+      </lst>
+      <!-- Use as default mapping instead of defaultFieldType -->
+      <bool name="default">true</bool>
+    </lst>
+  </updateProcessor>
+  <updateRequestProcessorChain name="add-unknown-fields-to-the-schema" 
default="true"
+           processor="max-fields">
+    <processor class="solr.LogUpdateProcessorFactory"/>
+    <processor class="solr.DistributedUpdateProcessorFactory"/>
+    <processor class="solr.RunUpdateProcessorFactory"/>
+  </updateRequestProcessorChain>
+
+  <indexConfig>
+    <mergeScheduler 
class="${solr.mscheduler:org.apache.lucene.index.ConcurrentMergeScheduler}"/>
+:  </indexConfig>

Review Comment:
   WTH?!
   
   I frequently insert `:` or `:wq` into files by accident when I make small 
edits using vim, but shouldn't this make the file invalid XML?  Maybe the 
parser is treating the ':' as the `<indexConfig>` tag's CDATA?  Is it valid for 
an XML tag to have both children and CDATA?
   
   EDIT: Nvm, [I guess it 
is](https://stackoverflow.com/questions/12130228/can-a-xml-element-contain-text-and-child-elements-at-the-same-time)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org
For additional commands, e-mail: issues-h...@solr.apache.org

Reply via email to