malliaridis commented on code in PR #2381:
URL: https://github.com/apache/solr/pull/2381#discussion_r1705590402


##########
solr/modules/hdfs/bin/prepare-snapshot-export.sh:
##########
@@ -0,0 +1,176 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+usage() {
+ echo "This script is to support the specific use case of using Hadoop 
specific libraries for managing Solr snapshot."
+ echo "--prepare-snapshot-export <arg>   This command will prepare"
+ echo "                                copylistings for the specified"
+ echo "                                snapshot. This command should only"
+ echo "                                be used only if Solr is deployed"
+ echo "                                with Hadoop and collection index"
+ echo "                                files are stored on a shared"
+ echo "                                file-system e.g. HDFS"
+ echo "--export <arg>                    This command will create a backup"
+ echo "                                for the specified snapshot."
+}
+
+distcp_warning() {
+  echo "SOLR_USE_DISTCP environment variable is not set. \
+        Do you want to use hadoop distcp tool for exporting Solr collection 
snapshot ?"
+}
+
+use_bin_solr_warning() {
+  echo "Please use the bin/solr script to execute this command."
+}
+
+parse_options() {
+  OPTIND=3
+  while getopts ":c:d:s:" o ; do
+    case "${o}" in
+      d)
+        destPath=${OPTARG}
+        ;;
+      s)
+        sourcePath=${OPTARG}
+        ;;
+      c)
+        collectionName=${OPTARG}
+        ;;
+      *)
+        echo "Unknown option ${OPTARG}"
+        usage 1>&2
+        exit 1
+        ;;
+    esac
+  done
+}
+
+prepare_snapshot_export() {
+  #Make sure to cleanup the temporary files.
+  scratch=$(mktemp -d -t solrsnaps.XXXXXXXXXX)
+  function finish {
+    rm -rf "${scratch}"
+  }
+  trap finish EXIT
+
+  if hdfs dfs -test -d "${destPath}" ; then
+      run_solr_snapshot_tool --prepare-snapshot-export "$@" -t "${scratch}"
+
+      hdfs dfs -mkdir -p "${copyListingDirPath}" > /dev/null
+      find "${scratch}" -type f -printf "%f\n" | while read shardId; do
+        echo "Copying the copy-listing for $shardId"
+        hdfs dfs -copyFromLocal "${scratch}/${shardId}" 
"${copyListingDirPath}" > /dev/null
+      done
+  else
+    echo "Directory ${destPath} does not exist."
+    exit 1
+  fi
+}
+
+copy_snapshot_files() {
+  copylisting_dir_path="$1"
+
+  if hdfs dfs -test -d "${copylisting_dir_path}" ; then
+    for shardId in $(hdfs dfs -stat "%n" "${copylisting_dir_path}/*"); do
+      oPath="${destPath}/${snapshotName}/snapshot.${shardId}"
+      echo "Copying the index files for ${shardId} to ${oPath}"
+      ${distCpCmd} -f "${copylisting_dir_path}/${shardId}" "${oPath}" > 
/dev/null
+    done
+  else
+    echo "Directory ${copylisting_dir_path} does not exist."
+    exit 1
+  fi
+}
+
+collectionName=""
+destPath=""
+sourcePath=""
+cmd="$1"
+snapshotName="$2"
+copyListingDirPath=""
+distCpCmd="${SOLR_DISTCP_CMD:-hadoop distcp}"
+scriptDir=$(dirname "$0")
+solrLibPath="${SOLR_LIB_PATH:-${scriptDir}/../../../server/solr-webapp/webapp/WEB-INF/lib/*:${scriptDir}/../../../server/lib/ext/*:${scriptDir}/../lib/*}"
+case "${cmd}" in
+  --create)
+    use_bin_solr_warning
+    ;;
+  --delete)
+    use_bin_solr_warning
+    ;;
+  --list)
+    use_bin_solr_warning
+    ;;
+  --describe)
+    use_bin_solr_warning
+    ;;
+  --prepare-snapshot-export)
+    : "${SOLR_USE_DISTCP:? $(distcp_warning)}"
+
+    parse_options "$@"
+
+    : "${destPath:? Please specify destination directory using -d option}"
+
+    copyListingDirPath="${destPath}/copylistings"
+    prepare_snapshot_export "${@:2}"
+    echo "Done. GoodBye!"
+    ;;
+  --export)
+    if [ -z "${SOLR_USE_DISTCP}" ]; then
+      echo "Not using the distcp command"
+      use_bin_solr_warning
+      echo "Done. GoodBye!"
+      exit 0
+    fi
+
+    parse_options "$@"
+
+    : "${snapshotName:? Please specify the name of the snapshot}"
+    : "${destPath:? Please specify destination directory using -d option}"
+
+    if [ -n "${collectionName}" ] && [ -n "${sourcePath}" ]; then
+      echo "The -c and -s options can not be specified together"
+      exit 1
+    fi
+
+    if [ -z "${collectionName}" ] && [ -z "${sourcePath}" ]; then
+      echo "At least one of options (-c or -s) must be specified"
+      exit 1
+    fi
+
+    if [ -n "${collectionName}" ]; then
+      copyListingDirPath="${destPath}/${snapshotName}/copylistings"
+      prepare_snapshot_export "${@:2}"
+      copy_snapshot_files "${destPath}/${snapshotName}/copylistings"
+      hdfs dfs -rm -r -f -skipTrash "${destPath}/${snapshotName}/copylistings" 
> /dev/null
+    else
+      copy_snapshot_files "${sourcePath}/copylistings"
+      echo "Copying the collection meta-data to ${destPath}/${snapshotName}"
+      ${distCpCmd} "${sourcePath}/${snapshotName}/*" 
"${destPath}/${snapshotName}/" > /dev/null
+    fi
+
+    echo "Done. GoodBye!"
+    ;;
+  --help)

Review Comment:
   I would use `-h|--help)` for consistency here.



##########
solr/core/src/java/org/apache/solr/cli/SolrCLI.java:
##########
@@ -310,6 +310,12 @@ private static Tool newTool(String toolType) throws 
Exception {
     else if ("post".equals(toolType)) return new PostTool();
     else if ("postlogs".equals(toolType)) return new PostLogsTool();
     else if ("version".equals(toolType)) return new VersionTool();
+    else if ("snapshot-create".equals(toolType)) return new 
SnapshotCreateTool();
+    else if ("snapshot-delete".equals(toolType)) return new 
SnapshotDeleteTool();
+    else if ("snapshot-list".equals(toolType)) return new SnapshotListTool();
+    else if ("snapshot-describe".equals(toolType)) return new 
SnapshotDescribeTool();
+    else if ("snapshot-prepare-export".equals(toolType)) return new 
SnapshotPrepareExportTool();
+    else if ("snapshot-export".equals(toolType)) return new 
SnapshotExportTool();

Review Comment:
   Is there a specific reason you decided to use separate CLI classes for 
individual snapshot actions? Because this breaks "visually" the consistency 
(see `zk` command). I would personally prefer single command `snapshot` or 
`snapshots` and preferably a single entry point that handles all snapshot 
related actions and argument parsing. This way we would also avoid some code 
duplication.
   
   Individual classes may still be used for handling individual actions, but in 
a more simplified manner, as the arguments are already parsed and prepared for 
the action.
   
   And a more general "help" output with "--help" that first lists all options 
available for snapshoting, and eventually provides information for HDFS 
limitations / requirements for specific arguments could be printed.



##########
solr/core/src/java/org/apache/solr/cli/SolrCLI.java:
##########
@@ -310,6 +310,12 @@ private static Tool newTool(String toolType) throws 
Exception {
     else if ("post".equals(toolType)) return new PostTool();
     else if ("postlogs".equals(toolType)) return new PostLogsTool();
     else if ("version".equals(toolType)) return new VersionTool();
+    else if ("snapshot-create".equals(toolType)) return new 
SnapshotCreateTool();
+    else if ("snapshot-delete".equals(toolType)) return new 
SnapshotDeleteTool();
+    else if ("snapshot-list".equals(toolType)) return new SnapshotListTool();
+    else if ("snapshot-describe".equals(toolType)) return new 
SnapshotDescribeTool();
+    else if ("snapshot-prepare-export".equals(toolType)) return new 
SnapshotPrepareExportTool();
+    else if ("snapshot-export".equals(toolType)) return new 
SnapshotExportTool();

Review Comment:
   Additionally, with the `OptionGroup` it is possible to allow only one of 
many options (`--create`, `--list` etc.).
   
   And don't get me wrong, I like the short classes more than an overpowered 
class. So my preference is kind of mixed feelings here.



##########
solr/modules/hdfs/src/java/org/apache/solr/hdfs/snapshots/PrepareSnapshotExportCLI.java:
##########
@@ -0,0 +1,342 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.hdfs.snapshots;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.hadoop.fs.Path;
+import org.apache.solr.cli.CLIO;
+import org.apache.solr.cli.SolrCLI;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.response.CollectionAdminResponse;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.params.CollectionAdminParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.snapshots.CollectionSnapshotMetaData;
+import 
org.apache.solr.core.snapshots.CollectionSnapshotMetaData.CoreSnapshotMetaData;
+import org.apache.solr.core.snapshots.SolrSnapshotManager;
+
+/**
+ * This class provides the commandline to prepare a snapshot export with HDFS. 
Other snapshot
+ * related commands live in org.apache.solr.cli package.
+ */
+public class PrepareSnapshotExportCLI implements Closeable, CLIO {
+
+  private static final String COLLECTION = "c";
+  private static final String TEMP_DIR = "t";
+  private static final String DEST_DIR = "d";
+  private static final String HDFS_PATH_PREFIX = "p";
+  private static final String BACKUP_REPO_NAME = "r";
+  private static final String ASYNC_REQ_ID = "i";

Review Comment:
   Would also be nice to support the long form of the options, so that it is 
consistent with the other scripts.
   
   May also be a separate ticket though.



##########
solr/core/src/java/org/apache/solr/cli/SnapshotPrepareExportTool.java:
##########
@@ -0,0 +1,322 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cli;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.response.CollectionAdminResponse;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.params.CollectionAdminParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.snapshots.CollectionSnapshotMetaData;
+import org.apache.solr.core.snapshots.SolrSnapshotManager;
+
+/**
+ * Supports prepare-snapshot-export command in the bin/solr script. This 
command should only be used
+ * only if Solr is deployed with Hadoop and collection index files are stored 
on a shared
+ * file-system e.g. HDFS This class should probably be in the hdfs module.
+ */
+public class SnapshotPrepareExportTool extends ToolBase {
+
+  public SnapshotPrepareExportTool() {
+    this(CLIO.getOutStream());
+  }
+
+  public SnapshotPrepareExportTool(PrintStream stdout) {
+    super(stdout);
+  }
+
+  @Override
+  public String getName() {
+    return "snapshot-prepare-export";
+  }
+
+  @Override
+  public List<Option> getOptions() {

Review Comment:
   Options have differences to `PrepareSnapshotExportCLI`. Should they maybe be 
synced?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org
For additional commands, e-mail: issues-h...@solr.apache.org

Reply via email to