cshannon commented on code in PR #3876:
URL: https://github.com/apache/accumulo/pull/3876#discussion_r1367721118


##########
core/src/main/java/org/apache/accumulo/core/metadata/schema/RootTabletMetadata.java:
##########
@@ -59,11 +60,14 @@ public class RootTabletMetadata {
   };
 
   // JSON Mapping Version 1. Released with Accumulo version 2.1.0
-  private static final int VERSION = 1;
+  private static final int VERSION_1 = 1;
+  // JSON Mapping Version 1. Released with Accumulo version 3,1

Review Comment:
   ```suggestion
     // JSON Mapping Version 2. Released with Accumulo version 3,1
   ```



##########
server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader11to12.java:
##########
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.manager.upgrade;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.accumulo.core.metadata.RootTable.ZROOT_TABLET;
+import static 
org.apache.accumulo.server.AccumuloDataVersion.METADATA_FILE_JSON_ENCODING;
+
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.accumulo.core.client.Accumulo;
+import org.apache.accumulo.core.client.AccumuloClient;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.dataImpl.KeyExtent;
+import org.apache.accumulo.core.fate.zookeeper.ZooUtil;
+import org.apache.accumulo.core.metadata.StoredTabletFile;
+import org.apache.accumulo.core.metadata.schema.Ample;
+import org.apache.accumulo.core.metadata.schema.MetadataSchema;
+import org.apache.accumulo.core.metadata.schema.RootTabletMetadata;
+import org.apache.accumulo.core.metadata.schema.UpgraderDeprecatedConstants;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.ComparablePair;
+import org.apache.accumulo.server.ServerContext;
+import org.apache.hadoop.io.Text;
+import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.data.Stat;
+import org.checkerframework.checker.nullness.qual.NonNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.gson.JsonSyntaxException;
+
+public class Upgrader11to12 implements Upgrader {
+
+  private static final Logger log = 
LoggerFactory.getLogger(Upgrader11to12.class);
+
+  @Override
+  public void upgradeZookeeper(@NonNull ServerContext context) {
+    log.debug("Upgrade ZooKeeper: upgrading to data version {}", 
METADATA_FILE_JSON_ENCODING);
+    var rootBase = ZooUtil.getRoot(context.getInstanceID()) + ZROOT_TABLET;
+
+    try {
+      var zrw = context.getZooReaderWriter();
+      Stat stat = new Stat();
+      byte[] rootData = zrw.getData(rootBase, stat);
+
+      String json = new String(rootData, UTF_8);
+      if (RootTabletMetadata.Data.needsConversion(json)) {
+        RootTabletMetadata rtm = RootTabletMetadata.upgrade(json);
+        zrw.overwritePersistentData(rootBase, rtm.toJson().getBytes(UTF_8), 
stat.getVersion());
+      }
+    } catch (InterruptedException ex) {
+      Thread.currentThread().interrupt();
+      throw new IllegalStateException(
+          "Could not read root metadata from ZooKeeper due to interrupt", ex);
+    } catch (KeeperException ex) {
+      throw new IllegalStateException(
+          "Could not read root metadata from ZooKeeper because of ZooKeeper 
exception", ex);
+    }
+  }
+
+  @Override
+  public void upgradeRoot(@NonNull ServerContext context) {
+    log.debug("Upgrade root: upgrading to data version {}", 
METADATA_FILE_JSON_ENCODING);
+
+    var rootName = Ample.DataLevel.METADATA.metaTable();
+    var filesToConvert = getFileReferences(context, rootName);
+    convertFileReferences(context, rootName, filesToConvert);
+    deleteObsoleteReferences(context, rootName);
+  }
+
+  @Override
+  public void upgradeMetadata(@NonNull ServerContext context) {
+    log.debug("Upgrade metadata: upgrading to data version {}", 
METADATA_FILE_JSON_ENCODING);
+
+    var metaName = Ample.DataLevel.USER.metaTable();
+    var filesToConvert = getFileReferences(context, metaName);
+    convertFileReferences(context, metaName, filesToConvert);
+    deleteObsoleteReferences(context, metaName);
+  }
+
+  private Map<ComparablePair<KeyExtent,String>,Value>
+      getFileReferences(@NonNull final ServerContext context, @NonNull final 
String tableName) {
+
+    log.trace("gather file references for table: {}", tableName);
+
+    Map<ComparablePair<KeyExtent,String>,Value> filesToConvert = new 
TreeMap<>();
+
+    try (Scanner scanner = context.createScanner(tableName, 
Authorizations.EMPTY)) {
+      
scanner.fetchColumnFamily(MetadataSchema.TabletsSection.DataFileColumnFamily.STR_NAME);
+      scanner.forEach((k, v) -> {
+        KeyExtent ke = KeyExtent.fromMetaRow(k.getRow());
+        String file = k.getColumnQualifier().toString();
+        // filter out references that are in the correct format already.
+        if (fileNeedsConversion(file)) {
+          var prev = filesToConvert.put(new ComparablePair<>(ke, file), v);
+          if (prev != null) {
+            throw new IllegalStateException(
+                "upgrade for table: " + tableName + " aborted, would have 
missed: " + prev);
+          }
+        }
+      });
+    } catch (TableNotFoundException ex) {
+      throw new IllegalStateException("failed to read metadata table for 
upgrade", ex);
+    }
+    log.debug("Number of files to convert for table: {}, number of files: {}", 
tableName,
+        filesToConvert.size());
+    return filesToConvert;
+  }
+
+  private void convertFileReferences(final ServerContext context, final String 
tableName,
+      Map<ComparablePair<KeyExtent,String>,Value> filesToConvert) {
+
+    // not using ample to avoid StoredTabletFile because old file ref is 
incompatible
+    try (AccumuloClient c = 
Accumulo.newClient().from(context.getProperties()).build();

Review Comment:
   Right now the conversion is happening in two steps where the metadata is 
first scanned to load the file references into a map and then that is iterated 
over and updated. One thing I am wondering is if this is a problem if there's a 
lot of files in the system. I'm not sure what a practical upper bounds would be 
since depends on a lot of factors but if it's say hundreds of thousands or 
millions that would be a lot to load at once. We could try and break it up 
maybe into batches (maybe a tablet at a time or some sort of limit on files at 
one time and keep track of where you left off). 
   
   I haven't really tried this before also thinking maybe we just modify the 
files while scanning in the same loop to avoid having to load into a map. The 
main thing I am not sure of is whether or not that would cause issues with the 
updates and we'd need to use an isolated scanner but that also buffers the 
entire row into memory as well. 
   
   @keith-turner any thoughts on this?



##########
core/src/test/java/org/apache/accumulo/core/metadata/schema/RootTabletMetadataTest.java:
##########
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.core.metadata.schema;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class RootTabletMetadataTest {
+  private static final Logger LOG = 
LoggerFactory.getLogger(RootTabletMetadataTest.class);
+
+  @Test
+  public void convertRoot1File() {
+    String root21ZkData =
+        
"{\"version\":1,\"columnValues\":{\"file\":{\"hdfs://localhost:8020/accumulo/tables/+r/root_tablet/A000000v.rf\":\"1368,61\"},\"last\":{\"100025091780006\":\"localhost:9997\"},\"loc\":{\"100025091780006\":\"localhost:9997\"},\"srv\":{\"dir\":\"root_tablet\",\"flush\":\"3\",\"lock\":\"tservers/localhost:9997/zlock#9db8961a-4ee9-400e-8e80-3353148baadd#0000000000$100025091780006\",\"time\":\"L53\"},\"~tab\":{\"~pr\":\"\\u0000\"}}}";
+
+    RootTabletMetadata rtm = RootTabletMetadata.upgrade(root21ZkData);
+    LOG.debug("converted column values: {}", 
rtm.toTabletMetadata().getFiles());
+    assertEquals(1, rtm.toTabletMetadata().getFiles().size());
+
+    LOG.info("FILES: {}", rtm.toTabletMetadata().getFilesMap());
+  }
+
+  @Test
+  public void convertRoot2Files() {
+    String root212ZkData2Files =
+        
"{\"version\":1,\"columnValues\":{\"file\":{\"hdfs://localhost:8020/accumulo/tables/+r/root_tablet/00000_00000.rf\":\"0,0\",\"hdfs://localhost:8020/accumulo/tables/+r/root_tablet/F000000c.rf\":\"926,18\"},\"last\":{\"10001a84d7d0005\":\"localhost:9997\"},\"loc\":{\"10001a84d7d0005\":\"localhost:9997\"},\"srv\":{\"dir\":\"root_tablet\",\"flush\":\"2\",\"lock\":\"tservers/localhost:9997/zlock#d21adaa4-0f97-4004-9ff8-cce9dbb6687f#0000000000$10001a84d7d0005\",\"time\":\"L6\"},\"~tab\":{\"~pr\":\"\\u0000\"}}}\n";
+
+    RootTabletMetadata rtm = RootTabletMetadata.upgrade(root212ZkData2Files);
+    LOG.debug("converted column values: {}", rtm.toTabletMetadata());
+    assertEquals(2, rtm.toTabletMetadata().getFiles().size());

Review Comment:
   Besides just checking size we should also verify correctness. It would be 
good to verify the files match the previous path after upgrade and also the 
ranges are infinite, etc.



##########
server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader11to12.java:
##########
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.manager.upgrade;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.accumulo.core.metadata.RootTable.ZROOT_TABLET;
+import static 
org.apache.accumulo.server.AccumuloDataVersion.METADATA_FILE_JSON_ENCODING;
+
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.accumulo.core.client.Accumulo;
+import org.apache.accumulo.core.client.AccumuloClient;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.dataImpl.KeyExtent;
+import org.apache.accumulo.core.fate.zookeeper.ZooUtil;
+import org.apache.accumulo.core.metadata.StoredTabletFile;
+import org.apache.accumulo.core.metadata.schema.Ample;
+import org.apache.accumulo.core.metadata.schema.MetadataSchema;
+import org.apache.accumulo.core.metadata.schema.RootTabletMetadata;
+import org.apache.accumulo.core.metadata.schema.UpgraderDeprecatedConstants;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.ComparablePair;
+import org.apache.accumulo.server.ServerContext;
+import org.apache.hadoop.io.Text;
+import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.data.Stat;
+import org.checkerframework.checker.nullness.qual.NonNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.gson.JsonSyntaxException;
+
+public class Upgrader11to12 implements Upgrader {
+
+  private static final Logger log = 
LoggerFactory.getLogger(Upgrader11to12.class);
+
+  @Override
+  public void upgradeZookeeper(@NonNull ServerContext context) {
+    log.debug("Upgrade ZooKeeper: upgrading to data version {}", 
METADATA_FILE_JSON_ENCODING);
+    var rootBase = ZooUtil.getRoot(context.getInstanceID()) + ZROOT_TABLET;
+
+    try {
+      var zrw = context.getZooReaderWriter();
+      Stat stat = new Stat();
+      byte[] rootData = zrw.getData(rootBase, stat);
+
+      String json = new String(rootData, UTF_8);
+      if (RootTabletMetadata.Data.needsConversion(json)) {
+        RootTabletMetadata rtm = RootTabletMetadata.upgrade(json);
+        zrw.overwritePersistentData(rootBase, rtm.toJson().getBytes(UTF_8), 
stat.getVersion());
+      }
+    } catch (InterruptedException ex) {
+      Thread.currentThread().interrupt();
+      throw new IllegalStateException(
+          "Could not read root metadata from ZooKeeper due to interrupt", ex);
+    } catch (KeeperException ex) {
+      throw new IllegalStateException(
+          "Could not read root metadata from ZooKeeper because of ZooKeeper 
exception", ex);
+    }
+  }
+
+  @Override
+  public void upgradeRoot(@NonNull ServerContext context) {
+    log.debug("Upgrade root: upgrading to data version {}", 
METADATA_FILE_JSON_ENCODING);
+
+    var rootName = Ample.DataLevel.METADATA.metaTable();
+    var filesToConvert = getFileReferences(context, rootName);
+    convertFileReferences(context, rootName, filesToConvert);
+    deleteObsoleteReferences(context, rootName);
+  }
+
+  @Override
+  public void upgradeMetadata(@NonNull ServerContext context) {
+    log.debug("Upgrade metadata: upgrading to data version {}", 
METADATA_FILE_JSON_ENCODING);
+
+    var metaName = Ample.DataLevel.USER.metaTable();
+    var filesToConvert = getFileReferences(context, metaName);
+    convertFileReferences(context, metaName, filesToConvert);
+    deleteObsoleteReferences(context, metaName);
+  }
+
+  private Map<ComparablePair<KeyExtent,String>,Value>
+      getFileReferences(@NonNull final ServerContext context, @NonNull final 
String tableName) {
+
+    log.trace("gather file references for table: {}", tableName);
+
+    Map<ComparablePair<KeyExtent,String>,Value> filesToConvert = new 
TreeMap<>();
+
+    try (Scanner scanner = context.createScanner(tableName, 
Authorizations.EMPTY)) {
+      
scanner.fetchColumnFamily(MetadataSchema.TabletsSection.DataFileColumnFamily.STR_NAME);
+      scanner.forEach((k, v) -> {
+        KeyExtent ke = KeyExtent.fromMetaRow(k.getRow());
+        String file = k.getColumnQualifier().toString();
+        // filter out references that are in the correct format already.
+        if (fileNeedsConversion(file)) {
+          var prev = filesToConvert.put(new ComparablePair<>(ke, file), v);
+          if (prev != null) {
+            throw new IllegalStateException(
+                "upgrade for table: " + tableName + " aborted, would have 
missed: " + prev);
+          }
+        }
+      });
+    } catch (TableNotFoundException ex) {
+      throw new IllegalStateException("failed to read metadata table for 
upgrade", ex);
+    }
+    log.debug("Number of files to convert for table: {}, number of files: {}", 
tableName,
+        filesToConvert.size());
+    return filesToConvert;
+  }
+
+  private void convertFileReferences(final ServerContext context, final String 
tableName,
+      Map<ComparablePair<KeyExtent,String>,Value> filesToConvert) {
+
+    // not using ample to avoid StoredTabletFile because old file ref is 
incompatible
+    try (AccumuloClient c = 
Accumulo.newClient().from(context.getProperties()).build();
+        BatchWriter batchWriter = c.createBatchWriter(tableName)) {
+      filesToConvert.forEach((refPair, value) -> {
+        try {
+          log.trace("update file reference for table: {}. row: {} to: {} 
value: {}", tableName,
+              refPair.getFirst().toMetaRow(), refPair.getSecond(), value);
+
+          var row = refPair.getFirst().toMetaRow();
+          var fileJson = new 
Text(StoredTabletFile.serialize(refPair.getSecond(), new Range()));

Review Comment:
   ```suggestion
             var fileJson = StoredTabletFile.of(new 
Path(refPair.getSecond())).getMetadataText();
   ```
   Both ways are equivalent so this is optional but this is a little nicer not 
having to create the empty range.



##########
core/src/main/java/org/apache/accumulo/core/metadata/schema/RootTabletMetadata.java:
##########
@@ -73,10 +77,27 @@ private static class Data {
      */
     private final TreeMap<String,TreeMap<String,String>> columnValues;
 
-    public Data(int version, TreeMap<String,TreeMap<String,String>> 
columnValues) {
+    private Data(int version, TreeMap<String,TreeMap<String,String>> 
columnValues) {
       this.version = version;
       this.columnValues = columnValues;
     }
+
+    public int getVersion() {
+      return version;
+    }
+
+    /**
+     * For external use, return a copy so the original remains immutable.
+     */
+    public TreeMap<String,TreeMap<String,String>> getColumnValues() {
+      return new TreeMap<>(columnValues);

Review Comment:
   Instead of making a copy and using TreeMap I would recommend using an 
Immutable map and then we don't need to worry about copying data (assuming the 
values are supposed to be immutable). Since it's a SortedMap you could look at 
using Guava's `ImmutableSortedMap`. It might be a little tricky because the map 
contains another map so you may need to make them both immutable depending on 
how it's used. You also could just wrap the map(s) using `UnmodifiableMap`



##########
server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader11to12.java:
##########
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.manager.upgrade;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.accumulo.core.metadata.RootTable.ZROOT_TABLET;
+import static 
org.apache.accumulo.server.AccumuloDataVersion.METADATA_FILE_JSON_ENCODING;
+
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.accumulo.core.client.Accumulo;
+import org.apache.accumulo.core.client.AccumuloClient;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.dataImpl.KeyExtent;
+import org.apache.accumulo.core.fate.zookeeper.ZooUtil;
+import org.apache.accumulo.core.metadata.StoredTabletFile;
+import org.apache.accumulo.core.metadata.schema.Ample;
+import org.apache.accumulo.core.metadata.schema.MetadataSchema;
+import org.apache.accumulo.core.metadata.schema.RootTabletMetadata;
+import org.apache.accumulo.core.metadata.schema.UpgraderDeprecatedConstants;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.ComparablePair;
+import org.apache.accumulo.server.ServerContext;
+import org.apache.hadoop.io.Text;
+import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.data.Stat;
+import org.checkerframework.checker.nullness.qual.NonNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.gson.JsonSyntaxException;
+
+public class Upgrader11to12 implements Upgrader {
+
+  private static final Logger log = 
LoggerFactory.getLogger(Upgrader11to12.class);
+
+  @Override
+  public void upgradeZookeeper(@NonNull ServerContext context) {
+    log.debug("Upgrade ZooKeeper: upgrading to data version {}", 
METADATA_FILE_JSON_ENCODING);
+    var rootBase = ZooUtil.getRoot(context.getInstanceID()) + ZROOT_TABLET;
+
+    try {
+      var zrw = context.getZooReaderWriter();
+      Stat stat = new Stat();
+      byte[] rootData = zrw.getData(rootBase, stat);
+
+      String json = new String(rootData, UTF_8);
+      if (RootTabletMetadata.Data.needsConversion(json)) {
+        RootTabletMetadata rtm = RootTabletMetadata.upgrade(json);
+        zrw.overwritePersistentData(rootBase, rtm.toJson().getBytes(UTF_8), 
stat.getVersion());
+      }
+    } catch (InterruptedException ex) {
+      Thread.currentThread().interrupt();
+      throw new IllegalStateException(
+          "Could not read root metadata from ZooKeeper due to interrupt", ex);
+    } catch (KeeperException ex) {
+      throw new IllegalStateException(
+          "Could not read root metadata from ZooKeeper because of ZooKeeper 
exception", ex);
+    }
+  }
+
+  @Override
+  public void upgradeRoot(@NonNull ServerContext context) {
+    log.debug("Upgrade root: upgrading to data version {}", 
METADATA_FILE_JSON_ENCODING);
+
+    var rootName = Ample.DataLevel.METADATA.metaTable();
+    var filesToConvert = getFileReferences(context, rootName);
+    convertFileReferences(context, rootName, filesToConvert);
+    deleteObsoleteReferences(context, rootName);
+  }
+
+  @Override
+  public void upgradeMetadata(@NonNull ServerContext context) {
+    log.debug("Upgrade metadata: upgrading to data version {}", 
METADATA_FILE_JSON_ENCODING);
+
+    var metaName = Ample.DataLevel.USER.metaTable();
+    var filesToConvert = getFileReferences(context, metaName);
+    convertFileReferences(context, metaName, filesToConvert);
+    deleteObsoleteReferences(context, metaName);
+  }
+
+  private Map<ComparablePair<KeyExtent,String>,Value>
+      getFileReferences(@NonNull final ServerContext context, @NonNull final 
String tableName) {
+
+    log.trace("gather file references for table: {}", tableName);
+
+    Map<ComparablePair<KeyExtent,String>,Value> filesToConvert = new 
TreeMap<>();
+
+    try (Scanner scanner = context.createScanner(tableName, 
Authorizations.EMPTY)) {
+      
scanner.fetchColumnFamily(MetadataSchema.TabletsSection.DataFileColumnFamily.STR_NAME);
+      scanner.forEach((k, v) -> {
+        KeyExtent ke = KeyExtent.fromMetaRow(k.getRow());
+        String file = k.getColumnQualifier().toString();
+        // filter out references that are in the correct format already.
+        if (fileNeedsConversion(file)) {
+          var prev = filesToConvert.put(new ComparablePair<>(ke, file), v);
+          if (prev != null) {
+            throw new IllegalStateException(
+                "upgrade for table: " + tableName + " aborted, would have 
missed: " + prev);
+          }
+        }
+      });
+    } catch (TableNotFoundException ex) {
+      throw new IllegalStateException("failed to read metadata table for 
upgrade", ex);
+    }
+    log.debug("Number of files to convert for table: {}, number of files: {}", 
tableName,
+        filesToConvert.size());
+    return filesToConvert;
+  }
+
+  private void convertFileReferences(final ServerContext context, final String 
tableName,
+      Map<ComparablePair<KeyExtent,String>,Value> filesToConvert) {
+
+    // not using ample to avoid StoredTabletFile because old file ref is 
incompatible
+    try (AccumuloClient c = 
Accumulo.newClient().from(context.getProperties()).build();
+        BatchWriter batchWriter = c.createBatchWriter(tableName)) {
+      filesToConvert.forEach((refPair, value) -> {
+        try {
+          log.trace("update file reference for table: {}. row: {} to: {} 
value: {}", tableName,
+              refPair.getFirst().toMetaRow(), refPair.getSecond(), value);
+
+          var row = refPair.getFirst().toMetaRow();
+          var fileJson = new 
Text(StoredTabletFile.serialize(refPair.getSecond(), new Range()));
+
+          Mutation m = new Mutation(row);
+          
m.at().family(MetadataSchema.TabletsSection.DataFileColumnFamily.STR_NAME)
+              .qualifier(fileJson).put(value);
+
+          log.trace("table: {}, adding: {}", tableName, m.prettyPrint());
+          batchWriter.addMutation(m);
+
+          Mutation delete = new Mutation(row);
+          
delete.at().family(MetadataSchema.TabletsSection.DataFileColumnFamily.STR_NAME)
+              .qualifier(refPair.getSecond()).delete();
+
+          log.trace("table {}: deleting: {}", tableName, delete.prettyPrint());
+          batchWriter.addMutation(delete);
+        } catch (MutationsRejectedException ex) {
+          throw new IllegalStateException("Failed to update file entries for 
table: " + tableName,
+              ex);
+        }
+      });
+      batchWriter.flush();
+    } catch (Exception ex) {
+      throw new IllegalStateException(ex);
+    }
+  }
+
+  /**
+   * Removes chopped and external compaction references that have obsolete 
encoding that is
+   * incompatible with StoredTabletFile json encoding. These references should 
be rare. If they are
+   * present, some operations likely terminated abnormally under the old 
version before shutdown.
+   * The deletions are logged so those operations may be re-run if desired.
+   */
+  private void deleteObsoleteReferences(ServerContext context, String 
tableName) {
+    log.debug("processing obsolete references for table: {}", tableName);
+    try (AccumuloClient c = 
Accumulo.newClient().from(context.getProperties()).build();
+        BatchWriter batchWriter = c.createBatchWriter(tableName)) {
+
+      try (Scanner scanner = context.createScanner(tableName, 
Authorizations.EMPTY)) {
+        
scanner.fetchColumnFamily(UpgraderDeprecatedConstants.ChoppedColumnFamily.STR_NAME);
+        scanner
+            
.fetchColumnFamily(MetadataSchema.TabletsSection.ExternalCompactionColumnFamily.NAME);
+        scanner.forEach((k, v) -> {
+          Mutation delete;
+          var family = k.getColumnFamily();
+          if 
(family.equals(UpgraderDeprecatedConstants.ChoppedColumnFamily.NAME)) {
+            delete = buildChoppedDeleteMutation(k, tableName);
+          } else if (family
+              
.equals(MetadataSchema.TabletsSection.ExternalCompactionColumnFamily.NAME)) {
+            delete = buildExternalCompactionDelete(k, tableName);
+          } else {
+            throw new IllegalStateException(
+                "unexpected column Family: '{}' seen processing obsolete 
references");
+          }
+          try {
+            batchWriter.addMutation(delete);
+          } catch (MutationsRejectedException ex) {
+            log.warn("Failed to delete obsolete reference for table: " + 
tableName + ". Ref: "
+                + delete.prettyPrint()
+                + ". Will try to continue. Ref may need to be manually 
removed");
+            log.warn("Constraint violations: {}", 
ex.getConstraintViolationSummaries());
+          }
+        });
+      }
+    } catch (MutationsRejectedException ex) {
+      log.warn("Failed to delete obsolete reference for table: " + tableName + 
" on close");
+      log.warn("Constraint violations: {}", 
ex.getConstraintViolationSummaries());
+      throw new IllegalStateException(ex);
+    } catch (Exception ex) {
+      throw new IllegalStateException(
+          "Processing obsolete referenced for table: " + tableName + " failed. 
Upgrade aborting",
+          ex);
+    }
+  }
+
+  private Mutation buildChoppedDeleteMutation(final Key k, final String 
tableName) {
+    Mutation delete = new Mutation(k.getRow()).at()
+        .family(UpgraderDeprecatedConstants.ChoppedColumnFamily.STR_NAME)
+        
.qualifier(UpgraderDeprecatedConstants.ChoppedColumnFamily.STR_NAME).delete();
+    log.warn(
+        "Deleting chopped reference from:{}. Previous split or delete may not 
have completed cleanly. Ref: {}",
+        tableName, delete.prettyPrint());
+    return delete;
+  }
+
+  private Mutation buildExternalCompactionDelete(Key k, String tableName) {
+    Mutation delete = new Mutation(k.getRow()).at()
+        
.family(MetadataSchema.TabletsSection.ExternalCompactionColumnFamily.NAME)
+        .qualifier(k.getColumnQualifier()).delete();
+    log.warn(
+        "Deleting external compaction reference from:{}. Previous compaction 
may not have completed. Ref: {}",
+        tableName, delete.prettyPrint());
+    return delete;
+  }
+
+  /**
+   * Quick sanity check to see if value has been converted by checking the 
candidate can be parsed
+   * into a StoredTabletFile. If parsing does not throw a parsing exception. 
then assume it has been
+   * converted. If validate cannot parse the candidate a JSON, then it is 
assumed that it has not
+   * been converted. Other parsing errors will propagate as 
IllegalArgumentExceptions.
+   *
+   * @param candidate a possible file: reference.
+   * @return false if a valid StoredTabletFile, true if it cannot be parsed as 
JSON. Otherwise,
+   *         propagate an IllegalArgumentException
+   */
+  private boolean fileNeedsConversion(@NonNull final String candidate) {
+    try {
+      StoredTabletFile.validate(candidate);

Review Comment:
   Some trace logging might be good here when validating just in case we need 
to troubleshoot something weird going on with the metadata not converting, etc.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to