keith-turner commented on code in PR #3886:
URL: https://github.com/apache/accumulo/pull/3886#discussion_r1380462022
##########
core/src/main/java/org/apache/accumulo/core/tabletserver/log/LogEntry.java:
##########
@@ -19,67 +19,124 @@
package org.apache.accumulo.core.tabletserver.log;
import java.util.Map.Entry;
+import java.util.Objects;
+import java.util.UUID;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.dataImpl.KeyExtent;
-import
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.LogColumnFamily;
import org.apache.hadoop.io.Text;
+import com.google.common.net.HostAndPort;
+
public class LogEntry {
- private final KeyExtent extent;
- public final long timestamp;
- public final String filename;
- public LogEntry(KeyExtent extent, long timestamp, String filename) {
- // note the prevEndRow in the extent does not matter, and is not used by
LogEntry
- this.extent = extent;
+ private final long timestamp;
+ private final String filePath;
+
+ public LogEntry(long timestamp, String filePath) {
+ validateFilePath(filePath);
this.timestamp = timestamp;
- this.filename = filename;
+ this.filePath = filePath;
+ }
+
+ public long getTimestamp() {
+ return this.timestamp;
}
- // make copy, but with a different filename
- public LogEntry switchFile(String filename) {
- return new LogEntry(extent, timestamp, filename);
+ public String getFilePath() {
+ return this.filePath;
+ }
+
+ /**
+ * Validates the expected format of the file path. We expect the path to
contain a tserver
+ * (host:port) followed by a UUID as the file name. For example,
+ * localhost:1234/927ba659-d109-4bce-b0a5-bcbbcb9942a2 is a valid file path.
+ *
+ * @param filePath path to validate
+ * @throws IllegalArgumentException if the filepath is invalid
+ */
+ private static void validateFilePath(String filePath) {
+ String[] parts = filePath.split("/");
+
+ if (parts.length < 2) {
+ throw new IllegalArgumentException(
+ "Invalid filePath format. The path should at least contain
tserver/UUID.");
+ }
+
+ String tserverPart = parts[parts.length - 2];
+ String uuidPart = parts[parts.length - 1];
+
+ try {
+ var ignored = HostAndPort.fromString(tserverPart);
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException(
+ "Invalid tserver format in filePath. Expected format: host:port.
Found '" + tserverPart
+ + "'");
+ }
+
+ try {
+ var ignored = UUID.fromString(uuidPart);
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException("Expected valid UUID. Found '" +
uuidPart + "'");
+ }
+ }
+
+ /**
+ * Make a copy of this LogEntry but replace the file path.
+ *
+ * @param filePath path to use
+ */
+ public LogEntry switchFile(String filePath) {
Review Comment:
If this class only has a file name and not a fully qualified path, then this
method has no reason for existence and could be removed. The purpose of this
method is to support changing volumes in a fully qualified path. May be able
to remove this and the code that calls it, would be another follow on issue.
I will create an issue for this and the timestamp.
##########
core/src/main/java/org/apache/accumulo/core/tabletserver/log/LogEntry.java:
##########
@@ -19,67 +19,124 @@
package org.apache.accumulo.core.tabletserver.log;
import java.util.Map.Entry;
+import java.util.Objects;
+import java.util.UUID;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.dataImpl.KeyExtent;
-import
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.LogColumnFamily;
import org.apache.hadoop.io.Text;
+import com.google.common.net.HostAndPort;
+
public class LogEntry {
- private final KeyExtent extent;
- public final long timestamp;
- public final String filename;
- public LogEntry(KeyExtent extent, long timestamp, String filename) {
- // note the prevEndRow in the extent does not matter, and is not used by
LogEntry
- this.extent = extent;
+ private final long timestamp;
+ private final String filePath;
+
+ public LogEntry(long timestamp, String filePath) {
+ validateFilePath(filePath);
this.timestamp = timestamp;
- this.filename = filename;
+ this.filePath = filePath;
+ }
+
+ public long getTimestamp() {
+ return this.timestamp;
}
- // make copy, but with a different filename
- public LogEntry switchFile(String filename) {
- return new LogEntry(extent, timestamp, filename);
+ public String getFilePath() {
+ return this.filePath;
+ }
+
+ /**
+ * Validates the expected format of the file path. We expect the path to
contain a tserver
+ * (host:port) followed by a UUID as the file name. For example,
+ * localhost:1234/927ba659-d109-4bce-b0a5-bcbbcb9942a2 is a valid file path.
+ *
+ * @param filePath path to validate
+ * @throws IllegalArgumentException if the filepath is invalid
+ */
+ private static void validateFilePath(String filePath) {
+ String[] parts = filePath.split("/");
+
+ if (parts.length < 2) {
Review Comment:
This could be made more strict.
```suggestion
if(parts.length == 3) {
// TODO verify that part[0] is "-/"
} else if (parts.length != 2) {
```
##########
core/src/main/java/org/apache/accumulo/core/tabletserver/log/LogEntry.java:
##########
@@ -19,67 +19,124 @@
package org.apache.accumulo.core.tabletserver.log;
import java.util.Map.Entry;
+import java.util.Objects;
+import java.util.UUID;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.dataImpl.KeyExtent;
-import
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.LogColumnFamily;
import org.apache.hadoop.io.Text;
+import com.google.common.net.HostAndPort;
+
public class LogEntry {
- private final KeyExtent extent;
- public final long timestamp;
- public final String filename;
- public LogEntry(KeyExtent extent, long timestamp, String filename) {
- // note the prevEndRow in the extent does not matter, and is not used by
LogEntry
- this.extent = extent;
+ private final long timestamp;
+ private final String filePath;
+
+ public LogEntry(long timestamp, String filePath) {
+ validateFilePath(filePath);
this.timestamp = timestamp;
- this.filename = filename;
+ this.filePath = filePath;
+ }
+
+ public long getTimestamp() {
+ return this.timestamp;
}
- // make copy, but with a different filename
- public LogEntry switchFile(String filename) {
- return new LogEntry(extent, timestamp, filename);
+ public String getFilePath() {
+ return this.filePath;
+ }
+
+ /**
+ * Validates the expected format of the file path. We expect the path to
contain a tserver
+ * (host:port) followed by a UUID as the file name. For example,
+ * localhost:1234/927ba659-d109-4bce-b0a5-bcbbcb9942a2 is a valid file path.
+ *
+ * @param filePath path to validate
+ * @throws IllegalArgumentException if the filepath is invalid
+ */
+ private static void validateFilePath(String filePath) {
+ String[] parts = filePath.split("/");
+
+ if (parts.length < 2) {
+ throw new IllegalArgumentException(
+ "Invalid filePath format. The path should at least contain
tserver/UUID.");
+ }
+
+ String tserverPart = parts[parts.length - 2];
+ String uuidPart = parts[parts.length - 1];
+
+ try {
+ var ignored = HostAndPort.fromString(tserverPart);
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException(
+ "Invalid tserver format in filePath. Expected format: host:port.
Found '" + tserverPart
+ + "'");
+ }
+
+ try {
+ var ignored = UUID.fromString(uuidPart);
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException("Expected valid UUID. Found '" +
uuidPart + "'");
+ }
+ }
+
+ /**
+ * Make a copy of this LogEntry but replace the file path.
+ *
+ * @param filePath path to use
+ */
+ public LogEntry switchFile(String filePath) {
+ return new LogEntry(timestamp, filePath);
}
@Override
public String toString() {
- return extent.toMetaRow() + " " + filename;
+ return filePath;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this == other) {
+ return true;
+ }
+ if (!(other instanceof LogEntry)) {
+ return false;
+ }
+ LogEntry logEntry = (LogEntry) other;
+ return this.timestamp == logEntry.timestamp &&
this.filePath.equals(logEntry.filePath);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(timestamp, filePath);
}
public static LogEntry fromMetaWalEntry(Entry<Key,Value> entry) {
final Key key = entry.getKey();
final Value value = entry.getValue();
- KeyExtent extent = KeyExtent.fromMetaRow(key.getRow());
- // qualifier.split("/")[0] used to store the server, but this is no longer
used, and the
- // qualifier can be ignored
- // the following line handles old-style log entry values that specify log
sets
+
+ // the older format seems to split on "|", and then on ";".
+ // We're only interested in the last part after splitting on ";", which
seems to be the filePath
String[] parts = value.toString().split("\\|")[0].split(";");
- String filename = parts[parts.length - 1];
- long timestamp = key.getTimestamp();
- return new LogEntry(extent, timestamp, filename);
- }
+ String filePath = parts[parts.length - 1];
- public Text getRow() {
- return extent.toMetaRow();
- }
+ validateFilePath(filePath);
- public Text getColumnFamily() {
- return LogColumnFamily.NAME;
+ return new LogEntry(key.getTimestamp(), filePath);
}
public String getUniqueID() {
- String[] parts = filename.split("/");
+ String[] parts = filePath.split("/");
return parts[parts.length - 1];
}
public Text getColumnQualifier() {
- return new Text("-/" + filename);
+ return new Text("-/" + filePath);
}
public Value getValue() {
- return new Value(filename);
+ return new Value(filePath);
Review Comment:
Something else mentioned in #3878 was removing storing the value as its
redundant with the qualifier. Does not need to be done in the PR, but need to
leave #3878 open or create a new issue when closing this PR out.
##########
core/src/main/java/org/apache/accumulo/core/tabletserver/log/LogEntry.java:
##########
@@ -19,67 +19,124 @@
package org.apache.accumulo.core.tabletserver.log;
import java.util.Map.Entry;
+import java.util.Objects;
+import java.util.UUID;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.dataImpl.KeyExtent;
-import
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.LogColumnFamily;
import org.apache.hadoop.io.Text;
+import com.google.common.net.HostAndPort;
+
public class LogEntry {
- private final KeyExtent extent;
- public final long timestamp;
- public final String filename;
- public LogEntry(KeyExtent extent, long timestamp, String filename) {
- // note the prevEndRow in the extent does not matter, and is not used by
LogEntry
- this.extent = extent;
+ private final long timestamp;
+ private final String filePath;
+
+ public LogEntry(long timestamp, String filePath) {
+ validateFilePath(filePath);
this.timestamp = timestamp;
- this.filename = filename;
+ this.filePath = filePath;
+ }
+
+ public long getTimestamp() {
+ return this.timestamp;
}
- // make copy, but with a different filename
- public LogEntry switchFile(String filename) {
- return new LogEntry(extent, timestamp, filename);
+ public String getFilePath() {
+ return this.filePath;
+ }
+
+ /**
+ * Validates the expected format of the file path. We expect the path to
contain a tserver
+ * (host:port) followed by a UUID as the file name. For example,
+ * localhost:1234/927ba659-d109-4bce-b0a5-bcbbcb9942a2 is a valid file path.
+ *
+ * @param filePath path to validate
+ * @throws IllegalArgumentException if the filepath is invalid
+ */
+ private static void validateFilePath(String filePath) {
+ String[] parts = filePath.split("/");
+
+ if (parts.length < 2) {
+ throw new IllegalArgumentException(
+ "Invalid filePath format. The path should at least contain
tserver/UUID.");
+ }
+
+ String tserverPart = parts[parts.length - 2];
+ String uuidPart = parts[parts.length - 1];
+
+ try {
+ var ignored = HostAndPort.fromString(tserverPart);
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException(
+ "Invalid tserver format in filePath. Expected format: host:port.
Found '" + tserverPart
+ + "'");
+ }
+
+ try {
+ var ignored = UUID.fromString(uuidPart);
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException("Expected valid UUID. Found '" +
uuidPart + "'");
+ }
+ }
+
+ /**
+ * Make a copy of this LogEntry but replace the file path.
+ *
+ * @param filePath path to use
+ */
+ public LogEntry switchFile(String filePath) {
+ return new LogEntry(timestamp, filePath);
}
@Override
public String toString() {
- return extent.toMetaRow() + " " + filename;
+ return filePath;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this == other) {
+ return true;
+ }
+ if (!(other instanceof LogEntry)) {
+ return false;
+ }
+ LogEntry logEntry = (LogEntry) other;
+ return this.timestamp == logEntry.timestamp &&
this.filePath.equals(logEntry.filePath);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(timestamp, filePath);
}
public static LogEntry fromMetaWalEntry(Entry<Key,Value> entry) {
final Key key = entry.getKey();
final Value value = entry.getValue();
- KeyExtent extent = KeyExtent.fromMetaRow(key.getRow());
- // qualifier.split("/")[0] used to store the server, but this is no longer
used, and the
- // qualifier can be ignored
- // the following line handles old-style log entry values that specify log
sets
+
+ // the older format seems to split on "|", and then on ";".
+ // We're only interested in the last part after splitting on ";", which
seems to be the filePath
String[] parts = value.toString().split("\\|")[0].split(";");
- String filename = parts[parts.length - 1];
- long timestamp = key.getTimestamp();
- return new LogEntry(extent, timestamp, filename);
- }
+ String filePath = parts[parts.length - 1];
Review Comment:
I think we drop split preceding this line and the comments around it. We
should not see these really old data format anymore. If we did happen to see
it, then the validation should fail if `|` or `;` are present.
```suggestion
String filePath = value.toString();
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]