adelapena commented on code in PR #2957:
URL: https://github.com/apache/cassandra/pull/2957#discussion_r1417494908
##########
test/unit/org/apache/cassandra/tools/cqlsh/CqlshTest.java:
##########
@@ -44,4 +53,95 @@ public void testKeyspaceRequired()
assertThat(tool.getCleanedStderr(),
CoreMatchers.containsStringIgnoringCase("No keyspace has been specified"));
assertEquals(2, tool.getExitCode());
}
+
+ @Test
+ public void testCopyFloatVectorFromFile() throws IOException
+ {
+ assertCopyOfVectorLiteralsFromFileSucceeds("float", 6, new Object[][] {
+ row(1, vector(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f)),
+ row(2, vector(-0.1f, -0.2f, -0.3f, -0.4f, -0.5f, -0.6f)),
+ row(3, vector(0.9f, 0.8f, 0.7f, 0.6f, 0.5f, 0.4f))
+ });
+
+ assertCopyOfVectorLiteralsFromFileSucceeds("float", 3, new Object[][] {
+ row(1, vector(0.1f, 0.2f, 0.3f)),
+ row(2, vector(-0.4f, -0.5f, -0.6f)),
+ row(3, vector(0.7f, 0.8f, 0.9f))
+ });
+ }
+
+ @Test
+ public void testCopyIntVectorFromFile() throws IOException
+ {
+ assertCopyOfVectorLiteralsFromFileSucceeds("int", 6, new Object[][] {
+ row(1, vector(1, 2, 3, 4, 5, 6)),
+ row(2, vector(-1, -2, -3, -4, -5, -6)),
+ row(3, vector(9, 8, 7, 6, 5, 4))
+ });
+
+ assertCopyOfVectorLiteralsFromFileSucceeds("int", 3, new Object[][] {
+ row(1, vector(1, 2, 3)),
+ row(2, vector(-4, -5, -6)),
+ row(3, vector(7, 8, 9))
+ });
+ }
+
+ private void assertCopyOfVectorLiteralsFromFileSucceeds(String vectorType,
int vectorSize, Object[][] rows) throws IOException
+ {
+ // given a table with a vector column and a file containing vector
literals
+ createTable(KEYSPACE, format("CREATE TABLE %%s (id int PRIMARY KEY,
embedding_vector vector<%s, %d>)", vectorType, vectorSize));
+ assertTrue("table should be initially empty", execute("SELECT * FROM
%s").isEmpty());
+
+ Path csv = prepareCSVFile(rows);
+
+ // when running COPY via cqlsh
+ ToolRunner.ToolResult result = ToolRunner.invokeCqlsh(format("COPY
%s.%s FROM '%s'", KEYSPACE, currentTable(), csv.toAbsolutePath()));
+ UntypedResultSet importedRows = execute("SELECT * FROM %s");
+
+ // then all rows should be imported
+ result.asserts().success();
+ assertRowsIgnoringOrder(importedRows, rows);
Review Comment:
Nit: we can check this immediately after invoking cqlsh, before running the
`SELECT` query:
```suggestion
// when running COPY via cqlsh
result = ToolRunner.invokeCqlsh(format("COPY %s.%s FROM '%s'",
KEYSPACE, currentTable(), csv.toAbsolutePath()));
result.asserts().success();
// then all rows should be imported
UntypedResultSet importedRows = execute("SELECT * FROM %s");
assertRowsIgnoringOrder(importedRows, rows);
```
##########
test/unit/org/apache/cassandra/tools/cqlsh/CqlshTest.java:
##########
@@ -44,4 +53,95 @@ public void testKeyspaceRequired()
assertThat(tool.getCleanedStderr(),
CoreMatchers.containsStringIgnoringCase("No keyspace has been specified"));
assertEquals(2, tool.getExitCode());
}
+
+ @Test
+ public void testCopyFloatVectorFromFile() throws IOException
+ {
+ assertCopyOfVectorLiteralsFromFileSucceeds("float", 6, new Object[][] {
+ row(1, vector(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f)),
+ row(2, vector(-0.1f, -0.2f, -0.3f, -0.4f, -0.5f, -0.6f)),
+ row(3, vector(0.9f, 0.8f, 0.7f, 0.6f, 0.5f, 0.4f))
+ });
+
+ assertCopyOfVectorLiteralsFromFileSucceeds("float", 3, new Object[][] {
+ row(1, vector(0.1f, 0.2f, 0.3f)),
+ row(2, vector(-0.4f, -0.5f, -0.6f)),
+ row(3, vector(0.7f, 0.8f, 0.9f))
+ });
+ }
+
+ @Test
+ public void testCopyIntVectorFromFile() throws IOException
+ {
+ assertCopyOfVectorLiteralsFromFileSucceeds("int", 6, new Object[][] {
+ row(1, vector(1, 2, 3, 4, 5, 6)),
+ row(2, vector(-1, -2, -3, -4, -5, -6)),
+ row(3, vector(9, 8, 7, 6, 5, 4))
+ });
+
+ assertCopyOfVectorLiteralsFromFileSucceeds("int", 3, new Object[][] {
+ row(1, vector(1, 2, 3)),
+ row(2, vector(-4, -5, -6)),
+ row(3, vector(7, 8, 9))
+ });
+ }
+
+ private void assertCopyOfVectorLiteralsFromFileSucceeds(String vectorType,
int vectorSize, Object[][] rows) throws IOException
+ {
+ // given a table with a vector column and a file containing vector
literals
+ createTable(KEYSPACE, format("CREATE TABLE %%s (id int PRIMARY KEY,
embedding_vector vector<%s, %d>)", vectorType, vectorSize));
+ assertTrue("table should be initially empty", execute("SELECT * FROM
%s").isEmpty());
+
+ Path csv = prepareCSVFile(rows);
+
+ // when running COPY via cqlsh
+ ToolRunner.ToolResult result = ToolRunner.invokeCqlsh(format("COPY
%s.%s FROM '%s'", KEYSPACE, currentTable(), csv.toAbsolutePath()));
+ UntypedResultSet importedRows = execute("SELECT * FROM %s");
+
+ // then all rows should be imported
+ result.asserts().success();
+ assertRowsIgnoringOrder(importedRows, rows);
+ }
+
+ private Path prepareCSVFile(Object[][] rows) throws IOException
Review Comment:
Nit: can be `static`. Also, I would probably put this method after
`testCopyOnlyThoseRowsThatMatchVectorTypeSize`.
##########
test/unit/org/apache/cassandra/tools/cqlsh/CqlshTest.java:
##########
@@ -44,4 +53,95 @@ public void testKeyspaceRequired()
assertThat(tool.getCleanedStderr(),
CoreMatchers.containsStringIgnoringCase("No keyspace has been specified"));
assertEquals(2, tool.getExitCode());
}
+
+ @Test
+ public void testCopyFloatVectorFromFile() throws IOException
+ {
+ assertCopyOfVectorLiteralsFromFileSucceeds("float", 6, new Object[][] {
+ row(1, vector(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f)),
+ row(2, vector(-0.1f, -0.2f, -0.3f, -0.4f, -0.5f, -0.6f)),
+ row(3, vector(0.9f, 0.8f, 0.7f, 0.6f, 0.5f, 0.4f))
+ });
+
+ assertCopyOfVectorLiteralsFromFileSucceeds("float", 3, new Object[][] {
+ row(1, vector(0.1f, 0.2f, 0.3f)),
+ row(2, vector(-0.4f, -0.5f, -0.6f)),
+ row(3, vector(0.7f, 0.8f, 0.9f))
+ });
+ }
+
+ @Test
+ public void testCopyIntVectorFromFile() throws IOException
+ {
+ assertCopyOfVectorLiteralsFromFileSucceeds("int", 6, new Object[][] {
+ row(1, vector(1, 2, 3, 4, 5, 6)),
+ row(2, vector(-1, -2, -3, -4, -5, -6)),
+ row(3, vector(9, 8, 7, 6, 5, 4))
+ });
+
+ assertCopyOfVectorLiteralsFromFileSucceeds("int", 3, new Object[][] {
+ row(1, vector(1, 2, 3)),
+ row(2, vector(-4, -5, -6)),
+ row(3, vector(7, 8, 9))
+ });
+ }
+
+ private void assertCopyOfVectorLiteralsFromFileSucceeds(String vectorType,
int vectorSize, Object[][] rows) throws IOException
+ {
+ // given a table with a vector column and a file containing vector
literals
+ createTable(KEYSPACE, format("CREATE TABLE %%s (id int PRIMARY KEY,
embedding_vector vector<%s, %d>)", vectorType, vectorSize));
+ assertTrue("table should be initially empty", execute("SELECT * FROM
%s").isEmpty());
+
+ Path csv = prepareCSVFile(rows);
Review Comment:
We are testing `COPY FROM` but not `COPY TO`. This could easily be achieved
by inserting the rows and copying them to CSV, then verifying that the written
file is equals to the expected CSV file synthetically generated by
`prepareCSVFile`:
```suggestion
// write the rows into the table
for (Object[] row : rows)
execute("INSERT INTO %s (id, embedding_vector) VALUES (?, ?)",
row);
// export the rows to CSV
Path csv = Files.createTempFile("test_copy_to_vector", ".csv");
csv.toFile().deleteOnExit();
ToolRunner.ToolResult result = ToolRunner.invokeCqlsh(format("COPY
%s.%s TO '%s'", KEYSPACE, currentTable(), csv.toAbsolutePath()));
result.asserts().success();
// verify that the exported CSV contains the expected rows
Assertions.assertThat(csv).hasSameTextualContentAs(prepareCSVFile(rows));
// truncate the table
execute("TRUNCATE %s");
assertTrue("table should be initially empty", execute("SELECT * FROM
%s").isEmpty());
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]