Repository: metamodel Updated Branches: refs/heads/master 161b64285 -> a6093c167
METAMODEL-244: Addition for Excel Closes #99 Project: http://git-wip-us.apache.org/repos/asf/metamodel/repo Commit: http://git-wip-us.apache.org/repos/asf/metamodel/commit/a6093c16 Tree: http://git-wip-us.apache.org/repos/asf/metamodel/tree/a6093c16 Diff: http://git-wip-us.apache.org/repos/asf/metamodel/diff/a6093c16 Branch: refs/heads/master Commit: a6093c167f0fa7f9a6fc23aa8cb2ee0439c3d972 Parents: 161b642 Author: Kasper Sørensen <i.am.kasper.soren...@gmail.com> Authored: Thu May 12 20:15:33 2016 -0700 Committer: Kasper Sørensen <i.am.kasper.soren...@gmail.com> Committed: Thu May 12 20:15:33 2016 -0700 ---------------------------------------------------------------------- .../couchdb/CouchDbDataContextTest.java | 3 +- .../excel/DefaultSpreadsheetReaderDelegate.java | 45 +++++++++------ .../metamodel/excel/ExcelConfiguration.java | 31 ++++++++-- .../metamodel/excel/ExcelDataContext.java | 4 +- .../excel/XlsxSpreadsheetReaderDelegate.java | 60 +++++++++----------- .../metamodel/excel/ExcelDataContextTest.java | 4 +- 6 files changed, 85 insertions(+), 62 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/metamodel/blob/a6093c16/couchdb/src/test/java/org/apache/metamodel/couchdb/CouchDbDataContextTest.java ---------------------------------------------------------------------- diff --git a/couchdb/src/test/java/org/apache/metamodel/couchdb/CouchDbDataContextTest.java b/couchdb/src/test/java/org/apache/metamodel/couchdb/CouchDbDataContextTest.java index 9e1f5fe..c2ec998 100644 --- a/couchdb/src/test/java/org/apache/metamodel/couchdb/CouchDbDataContextTest.java +++ b/couchdb/src/test/java/org/apache/metamodel/couchdb/CouchDbDataContextTest.java @@ -53,7 +53,8 @@ public class CouchDbDataContextTest extends CouchDbTestCase { super.setUp(); if (isConfigured()) { - httpClient = new StdHttpClient.Builder().host(getHostname()).build(); + final int timeout = 8 * 1000; // 8 seconds should be more than enough + httpClient = new StdHttpClient.Builder().socketTimeout(timeout).host(getHostname()).build(); // set up a simple database couchDbInstance = new StdCouchDbInstance(httpClient); http://git-wip-us.apache.org/repos/asf/metamodel/blob/a6093c16/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java ---------------------------------------------------------------------- diff --git a/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java b/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java index 1b8b534..009fad4 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java +++ b/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java @@ -30,7 +30,10 @@ import org.apache.metamodel.schema.MutableSchema; import org.apache.metamodel.schema.MutableTable; import org.apache.metamodel.schema.Schema; import org.apache.metamodel.schema.Table; -import org.apache.metamodel.util.AlphabeticSequence; +import org.apache.metamodel.schema.naming.ColumnNamingContext; +import org.apache.metamodel.schema.naming.ColumnNamingContextImpl; +import org.apache.metamodel.schema.naming.ColumnNamingSession; +import org.apache.metamodel.schema.naming.ColumnNamingStrategy; import org.apache.metamodel.util.FileHelper; import org.apache.metamodel.util.Resource; import org.apache.poi.ss.usermodel.Cell; @@ -131,19 +134,22 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat row = rowIterator.next(); } - // build columns by using alphabetic sequences - // (A,B,C...) - AlphabeticSequence sequence = new AlphabeticSequence(); + // build columns without any intrinsic column names + final ColumnNamingStrategy columnNamingStrategy = _configuration.getColumnNamingStrategy(); + try (final ColumnNamingSession columnNamingSession = columnNamingStrategy.startColumnNamingSession()) { + final int offset = getColumnOffset(row); + for (int i = 0; i < offset; i++) { + columnNamingSession.getNextColumnName(new ColumnNamingContextImpl(i)); + } - final int offset = getColumnOffset(row); - for (int i = 0; i < offset; i++) { - sequence.next(); + for (int j = offset; j < row.getLastCellNum(); j++) { + final ColumnNamingContext namingContext = new ColumnNamingContextImpl(table, null, j); + final Column column = new MutableColumn(columnNamingSession.getNextColumnName(namingContext), + ColumnType.STRING, table, j, true); + table.addColumn(column); + } } - for (int j = offset; j < row.getLastCellNum(); j++) { - Column column = new MutableColumn(sequence.next(), ColumnType.STRING, table, j, true); - table.addColumn(column); - } } else { boolean hasColumns = true; @@ -183,14 +189,17 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat final int offset = getColumnOffset(row); // build columns based on cell values. - for (int j = offset; j < rowLength; j++) { - Cell cell = row.getCell(j); - String columnName = ExcelUtils.getCellValue(wb, cell); - if (columnName == null || "".equals(columnName)) { - columnName = "[Column " + (j + 1) + "]"; + try (final ColumnNamingSession columnNamingSession = _configuration.getColumnNamingStrategy() + .startColumnNamingSession()) { + for (int j = offset; j < rowLength; j++) { + final Cell cell = row.getCell(j); + final String intrinsicColumnName = ExcelUtils.getCellValue(wb, cell); + final ColumnNamingContext columnNamingContext = new ColumnNamingContextImpl(table, intrinsicColumnName, + j); + final String columnName = columnNamingSession.getNextColumnName(columnNamingContext); + final Column column = new MutableColumn(columnName, ColumnType.VARCHAR, table, j, true); + table.addColumn(column); } - Column column = new MutableColumn(columnName, ColumnType.VARCHAR, table, j, true); - table.addColumn(column); } } http://git-wip-us.apache.org/repos/asf/metamodel/blob/a6093c16/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java ---------------------------------------------------------------------- diff --git a/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java b/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java index 9220ea3..4779bb1 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java +++ b/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java @@ -21,6 +21,8 @@ package org.apache.metamodel.excel; import java.io.Serializable; import java.util.List; +import org.apache.metamodel.schema.naming.ColumnNamingStrategies; +import org.apache.metamodel.schema.naming.ColumnNamingStrategy; import org.apache.metamodel.util.BaseObject; /** @@ -37,6 +39,7 @@ public final class ExcelConfiguration extends BaseObject implements public static final int DEFAULT_COLUMN_NAME_LINE = 1; private final int columnNameLineNumber; + private final ColumnNamingStrategy columnNamingStrategy; private final boolean skipEmptyLines; private final boolean skipEmptyColumns; @@ -44,12 +47,28 @@ public final class ExcelConfiguration extends BaseObject implements this(DEFAULT_COLUMN_NAME_LINE, true, false); } - public ExcelConfiguration(int columnNameLineNumber, boolean skipEmptyLines, - boolean skipEmptyColumns) { - this.columnNameLineNumber = columnNameLineNumber; - this.skipEmptyLines = skipEmptyLines; - this.skipEmptyColumns = skipEmptyColumns; - } + public ExcelConfiguration(int columnNameLineNumber, boolean skipEmptyLines, boolean skipEmptyColumns) { + this(columnNameLineNumber, null, skipEmptyLines, skipEmptyColumns); + } + + public ExcelConfiguration(int columnNameLineNumber, ColumnNamingStrategy columnNamingStrategy, + boolean skipEmptyLines, boolean skipEmptyColumns) { + this.columnNameLineNumber = columnNameLineNumber; + this.skipEmptyLines = skipEmptyLines; + this.skipEmptyColumns = skipEmptyColumns; + this.columnNamingStrategy = columnNamingStrategy; + } + + /** + * Gets a {@link ColumnNamingStrategy} to use if needed. + * @return + */ + public ColumnNamingStrategy getColumnNamingStrategy() { + if (columnNamingStrategy == null) { + return ColumnNamingStrategies.defaultStrategy(); + } + return columnNamingStrategy; + } /** * The line number (1 based) from which to get the names of the columns. http://git-wip-us.apache.org/repos/asf/metamodel/blob/a6093c16/excel/src/main/java/org/apache/metamodel/excel/ExcelDataContext.java ---------------------------------------------------------------------- diff --git a/excel/src/main/java/org/apache/metamodel/excel/ExcelDataContext.java b/excel/src/main/java/org/apache/metamodel/excel/ExcelDataContext.java index 28c1f8e..0ce1c64 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/ExcelDataContext.java +++ b/excel/src/main/java/org/apache/metamodel/excel/ExcelDataContext.java @@ -165,8 +165,8 @@ public final class ExcelDataContext extends QueryPostprocessDataContext implemen return new MutableSchema(getMainSchemaName()); } try { - SpreadsheetReaderDelegate delegate = getSpreadsheetReaderDelegate(); - Schema schema = delegate.createSchema(getMainSchemaName()); + final SpreadsheetReaderDelegate delegate = getSpreadsheetReaderDelegate(); + final Schema schema = delegate.createSchema(getMainSchemaName()); assert getMainSchemaName().equals(schema.getName()); return schema; } catch (Exception e) { http://git-wip-us.apache.org/repos/asf/metamodel/blob/a6093c16/excel/src/main/java/org/apache/metamodel/excel/XlsxSpreadsheetReaderDelegate.java ---------------------------------------------------------------------- diff --git a/excel/src/main/java/org/apache/metamodel/excel/XlsxSpreadsheetReaderDelegate.java b/excel/src/main/java/org/apache/metamodel/excel/XlsxSpreadsheetReaderDelegate.java index ab34ef6..94a9ff7 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/XlsxSpreadsheetReaderDelegate.java +++ b/excel/src/main/java/org/apache/metamodel/excel/XlsxSpreadsheetReaderDelegate.java @@ -39,7 +39,9 @@ import org.apache.metamodel.schema.MutableSchema; import org.apache.metamodel.schema.MutableTable; import org.apache.metamodel.schema.Schema; import org.apache.metamodel.schema.Table; -import org.apache.metamodel.util.AlphabeticSequence; +import org.apache.metamodel.schema.naming.ColumnNamingContextImpl; +import org.apache.metamodel.schema.naming.ColumnNamingSession; +import org.apache.metamodel.schema.naming.ColumnNamingStrategy; import org.apache.metamodel.util.FileHelper; import org.apache.metamodel.util.FileResource; import org.apache.metamodel.util.Resource; @@ -141,8 +143,8 @@ final class XlsxSpreadsheetReaderDelegate implements SpreadsheetReaderDelegate { for (Column column : columns) { selectItems.add(new SelectItem(column)); } - final XlsxRowPublisherAction publishAction = new XlsxRowPublisherAction(_configuration, columns, - relationshipId, xssfReader); + final XlsxRowPublisherAction publishAction = new XlsxRowPublisherAction(_configuration, columns, relationshipId, + xssfReader); return new RowPublisherDataSet(selectItems.toArray(new SelectItem[selectItems.size()]), maxRows, publishAction, new Closeable() { @@ -161,26 +163,31 @@ final class XlsxSpreadsheetReaderDelegate implements SpreadsheetReaderDelegate { @Override public boolean row(int rowNumber, List<String> values, List<Style> styles) { final int columnNameLineNumber = _configuration.getColumnNameLineNumber(); - if (columnNameLineNumber == ExcelConfiguration.NO_COLUMN_NAME_LINE) { - AlphabeticSequence alphabeticSequence = new AlphabeticSequence(); - List<String> generatedColumnNames = new ArrayList<String>(values.size()); - for (String originalColumnName : values) { - String columnName = alphabeticSequence.next(); - if (originalColumnName == null) { - columnName = null; - } - generatedColumnNames.add(columnName); - } - buildColumns(table, generatedColumnNames); - return false; - } else { + final boolean hasColumnNameLine = columnNameLineNumber != ExcelConfiguration.NO_COLUMN_NAME_LINE; + + if (hasColumnNameLine) { final int zeroBasedLineNumber = columnNameLineNumber - 1; - if (rowNumber >= zeroBasedLineNumber) { - buildColumns(table, values); - return false; + if (rowNumber < zeroBasedLineNumber) { + // jump to read the next line + return true; + } + } + + final ColumnNamingStrategy columnNamingStrategy = _configuration.getColumnNamingStrategy(); + try (ColumnNamingSession session = columnNamingStrategy.startColumnNamingSession()) { + for (int i = 0; i < values.size(); i++) { + final String intrinsicColumnName = hasColumnNameLine ? values.get(i) : null; + final String columnName = session.getNextColumnName(new ColumnNamingContextImpl(table, + intrinsicColumnName, i)); + + if (!(_configuration.isSkipEmptyColumns() && values.get(i) == null)) { + table.addColumn(new MutableColumn(columnName, ColumnType.STRING, table, i, true)); + } } } - return true; + + // now we're done, no more reading + return false; } }; final XlsxSheetToRowsHandler handler = new XlsxSheetToRowsHandler(rowCallback, xssfReader, _configuration); @@ -196,19 +203,6 @@ final class XlsxSpreadsheetReaderDelegate implements SpreadsheetReaderDelegate { } } - protected void buildColumns(final MutableTable table, final List<String> columnNames) { - int columnNumber = 0; - for (String columnName : columnNames) { - if (columnName != null || !_configuration.isSkipEmptyColumns()) { - if (columnName == null) { - columnName = "[Column " + (columnNumber + 1) + "]"; - } - table.addColumn(new MutableColumn(columnName, ColumnType.STRING, table, columnNumber, true)); - } - columnNumber++; - } - } - private void buildTables(final XSSFReader xssfReader, final XlsxWorkbookToTablesHandler workbookToTables) throws Exception { final InputStream workbookData = xssfReader.getWorkbookData(); http://git-wip-us.apache.org/repos/asf/metamodel/blob/a6093c16/excel/src/test/java/org/apache/metamodel/excel/ExcelDataContextTest.java ---------------------------------------------------------------------- diff --git a/excel/src/test/java/org/apache/metamodel/excel/ExcelDataContextTest.java b/excel/src/test/java/org/apache/metamodel/excel/ExcelDataContextTest.java index 81155c0..3b69290 100644 --- a/excel/src/test/java/org/apache/metamodel/excel/ExcelDataContextTest.java +++ b/excel/src/test/java/org/apache/metamodel/excel/ExcelDataContextTest.java @@ -276,7 +276,7 @@ public class ExcelDataContextTest extends TestCase { assertNotNull(table); - assertEquals("[[Column 1], hello]", Arrays.toString(table.getColumnNames())); + assertEquals("[A, hello]", Arrays.toString(table.getColumnNames())); Query q = dc.query().from(table).select(table.getColumns()).toQuery(); DataSet ds = dc.executeQuery(q); @@ -427,7 +427,7 @@ public class ExcelDataContextTest extends TestCase { Table table = schema.getTables()[0]; assertEquals("[Column[name=a,columnNumber=0,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], " + "Column[name=b,columnNumber=1,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], " - + "Column[name=[Column 3],columnNumber=2,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], " + + "Column[name=A,columnNumber=2,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], " + "Column[name=d,columnNumber=3,type=VARCHAR,nullable=true,nativeType=null,columnSize=null]]", Arrays.toString(table.getColumns()));