This is an automated email from the ASF dual-hosted git repository. jsorel pushed a commit to branch geoapi-4.0 in repository https://gitbox.apache.org/repos/asf/sis.git
The following commit(s) were added to refs/heads/geoapi-4.0 by this push: new f2347a9fbe feat(shapefile): add dbf read support f2347a9fbe is described below commit f2347a9fbe11d77cda16fcc95a86ddbb40680942 Author: jsorel <johann.so...@geomatys.com> AuthorDate: Tue Oct 31 15:45:09 2023 +0100 feat(shapefile): add dbf read support --- .../main/module-info.java | 1 + .../apache/sis/storage/shapefile/dbf/DBFField.java | 120 ++++++++++++ .../sis/storage/shapefile/dbf/DBFFieldEncoder.java | 206 +++++++++++++++++++++ .../sis/storage/shapefile/dbf/DBFHeader.java | 78 ++++++++ .../sis/storage/shapefile/dbf/DBFReader.java | 87 +++++++++ .../sis/storage/shapefile/dbf/DBFRecord.java} | 21 ++- .../sis/storage/shapefile/dbf/DBFIOTest.java | 104 +++++++++++ 7 files changed, 607 insertions(+), 10 deletions(-) diff --git a/incubator/src/org.apache.sis.storage.shapefile/main/module-info.java b/incubator/src/org.apache.sis.storage.shapefile/main/module-info.java index 2aad9ecdde..d9cd721f2f 100644 --- a/incubator/src/org.apache.sis.storage.shapefile/main/module-info.java +++ b/incubator/src/org.apache.sis.storage.shapefile/main/module-info.java @@ -29,4 +29,5 @@ module org.apache.sis.storage.shapefile { exports org.apache.sis.storage.shapefile; exports org.apache.sis.storage.shapefile.cpg; exports org.apache.sis.storage.shapefile.shp; + exports org.apache.sis.storage.shapefile.dbf; } diff --git a/incubator/src/org.apache.sis.storage.shapefile/main/org/apache/sis/storage/shapefile/dbf/DBFField.java b/incubator/src/org.apache.sis.storage.shapefile/main/org/apache/sis/storage/shapefile/dbf/DBFField.java new file mode 100644 index 0000000000..ca630ec6cb --- /dev/null +++ b/incubator/src/org.apache.sis.storage.shapefile/main/org/apache/sis/storage/shapefile/dbf/DBFField.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sis.storage.shapefile.dbf; + +import java.io.IOException; +import java.nio.charset.Charset; +import java.util.Date; +import org.apache.sis.io.stream.ChannelDataInput; +import org.apache.sis.util.ArraysExt; + + +/** + * + * @author Johann Sorel (Geomatys) + */ +public final class DBFField { + + /** + * Binary, String : b or B + */ + public static final int TYPE_BINARY = 'b'; + /** + * Characters : c or C + */ + public static final int TYPE_CHAR = 'c'; + /** + * Date : d or D + */ + public static final int TYPE_DATE = 'd'; + /** + * Numeric : n or N + */ + public static final int TYPE_NUMBER = 'n'; + /** + * Logical : l or L + */ + public static final int TYPE_LOGIC = 'l'; + /** + * Memo, String : m or M + */ + public static final int TYPE_MEMO = 'm'; + /** + * TimeStamp : 8 bytes, two longs, first for date, second for time. + * The date is the number of days since 01/01/4713 BC. + * Time is hours * 3600000L + minutes * 60000L + Seconds * 1000L + */ + public static final int TYPE_TIMESTAMP = '@'; + /** + * Long : i or I on 4 bytes, first bit is the sign, 0 = negative + */ + public static final int TYPE_LONG = 'i'; + /** + * Autoincrement : same as Long + */ + public static final int TYPE_INC = '+'; + /** + * Floats : f or F + */ + public static final int TYPE_FLOAT = 'f'; + /** + * Double : o or O, real double on 8bytes, not string encoded + */ + public static final int TYPE_DOUBLE = 'o'; + /** + * OLE : g or G + */ + public static final int TYPE_OLE = 'g'; + + public String fieldName; + public int fieldType; + public int fieldAddress; + public int fieldLength; + public int fieldLDecimals; + + private DBFFieldEncoder encoder; + + public void read(ChannelDataInput channel, Charset charset) throws IOException { + byte[] n = channel.readBytes(11); + int nameSize = 0; + for (int i = 0; i < n.length && n[i] != 0; i++,nameSize++); + + fieldName = new String(n, 0, nameSize); + fieldType = Character.valueOf(((char)channel.readUnsignedByte())).toString().toLowerCase().charAt(0); + fieldAddress = channel.readInt(); + fieldLength = channel.readUnsignedByte(); + fieldLDecimals = channel.readUnsignedByte(); + channel.skipBytes(14); + + encoder = DBFFieldEncoder.getEncoder(fieldType, fieldLength, fieldLDecimals, charset); + } + + public DBFFieldEncoder getEncoder() { + return encoder; + } + + @Override + public String toString() { + return "DBFField{" + + "fieldName='" + fieldName + '\'' + + ", fieldType=" + fieldType + + ", fieldAddress=" + fieldAddress + + ", fieldLength=" + fieldLength + + ", fieldLDecimals=" + fieldLDecimals + + '}'; + } +} diff --git a/incubator/src/org.apache.sis.storage.shapefile/main/org/apache/sis/storage/shapefile/dbf/DBFFieldEncoder.java b/incubator/src/org.apache.sis.storage.shapefile/main/org/apache/sis/storage/shapefile/dbf/DBFFieldEncoder.java new file mode 100644 index 0000000000..63d0b3237e --- /dev/null +++ b/incubator/src/org.apache.sis.storage.shapefile/main/org/apache/sis/storage/shapefile/dbf/DBFFieldEncoder.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sis.storage.shapefile.dbf; + +import java.io.IOException; +import java.nio.charset.Charset; +import java.time.LocalDate; +import org.apache.sis.io.stream.ChannelDataInput; +import org.apache.sis.io.stream.ChannelDataOutput; +import static org.apache.sis.storage.shapefile.dbf.DBFField.*; + +/** + * + * @author Johann Sorel (Geomatys) + */ +public abstract class DBFFieldEncoder { + + public static DBFFieldEncoder getEncoder(int fieldType, int fieldLength, int fieldDecimals, Charset charset) { + switch (fieldType) { + case TYPE_BINARY : return new Binary(fieldLength, fieldDecimals); + case TYPE_CHAR : return new Char(fieldLength, fieldDecimals, charset); + case TYPE_DATE : return new Date(fieldLength, fieldDecimals); + case TYPE_NUMBER : { + if (fieldDecimals != 0) return new Decimal(fieldLength, fieldDecimals); + return fieldLength > 9 ? new LongInt(fieldLength) : new ShortInt(fieldLength); + } + case TYPE_LOGIC : return new Logic(fieldLength); + case TYPE_MEMO : throw new UnsupportedOperationException("todo"); + case TYPE_TIMESTAMP : throw new UnsupportedOperationException("todo"); + case TYPE_LONG : throw new UnsupportedOperationException("todo"); + case TYPE_INC : throw new UnsupportedOperationException("todo"); + case TYPE_FLOAT : throw new UnsupportedOperationException("todo"); + case TYPE_DOUBLE : throw new UnsupportedOperationException("todo"); + case TYPE_OLE : throw new UnsupportedOperationException("todo"); + default: throw new IllegalArgumentException("Unknown field type "+fieldType); + } + + } + + protected final Class valueClass; + protected final int fieldLength; + protected final int fieldLDecimals; + + public DBFFieldEncoder(Class valueClass, int fieldLength, int fieldLDecimals) { + this.valueClass = valueClass; + this.fieldLength = fieldLength; + this.fieldLDecimals = fieldLDecimals; + } + + + public Class getValueClass() { + return valueClass; + } + + public abstract Object read(ChannelDataInput channel) throws IOException; + + public abstract void write(ChannelDataOutput channel, Object value) throws IOException; + + + private static final class Binary extends DBFFieldEncoder { + + public Binary(int fieldLength, int fieldDecimals) { + super(Long.class, fieldLength, fieldDecimals); + } + + @Override + public Object read(ChannelDataInput channel) throws IOException { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + public void write(ChannelDataOutput channel, Object value) throws IOException { + throw new UnsupportedOperationException("Not supported yet."); + } + } + + private static final class Char extends DBFFieldEncoder { + + private final Charset charset; + + public Char(int fieldLength, int fieldDecimals, Charset charset) { + super(String.class, fieldLength, fieldDecimals); + this.charset = charset; + } + + @Override + public Object read(ChannelDataInput channel) throws IOException { + return new String(channel.readBytes(fieldLength), charset).trim(); + } + + @Override + public void write(ChannelDataOutput channel, Object value) throws IOException { + throw new UnsupportedOperationException("Not supported yet."); + } + } + + private static final class Date extends DBFFieldEncoder { + + public Date(int fieldLength, int fieldDecimals) { + super(LocalDate.class, fieldLength, fieldDecimals); + } + + @Override + public Object read(ChannelDataInput channel) throws IOException { + final String str = new String(channel.readBytes(fieldLength)).trim(); + final int year = Integer.parseUnsignedInt(str,0,4,10); + final int month = Integer.parseUnsignedInt(str,4,6,10); + final int day = Integer.parseUnsignedInt(str,6,8,10); + return LocalDate.of(year, month, day); + } + + @Override + public void write(ChannelDataOutput channel, Object value) throws IOException { + throw new UnsupportedOperationException("Not supported yet."); + } + } + + private static final class ShortInt extends DBFFieldEncoder { + + public ShortInt(int fieldLength) { + super(Integer.class, fieldLength, 0); + } + + @Override + public Object read(ChannelDataInput channel) throws IOException { + final String str = new String(channel.readBytes(fieldLength)).trim(); + if (str.isEmpty()) return 0; + else return Integer.parseInt(str); + } + + @Override + public void write(ChannelDataOutput channel, Object value) throws IOException { + throw new UnsupportedOperationException("Not supported yet."); + } + } + + private static final class LongInt extends DBFFieldEncoder { + + public LongInt(int fieldLength) { + super(Long.class, fieldLength, 0); + } + + @Override + public Object read(ChannelDataInput channel) throws IOException { + final String str = new String(channel.readBytes(fieldLength)).trim(); + if (str.isEmpty()) return 0L; + else return Long.parseLong(str); + } + + @Override + public void write(ChannelDataOutput channel, Object value) throws IOException { + throw new UnsupportedOperationException("Not supported yet."); + } + } + + private static final class Decimal extends DBFFieldEncoder { + + public Decimal(int fieldLength, int fieldDecimals) { + super(Double.class, fieldLength, fieldDecimals); + } + + @Override + public Object read(ChannelDataInput channel) throws IOException { + final String str = new String(channel.readBytes(fieldLength)).trim(); + if (str.isEmpty()) return 0L; + else return Double.parseDouble(str); + } + + @Override + public void write(ChannelDataOutput channel, Object value) throws IOException { + throw new UnsupportedOperationException("Not supported yet."); + } + } + + private static final class Logic extends DBFFieldEncoder { + + public Logic(int fieldLength) { + super(Boolean.class, fieldLength, 0); + } + + @Override + public Object read(ChannelDataInput channel) throws IOException { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + public void write(ChannelDataOutput channel, Object value) throws IOException { + throw new UnsupportedOperationException("Not supported yet."); + } + } + +} diff --git a/incubator/src/org.apache.sis.storage.shapefile/main/org/apache/sis/storage/shapefile/dbf/DBFHeader.java b/incubator/src/org.apache.sis.storage.shapefile/main/org/apache/sis/storage/shapefile/dbf/DBFHeader.java new file mode 100644 index 0000000000..bbe3c8b475 --- /dev/null +++ b/incubator/src/org.apache.sis.storage.shapefile/main/org/apache/sis/storage/shapefile/dbf/DBFHeader.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sis.storage.shapefile.dbf; + +import java.io.IOException; +import java.nio.ByteOrder; +import java.nio.charset.Charset; + +import org.apache.sis.io.stream.ChannelDataInput; + +/** + * + * @author Johann Sorel (Geomatys) + */ +public final class DBFHeader { + + private static final int FIELD_SIZE = 32; + + public int year; + public int month; + public int day; + public int nbRecord; + public int headerSize; + public int recordSize; + public DBFField[] fields; + + public void read(ChannelDataInput channel, Charset charset) throws IOException { + channel.buffer.order(ByteOrder.LITTLE_ENDIAN); + if (channel.readByte()!= 0x03) { + throw new IOException("Unvalid database III magic"); + } + year = channel.readUnsignedByte(); + month = channel.readUnsignedByte(); + day = channel.readUnsignedByte(); + nbRecord = channel.readInt(); + headerSize = channel.readUnsignedShort(); + recordSize = channel.readUnsignedShort(); + channel.skipBytes(20); + fields = new DBFField[(headerSize - FIELD_SIZE - 1) / FIELD_SIZE]; + + for (int i = 0; i < fields.length; i++) { + fields[i] = new DBFField(); + fields[i].read(channel, charset); + } + channel.skipBytes(1); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("DBFHeader{"); + sb.append("year=").append(year); + sb.append(",month=").append(month); + sb.append(",day=").append(day); + sb.append(",nbRecord=").append(nbRecord); + sb.append(",headerSize=").append(headerSize); + sb.append(",recordSize=").append(recordSize); + sb.append("}\n"); + for (DBFField field : fields) { + sb.append("- ").append(field.toString()); + sb.append("\n"); + } + return sb.toString(); + } +} diff --git a/incubator/src/org.apache.sis.storage.shapefile/main/org/apache/sis/storage/shapefile/dbf/DBFReader.java b/incubator/src/org.apache.sis.storage.shapefile/main/org/apache/sis/storage/shapefile/dbf/DBFReader.java new file mode 100644 index 0000000000..f7893632a8 --- /dev/null +++ b/incubator/src/org.apache.sis.storage.shapefile/main/org/apache/sis/storage/shapefile/dbf/DBFReader.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sis.storage.shapefile.dbf; + +import java.io.IOException; +import java.nio.charset.Charset; + +import org.apache.sis.io.stream.ChannelDataInput; + +/** + * Seekable dbf file reader. + * + * @author Johann Sorel (Geomatys) + */ +public final class DBFReader implements AutoCloseable { + + private static final int TAG_PRESENT = 0x20; + private static final int TAG_DELETED = 0x2a; + + private final ChannelDataInput channel; + private final DBFHeader header; + private int nbRead = 0; + + public DBFReader(ChannelDataInput channel, Charset charset) throws IOException { + this.channel = channel; + this.header = new DBFHeader(); + this.header.read(channel, charset); + } + + public DBFHeader getHeader() { + return header; + } + + public void moveToOffset(long position) throws IOException { + channel.seek(position); + } + + /** + * + * @return record or DBFRecord.DELETED if this record has been deleted. + * @throws IOException if a decoding error occurs + */ + public DBFRecord next() throws IOException { + if (nbRead >= header.nbRecord) { + //reached records end + return null; + } + nbRead++; + + final int marker = channel.readUnsignedByte(); + if (marker == TAG_DELETED) { + channel.skipBytes(header.recordSize); + return DBFRecord.DELETED; + } else if (marker != TAG_PRESENT) { + throw new IOException("Unexpected record marker " + marker); + } + + final DBFRecord record = new DBFRecord(); + record.fields = new Object[header.fields.length]; + for (int i = 0; i < header.fields.length; i++) { + record.fields[i] = header.fields[i].getEncoder().read(channel); + } + return record; + } + + + + @Override + public void close() throws IOException { + channel.channel.close(); + } + +} diff --git a/incubator/src/org.apache.sis.storage.shapefile/main/module-info.java b/incubator/src/org.apache.sis.storage.shapefile/main/org/apache/sis/storage/shapefile/dbf/DBFRecord.java similarity index 69% copy from incubator/src/org.apache.sis.storage.shapefile/main/module-info.java copy to incubator/src/org.apache.sis.storage.shapefile/main/org/apache/sis/storage/shapefile/dbf/DBFRecord.java index 2aad9ecdde..e21562d338 100644 --- a/incubator/src/org.apache.sis.storage.shapefile/main/module-info.java +++ b/incubator/src/org.apache.sis.storage.shapefile/main/org/apache/sis/storage/shapefile/dbf/DBFRecord.java @@ -14,19 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.sis.storage.shapefile.dbf; /** - * Shapefile store. + * A DBF record is an array of field values. * - * @author Travis L. Pinney - * @author Marc Le Bihan + * @author Johann Sorel (Geomatys) */ -module org.apache.sis.storage.shapefile { - requires java.sql; - requires esri.geometry.api; - requires transitive org.apache.sis.storage; +public final class DBFRecord { + + public static final DBFRecord DELETED = new DBFRecord(); + + public Object[] fields; + + public DBFRecord() { + } - exports org.apache.sis.storage.shapefile; - exports org.apache.sis.storage.shapefile.cpg; - exports org.apache.sis.storage.shapefile.shp; } diff --git a/incubator/src/org.apache.sis.storage.shapefile/test/org/apache/sis/storage/shapefile/dbf/DBFIOTest.java b/incubator/src/org.apache.sis.storage.shapefile/test/org/apache/sis/storage/shapefile/dbf/DBFIOTest.java new file mode 100644 index 0000000000..46fa04af28 --- /dev/null +++ b/incubator/src/org.apache.sis.storage.shapefile/test/org/apache/sis/storage/shapefile/dbf/DBFIOTest.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sis.storage.shapefile.dbf; + +import java.io.IOException; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.time.LocalDate; +import org.apache.sis.io.stream.ChannelDataInput; +import org.apache.sis.storage.DataStoreException; +import org.apache.sis.storage.StorageConnector; +import org.junit.Test; +import static org.junit.jupiter.api.Assertions.*; + +/** + * + * @author Johann Sorel (Geomatys) + */ +public class DBFIOTest { + + private ChannelDataInput openRead(String path) throws DataStoreException { + final URL url = DBFIOTest.class.getResource(path); + final StorageConnector cnx = new StorageConnector(url); + final ChannelDataInput cdi = cnx.getStorageAs(ChannelDataInput.class); + cnx.closeAllExcept(cdi); + return cdi; + } + + @Test + public void readTest() throws DataStoreException, IOException { + final String path = "/org/apache/sis/storage/shapefile/point.dbf"; + final ChannelDataInput cdi = openRead(path); + + try (DBFReader reader = new DBFReader(cdi, StandardCharsets.UTF_8)) { + final DBFHeader header = reader.getHeader(); + assertEquals(123, header.year); + assertEquals(10, header.month); + assertEquals(27, header.day); + assertEquals(2, header.nbRecord); + assertEquals(193, header.headerSize); + assertEquals(120, header.recordSize); + assertEquals(5, header.fields.length); + assertEquals("id", header.fields[0].fieldName); + assertEquals(110, header.fields[0].fieldType); + assertEquals(0, header.fields[0].fieldAddress); + assertEquals(10, header.fields[0].fieldLength); + assertEquals(0, header.fields[0].fieldLDecimals); + assertEquals("text", header.fields[1].fieldName); + assertEquals(99, header.fields[1].fieldType); + assertEquals(0, header.fields[1].fieldAddress); + assertEquals(80, header.fields[1].fieldLength); + assertEquals(0, header.fields[1].fieldLDecimals); + assertEquals("integer", header.fields[2].fieldName); + assertEquals(110, header.fields[2].fieldType); + assertEquals(0, header.fields[2].fieldAddress); + assertEquals(10, header.fields[2].fieldLength); + assertEquals(0, header.fields[2].fieldLDecimals); + assertEquals("float", header.fields[3].fieldName); + assertEquals(110, header.fields[3].fieldType); + assertEquals(0, header.fields[3].fieldAddress); + assertEquals(11, header.fields[3].fieldLength); + assertEquals(6, header.fields[3].fieldLDecimals); + assertEquals("date", header.fields[4].fieldName); + assertEquals(100, header.fields[4].fieldType); + assertEquals(0, header.fields[4].fieldAddress); + assertEquals(8, header.fields[4].fieldLength); + assertEquals(0, header.fields[4].fieldLDecimals); + + + final DBFRecord record1 = reader.next(); + assertEquals(1L, record1.fields[0]); + assertEquals("text1", record1.fields[1]); + assertEquals(10L, record1.fields[2]); + assertEquals(20.0, record1.fields[3]); + assertEquals(LocalDate.of(2023, 10, 27), record1.fields[4]); + + final DBFRecord record2 = reader.next(); + assertEquals(2L, record2.fields[0]); + assertEquals("text2", record2.fields[1]); + assertEquals(40L, record2.fields[2]); + assertEquals(60.0, record2.fields[3]); + assertEquals(LocalDate.of(2023, 10, 28), record2.fields[4]); + + //no more records + assertNull(reader.next()); + } + + } + +}