This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4011
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 627b94175624693dc84296d84a2bd24d499ddc5d
Author: tallison <[email protected]>
AuthorDate: Mon Apr 10 15:58:49 2023 -0400

    TIKA-4011 -- add detection for onix message files
---
 CHANGES.txt                                        |  2 ++
 .../org/apache/tika/mime/tika-mimetypes.xml        | 18 ++++++++++++++++
 .../java/org/apache/tika/mime/TestMimeTypes.java   |  6 ++++++
 .../resources/test-documents/testONIXMessage.xml   | 21 +++++++++++++++++++
 .../test-documents/testONIXMessageShort.xml        | 24 ++++++++++++++++++++++
 5 files changed, 71 insertions(+)

diff --git a/CHANGES.txt b/CHANGES.txt
index eeab24304..bcdea6c7a 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,6 +2,8 @@ Release 2.7.1 - ???
 
    * Add magic detection for canon raw file types: crw, cr2 and cr3 
(TIKA-3991).
 
+   * Add detection for ONIX message files (TIKA-4011).
+
    * Add detection and a parser for ActiveMime files (TIKA-3987).
 
    * Users may now avoid the ZeroByteFileException via a
diff --git 
a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index fdb855f1c..088047914 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -6700,6 +6700,24 @@
     <sub-class-of type="application/xml"/>
   </mime-type>
 
+  <mime-type type="application/onix-message+xml">
+    <_comment>ONline Information eXchange (ONIX) for books</_comment>
+    <!-- see 
https://www.loc.gov/preservation/digital/formats/fdd/fdd000488.shtml -->
+    <root-XML localName="ONIXMessage" 
namespaceURI="http://ns.editeur.org/onix/3.0/reference"/>
+    <!-- do we want a separate mime for this? -->
+    <root-XML localName="ONIXmessage" 
namespaceURI="http://ns.editeur.org/onix/3.0/short"/>
+    <root-XML localName="ONIXMessage"/>
+    <sub-class-of type="application/xml"/>
+  </mime-type>
+
+  <mime-type type="application/onix-message-short+xml">
+    <_comment>ONline Information eXchange (ONIX) for books</_comment>
+    <!-- see 
https://www.loc.gov/preservation/digital/formats/fdd/fdd000488.shtml -->
+    <root-XML localName="ONIXMessage" 
namespaceURI="http://ns.editeur.org/onix/3.0/reference"/>
+    <root-XML localName="ONIXMessage"/>
+    <sub-class-of type="application/xml"/>
+  </mime-type>
+
   <mime-type type="text/x-actionscript">
     <_comment>ActionScript source code</_comment>
     <glob pattern="*.as"/>
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index 8a33c4fe6..784b860a3 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -1312,6 +1312,12 @@ public class TestMimeTypes {
         assertTypeDetection("testPGPEncrypted.gpg", 
"application/pgp-encrypted");
     }
 
+    @Test
+    public void testONIX() throws Exception {
+        assertTypeByData("application/onix-message+xml", 
"testONIXMessage.xml");
+        assertTypeByData("application/onix-message+xml", 
"testONIXMessageShort.xml");
+    }
+
     private void assertText(byte[] prefix) throws IOException {
         assertMagic("text/plain", prefix);
     }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testONIXMessage.xml
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testONIXMessage.xml
new file mode 100644
index 000000000..e2e098f7d
--- /dev/null
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testONIXMessage.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ONIXMessage release="3.0" xmlns="http://ns.editeur.org/onix/3.0/reference";>
+  <Header>
+    <Sender>
+      <SenderName>Global Bookinfo</SenderName>
+      <ContactName>Someone Or Other, blah</ContactName>
+      <EmailAddress>[email protected]</EmailAddress>
+    </Sender>
+    <Addressee>
+      <AddresseeName>BooksBooksBooks.com</AddresseeName>
+    </Addressee>
+    <MessageNumber>231</MessageNumber>
+    <SentDateTime>20100510T1115-0400</SentDateTime>
+    <MessageNote>Sample message</MessageNote>
+  </Header>
+  <!-- product record 1 of 1 in message -->
+  <Product>
+    <RecordReference>com.globalbookinfo.onix.01734529</RecordReference>
+    <NotificationType>03</NotificationType>
+  </Product>
+</ONIXMessage>
\ No newline at end of file
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testONIXMessageShort.xml
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testONIXMessageShort.xml
new file mode 100644
index 000000000..01a5cd8f8
--- /dev/null
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testONIXMessageShort.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ONIXmessage release="3.0" xmlns="http://ns.editeur.org/onix/3.0/short";>
+  <header>
+    <sender>
+      <x298>Global Bookinfo</x298>
+      <x299>Jane King, +1 555 321 7654</x299>
+      <j272>[email protected]</j272>
+    </sender>
+    <addressee>
+      <x300>BooksBooksBooks.com</x300>
+    </addressee>
+    <m180>231</m180>
+    <x307>20100510T1115-0400</x307>
+    <m183>Sample message</m183>
+  </header>
+  <!-- product record 1 of 1 in message -->
+  <product>
+    <a001>com.globalbookinfo.onix.01734529</a001>
+    <a002>03</a002>
+    <a194>04</a194>
+    <recordsourceidentifier>
+    </recordsourceidentifier>
+  </product>
+</ONIXmessage>
\ No newline at end of file

Reply via email to