Created
December 21, 2023 15:54
-
-
Save atomotic/90a7e8c30faeeba37c7cc6459c5d947b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
➜ file 89595bd2-8076-4da0-8880-518c291e7904 | |
89595bd2-8076-4da0-8880-518c291e7904: EPUB document | |
➜ tika -m -j 89595bd2-8076-4da0-8880-518c291e7904 | |
Exception in thread "main" org.apache.tika.exception.TikaException: TIKA-237: Illegal SAXException from org.apache.tika.parser.epub.EpubParser@3a320ade | |
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:310) | |
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:298) | |
at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:203) | |
at org.apache.tika.cli.TikaCLI$OutputType.process(TikaCLI.java:1071) | |
at org.apache.tika.cli.TikaCLI.process(TikaCLI.java:493) | |
at org.apache.tika.cli.TikaCLI.main(TikaCLI.java:256) | |
Caused by: org.xml.sax.SAXParseException; lineNumber: 1; columnNumber: 1; Content is not allowed in prolog. | |
at org.apache.xerces.util.ErrorHandlerWrapper.createSAXParseException(Unknown Source) | |
at org.apache.xerces.util.ErrorHandlerWrapper.fatalError(Unknown Source) | |
at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source) | |
at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source) | |
at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source) | |
at org.apache.xerces.impl.XMLScanner.reportFatalError(Unknown Source) | |
at org.apache.xerces.impl.XMLDocumentScannerImpl$PrologDispatcher.dispatch(Unknown Source) | |
at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown Source) | |
at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source) | |
at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source) | |
at org.apache.xerces.parsers.XMLParser.parse(Unknown Source) | |
at org.apache.xerces.parsers.AbstractSAXParser.parse(Unknown Source) | |
at org.apache.xerces.jaxp.SAXParserImpl$JAXPSAXParser.parse(Unknown Source) | |
at org.apache.xerces.jaxp.SAXParserImpl.parse(Unknown Source) | |
at java.xml/javax.xml.parsers.SAXParser.parse(SAXParser.java:197) | |
at org.apache.tika.utils.XMLReaderUtils.parseSAX(XMLReaderUtils.java:513) | |
at org.apache.tika.parser.epub.EpubContentParser.parse(EpubContentParser.java:50) | |
at org.apache.tika.parser.epub.EpubParser.bufferedParseZipFile(EpubParser.java:289) | |
at org.apache.tika.parser.epub.EpubParser.bufferedParse(EpubParser.java:175) | |
at org.apache.tika.parser.epub.EpubParser.parse(EpubParser.java:124) | |
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:298) | |
... 5 more | |
➜ sf 89595bd2-8076-4da0-8880-518c291e7904 | |
--- | |
siegfried : 1.9.6 | |
scandate : 2023-12-21T16:52:55+01:00 | |
signature : default.sig | |
created : 2022-11-06T17:44:52+01:00 | |
identifiers : | |
- name : 'pronom' | |
details : 'DROID_SignatureFile_V109.xml; container-signature-20221102.xml' | |
--- | |
filename : '89595bd2-8076-4da0-8880-518c291e7904' | |
filesize : 1373574 | |
modified : 2023-07-04T21:39:45+02:00 | |
errors : | |
matches : | |
- ns : 'pronom' | |
id : 'fmt/483' | |
format : 'ePub format' | |
version : | |
mime : 'application/epub+zip' | |
basis : 'container name mimetype with byte match at 0, 20' | |
warning : 'extension mismatch' | |
➜ unzip -l 89595bd2-8076-4da0-8880-518c291e7904 | |
Archive: 89595bd2-8076-4da0-8880-518c291e7904 | |
Length Date Time Name | |
--------- ---------- ----- ---- | |
20 00-00-1980 00:00 mimetype | |
258 00-00-1980 00:00 META-INF/container.xml | |
4945 00-00-1980 00:00 OEBPS/content.opf | |
544 00-00-1980 00:00 OEBPS/page-template.xpgt | |
944 00-00-1980 00:00 OEBPS/9788834025161.css | |
3536 00-00-1980 00:00 OEBPS/toc.ncx | |
384 00-00-1980 00:00 OEBPS/01_cover.html | |
1104 00-00-1980 00:00 OEBPS/02_front.html | |
5696 00-00-1980 00:00 OEBPS/25_toc.html | |
688 00-00-1980 00:00 OEBPS/03_front1.html | |
400 00-00-1980 00:00 OEBPS/04_half.html | |
400 00-00-1980 00:00 OEBPS/06_titlepage.html | |
768 00-00-1980 00:00 OEBPS/07_preface.html | |
26688 00-00-1980 00:00 OEBPS/09_chapter2.html | |
672 00-00-1980 00:00 OEBPS/05_half1.html | |
24784 00-00-1980 00:00 OEBPS/16_chapter9.html | |
26592 00-00-1980 00:00 OEBPS/17_chapter10.html | |
30176 00-00-1980 00:00 OEBPS/18_chapter11.html | |
29680 00-00-1980 00:00 OEBPS/19_chapter12.html | |
30640 00-00-1980 00:00 OEBPS/20_chapter13.html | |
28928 00-00-1980 00:00 OEBPS/21_chapter14.html | |
29056 00-00-1980 00:00 OEBPS/22_chapter15.html | |
25904 00-00-1980 00:00 OEBPS/23_chapter16.html | |
26192 00-00-1980 00:00 OEBPS/08_chapter1.html | |
25472 00-00-1980 00:00 OEBPS/14_chapter7.html | |
27120 00-00-1980 00:00 OEBPS/13_chapter6.html | |
25344 00-00-1980 00:00 OEBPS/10_chapter3.html | |
28064 00-00-1980 00:00 OEBPS/12_chapter5.html | |
27632 00-00-1980 00:00 OEBPS/11_chapter4.html | |
28784 00-00-1980 00:00 OEBPS/15_chapter8.html | |
106772 00-00-1980 00:00 OEBPS/images/cover.jpg | |
37376 00-00-1980 00:00 OEBPS/images/half.jpg | |
46000 00-00-1980 00:00 OEBPS/images/titlepage.jpg | |
68384 00-00-1980 00:00 OEBPS/images/pg33.jpg | |
12720 00-00-1980 00:00 OEBPS/images/circle.jpg | |
57216 00-00-1980 00:00 OEBPS/images/pg36.jpg | |
51232 00-00-1980 00:00 OEBPS/images/pg104.jpg | |
79072 00-00-1980 00:00 OEBPS/images/pg194.jpg | |
82192 00-00-1980 00:00 OEBPS/images/pg216.jpg | |
64704 00-00-1980 00:00 OEBPS/images/pg232.jpg | |
75200 00-00-1980 00:00 OEBPS/images/pg251.jpg | |
82000 00-00-1980 00:00 OEBPS/images/pg252.jpg | |
36304 00-00-1980 00:00 OEBPS/images/pg254.jpg | |
93616 00-00-1980 00:00 OEBPS/images/pg261.jpg | |
40736 00-00-1980 00:00 OEBPS/images/pg281.jpg | |
39589 00-00-1980 00:00 META-INF/encryption.xml | |
--------- ------- | |
1434528 46 files | |
➜ drm-epub |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment