Package org.apache.any23.extractor

Examples of org.apache.any23.extractor.ExtractionException


        processFile(FILE);
    }

    private void processFile(String resource) throws IOException, ExtractionException, TripleHandlerException {
        final ExtractionParameters extractionParameters = ExtractionParameters.newDefault();
        final ExtractionContext extractionContext = new ExtractionContext(
                extractor.getDescription().getExtractorName(),
                RDFUtils.uri("file://" + resource)
        );
        final InputStream is = this.getClass().getResourceAsStream(resource);
        final CompositeTripleHandler compositeTripleHandler = new CompositeTripleHandler();
View Full Code Here


        try {
            final URI documentURI = context.getDocumentURI();
            final Workbook workbook = createWorkbook(documentURI, in);
            processWorkbook(documentURI, workbook, er);
        } catch (Exception e) {
            throw new ExtractionException("An error occurred while extracting MS Excel content.", e);
        }
    }
View Full Code Here

        StringWriter buffer = new StringWriter();
        try {
            getXSLT().applyTo(in, buffer);
        } catch (XSLTStylesheetException xslte) {
            throw new ExtractionException("An error occurred during the XSLT application.", xslte);
        }

        try {
            RDFParser parser
                    = RDFParserFactory.getInstance().getRDFXMLParser(
                        verifyDataType, stopAtFirstError, extractionContext, out
                    );
            parser.parse(
                    new StringReader(buffer.getBuffer().toString()),
                    extractionContext.getDocumentURI().stringValue()
            );
        } catch (RDFHandlerException ex) {
            throw new IllegalStateException(
                    "Should not happen, RDFHandlerAdapter does not throw RDFHandlerException", ex
            );
        } catch (RDFParseException ex) {
            throw new ExtractionException(
                    "Invalid RDF/XML produced by RDFa transform.", ex, out
            );
        }
    }
View Full Code Here

             ExtractionResult out
     ) throws IOException, ExtractionException {
         try {
             parser.processDocument( new URL(extractionContext.getDocumentURI().toString() ), in, out );
         } catch (RDFa11ParserException rpe) {
             throw new ExtractionException("Error while performing extraction.", rpe);
         }
     }
View Full Code Here

                        extractionRule.property,
                        ValueFactoryImpl.getInstance().createLiteral(content)
                );
            }
        } catch (BoilerpipeProcessingException bpe) {
            throw new ExtractionException("Error while applying text processor " + ArticleExtractor.class, bpe);
        }
    }
View Full Code Here

            final RDFParser parser = getParser(extractionContext, extractionResult);
            parser.parse(in, extractionContext.getDocumentURI().stringValue());
        } catch (RDFHandlerException ex) {
            throw new IllegalStateException("Unexpected exception.", ex);
        } catch (RDFParseException ex) {
            throw new ExtractionException("Error while parsing RDF document.", ex, extractionResult);
        }
    }
View Full Code Here

        try {
            final URI documentURI = context.getDocumentURI();
            final Workbook workbook = createWorkbook(documentURI, in);
            processWorkbook(documentURI, workbook, er);
        } catch (Exception e) {
            throw new ExtractionException("An error occurred while extracting MS Excel content.", e);
        }
    }
View Full Code Here

        final ExtractionResult er = new ExtractionResultImpl(extractionContext, extractor, th);
        er.notifyIssue(IssueReport.IssueLevel.Fatal  , "Fake fatal error.", 1, 2);
        er.notifyIssue(IssueReport.IssueLevel.Error  , "Fake error."      , 3, 4);
        er.notifyIssue(IssueReport.IssueLevel.Warning, "Fake warning."    , 5, 6);

        ExtractionException ee = new ExtractionException("Fake message.", new RuntimeException("Fake cause"), er);
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ee.printStackTrace(new PrintWriter(baos));
        final String bufferContent = baos.toString();
        Assert.assertTrue("Unexpected message content.", bufferContent.contains(FAKE_EXTRACTOR_NAME));
        Assert.assertTrue("Unexpected message content.", bufferContent.contains("http://fake.document.uri"));
        Assert.assertTrue("Unexpected message content.", bufferContent.contains(
            ExtractionContext.ROOT_EXTRACTION_RESULT_ID
View Full Code Here

                            documentURI,
                            mappings,
                            out
                    );
                } catch (MalformedURLException e) {
                    throw new ExtractionException(
                            "Error while processing on subject '" + subject +
                                    "' the itemProp: '" + itemProp + "' "
                    );
                }
            }
View Full Code Here

    public ExtractionReport extract(ExtractionParameters eps, String documentURI, TripleHandler outputHandler)
    throws IOException, ExtractionException {
        try {
            return extract(eps, createDocumentSource(documentURI), outputHandler);
        } catch (URISyntaxException ex) {
            throw new ExtractionException("Error while extracting data from document URI.", ex);
        }
    }
View Full Code Here

TOP

Related Classes of org.apache.any23.extractor.ExtractionException

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.