Package org.modeshape.jcr.text

Examples of org.modeshape.jcr.text.TextExtractorOutput


        File tempFile = File.createTempFile("tika_extraction_"".txt");
        try {
            IoUtil.write(rndString, tempFile);

            extractor.setWriteLimit(stringLength);
            TextExtractorOutput output = new TextExtractorOutput();
            extractor.extractFrom(new InMemoryTestBinary(new FileInputStream(tempFile)), output, new TextExtractorContext(DETECTOR));

            assertEquals(rndString, output.getText());
        } finally {
            FileUtil.delete(tempFile);
        }
    }
View Full Code Here


    }

    private void extractTermsFrom( String resourcePath ) throws Exception {
        InputStream stream = getClass().getClassLoader().getResourceAsStream(resourcePath);
        assertThat(stream, is(notNullValue()));
        TextExtractorOutput output = new TextExtractorOutput();
        extractor.extractFrom(new InMemoryTestBinary(stream), output, new TextExtractorContext(DETECTOR));
        output.toString();
        addWords(extracted, output.getText());
    }
View Full Code Here

        if (!extractionEnabled()) {
            return null;
        }
        try {
            String mimeType = inMemoryBinaryValue.getMimeType();
            TextExtractorOutput output = new TextExtractorOutput();
            // Run through the extractors and have them extract the text - the first one which accepts the mime-type will win
            for (TextExtractor extractor : extractors) {
                if (!extractor.supportsMimeType(mimeType)) {
                    continue;
                }
                extractor.extractFrom(inMemoryBinaryValue, output, context);
                break;
            }

            return output.getText();
        } catch (Exception e) {
            LOGGER.error(e, JcrI18n.errorExtractingTextFromBinary, inMemoryBinaryValue.getHexHash(), e.getLocalizedMessage());
        }
        return null;
    }
View Full Code Here

                if (store.getExtractedText(binaryValue) != null) {
                    return;
                }

                String mimeType = binaryValue.getMimeType();
                TextExtractorOutput output = new TextExtractorOutput();
                // Run through the extractors and have them extract the text - the first one which accepts the mime-type will win
                for (TextExtractor extractor : extractors) {
                    if (!extractor.supportsMimeType(mimeType)) {
                        continue;
                    }
                    extractor.extractFrom(binaryValue, output, context);
                    break;
                }

                String extractedText = output.getText();
                if (extractedText != null && !StringUtil.isBlank(extractedText)) {
                    store.storeExtractedText(binaryValue, extractedText);
                }
            }  catch (InterruptedException ie) {
                Thread.interrupted();
View Full Code Here

TOP

Related Classes of org.modeshape.jcr.text.TextExtractorOutput

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.