Package org.apache.stanbol.enhancer.servicesapi

Examples of org.apache.stanbol.enhancer.servicesapi.ContentItem


        engine = null;
    }
   
    @Test
    public void testEngineDe() throws IOException, EngineException {
        ContentItem ci = ciFactory.createContentItem(new StringSource(de_text));
        Assert.assertNotNull(ci);
        AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
        Assert.assertNotNull(at);
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("de")));
        Assert.assertEquals("de", EnhancementEngineHelper.getLanguage(ci));
       
        //Add some Tokens with POS annotations to test the usage of
        //existing POS annotations by the lemmatizer
        Token verbrachten = at.addToken(de_verbStart,de_verbStart+de_verb.length());
View Full Code Here


    @Before
    public void setupTest() throws IOException {
        //create a contentItem for the plain text used for testing
        InputStream is = FstLinkingEngineTest.class.getClassLoader().getResourceAsStream(TEST_TEXT_FILE);
        Assert.assertNotNull("Unable to load '"+TEST_TEXT_FILE+"' via classpath",is);
        ContentItem ci = cif.createContentItem(new StreamSource(is,"text/plain"));
        AnalysedText at = atf.createAnalysedText(ci, ci.getBlob());
        is.close();
        //parse the prepared NLP results and add it to the ContentItem
        is = FstLinkingEngineTest.class.getClassLoader().getResourceAsStream(TEST_TEXT_NLP_FILE);
        Assert.assertNotNull("Unable to load '"+TEST_TEXT_NLP_FILE+"' via classpath",is);
        AnalyzedTextParser.getDefaultInstance().parse(is, Charset.forName("UTF-8"), at);
        is.close();
        //set the language of the contentItem
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE,
            EN_LANGUAGE));
        //set the contentItem and also the content
        this.ci = ci;
        this.content = at.getText().toString();
    }
View Full Code Here

      return ciFactory.createContentItem(cs);
    }

    @Test
    public void testEnhancementInterfaces() throws Exception {
        ContentItem ci = createContentItem(SINGLE_SENTENCE_SOURCE);
        UriRef ciUri = new UriRef(ci.getUri().getUnicodeString());
        RdfEntityFactory factory = RdfEntityFactory.createInstance(ci.getMetadata());
        long start = System.currentTimeMillis();
        //create an Text Annotation representing an extracted Person
        TextAnnotation personAnnotation = factory.getProxy(
                createEnhancementURI(), TextAnnotation.class);
        personAnnotation.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
        personAnnotation.setCreated(new Date());
        personAnnotation.setExtractedFrom(ciUri);
        personAnnotation.getDcType().add(new UriRef("http://www.example.org/cv/annotatation-types/text#Person"));
        personAnnotation.setConfidence(0.8);
        personAnnotation.setSelectedText("Patrick Marshall");
        personAnnotation.setStart(SINGLE_SENTENCE.indexOf(personAnnotation.getSelectedText()));
        personAnnotation.setEnd(personAnnotation.getStart()+personAnnotation.getSelectedText().length());
        personAnnotation.setSelectionContext(SINGLE_SENTENCE);

        //create an Text Annotation representing an extracted Location
        TextAnnotation locationAnnotation = factory.getProxy(
                createEnhancementURI(),    TextAnnotation.class);
        locationAnnotation.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
        locationAnnotation.setCreated(new Date());
        locationAnnotation.setExtractedFrom(ciUri);
        locationAnnotation.getDcType().add(new UriRef("http://www.example.org/cv/annotatation-types/text#Location"));
        locationAnnotation.setConfidence(0.78);
        locationAnnotation.setSelectedText("New Zealand");
        locationAnnotation.setStart(SINGLE_SENTENCE.indexOf(locationAnnotation.getSelectedText()));
        locationAnnotation.setEnd(locationAnnotation.getStart()+locationAnnotation.getSelectedText().length());
        locationAnnotation.setSelectionContext(SINGLE_SENTENCE);

        //create an Text Annotation representing an extracted Organisation
        TextAnnotation orgAnnotation = factory.getProxy(
                createEnhancementURI(),    TextAnnotation.class);
        orgAnnotation.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
        orgAnnotation.setCreated(new Date());
        orgAnnotation.setExtractedFrom(ciUri);
        orgAnnotation.getDcType().add(new UriRef("http://www.example.org/cv/annotatation-types/text#Organisation"));
        orgAnnotation.setConfidence(0.78);
        orgAnnotation.setSelectedText("University of Otago");
        orgAnnotation.setStart(SINGLE_SENTENCE.indexOf(orgAnnotation.getSelectedText()));
        orgAnnotation.setEnd(orgAnnotation.getStart()+orgAnnotation.getSelectedText().length());
        orgAnnotation.setSelectionContext(SINGLE_SENTENCE);

        // create an Entity Annotation for the person TextAnnotation
        EntityAnnotation patrickMarshall = factory.getProxy(
                createEnhancementURI(), EntityAnnotation.class);
        patrickMarshall.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
        patrickMarshall.setCreated(new Date());
        patrickMarshall.setExtractedFrom(ciUri);
        patrickMarshall.getDcType().add(new UriRef("http://www.example.org/cv/annotatation-types/entity#Entity"));
        patrickMarshall.setConfidence(0.56);
        patrickMarshall.getRelations().add(personAnnotation);
        patrickMarshall.setEntityLabel("Patrick Marshall");
        patrickMarshall.setEntityReference(new UriRef("http://rdf.freebase.com/rdf/en/patrick_marshall"));
        patrickMarshall.getEntityTypes().addAll(Arrays.asList(
                        new UriRef("http://rdf.freebase.com/ns/people.person"),
                        new UriRef("http://rdf.freebase.com/ns/common.topic"),
                        new UriRef("http://rdf.freebase.com/ns/education.academic")));
        // and an other for New Zealand
        EntityAnnotation newZealand = factory.getProxy(
                createEnhancementURI(), EntityAnnotation.class);
        newZealand.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
        newZealand.setCreated(new Date());
        newZealand.setExtractedFrom(ciUri);
        newZealand.getDcType().add(new UriRef("http://www.example.org/cv/annotatation-types/entity#Entity"));
        newZealand.setConfidence(0.98);
        newZealand.getRelations().add(locationAnnotation);
        newZealand.setEntityLabel("New Zealand");
        newZealand.setEntityReference(new UriRef("http://rdf.freebase.com/rdf/en/new_zealand"));
        newZealand.getEntityTypes().addAll(Arrays.asList(
                new UriRef("http://rdf.freebase.com/ns/location.location"),
                new UriRef("http://rdf.freebase.com/ns/common.topic"),
                new UriRef("http://rdf.freebase.com/ns/location.country")));

        // and an other option for New Zealand
        EntityAnnotation airNewZealand = factory.getProxy(
                createEnhancementURI(), EntityAnnotation.class);
        airNewZealand.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
        airNewZealand.setCreated(new Date());
        airNewZealand.setExtractedFrom(ciUri);
        airNewZealand.getDcType().add(new UriRef("http://www.example.org/cv/annotatation-types/entity#Entity"));
        airNewZealand.setConfidence(0.36);
        airNewZealand.getRelations().add(locationAnnotation);
        airNewZealand.setEntityLabel("New Zealand");
        airNewZealand.setEntityReference(new UriRef("http://rdf.freebase.com/rdf/en/air_new_zealand"));
        airNewZealand.getEntityTypes().addAll(Arrays.asList(
                new UriRef("http://rdf.freebase.com/ns/business.sponsor"),
                new UriRef("http://rdf.freebase.com/ns/common.topic"),
                new UriRef("http://rdf.freebase.com/ns/travel.transport_operator"),
                new UriRef("http://rdf.freebase.com/ns/aviation.airline"),
                new UriRef("http://rdf.freebase.com/ns/aviation.aircraft_owner"),
                new UriRef("http://rdf.freebase.com/ns/business.employer"),
                new UriRef("http://rdf.freebase.com/ns/freebase.apps.hosts.com.appspot.acre.juggle.juggle"),
                new UriRef("http://rdf.freebase.com/ns/business.company")));
        System.out.println("creation time "+(System.currentTimeMillis()-start)+"ms");

        //now test the enhancement
        int numberOfTextAnnotations = checkAllTextAnnotations(ci.getMetadata());
        assertEquals(3, numberOfTextAnnotations);

        int numberOfEntityAnnotations = checkAllEntityAnnotations(ci.getMetadata());
        assertEquals(3, numberOfEntityAnnotations);
    }
View Full Code Here

    }

    @Test
    public void testHtml() throws EngineException, IOException {
        log.info(">>> testHtml <<<");
        ContentItem ci = createContentItem("test.html", "text/html; charset=UTF-8");
        assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
        engine.computeEnhancements(ci);
        Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci,
            singleton("text/plain"));
        assertNotNull(contentPart);
View Full Code Here

    }
    @Test
    public void testPdf() throws EngineException, IOException {
        log.info(">>> testPdf <<<");
        //PDF created by Apple Pages
        ContentItem ci = createContentItem("test.pdf", "application/pdf");
        assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
        engine.computeEnhancements(ci);
        Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci,
            singleton("text/plain"));
        assertNotNull(contentPart);
View Full Code Here

            "</body></html>");
    }
    @Test
    public void testMsWord() throws EngineException, IOException {
        log.info(">>> testMsWord <<<");
        ContentItem ci = createContentItem("test.doc", "application/msword");
        assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
        engine.computeEnhancements(ci);
        Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci,
            singleton("text/plain"));
        assertNotNull(contentPart);
View Full Code Here

            "The Stanbol enhancer can detect famous cities",
            "</body></html>");    }
    @Test
    public void testRtf() throws EngineException, IOException {
        log.info(">>> testRtf <<<");
        ContentItem ci = createContentItem("test.rtf", "application/rtf");
        assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
        engine.computeEnhancements(ci);
        Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci,
            singleton("text/plain"));
        assertNotNull(contentPart);
View Full Code Here

            "</body></html>");
    }
    @Test
    public void testOdt() throws EngineException, IOException {
        log.info(">>> testOdt <<<");
        ContentItem ci = createContentItem("test.odt", "application/vnd.oasis.opendocument.text");
        assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
        engine.computeEnhancements(ci);
        Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci,
            singleton("text/plain"));
        assertNotNull(contentPart);
View Full Code Here

            "</body></html>");
    }
    @Test
    public void testEMail() throws EngineException, IOException, ParseException {
        log.info(">>> testEMail <<<");
        ContentItem ci = createContentItem("test.email.txt", "message/rfc822");
        assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
        engine.computeEnhancements(ci);
        Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci,
            singleton("text/plain"));
        assertNotNull(contentPart);
View Full Code Here

       
    }
    @Test
    public void testMp3() throws EngineException, IOException, ParseException {
        log.info(">>> testMp3 <<<");
        ContentItem ci = createContentItem("testMP3id3v24.mp3", "audio/mpeg");
        assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
        engine.computeEnhancements(ci);
        Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci,
            singleton("text/plain"));
        assertNotNull(contentPart);
View Full Code Here

TOP

Related Classes of org.apache.stanbol.enhancer.servicesapi.ContentItem

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.