Package org.apache.clerezza.rdf.core

Examples of org.apache.clerezza.rdf.core.LiteralFactory


     *
     * @return the URI of the new enhancement instance
     */
    protected static UriRef createEnhancement(MGraph metadata,
            EnhancementEngine engine, UriRef contentItemId){
        LiteralFactory literalFactory = LiteralFactory.getInstance();

        UriRef enhancement = new UriRef("urn:enhancement-"
                + EnhancementEngineHelper.randomUUID());
        //add the Enhancement Type
        metadata.add(new TripleImpl(enhancement, Properties.RDF_TYPE,
                TechnicalClasses.ENHANCER_ENHANCEMENT));
        //add the extracted from content item
        metadata.add(new TripleImpl(enhancement,
                Properties.ENHANCER_EXTRACTED_FROM, contentItemId));
        // creation date
        metadata.add(new TripleImpl(enhancement, Properties.DC_CREATED,
                literalFactory.createTypedLiteral(new Date())));

        // the engines that extracted the data
        // TODO: add some kind of versioning info for the extractor?
        // TODO: use a public dereferencing URI instead? that would allow for
        // explicit versioning too
        /* NOTE (Rupert Westenthaler 2010-05-26):
         * The Idea is to use the  ComponentContext in the activate() method of
         * an Enhancer to get the bundle name/version and use that as an
         * URI for the creator.
         * We would need to add getEnhancerID() method to the enhancer interface
         * to access this information
          */
        metadata.add(new TripleImpl(enhancement, Properties.DC_CREATOR,
                literalFactory.createTypedLiteral(engine.getClass().getName())));
        return enhancement;
    }
View Full Code Here


     * @param enhancement the enhancement
     * @param engine the engine
     */
    public static void addContributingEngine(MGraph metadata, UriRef enhancement,
                                             EnhancementEngine engine){
        LiteralFactory literalFactory = LiteralFactory.getInstance();
        // TODO: use a public dereferencing URI instead?
        metadata.add(new TripleImpl(enhancement, Properties.DC_CONTRIBUTOR,
            literalFactory.createTypedLiteral(engine.getClass().getName())));
        //set the modification date to the current date.
        set(metadata,enhancement,Properties.DC_MODIFIED,new Date(),literalFactory);
    }
View Full Code Here

     * @see EnhancementEngineHelper#createTextEnhancement(ContentItem, EnhancementEngine)
     */
    @Deprecated
    public static UriRef createNewExtraction(ContentItem ci,
            EnhancementEngine engine) {
        LiteralFactory literalFactory = LiteralFactory.getInstance();

        MGraph metadata = ci.getMetadata();
        UriRef extraction = new UriRef("urn:extraction-"
                + EnhancementEngineHelper.randomUUID());

        metadata.add(new TripleImpl(extraction, Properties.RDF_TYPE,
                TechnicalClasses.ENHANCER_EXTRACTION));

        // relate the extraction to the content item
        metadata.add(new TripleImpl(extraction,
                Properties.ENHANCER_RELATED_CONTENT_ITEM, new UriRef(ci.getUri().getUnicodeString())));

        // creation date
        metadata.add(new TripleImpl(extraction, Properties.DC_CREATED,
                literalFactory.createTypedLiteral(new Date())));

        // the engines that extracted the data
        // TODO: add some kind of versioning info for the extractor?
        // TODO: use a public dereferencing URI instead? that would allow for
        // explicit versioning too
        metadata.add(new TripleImpl(extraction, Properties.DC_CREATOR,
                literalFactory.createTypedLiteral(engine.getClass().getName())));

        return extraction;
    }
View Full Code Here

        confidence
    }

    private void initOccurrences() {
        MGraph graph = contentItem.getMetadata();
        LiteralFactory lf = LiteralFactory.getInstance();
        Map<UriRef,Collection<NonLiteral>> suggestionMap = new HashMap<UriRef,Collection<NonLiteral>>();
        // 1) get Entity Annotations
        Map<NonLiteral,Map<EAProps,Object>> entitySuggestionMap = new HashMap<NonLiteral,Map<EAProps,Object>>();
        Iterator<Triple> entityAnnotations = graph.filter(null, RDF.type, ENHANCER_ENTITYANNOTATION);
        while(entityAnnotations.hasNext()){
View Full Code Here

    /**
     * @return an RDF/JSON descriptions of places for the word map widget
     */
    public String getPlacesAsJSON() throws ParseException, UnsupportedEncodingException {
        MGraph g = new IndexedMGraph();
        LiteralFactory lf = LiteralFactory.getInstance();
        MGraph metadata = contentItem.getMetadata();
        for (EntityExtractionSummary p : getPlaceOccurrences()) {
            EntitySuggestion bestGuess = p.getBestGuess();
            if (bestGuess == null) {
                continue;
            }
            UriRef uri = new UriRef(bestGuess.getUri());
            Iterator<Triple> latitudes = metadata.filter(uri, GEO_LAT, null);
            if (latitudes.hasNext()) {
                g.add(latitudes.next());
            }
            Iterator<Triple> longitutes = metadata.filter(uri, GEO_LONG, null);
            if (longitutes.hasNext()) {
                g.add(longitutes.next());
                g.add(new TripleImpl(uri, Properties.RDFS_LABEL, lf.createTypedLiteral(bestGuess.getLabel())));
            }
        }
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        serializer.serialize(out, g, SupportedFormat.RDF_JSON);
       
View Full Code Here

     *
     * @param occs a Collection of entity information
     * @param ci the content item
     */
    public void createEnhancements(Collection<CalaisEntityOccurrence> occs, ContentItem ci) {
        LiteralFactory literalFactory = LiteralFactory.getInstance();
        final Language language; // used for plain literals representing parts fo the content
        String langString = EnhancementEngineHelper.getLanguage(ci);
        if(langString != null && !langString.isEmpty()){
            language = new Language(langString);
        } else {
            language = null;
        }
        //TODO create TextEnhancement (form, start, end, type?) and EntityAnnotation (id, name, type)
        HashMap<Resource, UriRef> entityAnnotationMap = new HashMap<Resource, UriRef>();
        for (CalaisEntityOccurrence occ : occs) {
            UriRef textAnnotation = EnhancementEngineHelper.createTextEnhancement(
                    ci, this);
            MGraph model = ci.getMetadata();
            model.add(new TripleImpl(textAnnotation, DC_TYPE, occ.type));
            // for autotagger use the name instead of the matched term (that might be a pronoun!)
            if (onlyNERMode) {
                model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT,new PlainLiteralImpl(occ.name,language)));
            }
            else {
                model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occ.exact,language)));
            }
            model.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(occ.offset)));
            model.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(occ.offset + occ.length)));
            model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occ.context,language)));
            //use the relevance as confidence
            if(occ.relevance != null && Double.valueOf(0).compareTo(occ.relevance) <= 0 ){
                //we do not know if the relevance is available (may be NULL)
                //or the relevance feature is activated (may be -1)
                model.add(new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(occ.relevance)));
            }
            //create EntityAnnotation only once but add a reference to the textAnnotation
            if (entityAnnotationMap.containsKey(occ.id)) {
                model.add(new TripleImpl(entityAnnotationMap.get(occ.id), DC_RELATION, textAnnotation));
            } else {
View Full Code Here

        }
        Iterator<Span> spans = at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence,SpanTypeEnum.Chunk));
        Sentence context = null;
        MGraph metadata = ci.getMetadata();
        Language lang = new Language(language);
        LiteralFactory lf = LiteralFactory.getInstance();
        ci.getLock().writeLock().lock();
        try { //write TextAnnotations for Named Entities
            while(spans.hasNext()){
                Span span = spans.next();
                switch (span.getType()) {
                    case Sentence:
                        context = (Sentence)context;
                        break;
                    default:
                        Value<NerTag> nerAnno = span.getAnnotation(NER_ANNOTATION);
                        if(nerAnno != null){
                            UriRef ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
                            //add span related data
                            metadata.add(new TripleImpl(ta, ENHANCER_SELECTED_TEXT,
                                new PlainLiteralImpl(span.getSpan(), lang)));
                            metadata.add(new TripleImpl(ta, ENHANCER_START,
                                lf.createTypedLiteral(span.getStart())));
                            metadata.add(new TripleImpl(ta, ENHANCER_END,
                                lf.createTypedLiteral(span.getEnd())));
                            metadata.add(new TripleImpl(ta, ENHANCER_SELECTION_CONTEXT,
                                new PlainLiteralImpl(context == null ?
                                        getDefaultSelectionContext(at.getSpan(), span.getSpan(), span.getStart()) :
                                            context.getSpan(), lang)));
                            //add the NER type
                            if(nerAnno.value().getType() != null){
                                metadata.add(new TripleImpl(ta,DC_TYPE,nerAnno.value().getType()));
                            }
                            if(nerAnno.probability() >= 0) {
                                metadata.add(new TripleImpl(ta, ENHANCER_CONFIDENCE,
                                    lf.createTypedLiteral(nerAnno.probability())));
                            }
                        }
                        break;
                }
            }
View Full Code Here

        if(log.isDebugEnabled()){
            log.debug("findNamedEntities model={},  language={}, text=",
                    new Object[]{ nameFinderModel, language,
                                  StringUtils.abbreviate(at != null ? at.getSpan() : text, 100) });
        }
        LiteralFactory literalFactory = LiteralFactory.getInstance();
        MGraph g = ci.getMetadata();
        Map<String,List<NameOccurrence>> entityNames;
        if(at != null){
            entityNames = extractNameOccurrences(nameFinderModel, at, lang);
        } else {
            entityNames = extractNameOccurrences(nameFinderModel, text,lang);
        }
        //lock the ContentItem while writing the RDF data for found Named Entities
        ci.getLock().writeLock().lock();
        try {
            Map<String,UriRef> previousAnnotations = new LinkedHashMap<String,UriRef>();
            for (Map.Entry<String,List<NameOccurrence>> nameInContext : entityNames.entrySet()) {
   
                String name = nameInContext.getKey();
                List<NameOccurrence> occurrences = nameInContext.getValue();
   
                UriRef firstOccurrenceAnnotation = null;
   
                for (NameOccurrence occurrence : occurrences) {
                    UriRef textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                    g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT,
                        new PlainLiteralImpl(name, language)));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT,
                        new PlainLiteralImpl(occurrence.context, language)));
                    if(occurrence.type != null){
                        g.add(new TripleImpl(textAnnotation, DC_TYPE, occurrence.type));
                    }
                    g.add(new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE, literalFactory
                            .createTypedLiteral(occurrence.confidence)));
                    if (occurrence.start != null && occurrence.end != null) {
                        g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory
                                .createTypedLiteral(occurrence.start)));
                        g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory
                                .createTypedLiteral(occurrence.end)));
                    }
   
                    // add the subsumption relationship among occurrences of the same
                    // name
View Full Code Here

            }
        } else { // null indicates to use the Entityhub to lookup Entities
            site = null;
        }
        MGraph graph = ci.getMetadata();
        LiteralFactory literalFactory = LiteralFactory.getInstance();
        // Retrieve the existing text annotations (requires read lock)
        Map<NamedEntity,List<UriRef>> textAnnotations = new HashMap<NamedEntity,List<UriRef>>();
        // the language extracted for the parsed content or NULL if not
        // available
        String contentLangauge;
View Full Code Here

    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        UriRef contentItemId = ci.getUri();
        MGraph graph = ci.getMetadata();
        LiteralFactory literalFactory = LiteralFactory.getInstance();
        //get all the textAnnotations
        /*
         * this Map holds the name as key and all the text annotations of
         * dc:type dbpedia:Place that select this name as value
         * this map is used to avoid multiple lookups for text annotations
View Full Code Here

TOP

Related Classes of org.apache.clerezza.rdf.core.LiteralFactory

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.