Package com.digitalpebble.behemoth

Examples of com.digitalpebble.behemoth.Annotation


        // to form a new content string separated by spaces

        // iterate on the annotations
        Iterator<Annotation> iterator = doc.getAnnotations().iterator();
        while (iterator.hasNext()) {
            Annotation current = iterator.next();
            // check whether it belongs to a type we'd like to send to SOLR
            Map<String, String> featureField = fieldMapping.get(current
                    .getType());
            if (featureField == null)
                continue;
            // iterate on the expected features
            for (String targetFeature : featureField.keySet()) {
                String SOLRFieldName = featureField.get(targetFeature);
                String value = null;
                // special case for covering text
                if ("*".equals(targetFeature)) {
                    value = doc.getText().substring((int) current.getStart(),
                            (int) current.getEnd());
                }
                // get the value for the feature
                else {
                    value = current.getFeatures().get(targetFeature);
                }
                LOG.debug("Adding field : " + SOLRFieldName + "\t" + value);
                // skip if no value has been found
                if (value != null)
                    inputDoc.addField(SOLRFieldName, value);
View Full Code Here


        // iterate on the annotations
        if (includeAnnotations) {
            Iterator<Annotation> iterator = doc.getAnnotations().iterator();
            while (iterator.hasNext()) {
                Annotation current = iterator.next();
                // check whether it belongs to a type we'd like to send to SOLR
                Map<String, String> featureField = fieldMapping.get(current
                        .getType());
                // special case of all annotations
                if (featureField == null && !includeAllAnnotations) {
                    continue;
                }
                if (!includeAllAnnotations) {
                    // iterate on the expected features
                    for (String targetFeature : featureField.keySet()) {
                        String SOLRFieldName = featureField.get(targetFeature);
                        String value = null;
                        // special case for covering text
                        if ("*".equals(targetFeature)) {
                            value = doc.getText().substring(
                                    (int) current.getStart(),
                                    (int) current.getEnd());
                        }
                        // get the value for the feature
                        else {
                            value = current.getFeatures().get(targetFeature);
                        }
                        LOG.debug("Adding field : " + SOLRFieldName + "\t"
                                + value);
                        // skip if no value has been found
                        if (value != null)
                            inputDoc.addField(SOLRFieldName, value);
                    }
                } else {
                    for (Entry<String, String> e : current.getFeatures()
                            .entrySet()) {
                        inputDoc.addField(annotationPrefix + current.getType()
                                + "." + e.getKey(), e.getValue());
                    }
                }
            }
        }
View Full Code Here

            long start, long end) {
        // BRUTAL
        List<Annotation> output = new ArrayList<Annotation>();
        Iterator<Annotation> iterator = input.iterator();
        while (iterator.hasNext()) {
            Annotation annot = iterator.next();
            if (annot.getStart() >= start && annot.getEnd() <= end)
                output.add(annot);
        }
        return output;
    }
View Full Code Here

    public static List<Annotation> filter(List<Annotation> input, String type,
            String feature, String value) {
        List<Annotation> output = new ArrayList<Annotation>();
        Iterator<Annotation> iterator = input.iterator();
        main: while (iterator.hasNext()) {
            Annotation annot = iterator.next();
            // TODO check that types are not null
            boolean hastypematch = annot.getType().matches(type);
            if (!hastypematch)
                continue;
            // check the features
            // TODO no feature? no worries
            if (feature == null) {
                output.add(annot);
                continue;
            }

            // find all the keys matching the regex
            Iterator<String> keyIter = annot.getFeatures().keySet().iterator();
            while (keyIter.hasNext()) {
                String key = keyIter.next();
                boolean keyMatch = key.matches(feature);
                if (!keyMatch)
                    continue;
                // has a value been specified?
                if (value == null | value.length() == 0) {
                    output.add(annot);
                    continue main;
                }
                // need to check whether the values match
                String val = annot.getFeatures().get(key);
                boolean valueMatch = val.matches(value);
                if (!valueMatch)
                    continue;
                {
                    output.add(annot);
View Full Code Here

    public int compare(Object o1, Object o2) {
        if (!(o1 instanceof Annotation) || !(o2 instanceof Annotation))
            return 0;

        Annotation a1 = (Annotation) o1;
        Annotation a2 = (Annotation) o2;

        Long l1 = a1.getStart();
        Long l2 = a2.getStart();
        if (l1 != null)
            return l1.compareTo(l2);
        else
            return -1;
    }
View Full Code Here

    public void startElement(String uri, String localName, String qName,
            Attributes atts) throws SAXException {
        int startOffset = textBuffer.length();

        Annotation annot = new Annotation();
        annot.setStart(startOffset);
        // use the localname as a type
        annot.setType(localName);
        // convert the attributes into features
        for (int i = 0; i < atts.getLength(); i++) {
            String key = atts.getLocalName(i);
            String value = atts.getValue(i);
            annot.getFeatures().put(key, value);
        }
        this.startedAnnotations.addLast(annot);
    }
View Full Code Here

        // try to get the corresponding annotation
        // we start from the last temporary
        // and go up the stack
        Iterator<Annotation> iter = startedAnnotations.iterator();
        Annotation startedAnnot = null;
        while (iter.hasNext()) {
            Annotation temp = iter.next();
            if (temp.getType().equals(localName)) {
                startedAnnot = temp;
                break;
            }
        }
        // found something?
View Full Code Here

            throws IOException, InterruptedException {
        StringTuple document = new StringTuple();
        Iterator<Annotation> iter = value.getAnnotations().iterator();

        while (iter.hasNext()) {
            Annotation annot = iter.next();
            // check the type
            if (!annot.getType().equals(tokenType))
                continue;
            java.util.Map<String, String> features = annot.getFeatures();
            if (features == null)
                continue;

            String featureValue = null;

            // no feature? use the underlying text
            if (tokenFeature.equals("")) {
                featureValue = value.getText().substring(
                        (int) annot.getStart(), (int) annot.getEnd());
            } else
                featureValue = features.get(tokenFeature);
            if (featureValue == null)
                continue;
            document.add(featureValue);
View Full Code Here

TOP

Related Classes of com.digitalpebble.behemoth.Annotation

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.