Package gate

Examples of gate.FeatureMap


    // extract the document content as a string
    String text = document.getContent().toString();
    ArrayList<Element<Object>> elements = new ArrayList<Element<Object>>();
    Alphabet labelAlphabet = tagger.getYAlphabet();
    FeatureMap features = Factory.newFeatureMap();
    long start = 0;
    boolean wasSpace = true;
    ElementSequence<Element<Object>> sequence;
    SparseVector[] x;
    Object[] y;
View Full Code Here


          y, document, inputAS);
      elements = new ArrayList<Element<Object>>();

      // extract the labels for the tokens in the sentence
      labels = tagger.label(extractor.process(sequence).x);
      FeatureMap features = Factory.newFeatureMap();
      long start, end;
     
      if (labels.length == tokens.size()) {
       
        for (int i = 0; i < tokens.size(); i++) {
View Full Code Here

          y, document, inputAS);
      elements = new ArrayList<Element<Object>>();

      // extract the labels for the tokens in the sentence
      labels = tagger.label(extractor.process(sequence).x);
      FeatureMap features = Factory.newFeatureMap();
      long start, end;

      if (labels.length == tokens.size()) {

        for (int i = 0; i < tokens.size(); i++) {
View Full Code Here

    sequence = new ElementSequence(elements, xAlphabet, yAlphabet, x, y,
        document, inputAS);

    // extract the labels for the tokens in the sentence
    labels = tagger.label(extractor.process(sequence).x);
    FeatureMap features = Factory.newFeatureMap();
    long start = tokens.get(0).getStartNode().getOffset();
    long end;
   
    if (labels.length == tokens.size()) {
View Full Code Here

        // // delete the input doc
        // tempInputFile.delete();
        // throw e;
        // }

        FeatureMap params = Factory.newFeatureMap();
        params.put(Document.DOCUMENT_STRING_CONTENT_PARAMETER_NAME, new String(
                inputDoc.getContent()));
        String ct = inputDoc.getContentType();
        if (ct != null)
            params.put(Document.DOCUMENT_MIME_TYPE_PARAMETER_NAME, ct);

        gate.Document gatedocument;
        try {
            gatedocument = (Document) Factory.createResource(
                    "gate.corpora.DocumentImpl", params);
View Full Code Here

            text = inputDoc.getText();

        gatedocument = Factory.newDocument(text);

        // then the metadata as document features
        FeatureMap docFeatures = gatedocument.getFeatures();
        String docUrl = inputDoc.getUrl();
        if (docUrl != null)
            docFeatures.put("gate.SourceURL", docUrl);
        if (inputDoc.getMetadata() != null) {
            Iterator<Entry<Writable, Writable>> iter = inputDoc.getMetadata()
                    .entrySet().iterator();
            while (iter.hasNext()) {
                Entry<Writable, Writable> entry = iter.next();
                String skey = entry.getKey().toString().trim();
                String svalue = null;
                if (entry.getValue() != null)
                    svalue = entry.getValue().toString().trim();
                docFeatures.put(skey, svalue);
            }
        }

        // finally the annotations as original markups
        // TODO change the name of the annotation set via config
        AnnotationSet outputAS = gatedocument
                .getAnnotations("Original markups");
        for (Annotation annot : inputDoc.getAnnotations()) {
            // add to outputAS as a GATE annotation
            FeatureMap features = Factory.newFeatureMap();
            features.putAll(annot.getFeatures());
            outputAS.add(annot.getStart(), annot.getEnd(), annot.getType(),
                    features);
        }
        return gatedocument;
    }
View Full Code Here

    }

    private gate.Document generateGATEDocFromBinary(BehemothDocument inputDoc)
            throws ResourceInstantiationException, IOException {

        FeatureMap params = Factory.newFeatureMap();
        // TODO make sure that we pass the right charset
        params.put(Document.DOCUMENT_STRING_CONTENT_PARAMETER_NAME, new String(
                inputDoc.getContent()));
        String ct = inputDoc.getContentType();
        if (ct != null)
            params.put(Document.DOCUMENT_MIME_TYPE_PARAMETER_NAME, ct);

        params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, Boolean.TRUE);

        gate.Document gatedocument = (Document) Factory.createResource(
                "gate.corpora.DocumentImpl", params);

        return gatedocument;
View Full Code Here

            } catch (Exception e) {
                LOG.error("Can't generate GATE doc from binary content", e);
            }
        }

        FeatureMap params = Factory.newFeatureMap();
        params.put(Document.DOCUMENT_STRING_CONTENT_PARAMETER_NAME,
                inputDoc.getText());
        // no need to generate markup from it
        params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, Boolean.FALSE);

        gatedocument = (Document) Factory.createResource(
                "gate.corpora.DocumentImpl", params);
        gatedocument.setSourceUrl(null);

        // then the metadata as document features
        FeatureMap docFeatures = gatedocument.getFeatures();
        String docUrl = inputDoc.getUrl();
        if (docUrl != null)
            docFeatures.put("gate.SourceURL", docUrl);
        if (inputDoc.getMetadata() != null) {
            Iterator<Entry<Writable, Writable>> iter = inputDoc.getMetadata()
                    .entrySet().iterator();
            while (iter.hasNext()) {
                Entry<Writable, Writable> entry = iter.next();
                String skey = entry.getKey().toString().trim();
                String svalue = null;
                if (entry.getValue() != null)
                    svalue = entry.getValue().toString().trim();
                docFeatures.put(skey, svalue);
            }
        }

        // finally the annotations to the GATE document
        String GATEAnnotationSetName = getConf().get(
                "gate.annotationset.input", "Original markups");

        AnnotationSet outputAS = gatedocument
                .getAnnotations(GATEAnnotationSetName);
        for (Annotation annot : inputDoc.getAnnotations()) {
            // add to outputAS as a GATE annotation
            FeatureMap features = Factory.newFeatureMap();
            features.putAll(annot.getFeatures());
            outputAS.add(annot.getStart(), annot.getEnd(), annot.getType(),
                    features);
        }
        return gatedocument;
    }
View Full Code Here

TOP

Related Classes of gate.FeatureMap

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.