Package com.digitalpebble.classification

Examples of com.digitalpebble.classification.Document


            boolean create, String vector_location) throws IOException {
        File vectorFile = new File(vector_location);
        PrintWriter out = null;
        out = new PrintWriter(new FileWriter(vectorFile));
        for (int i = 0; i < documents.length; i++) {
            Document doc = documents[i];
            int label = doc.getLabel();
            // get a vector from the document
            // need a metric (e.g. relative frequency / binary)
            // and a lexicon
            // the vector is represented as a string directly
            Vector vector = doc.getFeatureVector(lexicon);
            out.print(label + " " + Utils.getVectorString(vector) + "\n");
        }
        out.close();
        return vectorFile;
    }
View Full Code Here


        out = new PrintWriter(new FileWriter(vectorFile));
        // get an iterator on the Corpus
        // and retrieve the documents one by one
        Iterator<Document> docIterator = corpus.iterator();
        while (docIterator.hasNext()) {
            Document doc = docIterator.next();
            int label = doc.getLabel();
            // get a vector from the document
            // need a metric (e.g. relative frequency / binary)
            // and a lexicon
            // the vector is represented as a string directly
            Vector vector = null;
            if (attributeMapping == null)
                vector = doc.getFeatureVector(lexicon);
            else
                vector = doc.getFeatureVector(lexicon, attributeMapping);
            out.print(label + " " + Utils.getVectorString(vector) + "\n");
        }
        out.close();
        return vectorFile;
    }
View Full Code Here

        // load classifier
        TextClassifier classifier = TextClassifier
            .getClassifier(new File(resourceDir));
        // create a document from a String
        String[] tokens = Tokenizer.tokenize(text.toString(), true);
        Document doc = classifier.createDocument(tokens);
        // classify
        double[] scores = classifier.classify(doc);
        // get best label
        String label = classifier.getBestLabel(scores);
        System.out.println("Classified as : "+label);
View Full Code Here

        // get an iterator on the Corpus
        // and retrieve the documents one by one
        Iterator<Document> docIterator = corpus.iterator();
        while (docIterator.hasNext()) {
            Document doc = docIterator.next();
            int label = doc.getLabel();
            // get a vector from the document
            // need a metric (e.g. relative frequency / binary)
            // and a lexicon
            // the vector is represented as a string directly
            Vector vector = null;
            if (attributeMapping == null)
                vector = doc.getFeatureVector(lexicon);
            else
                vector = doc.getFeatureVector(lexicon, attributeMapping);

            StringBuffer buffer = new StringBuffer("{");

            buffer.append("0 ").append(lexicon.getLabel(label));
View Full Code Here

        int latestRank = 0;
       
        // fill the matrix
        Iterator<Document> docIter = corpus.iterator();
        while(docIter.hasNext()){
          Document d = docIter.next()
          // Vector vector = d.getFeatureVector(lexicon);
          // get a vector based on the number of occurrences i.e on the raw document
          Vector vector = d.getFeatureVector(lexicon,Parameters.WeightingMethod.OCCURRENCES);
          int[] indices = vector.getIndices();
          double[] values = vector.getValues();
          int classNum = d.getLabel();
         
          for (int i=0;i<indices.length;i++){
            int index = indices[i];
            double value = values[i];
            if (value==0) continue;         
View Full Code Here

      int latestRank = 0;
     
      // fill the matrix
      Iterator<Document> docIter = corpus.iterator();
      while(docIter.hasNext()){
        Document d = docIter.next()
        Vector vector = d.getFeatureVector(lexicon);
        // get a vector based on the number of occurrences i.e on the raw document
        // Vector vector = d.getFeatureVector(lexicon,Parameters.WeightingMethod.OCCURRENCES);
        int[] indices = vector.getIndices();
        double[] values = vector.getValues();
        int classNum = d.getLabel();
       
        for (int i=0;i<indices.length;i++){
          int index = indices[i];
          double value = values[i];
          if (value==0) continue;         
View Full Code Here

        // load classifier
        TextClassifier classifier = TextClassifier
            .getClassifier(new File(resourceDir));
        // create a document from a String
        String[] tokens = Tokenizer.tokenize(text.toString(), true);
        Document doc = classifier.createDocument(tokens);
        // classify
        double[] scores = classifier.classify(doc);
        // get best label
        String label = classifier.getBestLabel(scores);
        System.out.println("Classified as : " + label);
View Full Code Here

            boolean create, String vector_location) throws IOException {
        File vectorFile = new File(vector_location);
        PrintWriter out = null;
        out = new PrintWriter(new FileWriter(vectorFile));
        for (int i = 0; i < documents.length; i++) {
            Document doc = documents[i];
            int label = doc.getLabel();
            // get a vector from the document
            // need a metric (e.g. relative frequency / binary)
            // and a lexicon
            // the vector is represented as a string directly
            Vector vector = doc.getFeatureVector(lexicon);
            out.print(label + " " + Utils.getVectorString(vector) + "\n");
        }
        out.close();
        return vectorFile;
    }
View Full Code Here

        out = new PrintWriter(new FileWriter(vectorFile));
        // get an iterator on the Corpus
        // and retrieve the documents one by one
        Iterator<Document> docIterator = corpus.iterator();
        while (docIterator.hasNext()) {
            Document doc = docIterator.next();
            int label = doc.getLabel();
            // get a vector from the document
            // need a metric (e.g. relative frequency / binary)
            // and a lexicon
            // the vector is represented as a string directly
            Vector vector = null;
            if (attributeMapping == null)
                vector = doc.getFeatureVector(lexicon);
            else
                vector = doc.getFeatureVector(lexicon, attributeMapping);
            out.print(label + " " + Utils.getVectorString(vector) + "\n");
        }
        out.close();
        return vectorFile;
    }
View Full Code Here

        // get an iterator on the Corpus
        // and retrieve the documents one by one
        Iterator<Document> docIterator = corpus.iterator();
        while (docIterator.hasNext()) {
            Document doc = docIterator.next();
            int label = doc.getLabel();
            // get a vector from the document
            // need a metric (e.g. relative frequency / binary)
            // and a lexicon
            // the vector is represented as a string directly
            Vector vector = null;
            if (attributeMapping == null)
                vector = doc.getFeatureVector(lexicon);
            else
                vector = doc.getFeatureVector(lexicon, attributeMapping);

            StringBuffer buffer = new StringBuffer("{");

            buffer.append("0 ").append(lexicon.getLabel(label));
View Full Code Here

TOP

Related Classes of com.digitalpebble.classification.Document

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.