Package org.apache.mahout.classifier.bayes

Examples of org.apache.mahout.classifier.bayes.ClassifierContext


      //<start id="mahout.bayes.setup"/>
      BayesParameters p = new BayesParameters();
      p.set("basePath", modelDir.getCanonicalPath());
      Datastore ds = new InMemoryBayesDatastore(p);
      Algorithm a  = new BayesAlgorithm();
      ClassifierContext ctx = new ClassifierContext(a,ds);
      ctx.initialize();
      //<end id="mahout.bayes.setup"/>
      synchronized (swapContext) {
          this.ctx = ctx; // swap upon successful load.
      }
      enabled = true;
View Full Code Here


     
      BayesParameters p = new BayesParameters();
      p.set("basePath", modelDir.getCanonicalPath());
      Datastore ds = new InMemoryBayesDatastore(p);
      Algorithm a  = new BayesAlgorithm();
      ClassifierContext ctx = new ClassifierContext(a,ds);
      ctx.initialize();
     
      //TODO: make the analyzer configurable
      StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
      TokenStream ts = analyzer.tokenStream(null, new InputStreamReader(new FileInputStream(inputFile), "UTF-8"));
    
      ArrayList<String> tokens = new ArrayList<String>(1000);
      while (ts.incrementToken()) {
        tokens.add(ts.getAttribute(CharTermAttribute.class).toString());
      }
      String[] document = tokens.toArray(new String[tokens.size()]);
     
      ClassifierResult[] cr = ctx.classifyDocument(document, "unknown", 5);
     
      for (ClassifierResult r: cr) {
        System.err.println(r.getLabel() + "\t" + r.getScore());
      }
    } catch (OptionException e) {
View Full Code Here

      }
     
    } else {
      throw new IllegalArgumentException("Unrecognized dataSource type: " + dataSource);
    }
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    String defaultCat = "unknown";
    if (cmdLine.hasOption(defaultCatOpt)) {
      defaultCat = (String) cmdLine.getValue(defaultCatOpt);
    }
    File docPath = new File((String) cmdLine.getValue(classifyOpt));
    String encoding = "UTF-8";
    if (cmdLine.hasOption(encodingOpt)) {
      encoding = (String) cmdLine.getValue(encodingOpt);
    }
    Analyzer analyzer = null;
    if (cmdLine.hasOption(analyzerOpt)) {
      analyzer = ClassUtils.instantiateAs((String) cmdLine.getValue(analyzerOpt), Analyzer.class);
    }
    if (analyzer == null) {
      analyzer = new StandardAnalyzer(Version.LUCENE_31);
    }
   
    log.info("Converting input document to proper format");

    String[] document =
        BayesFileFormatter.readerToDocument(analyzer,Files.newReader(docPath, Charset.forName(encoding)));
    StringBuilder line = new StringBuilder();
    for (String token : document) {
      line.append(token).append(' ');
    }
   
    List<String> doc = new NGrams(line.toString(), gramSize).generateNGramsWithoutLabel();
   
    log.info("Done converting");
    log.info("Classifying document: {}", docPath);
    ClassifierResult category = classifier.classifyDocument(doc.toArray(new String[doc.size()]), defaultCat);
    log.info("Category for {} is {}", docPath, category);
   
  }
View Full Code Here

        }
       
      } else {
        throw new IllegalArgumentException("Unrecognized dataSource type: " + params.get("dataSource"));
      }
      classifier = new ClassifierContext(algorithm, datastore);
      classifier.initialize();
     
      defaultCategory = params.get("defaultCat");
      gramSize = params.getGramSize();
    } catch (IOException ex) {
View Full Code Here

TOP

Related Classes of org.apache.mahout.classifier.bayes.ClassifierContext

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.