Package org.apache.mahout.common

Examples of org.apache.mahout.common.Classifier


    FileSystem fs = FileSystem.get(conf);

    log.info("Loading model from: {}", modelPaths);

    Model model;
    Classifier classifier;

    String classifierType = (String) cmdLine.getValue(typeOpt);

    if (classifierType.equalsIgnoreCase("bayes")) {
      log.info("Testing Bayes Classifier");
      model = new BayesModel();
      classifier = new BayesClassifier();
    } else if (classifierType.equalsIgnoreCase("cbayes")) {
      log.info("Testing Complementary Bayes Classifier");
      model = new CBayesModel();
      classifier = new CBayesClassifier();
    } else {
      throw new IllegalArgumentException("Unrecognized classifier type: " + classifierType);
    }

    SequenceFileModelReader.loadModel(model, fs, modelPaths, conf);

    log.info("Done loading model: # labels: {}", model.getLabels().size());

    log.info("Done generating Model");

    String defaultCat = "unknown";
    if (cmdLine.hasOption(defaultCatOpt)) {
      defaultCat = (String) cmdLine.getValue(defaultCatOpt);
    }

    String encoding = "UTF-8";
    if (cmdLine.hasOption(encodingOpt)) {
      encoding = (String) cmdLine.getValue(encodingOpt);
    }
    //Analyzer analyzer = null;
    //if (cmdLine.hasOption(analyzerOpt)) {
      //String className = (String) cmdLine.getValue(analyzerOpt);
      //Class clazz = Class.forName(className);
      //analyzer = (Analyzer) clazz.newInstance();
    //}
    //if (analyzer == null) {
    //  analyzer = new StandardAnalyzer();
    //}
    int gramSize = 1;
    if (cmdLine.hasOption(gramSizeOpt)) {
      gramSize = Integer.parseInt((String) cmdLine
          .getValue(gramSizeOpt));

    }

    String testDirPath = (String) cmdLine.getValue(dirOpt);
    File dir = new File(testDirPath);
    File[] subdirs = dir.listFiles();

    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(model.getLabels(), defaultCat);

    if (subdirs != null) {
      for (File subdir : subdirs) {

        String correctLabel = subdir.getName().split(".txt")[0];
        BufferedReader fileReader = new BufferedReader(new InputStreamReader(
            new FileInputStream(subdir.getPath()), encoding));
        try {
          String line;
          while ((line = fileReader.readLine()) != null) {
 
            Map<String, List<String>> document = Model.generateNGrams(line, gramSize);
            for (Map.Entry<String, List<String>> stringListEntry : document.entrySet()) {
              List<String> strings = stringListEntry.getValue();
              ClassifierResult classifiedLabel = classifier.classify(model,
                  strings.toArray(new String[strings.size()]),
                  defaultCat);
              resultAnalyzer.addInstance(correctLabel, classifiedLabel);
            }
          }
View Full Code Here


      log.info("Testing Complementary Bayes Classifier");
      model = new CBayesModel();
    } else {
      throw new IllegalArgumentException("Unrecognized classifier type: " + classifierType);
    }
    Classifier classifier = new BayesClassifier();

    SequenceFileModelReader.loadModel(model, fs, modelPaths, conf);

    log.info("Done loading model: # labels: {}", model.getLabels().size());

    log.info("Done generating Model");


    String defaultCat = "unknown";
    if (cmdLine.hasOption(defaultCatOpt)) {
      defaultCat = (String) cmdLine.getValue(defaultCatOpt);
    }
    File docPath = new File((String) cmdLine.getValue(classifyOpt));
    String encoding = "UTF-8";
    if (cmdLine.hasOption(encodingOpt)) {
      encoding = (String) cmdLine.getValue(encodingOpt);
    }
    Analyzer analyzer = null;
    if (cmdLine.hasOption(analyzerOpt)) {
      String className = (String) cmdLine.getValue(analyzerOpt);
      analyzer = Class.forName(className).asSubclass(Analyzer.class).newInstance();
    }
    if (analyzer == null) {
      analyzer = new StandardAnalyzer();
    }

    int gramSize = 1;
    if (cmdLine.hasOption(gramSizeOpt)) {
      gramSize = Integer.parseInt((String) cmdLine
          .getValue(gramSizeOpt));

    }

    log.info("Converting input document to proper format");
    String[] document = BayesFileFormatter.readerToDocument(analyzer, new InputStreamReader(new FileInputStream(docPath), Charset.forName(encoding)));
    StringBuilder line = new StringBuilder();
    for(String token : document)
    {
      line.append(token).append(' ');
    }
    List<String> doc = Model.generateNGramsWithoutLabel(line.toString(), gramSize) ;
    log.info("Done converting");
    log.info("Classifying document: {}", docPath);
    ClassifierResult category = classifier.classify(model, doc.toArray(new String[doc.size()]), defaultCat);
    log.info("Category for {} is {}", docPath, category);

  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.Classifier

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.