Package cz.zcu.fav.liks.ml.featuresel.data.connections

Examples of cz.zcu.fav.liks.ml.featuresel.data.connections.CsvConn


import cz.zcu.fav.liks.ml.featuresel.validation.CrossValidator;

public class Experiment4 {
  public static void main(String[] args) {
    Logger.getRootLogger().setLevel(Level.OFF);
    IDataConn dataConn = new CsvConn("c:/data/data.csv");

    try {

      CrossValidator cv = new CrossValidator();
      List<Feature<Review>> features = new ArrayList<Feature<Review>>();
      features.add(new LemmaFeature(10));

      for (int i = 1; i <= 20; i++) {

        List<Review> data = dataConn.getReviewsNo(1000 * i);
        System.out.println("Datasize " + data.size());

        print("Naive bayes");
        SupervisedTrainer trainer = new NaiveBayesGaussianTrainer();
        cv.multiValidate(features, trainer, 10, data, null, 0);

        print("Maximum Entropy");
        trainer = new MaxEntLBFGSTrainer();
        cv.multiValidate(features, trainer, 10, data, null, 0);

        print("SVM");
        trainer = new MySVMTrainer();
        cv.multiValidate(features, trainer, 10, data, null, 0);

      }
    } catch (SQLException e) {
      e.printStackTrace();
    } catch (InvalidAttributeValueException e) {
      e.printStackTrace();
    } finally {
      dataConn.close();
    }

  }
View Full Code Here


import cz.zcu.fav.liks.ml.featuresel.data.connections.IDataConn;

public class Experiment1 {
  public static void main(String[] args) {
    Logger.getRootLogger().setLevel(Level.OFF);
    IDataConn dataConn = new CsvConn("c:/data/data.csv");

    try {
      List<Review> data = dataConn.getReviews();

      int reviews = 0;
      int statements = 0;
      int[] ratings = new int[11];

      for (Review r : data) {
        reviews++;
        ratings[(int) (r.getRating() / 10)]++;
        List<Statement> statements2 = r.getStatements();
        for (int i = 0; i < statements2.size(); i++) {
          statements++;
        }
      }

      System.out.format("Number of reviews: %d%n", reviews);
      System.out.format("Number of statements: %d - %.4f statement for review%n", statements, (double) statements
          / reviews);
      for (int i = 0; i < ratings.length; i++) {
        System.out.format("Rating %d%% - %d reviews (%.2f%%)%n", i * 10, ratings[i],
            ((double) ratings[i] / reviews) * 100);
      }

    } catch (SQLException e) {
      e.printStackTrace();
    } finally {
      dataConn.close();
    }

  }
View Full Code Here

        filter = new RelevancyScore();
      } else
        throw new ParseException("Incorrect metric speficied");

      List<Review> data = null;
      IDataConn dataConn = new CsvConn(file);
      if (cmd.hasOption("d")) {
        try {
          String size = cmd.getOptionValue("d");
          data = dataConn.getReviewsNo(Integer.parseInt(size));
        } catch (NumberFormatException e) {
          throw new ParseException("Incorrect data size specified");
        }
      } else {
        data = dataConn.getReviews();
      }

      CrossValidator cv = new CrossValidator();

      // příznaky
      List<Feature<Review>> features = new ArrayList<Feature<Review>>();
      // features.add(new VerbReviewFeature(10));
      // features.add(new AdverbReviewFeature(10));
      // features.add(new BigramFeature(10));
      // features.add(new WordPairFeature(10));
      // features.add(new LemmaFeature(10));
      features.add(new ThresholdFeature(0));
      features.add(new StatementCountFeature());
      // features.add(new StopwordReviewFeature(10));

      SupervisedTrainer trainer = new MaxEntLBFGSTrainer();
      double weight = 0.1;

      for (int i = 0; i < 10; i++)
        cv.multiValidate(features, trainer, 10, data, filter, weight * i);

      dataConn.close();

    } catch (ParseException e) {
      System.err.println("Parsing failed.  Reason: " + e.getMessage());
    } catch (SQLException e) {
      e.printStackTrace();
View Full Code Here

import cz.zcu.fav.liks.ml.featuresel.validation.CrossValidator;

public class Experiment6 {
  public static void main(String[] args) {
    Logger.getRootLogger().setLevel(Level.OFF);
    IDataConn dataConn = new CsvConn("c:/data/data.csv");

    try {

      CrossValidator cv = new CrossValidator();
      List<Feature<Review>> features = new ArrayList<Feature<Review>>();
      features.add(new ThresholdFeature(0));
      features.add(new StatementCountFeature());

      SupervisedTrainer trainer = new MaxEntLBFGSTrainer();
      List<Review> data = dataConn.getReviews();

      print("no selector");
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("RS(0,1)");
      cv.multiValidate(features, trainer, 10, data, new RelevancyScore(), 0.1);

      print("RS(0,5)");
      cv.multiValidate(features, trainer, 10, data, new RelevancyScore(), 0.5);

      print("RS(0,6)");
      cv.multiValidate(features, trainer, 10, data, new RelevancyScore(), 0.6);

      print("RS(0,7)");
      cv.multiValidate(features, trainer, 10, data, new RelevancyScore(), 0.7);

      print("MI(0,6)");
      cv.multiValidate(features, trainer, 10, data, new MutualInformation(), 0.6);

      print("MI(0,7)");
      cv.multiValidate(features, trainer, 10, data, new MutualInformation(), 0.7);

      print("MI(0,8)");
      cv.multiValidate(features, trainer, 10, data, new MutualInformation(), 0.8);

      print("IG(0,6)");
      cv.multiValidate(features, trainer, 10, data, new InformationGain(), 0.6);

      print("IG(0,7)");
      cv.multiValidate(features, trainer, 10, data, new InformationGain(), 0.7);

      print("IG(0,8)");
      cv.multiValidate(features, trainer, 10, data, new InformationGain(), 0.8);

      print("IG(0,9)");
      cv.multiValidate(features, trainer, 10, data, new InformationGain(), 0.9);

      print("OR(0,9)");
      cv.multiValidate(features, trainer, 10, data, new OddsRatio(), 0.9);

    } catch (SQLException e) {
      e.printStackTrace();
    } catch (IllegalArgumentException e) {
      e.printStackTrace();
    } finally {
      dataConn.close();
    }

  }
View Full Code Here

public class Experiment3 {
  public static void main(String[] args) {

    Logger.getRootLogger().setLevel(Level.OFF);
    IDataConn dataConn = new CsvConn("c:/data/data.csv");

    try {

      CrossValidator cv = new CrossValidator();
      List<Feature<Review>> features = new ArrayList<Feature<Review>>();
      SupervisedTrainer trainer = new MaxEntLBFGSTrainer();

      List<Review> data = dataConn.getReviewsNo(20000);

      System.out.println("Datasize " + data.size());

      print("Empty Feature");
      features.add(new EmptyFeature());
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("Statement Count");
      features.add(new StatementCountFeature());
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("Threshold 0");
      features.clear();
      features.add(new ThresholdFeature(0));
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("Lemma");
      features.clear();
      features.add(new LemmaFeature(0));
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("POS tag");
      features.clear();
      features.add(new POSTagFeature(0, 5));
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("Adjective");
      features.clear();
      features.add(new AdjectiveFeature(0));
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("Verb");
      features.clear();
      features.add(new VerbFeature(0));
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("Bigram");
      features.clear();
      features.add(new BigramFeature(0));
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("Pairs");
      features.clear();
      features.add(new WordPairFeature(10));
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("Specific bigram");
      features.clear();
      features.add(new SpecificBigramFeature(0));
      cv.multiValidate(features, trainer, 10, data, null, 0);

    } catch (SQLException e) {
      e.printStackTrace();
    } catch (InvalidAttributeValueException e) {
      e.printStackTrace();
    } finally {
      dataConn.close();
    }

  }
View Full Code Here

import cz.zcu.fav.liks.ml.featuresel.validation.CrossValidator;

public class Experiment2 {
  public static void main(String[] args) {
    Logger.getRootLogger().setLevel(Level.OFF);
    IDataConn dataConn = new CsvConn("c:/data/data.csv");

    try {

      CrossValidator cv = new CrossValidator();
      List<Feature<Review>> features = new ArrayList<Feature<Review>>();
      SupervisedTrainer trainer = new MaxEntLBFGSTrainer();

      for (int i = 1; i < 20; i++) {

        List<Review> data = dataConn.getReviewsNo(1000 * i);
        System.out.println("Datasize " + data.size());

        print("Raw text");
        features.add(new ThresholdFeature(0));
        cv.multiValidate(features, trainer, 10, data, null, 0);

        print("Threshold 10");
        features.clear();
        features.add(new ThresholdFeature(10));
        cv.multiValidate(features, trainer, 10, data, null, 0);

        print("Stopword");
        features.clear();
        features.add(new StopwordFeature(0));
        cv.multiValidate(features, trainer, 10, data, null, 0);

        print("Stopword threshold 10");
        features.clear();
        features.add(new StopwordFeature(10));
        cv.multiValidate(features, trainer, 10, data, null, 0);

        print("Lemma");
        features.clear();
        features.add(new LemmaFeature(0));
        cv.multiValidate(features, trainer, 10, data, null, 0);

        print("Lemma threshold 10");
        features.clear();
        features.add(new LemmaFeature(10));
        cv.multiValidate(features, trainer, 10, data, null, 0);

      }
    } catch (SQLException e) {
      e.printStackTrace();
    } catch (InvalidAttributeValueException e) {
      e.printStackTrace();
    } finally {
      dataConn.close();
    }

  }
View Full Code Here

public class Experiment3a {
  public static void main(String[] args) {

    Logger.getRootLogger().setLevel(Level.OFF);
    IDataConn dataConn = new CsvConn("c:/data/data.csv");

    try {

      CrossValidator cv = new CrossValidator();
      List<Feature<Review>> features = new ArrayList<Feature<Review>>();
      SupervisedTrainer trainer = new MaxEntLBFGSTrainer();

      List<Review> data = dataConn.getReviewsNo(20000);

      System.out.println("Datasize " + data.size());

      print("lemma + pos");
      features.add(new LemmaFeature(0));
      features.add(new POSTagFeature(0, 4));
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("lemma + pos + statementcount");
      features.clear();
      features.add(new LemmaFeature(0));
      features.add(new POSTagFeature(0, 4));
      features.add(new StatementCountFeature());
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("words + statementcount");
      features.clear();
      features.add(new ThresholdFeature(0));
      features.add(new StatementCountFeature());
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("words + pos");
      features.clear();
      features.add(new ThresholdFeature(0));
      features.add(new POSTagFeature(0, 4));
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("words + pos + statementcount");
      features.clear();
      features.add(new ThresholdFeature(0));
      features.add(new POSTagFeature(0, 4));
      features.add(new StatementCountFeature());
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("words + bigrams");
      features.clear();
      features.add(new ThresholdFeature(0));
      features.add(new BigramFeature(0));
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("words + bigrams + statementcount");
      features.clear();
      features.add(new ThresholdFeature(0));
      features.add(new BigramFeature(0));
      features.add(new StatementCountFeature());
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("words + pairs");
      features.clear();
      features.add(new ThresholdFeature(0));
      features.add(new WordPairFeature(10));
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("Adjective + verb");
      features.clear();
      features.add(new AdjectiveFeature(0));
      features.add(new VerbFeature(0));
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("Adjective + verb + statementcount");
      features.clear();
      features.add(new AdjectiveFeature(0));
      features.add(new VerbFeature(0));
      features.add(new StatementCountFeature());
      cv.multiValidate(features, trainer, 10, data, null, 0);

      print("SpecificBigram + verb + adjectice + statementcount");
      features.clear();
      features.add(new SpecificBigramFeature(0));
      features.add(new VerbFeature(0));
      features.add(new AdjectiveFeature(0));
      features.add(new StatementCountFeature());
      cv.multiValidate(features, trainer, 10, data, null, 0);

    } catch (SQLException e) {
      e.printStackTrace();
    } catch (InvalidAttributeValueException e) {
      e.printStackTrace();
    } finally {
      dataConn.close();
    }

  }
View Full Code Here

import cz.zcu.fav.liks.ml.featuresel.validation.CrossValidator;

public class Experiment2a {
  public static void main(String[] args) {
    Logger.getRootLogger().setLevel(Level.OFF);
    IDataConn dataConn = new CsvConn("c:/data/data.csv");

    try {

      CrossValidator cv = new CrossValidator();
      List<Feature<Review>> features = new ArrayList<Feature<Review>>();
      SupervisedTrainer trainer = new MaxEntLBFGSTrainer();

      List<Review> data = dataConn.getReviewsNo(10000);

      print("Raw text");
      features.add(new ThresholdFeature(0));
      cv.multiValidate(features, trainer, 10, data, null, 0);
      System.out.println(features.get(0));

      print("Stopword");
      features.clear();
      features.add(new StopwordFeature(0));
      cv.multiValidate(features, trainer, 10, data, null, 0);
      System.out.println(features.get(0));

      print("Lemma");
      features.clear();
      features.add(new LemmaFeature(0));
      cv.multiValidate(features, trainer, 10, data, null, 0);
      System.out.println(features.get(0));

    } catch (SQLException e) {
      e.printStackTrace();
    } catch (InvalidAttributeValueException e) {
      e.printStackTrace();
    } finally {
      dataConn.close();
    }

  }
View Full Code Here

TOP

Related Classes of cz.zcu.fav.liks.ml.featuresel.data.connections.CsvConn

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.