Examples of Corpus


Examples of basic.Corpus

  public static void main(String[] args) {
    String folder = "/home/wesley/var/pln/textcat/";

    System.out.println("Lendo arquivos...");
    Corpus corpus = new Corpus();
//    corpus.addFileToTest (new File(folder + "Bosque_CF_8.0.ad-categorias-teste.csv"));
//    corpus.addFileToTrain(new File(folder + "Bosque_CF_8.0.ad-categorias-treino.csv"));
//    corpus.addFileToTest (new File(folder + "transform1-teste-menor.tsv"));
//    corpus.addFileToTrain(new File(folder + "transform1-treino-menor.tsv"));
//    corpus.addFileToTest (new File(folder + "transform1-teste.tsv"));
//    corpus.addFileToTrain(new File(folder + "transform1-treino.tsv"));
//    corpus.readFile(new File(folder + "transform1-treino.tsv"));
//    corpus.readFile(new File(folder + "transform1-teste.tsv"));
//    corpus.readFile(new File(folder + "Bosque_CF_8.0.ad-categorias-teste.csv"));
//    corpus.readFile(new File(folder + "Bosque_CF_8.0.ad-categorias-treino.csv"));
//    corpus.readFile(new File(folder + "projetos9horas.csv"));
    corpus.readFile(new File(folder + "fazido9horas_ava.csv"));
//    corpus.readFile(new File(folder + "fazido9horas_ava-menor.csv"));

    ClassifierNB engine = new ClassifierNB();

    System.out.println("Treinando...");
    engine.train(corpus.getTrain());

    System.out.println("Avaliando...");
    engine.eval(corpus.getTest());

  }
View Full Code Here

Examples of basic.Corpus

  public static void main(String[] args) {
    String folder = "/home/wesley/var/pln/textcat/";

    System.out.println("Lendo arquivos...");
    Corpus corpus = new Corpus();
//    corpus.readFile(new File(folder + "fazido9horas.csv"));
    corpus.readFile(new File(folder + "base-10-07-12.csv"));
   
    // corpus.readFile(new File(folder + "projetos9horas.csv"));

    ClassifierNBToFile engine = new ClassifierNBToFile();

    System.out.println("Treinando...");
    List<Document> docsToTrain = corpus.getCategories("ambiente",
        "aumento", "cultura", "data", "dev", "direito", "edu",
        "espaco_pub", "habita", "lixo", "muda_nome", "outros",
        "regulamentacao", "saude", "seguranca", "transito");
    engine.train(docsToTrain);

    System.out.println("Etiquetando...");
    List<Document> docsToTagger = corpus.getCategories("TBD");
    engine.tagger(docsToTagger);

    for (Document d : docsToTrain) {
      System.out.print(d.getCategory() + " \t");
      System.out.println(d);
View Full Code Here

Examples of ch.akuhn.hapax.corpus.Corpus

public class LogLikelihoodExample {

    public static void main(String... args) {
       
        Corpus c1 = new CorpusBuilderHelper(new SimpleCorpus()).importAllFiles(new File("../Fame"), ".java");
        Corpus c2 = new CorpusBuilderHelper(new SimpleCorpus()).importAllFiles(new File("../CELLS"), ".java");
       
        System.out.println(c1);
        System.out.println(c2);
       
        SortedSet<LogLikelihood> list = new TreeSet<LogLikelihood>();
        for (String each: (union(c1.terms(), c2.terms())).elements()) {
            list.add(new LogLikelihood(c1.terms(), c2.terms(), each));
        }
       
        Out.puts(list);
       
    }
View Full Code Here

Examples of gannuNLP.corpus.Corpus

        System.out.println("Dictionary uploaded!");
        System.out.println("Loading samples from SemCor files!");
        f=new File(args[2]);
        if(f.exists())
        {
          Corpus c=new Corpus(args[2],db, true);
          DataLoader.addSourceList(sourceList,c.getName());
          c.WriteSuperLemmas("./data/"+dict.getName()+"/");         
          System.out.println("Finished!");
        }
        else
        {
          System.out.println("Corpus not found!");
View Full Code Here

Examples of gannuNLP.corpus.Corpus

    this.name="CorpusMFS";
  }
  @Override
  public void init(Input document) throws Exception {
    if (CorpusMFS.corpus==null)
      CorpusMFS.corpus=new Corpus(this.getValue("corpus"),this.dict,Boolean.parseBoolean(this.getValue("includeNoTags")));
    if(this.getValue("osd")==null)
    {
      this.osd=true;
    }
    else
    {
      this.osd=Boolean.parseBoolean(this.getValue("osd"));
    }   
    this.current=new Corpus(CorpusMFS.corpus);   
    if(this.getValue("setup")!=null&&this.getValue("setup").equals("filterMFS"))
    {     
      this.current=new Corpus(CorpusMFS.corpus);
      String parameters="";
      if(this.getValue("threshold")==null)
      {
        parameters+="threshold:0.2;";
      }
View Full Code Here

Examples of gannuNLP.corpus.Corpus

    super("AddCorpusRelatedLemmas");
  }

  @Override
  public void init()throws Exception {
    AddCorpusRelatedLemmas.corpus=new Corpus(this.getValue("corpus"),this.dict,true);
    this.kw=new KeywordsByTFIDF();   
  }
View Full Code Here

Examples of gannuNLP.corpus.Corpus

   * This method removes all the duplicated words from all the bag of words of a target lemma.
   * @param lemma The target lemma.
   */
  public void modifyBow(Lemma lemma) throws Exception {
    ContainsLemmaFilter filter=new ContainsLemmaFilter("");   
    Corpus aux=new Corpus(corpus);
    filter.filter(aux, lemma);
    ArrayList<WSM> wsm=new ArrayList<WSM>();
    for(Input document:aux.getDocuments())
    {
      wsm.addAll(this.kw.extractKeywords(document, 10, true));
    }
    for(Sense s:lemma.getSenses())
    {
View Full Code Here

Examples of gate.Corpus

      }
      // otherwise, the document is already adopted by this datastore so we
      // silently ignore this.
    } else if(lr instanceof CorpusImpl) {
      // only a transient, empty corpus can be adopted!!!
      Corpus corpus = (Corpus)lr;
      if(corpus.getDataStore() != null) {
        throw new PersistenceException(
          "Cannot adopt corpus "+corpus.getName()+
          " which belongs to datastore "+corpus.getDataStore().getName());
      }
      if(corpus.size() != 0) {
        throw new PersistenceException(
          "Cannot adopt corpus "+corpus.getName()+
          " which is non empty, number of documents contained: "+
          corpus.size());
      }
      // since this is a valid corpus, we adopt it by returning new
      // DocumentSubsetCorpus which has the original corpus as a parent
      FeatureMap parms = Factory.newFeatureMap();
      parms.put("jdbcCorpus", ourCorpus);
      try {
        Resource newCorpus = Factory.createResource(
          "at.ofai.gate.virtualcorpus.JDBCSubsetCorpus", parms,
          corpus.getFeatures(), corpus.getName());
        lr = (LanguageResource)newCorpus;
      } catch (ResourceInstantiationException ex) {
        throw new PersistenceException(
          "Could not adopt corpus "+corpus.getName(),ex);
      }
    } else {
      throw new PersistenceException("Cannot adopt LR: "+lr.getName());
    }
    return lr;
View Full Code Here

Examples of gate.Corpus

      }
      // otherwise, the document is already adopted by this datastore so we
      // silently ignore this.
    } else if(lr instanceof CorpusImpl) {
      // only a transient, empty corpus can be adopted!!!
      Corpus corpus = (Corpus)lr;
      if(corpus.getDataStore() != null) {
        throw new PersistenceException(
          "Cannot adopt corpus "+corpus.getName()+
          " which belongs to datastore "+corpus.getDataStore().getName());
      }
      if(corpus.size() != 0) {
        throw new PersistenceException(
          "Cannot adopt corpus "+corpus.getName()+
          " which is non empty, number of documents contained: "+
          corpus.size());
      }
      // since this is a valid corpus, we adopt it by returning new
      // DocumentSubsetCorpus which has the original corpus as a parent
      FeatureMap parms = Factory.newFeatureMap();
      parms.put("directoryCorpus", ourCorpus);
      try {
        Resource newCorpus = Factory.createResource("at.ofai.gate.virtualcorpus.DirectorySubsetCorpus", parms, corpus.getFeatures(), corpus.getName());
        lr = (LanguageResource)newCorpus;
      } catch (ResourceInstantiationException ex) {
        throw new PersistenceException("Could not adopt corpus "+corpus.getName(),ex);
      }
    } else {
      throw new PersistenceException("Cannot adopt LR: "+lr.getName());
    }
    return lr;
View Full Code Here

Examples of gate.Corpus

  public void extractData(String sequenceType, String elementType,
      String labelType) throws ExecutionException,
      InvalidOffsetException, ResourceInstantiationException {

    // run the application on the document
    Corpus corpus = Factory.newCorpus("Edlin Corpus");
    corpus.add(document);
    pipeline.setCorpus(corpus);
    pipeline.execute();
    Factory.deleteResource(corpus);
    Factory.deleteResource(pipeline);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.