Package opennlp.maxent

Examples of opennlp.maxent.GISModel


        System.out.println("\tBAD");
      }*/
      List<String> features = fe.getFeatures(entity);
      String type = entity.getType();
      if(modelsByType.containsKey(type)) {
        GISModel model = modelsByType.get(type);
        if(model.getNumOutcomes() == 2) {
          double prob = model.eval(features.toArray(new String[0]))[model.getIndex("T")];
          //System.out.println(entity.getConfidence() + "\t->\t" + prob);
         
          double conf = entity.getConfidence();
         
          double prob2 = prob;
View Full Code Here


    modelsByType = new HashMap<String,GISModel>();
    for(int i=0;i<maxents.size();i++) {
      Element maxent = maxents.get(i);
      String type = maxent.getAttributeValue("type");
      StringGISModelReader sgmr = new StringGISModelReader(maxent.getValue());
      GISModel gm = sgmr.getModel();
      modelsByType.put(type, gm);
    }   
  }
View Full Code Here

              if(!feature.startsWith("anchor")) newFeatures.add(feature);             
            }
          }
          featArray = newFeatures.toArray(new String[0]);
        }
        GISModel gm = gmByPrev.get(tag);
        if(gm == null) continue;
        Map<String, Double> modelResults = runGIS(gm, featArray);
        results.put(tag, modelResults);
      }
    }
View Full Code Here

    FeatureExtractor extractor = new FeatureExtractor(tokSeq, domain);
    List<Token> tokens = tokSeq.getTokens();
    String prevTag = "O";
    for(int i=0;i<tokens.size();i++) {
      String tag = tokens.get(i).getBioTag();
      GISModel gm = gmByPrev.get(prevTag);
      if(gm == null) continue;
      Map<String,Double> scoresForPrev = featureCVScores.get(prevTag);
      if(scoresForPrev == null) {
        scoresForPrev = new HashMap<String,Double>();
        featureCVScores.put(prevTag, scoresForPrev);
      }
     
      prevTag = tag;
      int outcomeIndex = gm.getIndex(tag);
      if(outcomeIndex == -1) continue;
      List<String> features = extractor.getFeatures(i);
      if(features.size() == 0) continue;
      String [] featuresArray = features.toArray(new String[0]);
      String [] newFeaturesArray = features.toArray(new String[0]);
      double [] baseProbs = gm.eval(featuresArray);
      for(int j=0;j<features.size();j++) {
        newFeaturesArray[j] = "IGNORETHIS";
        double [] newProbs = gm.eval(newFeaturesArray);
        double gain = infoLoss(newProbs, outcomeIndex) - infoLoss(baseProbs, outcomeIndex);
        if(Double.isNaN(gain)) gain = 0.0;
        String feature = features.get(j);
        double oldScore = 0.0;
        if(scoresForPrev.containsKey(feature)) oldScore = scoresForPrev.get(feature);
View Full Code Here

    tagSet = new HashSet<String>();
    for(int i=0;i<maxents.size();i++) {
      Element maxent = maxents.get(i);
      String prev = maxent.getAttributeValue("prev");
      StringGISModelReader sgmr = new StringGISModelReader(maxent.getValue());
      GISModel gm = sgmr.getModel();
      gmByPrev.put(prev, gm);
      tagSet.add(prev);
      for(int j=0;j<gm.getNumOutcomes();j++) {
        tagSet.add(gm.getOutcome(j));
      }
    }
    Element rescorerElem = memmRoot.getFirstChildElement("rescorer");
    if(rescorerElem != null) {
      rescorer = new RescoreMEMMOut();
View Full Code Here

      if(goodPredName) {
        newPredNames.add(predName);
        newParams.add(context);
      }
    }
    return new GISModel(newParams.toArray(new Context[0]), newPredNames.toArray(new String[0]), (String [])modelData[2], (Integer)modelData[3], (Double)modelData[4]);
  }
View Full Code Here

    }
    for(BagEvent be : testBagEvents) {
      testEvents.add(new Event(be.getClassLabel(), be.getFeatures().getSet().toArray(new String[0])));
    }
    DataIndexer di = new TwoPassDataIndexer(new EventCollectorAsStream(new SimpleEventCollector(trainEvents)), 1);
    GISModel gm = GIS.trainModel(100, di);
   
    //ClassificationEvaluator ce = new ClassificationEvaluator();
   
    for(Event event : testEvents) {
      double [] results = gm.eval(event.getContext());
      String result = results[gm.getIndex("TRUE")] > 0.5 ? "TRUE" : "FALSE";
      //String result = gm.getBestOutcome(results);
      //System.out.println(event.getOutcome() + "\t" + result + "\t" + results[gm.getIndex(event.getOutcome())] + "\t" + StringTools.arrayToList(event.getContext()));
      ce.logEvent(event.getOutcome(), result);
    }
    System.out.println(ce.getAccuracy());
View Full Code Here

      if(isChem.get(s)) outcome = "C";
      events.add(new Event(outcome, getFeatures(s)));
    }
    SimpleEventCollector sec = new SimpleEventCollector(events);
    DataIndexer di = new TwoPassDataIndexer(new EventCollectorAsStream(sec), 1);
    GISModel gis = GIS.trainModel(100, di);
   
    System.out.println(gis.getIndex("FOO"));
    if(true) return;
   
   
    long time = System.currentTimeMillis();
    StringGISModelWriter sgmw = new StringGISModelWriter(gis);
    sgmw.persist();
    String modelStr = sgmw.toString();

    //System.out.println(modelStr);
   
    StringGISModelReader sgmr = new StringGISModelReader(modelStr);
    gis = sgmr.getModel();
    System.out.println("Round trip in " + (System.currentTimeMillis() - time));
    System.out.println(modelStr.length());
   
    //if(true) return;
   
    GISModel gis2 = null;
   
    if(false) {
      List<Event> events2 = new ArrayList<Event>();
      for(String s : batch2) {
        double prob = gis.eval(getFeatures(s))[gis.getIndex("C")];
        String outcome = "E";
        if(isChem.get(s)) outcome = "C";
        events.add(new Event(outcome, probToFeatures(prob)));
      }
      sec = new SimpleEventCollector(events);
      di = new TwoPassDataIndexer(new EventCollectorAsStream(sec), 2);
      gis2 = GIS.trainModel(1000, di);     
    }
   
    double score = 0.0;
    List<Double> trialScores = new ArrayList<Double>();
    for(double i=-1;i<1;i+=0.02) trialScores.add(Math.pow(10, i));
    Map<Double,Double> scores = new HashMap<Double,Double>();
    for(double d : trialScores) scores.put(d, 0.0);
    for(String s : batch3) {
      String outcome = "E";
      if(isChem.get(s)) outcome = "C";
      //System.out.println(s + "\t" + outcome + "\t" + gis.getBestOutcome(gis.eval(getFeatures(s))));
      double prob;
      if(gis2 == null) {
        prob = gis.eval(getFeatures(s))[gis.getIndex(outcome)];       
      } else {
        prob = gis2.eval(probToFeatures(gis.eval(getFeatures(s))[gis.getIndex("C")]))[gis.getIndex(outcome)];       
      }

      //System.out.print(prob + "\t");
      //prob = logitToProb(probToLogit(prob));
     
View Full Code Here

      Event event = new Event(type, f.toArray(new String[0]));
      events.add(event);
    }
    if(events.size() == 1) events.add(events.get(0));
    DataIndexer di = new TwoPassDataIndexer(new EventCollectorAsStream(new SimpleEventCollector(events)), 3);
    GISModel gm = GIS.trainModel(100, di);
   
    ClassificationEvaluator ce = new ClassificationEvaluator();

    List<Double> chemList = new ArrayList<Double>();
    List<Double> engList = new ArrayList<Double>();

    for(String word : testSet) {
      Collection<String> f = overallFeatures.get(word);
      String type = chemFeatures.containsKey(word) ? "CHEM" : "NONCHEM";
     
      double [] results = gm.eval(f.toArray(new String[0]));
      System.out.println(word + "\t" + gm.getAllOutcomes(results));
      ce.logEvent(type, gm.getBestOutcome(results));
      if(!gm.getBestOutcome(results).equals(type)) System.out.println("*");
      if(type.equals("CHEM")) {
        chemList.add(results[gm.getIndex("CHEM")]);
      } else {
        engList.add(results[gm.getIndex("CHEM")]);
      }

    }
    ce.pprintPrecisionRecallEval();
    ce.pprintConfusionMatrix();
   
    for(String word : unknownFeatures.keySet()) {
      Collection<String> f = unknownFeatures.get(word);
      double [] results = gm.eval(f.toArray(new String[0]));
      System.out.println(word + "\t" + results[gm.getIndex("CHEM")] + "\t" + NGramBuilder.getInstance().testWordProb(word) + "\t" + NGram.getInstance().testWordSuffixProb(word));     
    }
   
    /*Bag<String> chemWords = getBag(root.getFirstChildElement("chemical"));
    Bag<String> nonChemWords = getBag(root.getFirstChildElement("nonchemical"));
    Bag<String> words = new Bag<String>();
View Full Code Here

      //trainData = new FeatureSelector().selectFeatures(trainData, 200.0);
      List<Event> testData = events.subList(events.size()/2, events.size());
     
      if(trainData.size() == 1) trainData.add(trainData.get(0));
      DataIndexer di = new TwoPassDataIndexer(new EventCollectorAsStream(new SimpleEventCollector(trainData)), 1);
      GISModel gm = GIS.trainModel(100, di);
     
      ClassificationEvaluator ce = new ClassificationEvaluator();
     
      for(Event event : testData) {
        double [] results = gm.eval(event.getContext());
        String result = results[gm.getIndex("TRUE")] > 0.5 ? "TRUE" : "FALSE";
        //String result = gm.getBestOutcome(results);
        //System.out.println(event.getOutcome() + "\t" + result + "\t" + results[gm.getIndex(event.getOutcome())] + "\t" + StringTools.arrayToList(event.getContext()));
        ce.logEvent(event.getOutcome(), result);
      }
      System.out.println("seed: " + seed);
View Full Code Here

TOP

Related Classes of opennlp.maxent.GISModel

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.