Package org.fnlp.ml.types

Examples of org.fnlp.ml.types.InstanceSet


    Linear[] models = new Linear[postagAlphabet.size()];
    int fsize = features.size();

    for (int i = 0; i < postagAlphabet.size(); i++) {
      String pos = postagAlphabet.lookupString(i);
      InstanceSet instset = readInstanceSet(pos);
      LabelAlphabet alphabet = factory.buildLabelAlphabet(pos);
      int ysize = alphabet.size();
      System.out.printf("Training with data: %s\n", pos);
      System.out.printf("Number of labels: %d\n", ysize);
      LinearMax solver = new LinearMax(generator, ysize);
View Full Code Here


   * @return 样本集
   * @throws Exception
   */
  private InstanceSet readInstanceSet(String pos) throws IOException {

    InstanceSet instset = new InstanceSet();

    LabelAlphabet labelAlphabet = factory.buildLabelAlphabet(pos);

    BufferedReader in = new BufferedReader(new InputStreamReader(
        new FileInputStream(fp), charset));

    String line = null;
    while ((line = in.readLine()) != null) {
      line = line.trim();
      if (line.matches("^$"))
        continue;
      if (line.startsWith(pos + " ")) {
        List<String> tokens = Arrays.asList(line.split("\\s+"));

        int[] data = new int[tokens.size() - 2];
        for (int i = 0; i < data.length; i++) {
          data[i] = Integer.parseInt(tokens.get(i + 2));
        }
        Instance inst = new Instance(data);
        inst.setTarget(labelAlphabet.lookupIndex(tokens.get(1)));

        instset.add(inst);
      }
    }

    in.close();

    labelAlphabet.setStopIncrement(true);
    instset.setAlphabetFactory(factory);

    return instset;
  }
View Full Code Here

    System.out.print("生成训练数据 ...");

    FNLPReader reader = new FNLPReader(file);
    FNLPReader preReader = new FNLPReader(file);
    InstanceSet instset = new InstanceSet();
   
    LabelAlphabet la = factory.DefaultLabelAlphabet();
    IFeatureAlphabet fa = factory.DefaultFeatureAlphabet();
    int count = 0;
   
    //preReader为了把ysize定下来
    la.lookupIndex("S");
    while(preReader.hasNext()){
      Sentence sent = (Sentence) preReader.next();
      Target targets = (Target)sent.getTarget();
      for(int i=0; i<sent.length(); i++){
        String label;
        if(targets.getHead(i) != -1){
          if(targets.getHead(i) < i){
            label = "L" + targets.getDepClass(i);
          }
          //else if(targets.getHead(i) > i){
          else{
            label = "R" + targets.getDepClass(i);
          }
          la.lookupIndex(label);
        }
      }
    }
    int ysize = la.size();
    la.setStopIncrement(true);
       
    while (reader.hasNext()) {
      Sentence sent = (Sentence) reader.next();
      //  int[] heads = (int[]) instance.getTarget();
      String depClass = null;
      Target targets = (Target)sent.getTarget();
      JointParsingState state = new JointParsingState(sent);
     
      while (!state.isFinalState()) {
        // 左右焦点词在句子中的位置
        int[] lr = state.getFocusIndices();

        ArrayList<String> features = state.getFeatures();
        JointParsingState.Action action = getAction(lr[0], lr[1],
            targets);
        switch (action) {
        case LEFT:
          depClass = targets.getDepClass(lr[1]);
          break;
        case RIGHT:
          depClass = targets.getDepClass(lr[0]);
          break;
        default:

        }
        state.next(action,depClass);
        if (action == JointParsingState.Action.LEFT)
          targets.setHeads(lr[1],-1);
        if (action == JointParsingState.Action.RIGHT)
          targets.setHeads(lr[0],-1);
        String label = "";
        switch (action) {
        case LEFT:
          label += "L"+sent.getDepClass(lr[1]);   
          break;
        case RIGHT:
          label+="R"+sent.getDepClass(lr[0]);
          break;
        default:
          label = "S";         
        }
        int id = la.lookupIndex(label);       
        Instance inst = new Instance();
        inst.setTarget(id);
        int[] idx = JointParser.addFeature(fa, features, ysize);
        inst.setData(idx);
        instset.add(inst);
      }
      count++;
//      System.out.println(count);
    }
   
    instset.setAlphabetFactory(factory);
    System.out.printf("共生成实例:%d个\n", count);
    return instset;
  }
View Full Code Here

   * @throws IOException
   * @throws Exception
   */
  public void train(String dataFile, int maxite, float c) throws IOException {
   
    InstanceSet instset =  buildInstanceList(dataFile);
    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();

    SFGenerator generator = new SFGenerator();
    int fsize = features.size();
   
View Full Code Here

   
  }
 
  public static InstanceSet readTrees(String path, String suffix,
      Charset charset) throws IOException {
    InstanceSet dataSet = new InstanceSet();
    List<File> fileList = findFiles(path, -1, -1, suffix);
    for (File file : fileList) {
      System.out.println(file.toString());
//      if(file.toString().contains("0030")){
//        System.out.println(file.toString());
//      }
      TreeReaderIterator ite = new TreeReaderIterator(file, charset);
      while (ite.hasNext())
        dataSet.add(new Instance(ite.next()));
    }
    return dataSet;
  }
View Full Code Here

  }
 
  public static InstanceSet readNewTrees(String path,
      String suffix, Charset charset)throws IOException {
    List<File> fileList = findFiles(path, -1, -1, suffix);
    InstanceSet dataSet = new InstanceSet();
    for (File file : fileList) {
      System.out.println(file.toString());
//      if(file.toString().contains("0030")){
//        System.out.println(file.toString());
//      }
      TreeReaderIterator ite = new TreeReaderIterator(file, charset);
      while (ite.hasNext())  {
        System.out.print(".");
        Tree<Node> inst = ite.next();
        List<Tree<Node>> newTreeList = getNewTree(inst);
        for(int i=0;i<newTreeList.size();i++){
          dataSet.add(new Instance(newTreeList.get(i)));
        }
      }
      System.out.print("\n");
    }
    return dataSet;
View Full Code Here

  }

  public static InstanceSet readTrees(String path, int from, int to,
      String suffix, Charset charset) throws IOException {
    List<File> fileList = findFiles(path, from, to, suffix);
    InstanceSet dataSet = new InstanceSet();
    for (File file : fileList) {
      TreeReaderIterator ite = new TreeReaderIterator(file, charset);
      while (ite.hasNext())  {
        dataSet.add(new Instance(ite.next()));
      }
    }
    return dataSet;
  }
View Full Code Here

      in.unread(ch);
    }

  }
  public static void main(String[] args) throws IOException{
    InstanceSet ins = MyTreebankReader.readNewTrees("./data/ctb/data","mz",Charset.forName("UTF8"));
    System.out.print(ins.size());
    for(int i=0;i<ins.size();i++){
      Tree<Node> tr = (Tree<Node>)(ins.get(i).getData());
//      System.out.println(  tr.getLabel().getTag());
      Iterator it = tr.iterator();
      while(it.hasNext()){
        Tree<Node> te = (Tree<Node>) it.next();
  //      if(te.isLeaf())
View Full Code Here

*/
public class FCTB2CONLL {

  public static void main(String[] args) throws IOException{
    DependentTreeProducter rp = new DependentTreeProducter();
    InstanceSet ins = MyTreebankReader.readTrees("../data/ctb/data", null,Charset.forName("UTF8"));
//    InstanceSet ins = MyTreebankReader.readNewTrees("./data/ctb/data", null,Charset.forName("UTF8"));
   
    rp.write(ins, "../data/ctb/result.txt", "../data/headrules.txt");
    System.out.print("Done!");
  }
View Full Code Here

public class FCTB2CONLLTest {

  public static void main(String[] args) throws IOException{
    DependentTreeProducter rp = new DependentTreeProducter();
    rp.debug = true;
    InstanceSet ins = MyTreebankReader.readTrees("./data/ctb/test.txt", null,Charset.forName("UTF8"));
   
    rp.write(ins, "./data/ctb/result.txt", "./data/headrules.txt");
    System.out.print("Done!");
  }
View Full Code Here

TOP

Related Classes of org.fnlp.ml.types.InstanceSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.