Examples of edu.stanford.nlp.trees.treebank.Mapper

edu.stanford.nlp.trees.treebank.Mapper
Generic interface for mapping one string to another given some contextual evidence. @author Spence Green

    System.err.println("Reading from: " + path.getPath());
    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"));
      TokenizerFactory<CoreLabel> tf = ArabicTokenizer.factory();
      tf.setOptions(tokOptions);
      Mapper lexMapper = new DefaultLexicalMapper();
      lexMapper.setup(null, "StripSegMarkersInUTF8", "StripMorphMarkersInUTF8");


      int lineId = 0;
      for(String line; (line = br.readLine()) != null; lineId++) {
        line = line.trim();


        // Tokenize with the tokenizer
        List<CoreLabel> tokenizedLine = tf.getTokenizer(new StringReader(line)).tokenize();
        System.out.println(Sentence.listToString(tokenizedLine));


        // Tokenize with the mapper
        StringBuilder sb = new StringBuilder();
        String[] toks = line.split("\\s+");
        for (String tok : toks) {
          String mappedTok = lexMapper.map(null, tok);
          sb.append(mappedTok).append(" ");
        }
        List<String> mappedToks = Arrays.asList(sb.toString().trim().split("\\s+"));


        // Evaluate the output

View Full Code Here

    }
    return sb.toString();
  }


  public static void main(String[] args) {
    Mapper mapper = new LDCPosMapper(true);
    File mapFile = new File("/u/nlp/data/Arabic/ldc/atb-latest/p1/docs/atb1-v4.0-taglist-conversion-to-PennPOS-forrelease.lisp");
    mapper.setup(mapFile);


    String test1 = "DET+NOUN+NSUFF_FEM_SG+CASE_DEF_ACC";
    String test2 = "ADJXXXXX";
    String test3 = "REL_ADV";
    String test4 = "NUMERIC_COMMA";


    System.out.printf("%s --> %s\n",test1,mapper.map(test1, null));
    System.out.printf("%s --> %s\n",test2,mapper.map(test2, null));
    System.out.printf("%s --> %s\n",test3,mapper.map(test3, null));
    System.out.printf("%s --> %s\n",test4,mapper.map(test4, null));
  }

View Full Code Here

    Matcher numMatcher = hasDigit.matcher(element);
    return !(numMatcher.find() || parentTagsToEscape.contains(parent));
  }


  public static void main(String[] args) {
    Mapper m = new DefaultLexicalMapper();


    System.out.printf("< :-> %s\n",m.map(null, "FNKqq"));
  }

View Full Code Here

TOP

Related Classes of edu.stanford.nlp.trees.treebank.Mapper

edu.stanford.nlp.international.arabic.pipeline.DefaultLexicalMapper

edu.stanford.nlp.international.arabic.pipeline.LDCPosMapper

edu.stanford.nlp.international.arabic.process.ArabicTokenizerTester

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.