Package edu.stanford.nlp.international.morph

Examples of edu.stanford.nlp.international.morph.MorphoFeatureSpecification


    if(args.length != 2) {
      System.err.printf("Usage: java %s filename feats%n", ArabicMorphoFeatureSpecification.class.getName());
      System.exit(-1);
    }

    MorphoFeatureSpecification fSpec = new ArabicMorphoFeatureSpecification();
    String[] feats = args[1].split(",");
    for(String feat : feats) {
      MorphoFeatureType fType = MorphoFeatureType.valueOf(feat);
      fSpec.activate(fType);
    }

    File fName = new File(args[0]);
    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fName)));

      int nLine = 0;
      for(String line;(line = br.readLine()) != null; nLine++) {
        MorphoFeatures mFeats = fSpec.strToFeatures(line.trim());
        System.out.printf("%s\t%s%n", line.trim(), mFeats.toString());
      }
      br.close();
      System.out.printf("%nRead %d lines%n",nLine);
View Full Code Here


    TreebankLangParserParams tlpp = Languages.getLanguageParams(language);
    Treebank trainTreebank = tlpp.diskTreebank();
    trainTreebank.loadPath(args[2]);
    Treebank devTreebank = tlpp.diskTreebank();
    devTreebank.loadPath(args[3]);
    MorphoFeatureSpecification morphoSpec;
    Options options = getOptions(language);
    if (language.equals(Language.Arabic)) {
      morphoSpec = new ArabicMorphoFeatureSpecification();
      String[] languageOptions = {"-arabicFactored"};
      tlpp.setOptionFlag(languageOptions, 0);
    } else if (language.equals(Language.French)) {
      morphoSpec = new FrenchMorphoFeatureSpecification();
      String[] languageOptions = {"-frenchFactored"};
      tlpp.setOptionFlag(languageOptions, 0);
    } else {
      throw new UnsupportedOperationException();
    }
    String featureList = args[1];
    String[] features = featureList.trim().split(",");
    for (String feature : features) {
      morphoSpec.activate(MorphoFeatureType.valueOf(feature));
    }
    System.out.println("Language: " + language.toString());
    System.out.println("Features: " + args[1]);

    // Create word and tag indices
View Full Code Here

      tlpp.setOptionFlag(options, 0);
    }
    Treebank tb = tlpp.diskTreebank();
    tb.loadPath(args[1]);

    MorphoFeatureSpecification morphoSpec = language.equals(Language.Arabic) ?
        new ArabicMorphoFeatureSpecification() : new FrenchMorphoFeatureSpecification();

    String[] features = args[2].trim().split(",");
    for (String feature : features) {
      morphoSpec.activate(MorphoFeatureType.valueOf(feature));
    }

    // Counters
    Counter<String> wordTagCounter = new ClassicCounter<String>(30000);
    Counter<String> morphTagCounter = new ClassicCounter<String>(500);
//    Counter<String> signatureTagCounter = new ClassicCounter<String>();
    Counter<String> morphCounter = new ClassicCounter<String>(500);
    Counter<String> wordCounter = new ClassicCounter<String>(30000);
    Counter<String> tagCounter = new ClassicCounter<String>(300);

    Counter<String> lemmaCounter = new ClassicCounter<String>(25000);
    Counter<String> lemmaTagCounter = new ClassicCounter<String>(25000);

    Counter<String> richTagCounter = new ClassicCounter<String>(1000);

    Counter<String> reducedTagCounter = new ClassicCounter<String>(500);

    Counter<String> reducedTagLemmaCounter = new ClassicCounter<String>(500);

    Map<String,Set<String>> wordLemmaMap = Generics.newHashMap();

    TwoDimensionalIntCounter<String,String> lemmaReducedTagCounter = new TwoDimensionalIntCounter<String,String>(30000);
    TwoDimensionalIntCounter<String,String> reducedTagTagCounter = new TwoDimensionalIntCounter<String,String>(500);
    TwoDimensionalIntCounter<String,String> tagReducedTagCounter = new TwoDimensionalIntCounter<String,String>(300);

    int numTrees = 0;
    for (Tree tree : tb) {
      for (Tree subTree : tree) {
        if (!subTree.isLeaf()) {
          tlpp.transformTree(subTree, tree);
        }
      }
      List<Label> pretermList = tree.preTerminalYield();
      List<Label> yield = tree.yield();
      assert yield.size() == pretermList.size();

      int yieldLen = yield.size();
      for (int i = 0; i < yieldLen; ++i) {
        String tag = pretermList.get(i).value();

        String word = yield.get(i).value();
        String morph = ((CoreLabel) yield.get(i)).originalText();

        // Note: if there is no lemma, then we use the surface form.
        Pair<String,String> lemmaTag = MorphoFeatureSpecification.splitMorphString(word, morph);
        String lemma = lemmaTag.first();
        String richTag = lemmaTag.second();

        // WSGDEBUG
        if (tag.contains("MW")) lemma += "-MWE";

        lemmaCounter.incrementCount(lemma);
        lemmaTagCounter.incrementCount(lemma + tag);

        richTagCounter.incrementCount(richTag);

        String reducedTag = morphoSpec.strToFeatures(richTag).toString();
        reducedTagCounter.incrementCount(reducedTag);

        reducedTagLemmaCounter.incrementCount(reducedTag + lemma);

        wordTagCounter.incrementCount(word + tag);
View Full Code Here

      System.exit(-1);
    }

    try {
      BufferedReader br = new BufferedReader(new FileReader(args[0]));
      MorphoFeatureSpecification mfs = new FrenchMorphoFeatureSpecification();

      //Activate all features for debugging
      mfs.activate(MorphoFeatureType.GEN);
      mfs.activate(MorphoFeatureType.NUM);
      mfs.activate(MorphoFeatureType.PER);

      for(String line; (line = br.readLine()) != null;) {
        MorphoFeatures feats = mfs.strToFeatures(line);
        System.out.printf("%s\t%s%n", line.trim(),feats.toString());
      }

      br.close();
View Full Code Here

    List<Label> yield = tree.yield();
    List<Label> preYield = tree.preTerminalYield();

    assert yield.size() == preYield.size();

    MorphoFeatureSpecification spec = new FrenchMorphoFeatureSpecification();
    for(int i = 0; i < yield.size(); i++) {
      // Morphological Analysis
      String morphStr = ((CoreLabel) yield.get(i)).originalText();
      if (morphStr == null || morphStr.equals("")) {
        morphStr = preYield.get(i).value();
        // POS subcategory
        String subCat = ((CoreLabel) yield.get(i)).category();
        if (subCat != null && subCat != "") {
          morphStr += "-" + subCat + "--";
        } else {
          morphStr += "---";
        }
      }
      MorphoFeatures feats = spec.strToFeatures(morphStr);
      if(feats.getAltTag() != null && !feats.getAltTag().equals("")) {
        CoreLabel cl = (CoreLabel) preYield.get(i);
        cl.setValue(feats.getAltTag());
        cl.setTag(feats.getAltTag());
      }
View Full Code Here

    if (applyRewriteRules) {
      // Apply Arabic-specific re-write rules
      String rawToken = tokenLabel.word();
      String tag = tokenLabel.tag();
      MorphoFeatureSpecification featureSpec = new ArabicMorphoFeatureSpecification();
      featureSpec.activate(MorphoFeatureType.NGEN);
      featureSpec.activate(MorphoFeatureType.NNUM);
      featureSpec.activate(MorphoFeatureType.DEF);
      featureSpec.activate(MorphoFeatureType.TENSE);
      MorphoFeatures features = featureSpec.strToFeatures(tag);

      // Rule #1 : ت --> ة
      if (features.getValue(MorphoFeatureType.NGEN).equals("F") &&
          features.getValue(MorphoFeatureType.NNUM).equals("SG") &&
          rawToken.endsWith("ت-") &&
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.international.morph.MorphoFeatureSpecification

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.