Package edu.stanford.nlp.international.morph

Examples of edu.stanford.nlp.international.morph.MorphoFeatures


  /**
   * Hand-written rules to convert SAMA analyses to feature structures.
   */
  @Override
  public MorphoFeatures strToFeatures(String spec) {
    MorphoFeatures features = new ArabicMorphoFeatures();

    // Check for the boundary symbol
    if(spec == null || spec.equals("")) {
      return features;
    }
    //Possessiveness
    if(isActive(MorphoFeatureType.POSS) && spec.contains("POSS")) {
      features.addFeature(MorphoFeatureType.POSS,possVals[0]);
    }

    //Nominals and pronominals. Mona ignores Pronominals in ERTS, but they seem to help...
    // NSUFF -- declinable nominals
    // VSUFF -- enclitic pronominals
    // PRON -- ordinary pronominals
    if(spec.contains("NSUFF") || spec.contains("NOUN") || spec.contains("ADJ")) {
      // Nominal phi feature indicators are different than the indicators
      // that we process with processInflectionalFeatures()
      if(isActive(MorphoFeatureType.NGEN)) {
        if(spec.contains("FEM")) {
          features.addFeature(MorphoFeatureType.NGEN, genVals[1]);
        } else if(spec.contains("MASC") || !pNounNoMorph.matcher(spec).find()) {
          features.addFeature(MorphoFeatureType.NGEN, genVals[0]);
        }
      }

      // WSGDEBUG -- Number for nominals only
      if(isActive(MorphoFeatureType.NNUM)) {
        if(spec.contains("DU")) {
          features.addFeature(MorphoFeatureType.NNUM, numVals[1]);
        } else if(spec.contains("PL")) {
          features.addFeature(MorphoFeatureType.NNUM, numVals[2]);
        } else if (!pNounNoMorph.matcher(spec).find()){ // (spec.contains("SG"))
          features.addFeature(MorphoFeatureType.NNUM, numVals[0]);
        }
      }

      //Definiteness
      if(isActive(MorphoFeatureType.DEF)) {
        if (spec.contains("DET")) {
          features.addFeature(MorphoFeatureType.DEF, defVals[1]);
        } else if (!pNounNoMorph.matcher(spec).find()){
          features.addFeature(MorphoFeatureType.DEF, defVals[0]);
        }
      }

      // Proper nouns (probably a stupid feature)
      if (isActive(MorphoFeatureType.PROP)) {
        if (spec.contains("PROP")) {
          features.addFeature(MorphoFeatureType.PROP,"");
        }
      }

    } else if(spec.contains("PRON") || (spec.contains("VSUFF_DO") && !pVerbMood.matcher(spec).find())) {
      if(spec.contains("DEM_PRON")) {
        features.addFeature(MorphoFeatureType.DEF, defVals[0]);
        Matcher m = pDemPronounFeatures.matcher(spec);
        if (m.find()) {
          spec = m.group(1);
          processInflectionalFeaturesHelper(features, spec);
        }

      } else {
        processInflectionalFeatures(features, spec);
      }

    // Verbs (marked for tense)
    } else if(pVerbTenseMarker.matcher(spec).find()) {

      // Tense feature
      if(isActive(MorphoFeatureType.TENSE)) {
        if(spec.contains("PV"))
          features.addFeature(MorphoFeatureType.TENSE, tenseVals[0]);
        else if(spec.contains("IV"))
          features.addFeature(MorphoFeatureType.TENSE, tenseVals[1]);
        else if(spec.contains("CV"))
          features.addFeature(MorphoFeatureType.TENSE, tenseVals[2]);
      }

      // Inflectional features
      processInflectionalFeatures(features, spec);

      if(isActive(MorphoFeatureType.MOOD)) {
        Matcher moodMatcher = pMood.matcher(spec);
        if(moodMatcher.find()) {
          String moodStr = moodMatcher.group(1);
          switch (moodStr) {
            case "I":
              features.addFeature(MorphoFeatureType.MOOD, moodVals[0]);
              break;
            case "S":
              features.addFeature(MorphoFeatureType.MOOD, moodVals[1]);
              break;
            case "J":
              features.addFeature(MorphoFeatureType.MOOD, moodVals[2]);
              break;
          }
        }
      }

      if(isActive(MorphoFeatureType.VOICE)) {
        if(spec.contains("PASS")) {
          features.addFeature(MorphoFeatureType.VOICE, voiceVals[1]);
        } else {
          features.addFeature(MorphoFeatureType.VOICE, voiceVals[0]);
        }
      }
    }
    return features;
  }
View Full Code Here


    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fName)));

      int nLine = 0;
      for(String line;(line = br.readLine()) != null; nLine++) {
        MorphoFeatures mFeats = fSpec.strToFeatures(line.trim());
        System.out.printf("%s\t%s%n", line.trim(), mFeats.toString());
      }
      br.close();
      System.out.printf("%nRead %d lines%n",nLine);

    } catch (FileNotFoundException e) {
View Full Code Here

    private static final long serialVersionUID = -4611776415583633186L;

    @Override
    public MorphoFeatures fromTagString(String str) {
      String[] feats = str.split("\\-");
      MorphoFeatures mFeats = new ArabicMorphoFeatures();
      // First element is the base POS
//      String baseTag = feats[0];
      for(int i = 1; i < feats.length; i++) {
        String[] keyValue = feats[i].split(KEY_VAL_DELIM);
        if(keyValue.length != 2) continue;
        MorphoFeatureType fName = MorphoFeatureType.valueOf(keyValue[0].trim());
        mFeats.addFeature(fName, keyValue[1].trim());
      }
      return mFeats;
    }
View Full Code Here

    if(t.isPreTerminal() && tagSpec != null) {
      if( !(t.firstChild().label() instanceof CoreLabel) || ((CoreLabel) t.firstChild().label()).originalText() == null )
        throw new RuntimeException(String.format("%s: Term lacks morpho analysis: %s",this.getClass().getName(),t.toString()));

      String morphoStr = ((CoreLabel) t.firstChild().label()).originalText();
      MorphoFeatures feats = tagSpec.strToFeatures(morphoStr);
      baseCat = feats.getTag(baseCat);
    }

    //Update the label(s)
    String newCat = baseCat + newCategory.toString();
    t.setValue(newCat);
View Full Code Here

      throw new IllegalArgumentException("French does not support feature type: " + feat.toString());
  }

  @Override
  public MorphoFeatures strToFeatures(String spec) {
    MorphoFeatures feats = new MorphoFeatures();

    //Usually this is the boundary symbol
    if(spec == null || spec.equals(""))
      return feats;

    boolean isOtherActive = isActive(MorphoFeatureType.OTHER);
   
    if(spec.startsWith("ADV")) {
      feats.setAltTag("ADV");
      if(spec.contains("int")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "advint");
        }
        feats.setAltTag("ADVWH");
      }

    } else if(spec.startsWith("A")) {
      feats.setAltTag("ADJ");
      if(spec.contains("int")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "adjint");
        }
        feats.setAltTag("ADJWH");
      }
     
      addPhiFeatures(feats,spec);

    } else if(spec.equals("CC") || spec.equals("C-C")) {
      if (isOtherActive) {
        feats.addFeature(MorphoFeatureType.OTHER, "Cc");
      }
      feats.setAltTag("CC");

    } else if(spec.equals("CS") || spec.equals("C-S")) {
      if (isOtherActive) {
        feats.addFeature(MorphoFeatureType.OTHER, "Cs");
      }
      feats.setAltTag("CS");

    } else if(spec.startsWith("CL")) {
      feats.setAltTag("CL");
      if(spec.contains("suj") || spec.equals("CL-S-3fp")) {//"CL-S-3fp" is equivalent to suj
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER,"Sbj");
        }
        feats.setAltTag("CLS");

      } else if(spec.contains("obj")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Obj");
        }
        feats.setAltTag("CLO");

      } else if(spec.contains("refl")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Rfl");
        }
        feats.setAltTag("CLR");
      }

      addPhiFeatures(feats,spec);

    } else if(spec.startsWith("D")) {
      feats.setAltTag("DET");
      if(spec.contains("int")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "dint");
        }
        feats.setAltTag("DETWH");
      }

      addPhiFeatures(feats,spec);

    } else if(spec.startsWith("N")) {
      feats.setAltTag("N");//TODO These are usually N-card...make these CD?
      if(spec.contains("P")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Np");
        }
        feats.setAltTag("NPP");

      } else if(spec.contains("C")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Nc");
        }
        feats.setAltTag("NC");
      }

      addPhiFeatures(feats,spec);

    } else if(spec.startsWith("PRO")) {
      feats.setAltTag("PRO");
      if(spec.contains("int")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER,"Ni");
        }
        feats.setAltTag("PROWH");

      } else if(spec.contains("rel")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Nr");
        }
        feats.setAltTag("PROREL");
      }

      addPhiFeatures(feats,spec);

    } else if(spec.startsWith("V")) {
      feats.setAltTag("V");
      if(spec.contains("Y")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER,"Vp");
        }
        feats.setAltTag("VIMP");

      } else if(spec.contains("W")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Vf");
        }
        feats.setAltTag("VINF");
       
      } else if(spec.contains("S") || spec.contains("T")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Vs");
        }
        feats.setAltTag("VS");
       
      } else if(spec.contains("K")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Vp");
        }
        feats.setAltTag("VPP");
       
      } else if(spec.contains("G")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Vr");
        }
        feats.setAltTag("VPR");
      }
     
      addPhiFeatures(feats,spec);
   
    } else if(spec.equals("P") || spec.equals("I")) {
      feats.setAltTag(spec);
     
    }
//    else {
//      System.err.println("Could not map spec: " + spec);
//    }
View Full Code Here

        morphStr += "-" + subCat + "--";
      } else {
        morphStr += "---";
      }
    }
    MorphoFeatures feats = morpho.strToFeatures(morphStr);
    if(feats.getAltTag() != null && !feats.getAltTag().equals("")) {
      label.setValue(feats.getAltTag());
      label.setTag(feats.getAltTag());
    }
  }
View Full Code Here

      mfs.activate(MorphoFeatureType.GEN);
      mfs.activate(MorphoFeatureType.NUM);
      mfs.activate(MorphoFeatureType.PER);

      for(String line; (line = br.readLine()) != null;) {
        MorphoFeatures feats = mfs.strToFeatures(line);
        System.out.printf("%s\t%s%n", line.trim(),feats.toString());
      }

      br.close();

    } catch (FileNotFoundException e) {
View Full Code Here

          morphStr += "-" + subCat + "--";
        } else {
          morphStr += "---";
        }
      }
      MorphoFeatures feats = spec.strToFeatures(morphStr);
      if(feats.getAltTag() != null && !feats.getAltTag().equals("")) {
        CoreLabel cl = (CoreLabel) preYield.get(i);
        cl.setValue(feats.getAltTag());
        cl.setTag(feats.getAltTag());
      }
    }
  }
View Full Code Here

      MorphoFeatureSpecification featureSpec = new ArabicMorphoFeatureSpecification();
      featureSpec.activate(MorphoFeatureType.NGEN);
      featureSpec.activate(MorphoFeatureType.NNUM);
      featureSpec.activate(MorphoFeatureType.DEF);
      featureSpec.activate(MorphoFeatureType.TENSE);
      MorphoFeatures features = featureSpec.strToFeatures(tag);

      // Rule #1 : ت --> ة
      if (features.getValue(MorphoFeatureType.NGEN).equals("F") &&
          features.getValue(MorphoFeatureType.NNUM).equals("SG") &&
          rawToken.endsWith("ت-") &&
          !stripRewrites) {
        lastLabel = RewriteSymbol;
      } else if (rawToken.endsWith("ة-")) {
        assert token.endsWith("ة");
        token = token.substring(0, token.length() - 1) + "ت";
        lastLabel = RewriteSymbol;
      }

      // Rule #2 : لل --> ل ال
      if (lastToken.equals("ل") &&
          features.getValue(MorphoFeatureType.DEF).equals("D")) {
        if (rawToken.startsWith("-ال")) {
          if (!token.startsWith("ا"))
            System.err.println("Bad REWAL: " + rawToken + " / " + token);
          token = token.substring(1);
          rewritten = rewritten.substring(1);
          if (!stripRewrites)
            firstLabel = RewriteSymbol;
        } else if (rawToken.startsWith("-ل")) {
          if (!token.startsWith("ل"))
            System.err.println("Bad REWAL: " + rawToken + " / " + token);
          if (!stripRewrites)
            firstLabel = RewriteSymbol;
        } else {
          System.err.println("Ignoring REWAL: " + rawToken + " / " + token);
        }
      }
     
      // Rule #3 : ي --> ى
      // Rule #4 : ا --> ى
      if (rawToken.endsWith("ى-")) {
        if (features.getValue(MorphoFeatureType.TENSE) != null) {
          // verb: ى becomes ا
          token = token.substring(0, token.length() - 1) + "ا";
        } else {
          // assume preposition:
          token = token.substring(0, token.length() - 1) + "ي";
View Full Code Here

    if( ! universalMap.containsKey(shortTag)) {
      System.err.printf("%s: No universal tag for LDC tag %s%n", this.getClass().getName(),shortTag);
      universalTag = shortTag;
    }
  
    MorphoFeatures feats = new MorphoFeatures(morphoSpec.strToFeatures(rawTag));
   
    String functionalTag = feats.getTag(universalTag);
   
    return functionalTag;
  }
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.international.morph.MorphoFeatures

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.