Package edu.stanford.nlp.international.french

Source Code of edu.stanford.nlp.international.french.FrenchMorphoFeatureSpecification

package edu.stanford.nlp.international.french;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;

import edu.stanford.nlp.international.morph.MorphoFeatureSpecification;
import edu.stanford.nlp.international.morph.MorphoFeatures;

/**
* If MorphoFeatureType.OTHER is active, then the "CC tagset" is produced (see Tbl.2
* of (Crabbe and Candito, 2008). Additional support exists for GEN, NUM, and PER, which
* are (mostly) marked in the FTB annotation.
* <p>
* The actual CC tag is placed in the altTag field of the MorphoFeatures object.
*
* @author Spence Green
*
*/
public class FrenchMorphoFeatureSpecification extends MorphoFeatureSpecification {

  private static final long serialVersionUID = -58379347760106784L;

  public static final String[] genVals = {"M","F"};
  public static final String[] numVals = {"SG","PL"};
  public static final String[] perVals = {"1","2","3"};


  @Override
  public List<String> getValues(MorphoFeatureType feat) {
    if(feat == MorphoFeatureType.GEN)
      return Arrays.asList(genVals);
    else if(feat == MorphoFeatureType.NUM)
      return Arrays.asList(numVals);
    else if(feat == MorphoFeatureType.PER)
      return Arrays.asList(perVals);
    else
      throw new IllegalArgumentException("French does not support feature type: " + feat.toString());
  }

  @Override
  public MorphoFeatures strToFeatures(String spec) {
    MorphoFeatures feats = new MorphoFeatures();

    //Usually this is the boundary symbol
    if(spec == null || spec.equals(""))
      return feats;

    boolean isOtherActive = isActive(MorphoFeatureType.OTHER);
   
    if(spec.startsWith("ADV")) {
      feats.setAltTag("ADV");
      if(spec.contains("int")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "advint");
        }
        feats.setAltTag("ADVWH");
      }

    } else if(spec.startsWith("A")) {
      feats.setAltTag("ADJ");
      if(spec.contains("int")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "adjint");
        }
        feats.setAltTag("ADJWH");
      }
     
      addPhiFeatures(feats,spec);

    } else if(spec.equals("CC") || spec.equals("C-C")) {
      if (isOtherActive) {
        feats.addFeature(MorphoFeatureType.OTHER, "Cc");
      }
      feats.setAltTag("CC");

    } else if(spec.equals("CS") || spec.equals("C-S")) {
      if (isOtherActive) {
        feats.addFeature(MorphoFeatureType.OTHER, "Cs");
      }
      feats.setAltTag("CS");

    } else if(spec.startsWith("CL")) {
      feats.setAltTag("CL");
      if(spec.contains("suj") || spec.equals("CL-S-3fp")) {//"CL-S-3fp" is equivalent to suj
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER,"Sbj");
        }
        feats.setAltTag("CLS");

      } else if(spec.contains("obj")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Obj");
        }
        feats.setAltTag("CLO");

      } else if(spec.contains("refl")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Rfl");
        }
        feats.setAltTag("CLR");
      }

      addPhiFeatures(feats,spec);

    } else if(spec.startsWith("D")) {
      feats.setAltTag("DET");
      if(spec.contains("int")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "dint");
        }
        feats.setAltTag("DETWH");
      }

      addPhiFeatures(feats,spec);

    } else if(spec.startsWith("N")) {
      feats.setAltTag("N");//TODO These are usually N-card...make these CD?
      if(spec.contains("P")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Np");
        }
        feats.setAltTag("NPP");

      } else if(spec.contains("C")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Nc");
        }
        feats.setAltTag("NC");
      }

      addPhiFeatures(feats,spec);

    } else if(spec.startsWith("PRO")) {
      feats.setAltTag("PRO");
      if(spec.contains("int")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER,"Ni");
        }
        feats.setAltTag("PROWH");

      } else if(spec.contains("rel")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Nr");
        }
        feats.setAltTag("PROREL");
      }

      addPhiFeatures(feats,spec);

    } else if(spec.startsWith("V")) {
      feats.setAltTag("V");
      if(spec.contains("Y")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER,"Vp");
        }
        feats.setAltTag("VIMP");

      } else if(spec.contains("W")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Vf");
        }
        feats.setAltTag("VINF");
       
      } else if(spec.contains("S") || spec.contains("T")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Vs");
        }
        feats.setAltTag("VS");
       
      } else if(spec.contains("K")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Vp");
        }
        feats.setAltTag("VPP");
       
      } else if(spec.contains("G")) {
        if (isOtherActive) {
          feats.addFeature(MorphoFeatureType.OTHER, "Vr");
        }
        feats.setAltTag("VPR");
      }
     
      addPhiFeatures(feats,spec);
   
    } else if(spec.equals("P") || spec.equals("I")) {
      feats.setAltTag(spec);
     
    }
//    else {
//      System.err.println("Could not map spec: " + spec);
//    }

    return feats;
  }

  private void addPhiFeatures(MorphoFeatures feats, String spec) {
    String[] toks = spec.split("\\-+");

    String morphStr;
    if(toks.length == 3 && toks[0].equals("PRO") && toks[2].equals("neg"))
      morphStr = toks[1];
    else
      morphStr = toks[toks.length-1];

    //wsg2011: The analyses have mixed casing....
    morphStr = morphStr.toLowerCase();

    if(isActive(MorphoFeatureType.GEN)) {
      if(morphStr.contains("m"))
        feats.addFeature(MorphoFeatureType.GEN, genVals[0]);
      else if(morphStr.contains("f"))
        feats.addFeature(MorphoFeatureType.GEN, genVals[1]);
    }

    if(isActive(MorphoFeatureType.PER)) {
      if(morphStr.contains("1"))
        feats.addFeature(MorphoFeatureType.PER, perVals[0]);
      else if(morphStr.contains("2"))
        feats.addFeature(MorphoFeatureType.PER, perVals[1]);
      else if(morphStr.contains("3"))
        feats.addFeature(MorphoFeatureType.PER, perVals[2]);
    }

    if(isActive(MorphoFeatureType.NUM)) {
      if(morphStr.contains("s"))
        feats.addFeature(MorphoFeatureType.NUM, numVals[0]);
      else if(morphStr.contains("p"))
        feats.addFeature(MorphoFeatureType.NUM, numVals[1]);
    }
  }


  /**
   * For debugging
   *
   * @param args
   */
  public static void main(String[] args) {
    if(args.length != 1) {
      System.err.printf("Usage: java %s file%n", FrenchMorphoFeatureSpecification.class.getName());
      System.exit(-1);
    }

    try {
      BufferedReader br = new BufferedReader(new FileReader(args[0]));
      MorphoFeatureSpecification mfs = new FrenchMorphoFeatureSpecification();

      //Activate all features for debugging
      mfs.activate(MorphoFeatureType.GEN);
      mfs.activate(MorphoFeatureType.NUM);
      mfs.activate(MorphoFeatureType.PER);

      for(String line; (line = br.readLine()) != null;) {
        MorphoFeatures feats = mfs.strToFeatures(line);
        System.out.printf("%s\t%s%n", line.trim(),feats.toString());
      }

      br.close();

    } catch (FileNotFoundException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
}
TOP

Related Classes of edu.stanford.nlp.international.french.FrenchMorphoFeatureSpecification

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.