Package com.markwatson.nlp.propernames

Source Code of com.markwatson.nlp.propernames.Names

// Copyright 2003-2005.  Mark Watson (markw@markwatson.com).  All rights reserved.
// This software is released under the LGPL (www.fsf.org)
// THIS SOFTWARE COMES WITH NO WARRANTY

package com.markwatson.nlp.propernames;

import com.markwatson.data.ScoredList;

import java.util.*;
import java.io.*;


public class Names {
    /**
     *        Facade method: get all place and human names from a text string
     */
    public ScoredList[] getProperNames(String [] words) {
        ScoredList placeNames = new ScoredList();
        ScoredList humanNames = new ScoredList();
        ScoredList[] ret = new ScoredList[2];
        ret[0] = humanNames; ret[1] = placeNames;
        if (words == null) return ret;
        for (int i=0; i<words.length; i++) {
            // 5 word human names:
            if (isHumanName(words, i, 5)) {
                String s = words[i] + " " + words[i+1] + " " + words[i+2] + " " + words[i+3] + " " + words[i+4];
                humanNames.addValue(s);
                i += 4;
                continue;
            }
            // 4 word human names:
            if (isHumanName(words, i, 4)) {
                String s = words[i] + " " + words[i+1] + " " + words[i+2] + " " + words[i+3];
                humanNames.addValue(s);
                i += 3;
                continue;
            }
            // 3 word names:
            if (isPlaceName(words, i, 3)) {
                String s = words[i] + " " + words[i+1] + " " + words[i+2];
                placeNames.addValue(s);
                i += 2;
                continue;
            }
            if (isHumanName(words, i, 3)) {
                String s = words[i] + " " + words[i+1] + " " + words[i+2];
                humanNames.addValue(s);
                i += 2;
                continue;
            }
            // 2 word names:
            if (isPlaceName(words, i, 2)) {
                String s = words[i] + " " + words[i+1];
                placeNames.addValue(s);
                i += 1;
                continue;
            }
            if (isHumanName(words, i, 2)) {
                String s = words[i] + " " + words[i+1];
                humanNames.addValue(s);
                i += 1;
                continue;
            }
            // 1 word names:
            if (isPlaceName(words, i, 1)) {
                placeNames.addValue(words[i]);
                continue;
            }
        }
        return ret;
    }
    public ScoredList[] getProperNames(String s) {
        String [] words = com.knowledgebooks.nlp.util.Tokenizer.wordsToArray(s);
        return getProperNames(words);
    }

    public boolean isPlaceName(String [] words, int startIndex, int numWords) {
        if ((startIndex + numWords) > words.lengthreturn false;
        if (numWords == 1) return isPlaceName(words[startIndex]);
        String s = "";
        for (int i=startIndex; i<(startIndex + numWords); i++) {
            if (i < (startIndex + numWords - 1)) s = s + words[startIndex] + " ";
            else                                 s = s + words[startIndex];
        }
        return isPlaceName(s);
    }

    public boolean isPlaceName(String name) {
        return placeNameHash.get(name) != null;
    }

    public boolean isHumanName(String s) {
        String [] ss = com.knowledgebooks.nlp.util.Tokenizer.wordsToArray(s);
        //System.out.print("Tokens: "); for (int i=0; i<ss.length; i++) System.out.print(ss[i] + " "); System.out.println();
        if (ss == nullreturn false;
        return isHumanName(ss);
    }


    static private String s1[] = new String[1];
    static private String s2[] = new String[2];
    static private String s3[] = new String[3];
    static private String s4[] = new String[4];
    static private String s5[] = new String[5];

    public boolean isHumanName(String[] words, int index, int numWords) {
        if ((index + numWords) > words.lengthreturn false;
        if (numWords == 1) {
            s1[0] = words[index];   return isHumanName(s1);
        }
        if (numWords == 2) {
            s2[0] = words[index]; s2[1] = words[index+1]return isHumanName(s2);
        }
        if (numWords == 3) {
            s3[0] = words[index]; s3[1] = words[index+1]; s3[2] = words[index+2]; return isHumanName(s3);
        }
        if (numWords == 4) {
            s4[0] = words[index]; s4[1] = words[index+1]; s4[2] = words[index+2]; s4[3] = words[index+3]; return isHumanName(s4);
        }
        if (numWords == 5) {
            s5[0] = words[index]; s5[1] = words[index+1]; s5[2] = words[index+2]; s5[3] = words[index+3]; s5[4] = words[index+4]; return isHumanName(s5);
        }
        return false;
    }

    public boolean isHumanName(String[] words) {
        int len = words.length;
        if (len == 1) {
            if (lastNameHash.get(words[0]) != null) return true;
        } else if (len == 2) {
            if (firstNameHash.get(words[0]) != null && lastNameHash.get(words[1]) != null) return true;
            if (prefixHash.get(words[0])    != null && lastNameHash.get(words[1]) != null) return true;
        } else if (len == 3) {
            if (firstNameHash.get(words[0]) != null &&
                firstNameHash.get(words[1]) != null &&
                lastNameHash.get(words[2]) != null) return true;
            if (prefixHash.get(words[0]) != null &&
                firstNameHash.get(words[1]) != null &&
                lastNameHash.get(words[2]) != null) return true;
            if (prefixHash.get(words[0]) != null &&
                words[1].equals(".") &&
                lastNameHash.get(words[2]) != null) return true;
        } else if (len == 4) {
            if (firstNameHash.get(words[0]) != null &&
                firstNameHash.get(words[1]) != null &&
                firstNameHash.get(words[2]) != null &&
                lastNameHash.get(words[3]) != null) return true;
            if (firstNameHash.get(words[0]) != null &&
                words[1].length() == 1 &&
                words[2].equals(".") &&
                lastNameHash.get(words[3]) != null) return true;
            if (prefixHash.get(words[0]) != null &&
                firstNameHash.get(words[1]) != null &&
                firstNameHash.get(words[2]) != null &&
                lastNameHash.get(words[3]) != null) return true;
            if (prefixHash.get(words[0]) != null &&
                firstNameHash.get(words[1]) != null &&
                words[2].length()==1 &&
                lastNameHash.get(words[3]) != null) return true;
        } else if (len == 5) {
            if (firstNameHash.get(words[0]) != null &&
                firstNameHash.get(words[1]) != null &&
                words[2].length()==1 &&
                words[3].equals(".") &&
                lastNameHash.get(words[4]) != null) return true;
            if (prefixHash.get(words[0]) != null &&
                firstNameHash.get(words[1]) != null &&
                words[2].length()==1 &&
                words[3].equals(".") &&
                lastNameHash.get(words[4]) != null) return true;
        }
        return false;
    }

    public Names() {
        this("data/propername/propername.ser");
    }

    public Names(String dataPath) {
        if (lastNameHash != null) return; // static data already loaded
        try {
            InputStream ins =
                this.getClass().getClassLoader().getResourceAsStream(dataPath);
            if (ins == null) {
                ins = this.getClass().getClassLoader().getResourceAsStream(dataPath);
            }
            if (ins == null) {
                ins = new FileInputStream(dataPath);
            }
            if (ins == null) {
                System.out.println("\ncom.markwatson.nlp.propernames.Names: failed to open '" + dataPath + "'\n");
                System.exit(1);
            } else {
                ObjectInputStream p = new ObjectInputStream(ins);
                lastNameHash = (Hashtable) p.readObject();
                firstNameHash = (Hashtable) p.readObject();
                placeNameHash = (Hashtable) p.readObject();
                prefixHash = (Hashtable) p.readObject();
                ins.close();
            }
        } catch (Exception ee) {
            ee.printStackTrace();
        }
    }

    static public void main(String[] args) {
        Names names = new Names();
        // initialize everything, before printing any output - trying to see what is taking so long!
        if (args.length>0) {
            ScoredList[] ret = names.getProperNames(args[0]);
            System.out.println("Human names: " + ret[0].getValuesAsString());
            System.out.println("Place names: " + ret[1].getValuesAsString());
        } else {
            names.isPlaceName("Paris");
            names.isHumanName("President Bush");
            names.isHumanName("President George Bush");
            names.isHumanName("President George W. Bush");
            System.out.println("Initialization complete....");
            System.out.println("Paris: " + names.isPlaceName("Paris"));
            System.out.println("Mexico: " + names.isPlaceName("Mexico"));
            System.out.println("Fresno: " + names.isPlaceName("Fresno"));
            System.out.println("Moscow: " + names.isPlaceName("Moscow"));
            System.out.println("France: " + names.isPlaceName("France"));
            System.out.println("Los Angeles: " + names.isPlaceName("Los Angeles"));
            System.out.println("President Bush: " + names.isHumanName("President Bush"));
            System.out.println("President George Bush: " + names.isHumanName("President George Bush"));
            System.out.println("President George W. Bush: " + names.isHumanName("President George W. Bush"));
            System.out.println("George W. Bush: " + names.isHumanName("George W. Bush"));
            System.out.println("Senator Barbara Boxer: " + names.isHumanName("Senator Barbara Boxer"));
            System.out.println("King Smith: " + names.isHumanName("King Smith"));
            ScoredList[] ret = names.getProperNames("George Bush played golf. President George W. Bush went to London England, Paris France and Mexico to see Mary Smith in Moscow. President Bush will return home Monday.");
            System.out.println("Human names: " + ret[0].getValuesAsString());
            System.out.println("Place names: " + ret[1].getValuesAsString());
        }
    }

    static Hashtable lastNameHash = null;
    static Hashtable firstNameHash = null;
    static Hashtable placeNameHash = null; // cache for database access
    static Hashtable prefixHash = null;

}
TOP

Related Classes of com.markwatson.nlp.propernames.Names

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.