package org.sf.mustru.test;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Date;
import junit.framework.TestCase;
import org.apache.log4j.PropertyConfigurator;
import org.sf.mustru.utils.Constants;
import org.sf.mustru.utils.LingpipeTools;
import org.sf.mustru.utils.StringTools;
import com.aliasi.util.Files;
/**
* Run Lingpipe to extract entities from text
*/
public class TestLingpipeTools extends TestCase
{
/**
* 1. Create LingpipeTools
*/
private LingpipeTools ling = null;
protected void setUp()
{ PropertyConfigurator.configure (Constants.LOG4J_FILE);
ling = new LingpipeTools(); }
/**
* Test the extraction of named entities from text
*/
public void ttestIE()
{ long initTime = 0; long execTime = 0;
try
{
initTime = initTime - new Date().getTime();
ling.setforIE();
initTime = initTime + new Date().getTime();
String text = "";
for (int i = 1; i < 2; i++)
{
File file = new File(Constants.TESTINGDIR + File.separator + "samples" + File.separator + "entities" + i + ".txt");
text = Files.readFromFile(file); text = StringTools.filterChars(text);
//text = "Jones, F.W. , the alley cat in the Capital of Mongolia was not well behaved for the Presidency and ran awaya with rapier at about 10.5 degrees " +
// " centigrade and 5 kms and about 10 acres and around 10 gallons at the rate of 10 miles per sec. and is about 50 Kgs. for Rs. 30 and 55 Rs.";
//text = "Iranian Prime Minister Mir-Hossein Mousavi is currently in Damascus, and diplomats said he would seek Syrian help in preventing " +
// "a total Arab breach with Tehran.";
text = "Rhone-Poulenc is a French-based chemical company. " +
"Weinberger said the platform had been used " +
"as a military base by Iran and that the attack responded to an Iranian Silkworm missile strike on the U.S.-flagged " +
"Kuwaiti tanker Sea Isle City on Friday. The trials will be conducted in New York at the Beth Israel Medical Centre. ";
//text = "Are timings broken down by component ?";
execTime = execTime - new Date().getTime();
String annotText = ling.getEntities(text);
execTime = execTime + new Date().getTime();
System.out.println("Annotated text: " + annotText);
file = new File(Constants.TESTINGDIR + File.separator + "samples" + File.separator + "ne_results" + i + ".txt");
FileOutputStream fos = new FileOutputStream(file);
PrintWriter pw = new PrintWriter(fos);
pw.println("Annotated text: " + i + " " + annotText );
pw.println("------------------------------------------------------------");
pw.close();
} //*-- end of for
System.out.println("Time for Initialization " + initTime);
System.out.println("Time for execution " + execTime);
}
catch (IOException ie) { System.out.println("IO Error: " + ie.getMessage() ); }
} //*-- end of testIE
/**
* Test the POS tagger
*/
public void testPOS()
{ long initTime = 0; long execTime = 0;
try
{
initTime = initTime - new Date().getTime();
ling.setforPOS();
initTime = initTime + new Date().getTime();
String text = "";
text = "Once Eknath was writing certain accounts for his teacher. When he tallied the figures, he noticed a difference of a pai. He tallied it once again, but could find the error. It went on for many hours, it was past midnight, but Eknath kept at the accounts in the light of an oil lamp. When Janardhan Swamy saw the light, he came to see what was happening. He found Eknath engrossed in his work. Suddenly, Eknath found the mistake. \"Yes, I have found it.\" he said joyfully. \"What have you found that makes you so happy Eknath ?\", his teacher asked. Eknath was surprised and felt shy at the sight of his teacher. He narrated the whole story. His teacher was extremely pleased and blessed him.";
// text = "Are timings broken down by component ?";
// text = "Jones, F.W. , the alley cat in the Capital of Mongolia was not well behaved for the Presidency and ran awaya with rapier at about 10.5 degrees " +
// " centigrade and 5 kms and 75 about 10 acres and around 10 gallons at the rate of 10 miles per sec. and is about 50 Kgs. for Rs. 30 and 55 Rs.";
// text = "how heavy is the Earth ?";
// text = "The state of affairs with regard to children is a compelling factor, they being 40 % of our population and with every third household having a working child and every fourth child in the age group of 5 to 15 being employed.";
execTime = execTime - new Date().getTime();
String posText = ling.getPOS(text, true);
execTime = execTime + new Date().getTime();
System.out.println("Tagged text: " + posText);
File file = new File(Constants.TESTINGDIR + File.separator + "samples" + File.separator + "pos_results.txt");
FileOutputStream fos = new FileOutputStream(file);
PrintWriter pw = new PrintWriter(fos);
pw.println("POS Tagged text: " + posText );
pw.println("------------------------------------------------------------");
pw.close();
System.out.println("Time for Initialization " + initTime);
System.out.println("Time for execution " + execTime);
}
catch (IOException ie) { System.out.println("IO Error: " + ie.getMessage() ); }
} //*-- end of testPOS
/**
* Test the Tokenizer
*/
public void ttestTokenizer()
{ long initTime = 0; long execTime = 0;
initTime = initTime - new Date().getTime();
initTime = initTime + new Date().getTime();
String text = "";
text = "Iranian Prime Minister Mir-Hossein Mousavi is currently in Damascus, and diplomats said he would seek Syrian help in preventing " +
"a total Arab breach with Tehran. Rhone-Poulenc is a French-based chemical company. Weinberger said the platform had been used " +
"as a military base by Iran and that the attack responded to an Iranian Silkworm missile strike on the U.S.-flagged " +
"Kuwaiti tanker Sea Isle City on Friday. The trials will be conducted in New York at the Beth Israel Medical Centre. ";
text = "Are timings broken down by component ?";
text = "Jones, F.W. , the alley cat in the Capital of Mongolia was not well behaved for the Presidency and ran awaya with rapier at about 10.5 degrees " +
" centigrade and 5 kms and 75 about 10 acres and around 10 gallons at the rate of 10 miles per sec. and is about 50 Kgs. for Rs. 30 and 55 Rs.";
text = "how heavy is the Earth ?";
text = "The state of affairs with regard to children is a compelling factor, they being 40 % of our population and with every third household having a working child and every fourth child in the age group of 5$ being employed.";
execTime = execTime - new Date().getTime();
String[] tokens = ling.tokenizer(text);
execTime = execTime + new Date().getTime();
for (int i = 0; i < tokens.length; i++)
{ System.out.println(i + ": " + " " + tokens[i]); }
System.out.println("Time for Initialization " + initTime);
System.out.println("Time for execution " + execTime);
} //*-- end of testTokenize
} //*-- end of class