Package main

Source Code of main.Logic

/***************************************************************************
  Clinical Named Entity Recognizer and Normalizer(Clinical NERC), (v0.1).
    Copyright (C) 2013  Azad Dehghan
   
    Contact:  a.dehghan@manchester.ac.uk
*****************************************************************************/
package main;
import gate.Annotation;
import gate.Factory;
import gate.FeatureMap;
import gate.creole.ResourceInstantiationException;

import io.FileOps;
import io.RecurseFolder;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import classification.MappingPipeline;

import serialize.Serializ;
import stem.Stemmer;


public class Logic {

  private static Stemmer stem = new Stemmer();
 
  /**
   * construct feature file for CRF++
   * @param gateDoc
   * @param featureFile
   */
  public static void constructFeatureFile(gate.Document gateDoc, ArrayList<String> featureFile)
  { 
    List<Annotation> sentence = new ArrayList<Annotation>( gateDoc.getAnnotations().get("Sentence") );
    Collections.sort(sentence, gate.Utils.OFFSET_COMPARATOR);
   
    String aSentence = null;
   
    for(Annotation s: sentence)
    {
      aSentence = "";
      List<Annotation> token = new ArrayList<Annotation>( gateDoc.getAnnotations().get("Token", s.getStartNode().getOffset(), s.getEndNode().getOffset()) );
      Collections.sort(token, gate.Utils.OFFSET_COMPARATOR);
     
      for(int i = 0; i < token.size(); i++)
      {
        FeatureMap tAttributes = token.get(i).getFeatures();
        String str = gate.Utils.stringFor(gateDoc, token.get(i));   
        String orth = ""+tAttributes.get("orth"); if(orth.trim().equals("null")) {orth = "O";}
       
        //construct feature vectors
        aSentence += gateDoc.getName()+":"+token.get(i).getId() + "\t" + str +"\t"+ tAttributes.get("category") +
              "\t"+ tAttributes.get("chunk") + "\t" + tAttributes.get("kind") + "\t" + orth +
              "\t"+ getStem(str) + "\n";
      }
      featureFile.add(aSentence);
    }
    featureFile.add("\n");//insert empty line after each document
  }
  /**
   * Porter's stemmer algorithm
   * @param term
   * @return stem of term
   */
  public static String getStem(String term
  {
    term = term.toLowerCase();
   
    char[] c = term.toCharArray();
    for(int i=0;i<c.length;i++)
    {
      stem.add(c[i]);
    }
    stem.stem();
   
  return stem.toString();
  }
 
 
  /**
   * process sentence/string from IO/Socket
   * @param s text input
   * @throws InterruptedException
   * @throws IOException
   * @throws ResourceInstantiationException
   */
  public static String opt1(String s, MLPipeline p, MappingPipeline mp, Boolean nercFlag)
  { 
    gate.Document gateDoc = null;
   
    try {
      gateDoc = Factory.newDocument(s);
      gateDoc.setName("x");
      gateDoc = p.ml(gateDoc, mp, nercFlag);
    } catch (Exception e) {
      System.err.println("Logic.opt1(..): " +e.getMessage());
    }

  return Event.getEvents(gateDoc);
 
 
  /**
   * process a corpus (text document(s)) and generate GATE xml outputs
   * @param source_dir corpus/document(s) to process
   * @param op_dir output directory
   * @throws InterruptedException
   * @throws IOException
   * @throws ResourceInstantiationException
   */
  public static void opt2(String source_dir, String op_dir, Boolean nercFlag)
  {
    MLPipeline ml_p = new MLPipeline();
   
    try {
      ml_p.ml(source_dir, op_dir, nercFlag);
    } catch (Exception e) {
      System.err.println("Logic.opt2(..): " + e.getMessage());
    }
    System.out.println(".opt2.processing/output complete");
  }
 
  public static void opt3(String source_dir, String op_dir, Boolean nercFlag)
  {
    gate.Document gateDoc = null;
    MLPipeline ml_p = new MLPipeline();
    MappingPipeline mp = null;
   
    try {
      if(nercFlag)
        mp = new MappingPipeline();
    } catch (MalformedURLException e1) {
    }
   
    ArrayList<File> fileList = RecurseFolder.getFileList(source_dir);
    for(File f: fileList)
    {
      try {
        gateDoc = Factory.newDocument(FileOps.getFileContent(f.toURI().toURL()));
        gateDoc.setName(f.getName());
        gateDoc = ml_p.ml(gateDoc, mp, nercFlag);
               
      } catch (Exception e) {
        System.err.println("Logic.opt3(..): " + e.getMessage());
     
    Serializ.serialize(gateDoc, op_dir);
    }
    Factory.deleteResource(gateDoc);
    System.out.println(".opt3.processing/output complete");
 
 
  /**
   * process corpus and generate a offset file (MedNERC_offset.txt) with TEid results
   * @param source_dir
   * @param op_dir
   * @throws InterruptedException
   * @throws IOException
   * @throws ResourceInstantiationException
   */
  public static void opt4(String source_dir, String op_dir, Boolean nercFlag)
  {
    MappingPipeline mp = null;
   
    try {
      mp = new MappingPipeline();
    } catch (MalformedURLException e1) {
      System.err.println("Logic.op4(..): " + e1.getMessage());
    }
   
    MLPipeline ml_p = new MLPipeline();
    String all_EVENTs = "";
    gate.Document gateDoc = null;
   
    ArrayList<File> fileList = RecurseFolder.getFileList(source_dir);
    for(File f: fileList)
    {
      System.out.print("\r.processing: " + f.getName() + "\r");
     
      all_EVENTs += "filename:"+f.getName() + "\n";
      try {
        gateDoc = Factory.newDocument(FileOps.getFileContent(f.toURI().toURL()));
        gateDoc.setName(f.getName());
        gateDoc = ml_p.ml(gateDoc, mp, nercFlag);
      } catch (Exception e) {
        System.err.println("Logic.opt4(..): " +e.getMessage());
      }
      all_EVENTs += Event.getEvents(gateDoc) + "\n";
    }
    FileOps.saveFile(op_dir +"/" + "NER_offsets.txt", all_EVENTs);
    Factory.deleteResource(gateDoc);
    System.out.println(".opt4.processing/output complete");
 
 
}
TOP

Related Classes of main.Logic

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.