Package rouge.serializer

Source Code of rouge.serializer.RougeSeeFormatSerializer

package rouge.serializer;

import interfaces.IRougeSummaryModel;
import interfaces.ISerializer;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;

import common.RougeSummaryModel;

import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;

public class RougeSeeFormatSerializer implements ISerializer
{
    public static Boolean DEBUG = false;

    @Override
    public void serialize(Map<IRougeSummaryModel, Set<IRougeSummaryModel>> systemToModelsMapping, File outputDirectory)
    {
  if (outputDirectory == null)
  {
      throw new IllegalArgumentException("Output directory can not be null");
  }

  prepOutputFolders(outputDirectory);

  for (IRougeSummaryModel system : systemToModelsMapping.keySet())
  {
      writeToFile(new File(outputDirectory + "/" + "systems", system.getSourceFile().getName() + ".html"), createOutput(system));
  }

  for (Set<IRougeSummaryModel> models : systemToModelsMapping.values())
  {
      for (IRougeSummaryModel model : models)
      {
    writeToFile(new File(outputDirectory + "/" + "models", model.getSourceFile().getName() + ".html"), createOutput(model));
      }

  }

  writeToFile(new File(outputDirectory, "settings.xml"), createSettingXML(systemToModelsMapping));
    }

    @Override
    public Map<IRougeSummaryModel, Set<IRougeSummaryModel>> prepareForRouge(File inputDirectory, File goldStandardDirectory)
    {
  if (inputDirectory == null)
  {
      throw new IllegalArgumentException("Input directory can not be null");
  }

  Map<File, List<File>> fileMapping = mapSystemToModelFiles(inputDirectory, goldStandardDirectory);
  Map<IRougeSummaryModel, Set<IRougeSummaryModel>> systemToModelsMapping = createRougeSummaryModels(fileMapping);

  return systemToModelsMapping;
    }

    private void writeToFile(File outputFile, String data)
    {
  try
  {
      outputFile.createNewFile();
      FileOutputStream is = new FileOutputStream(outputFile);
      OutputStreamWriter osw = new OutputStreamWriter(is, "UTF-8");
      Writer w = new BufferedWriter(osw);
      w.write(data);
      w.close();
  }
  catch (FileNotFoundException e)
  {
      e.printStackTrace();
  }
  catch (IOException e)
  {
      e.printStackTrace();
  }
    }

    private Map<File, List<File>> mapSystemToModelFiles(File inputDirectory, File goldStandardDirectory)
    {
  File modelsFolder = goldStandardDirectory;

  if (modelsFolder == null)
  {
      System.out.println("models folder not found in the location specified: " + inputDirectory);
      return null;
  }
  FileFilter fileFilter = new FileFilter()
  {
      @Override
      public boolean accept(File arg0)
      {
    return arg0.isFile();
      }
  };

  List<File> systemFiles = Arrays.asList(inputDirectory.listFiles(fileFilter));
  List<File> modelFiles = Arrays.asList(modelsFolder.listFiles(fileFilter));

  Map<File, List<File>> systemToModelsMapping = new HashMap<File, List<File>>();

  for (File f : systemFiles)
  {
      if (DEBUG)
      {
    System.out.println("Matching models for: " + f);
      }
      String systemFileName = FilenameUtils.removeExtension(f.getName());
      if (DEBUG)
      {
    System.out.println("\tSystem ID: " + systemFileName);
      }

      List<File> matchedModels = new ArrayList<File>();

      for (File model : modelFiles)
      {
    if (FilenameUtils.removeExtension(model.getName()).equals(systemFileName))
    {
        if (DEBUG)
        {
      System.out.println("\t\tModel file: " + model + " Model ID: " + FilenameUtils.removeExtension(model.getName()));
        }
        matchedModels.add(model);
    }
      }

      systemToModelsMapping.put(f, matchedModels);
  }

  return systemToModelsMapping;
    }

    private String createSettingXML(Map<IRougeSummaryModel, Set<IRougeSummaryModel>> systemToModelsMapping)
    {
  StringBuilder sb = new StringBuilder();

  sb.append("<ROUGE_EVAL version=\"1.5.5\">\n");

  int id = 1;

  for (IRougeSummaryModel system : systemToModelsMapping.keySet())
  {
      sb.append("<EVAL ID=\"" + id + "\">\n");
      sb.append("<PEER-ROOT>systems</PEER-ROOT>\n");
      sb.append("<MODEL-ROOT>models</MODEL-ROOT>\n");
      sb.append("<INPUT-FORMAT TYPE=\"SEE\"></INPUT-FORMAT>\n");
      int pid = 1;
      sb.append("<PEERS>\n");
      sb.append("<P ID=\"" + pid++ + "\">" + system.getSourceFile().getName() + ".html" + "</P>\n");
      sb.append("</PEERS>\n");

      int mid = 1;
      sb.append("<MODELS>\n");
      for (IRougeSummaryModel model : systemToModelsMapping.get(system))
      {
    sb.append("<M ID=\"" + mid++ + "\">" + model.getSourceFile().getName() + ".html" + "</M>\n");
      }
      sb.append("</MODELS>\n");
      sb.append("</EVAL>\n");
      id++;
  }
  sb.append("</ROUGE_EVAL>");

  return sb.toString();
    }

    private String createOutput(IRougeSummaryModel summary)
    {
  StringBuilder sb = new StringBuilder();

  sb.append("<html>\n");
  sb.append("<head>\n");
  sb.append("<title>" + summary.getSourceFile().getName() + ".html" + "</title>\n");
  sb.append("</head>\n");
  sb.append("<body bgcolor=\"white\">\n");

  int id = 1;
  String title = summary.getTitle();
  sb.append("<a name=\"" + id + "\">[" + id + "]</a> <a href=\"#" + id + "\" id=" + id + ">");
  sb.append(title);
  sb.append("</a>\n");
  id++;
  for (String sentence : summary.getSentences())
  {
      String sent = sentence.toString();
      sb.append("<a name=\"" + id + "\">[" + id + "]</a> <a href=\"#" + id + "\" id=" + id + ">");
      sb.append(sent);
      sb.append("</a>\n");
      id++;
  }
  sb.append("</body>\n");
  sb.append("</html>");

  return sb.toString();
    }

    private boolean prepOutputFolders(File output)
    {
  String s = "y";
  if (output.exists() && output.isDirectory())
  {
      System.out.println("WARNING: Output folder exists, proceeding with the process will delete the contents of the folder");
      System.out.print("[y/n]");
      BufferedReader bufferRead = new BufferedReader(new InputStreamReader(System.in));
      try
      {
    s = bufferRead.readLine();
      }
      catch (IOException e1)
      {
    e1.printStackTrace();
      }

      if (s.equals("y"))
      {
    try
    {
        FileUtils.deleteDirectory(output);
    }
    catch (IOException e)
    {
        e.printStackTrace();
        return false;
    }
      }
      else
      {
    System.out.println("Process canceled.");
    return false;
      }
  }

  output.mkdir();

  File outputSystemFolder = new File(output, "systems");
  File outputModelFolder = new File(output, "models");
  outputModelFolder.mkdir();
  outputSystemFolder.mkdir();
  return true;
    }

    private Map<IRougeSummaryModel, Set<IRougeSummaryModel>> createRougeSummaryModels(Map<File, List<File>> fileMapping)
    {
  Map<IRougeSummaryModel, Set<IRougeSummaryModel>> mapping = new HashMap<IRougeSummaryModel, Set<IRougeSummaryModel>>();

  Properties props = new Properties();
  props.put("annotators", "tokenize ,ssplit");

  StanfordCoreNLP stanford = new StanfordCoreNLP(props);

  for (File modelFile : fileMapping.keySet())
  {
      RougeSummaryModel model = parseText(modelFile, stanford);

      Set<IRougeSummaryModel> listOfSystemFiles = new HashSet<IRougeSummaryModel>();
      for (File systemFile : fileMapping.get(modelFile))
      {
    RougeSummaryModel system = parseText(systemFile, stanford);
    listOfSystemFiles.add(system);
      }

      mapping.put(model, listOfSystemFiles);
  }

  return mapping;
    }

    private RougeSummaryModel parseText(File source, StanfordCoreNLP stanford)
    {
  String text = readTextFromFile(source);

  RougeSummaryModel summaryModel = new RougeSummaryModel(source);

  Annotation annotation = new Annotation(text);
  stanford.annotate(annotation);
  int id = 0;
  for (CoreMap sentence : annotation.get(SentencesAnnotation.class))
  {
      String sent = sentence.toString();
      if (id == 0)
      {
    summaryModel.setTitle(sent);
      }
      else
      {
    summaryModel.addSentence(sent);
      }
      id++;
  }

  return summaryModel;
    }

    private String readTextFromFile(File model)
    {
  byte[] encoded = null;

  try
  {
      encoded = Files.readAllBytes(Paths.get(model.getAbsolutePath()));
  }
  catch (IOException e)
  {
      e.printStackTrace();
  }

  String text = Charset.forName("UTF-8").decode(ByteBuffer.wrap(encoded)).toString().replaceAll("[\n|\r|\r\n]+", " ").trim();
  text = text.replaceAll("[\\u200C\\u200D\\u066C\\u202B]", "");
  return text;
    }
}
TOP

Related Classes of rouge.serializer.RougeSeeFormatSerializer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.