Source Code of it.halfone.parser.Parser

package it.halfone.parser;


import it.halfone.exception.ParseException;
import it.halfone.hava.search.SearchKnot;
import it.halfone.parser.event.Event;
import it.halfone.parser.token.Token;


import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;


/**
 * Parser - 03/set/2011
 * 
 * E' il risultato della compilazione di una Regex, detta genitrice.
 * <p>
 * L'operazione fondamentale svolta da un Parser e' quella di 'leggere' una stringa ed estrapolare da essa le componenti di interesse (segnalate
 * mediante la Regex genitrice).
 * <p>
 * 
 * @author <a href="mailto:larosaandrea84@gmail.com"> Andrea </a>
 */
public class Parser {
  
  /*
   * NOTE DI IMPLEMENTAZIONE
   * 
   * Per ben comprendere il seguito conviene tenere a mente la proporzione, non completamente corretta, 
   * Symbol : Regex = Token : Parser. 
   * In linea generale, un Parser � un grafo, i cui nodi sono rappresentati da Token, e i cui archi sono rappresentati da Step; 
   * nel grafo due Token rivestono un ruolo particolare, e sono identificati come 'start' ed 'end'.
   * Il grafo viene memorizzato mediante una mappa che associa a ciascun Token la lista degli archi (cioe' Step) uscenti da quel Token.
   * 
   */


  private Map<Token, List<Step>> connectionsMap;
  private Token start;
  private Token end;


  /**
   * Default constructor
   */
  public Parser(Map<Token, List<Step>> stepMap, Token start, Token end) {
    this.connectionsMap = stepMap;
    this.start = start;
    this.end = end;
  }


  /**
   * @param input
   * @throws ParseException
   */
  public List<Stream> parseOrThrow(String input) throws ParseException {
    return parseInput(input);
  }


  /**
   * @param input
   * @return
   */
  public List<Stream> parse(String input) {
    List<Stream> retVal = null;
    try {
      retVal = parseInput(input);
    } catch (ParseException e) {
      // doNothing();
    }
    return retVal;
  }


  /**
   * @param input
   * @return
   */
  public Stream parseBest(String input) {
    List<Stream> streamList = parse(input);
    return streamList != null ? streamList.get(0) : null;
  }


  /**
   * @param input
   * @return
   */
  public boolean match(String input) {
    return parse(input) != null;
  }


  /**
   * @param input
   * @return
   */
  public String bestMatch(String input){
    String retVal = input;
    try {
      parseOrThrow(input);
    } catch (ParseException e) {
      retVal = e.getBestParse();
    }
    return retVal;
  }
  
  /**
   * @param input
   * @return
   * @throws ParseException
   */
  private List<Stream> parseInput(String input) throws ParseException {
    List<Stream> retVal = new ArrayList<Stream>();


    // il primo passo e' fittizio e ci porta nel Token start senza che accada alcun evento
    SearchKnot<Step> root = new SearchKnot<Step>(new Step(start, new ArrayList<Event>()), null);


    // la boundaryList rappresenta la frontiera di ricerca
    List<SearchKnot<Step>> boundaryList = new ArrayList<SearchKnot<Step>>();
    boundaryList.add(root);
    
    List<SearchKnot<Step>> newBoundaryList = new ArrayList<SearchKnot<Step>>();
    
    String globalBestParse = null;
    String firstCycleBestParse = "";
    char toParse;
    boolean matched = false;
    /*
     * Primo ciclo dell'algoritmo, controlla semplicemente se esiste un cammino che parte dal Token start guidato dalla stringa di input.
     * Se ad esempio i diretti successori del Token start sono 'a', 'b' e 'c', e se la stringa di input indizia per 'd', allora sicuramente
     * il match fallisce gia' in questa prima fase. 
     */
    for (int i = 0; i < input.length(); i++) {
      toParse = input.charAt(i);
      matched = false;
      for (SearchKnot<Step> boundary : boundaryList) {
        for (Step step : connectionsMap.get(boundary.getValue().getDestination())) {
          if (step.getDestination().match(toParse)) {
            newBoundaryList.add(new SearchKnot<Step>(step, boundary));
            if(matched == false){
              firstCycleBestParse += toParse;
              matched = true;
            }
          }
        }
      }
      boundaryList.clear();
      boundaryList.addAll(newBoundaryList);
      newBoundaryList.clear();
    }


    if(boundaryList.isEmpty() == true){
      globalBestParse = firstCycleBestParse;
    }
    
    /*
     * Finito il primo ciclo, per ciascun nodo di ricerca presente nella boundaryList esiste un cammino plausibile nel grafo dei Token
     * guidato dalla stringa di input. Il secondo ciclo serve ad appurare quali fra questi cammini plausibili sono validi, costruendo
     * con ciascun cammino plausibile un relativo Path e tenendo conto di tutti gli eventi associati ai vari passi.
     *  
     * Per capire meglio la differenza fra plausibile e valido, considerare il seguente esempio:
     * Un Parser deve riconoscere stringhe che abbiano la forma "((( .... (0) ... )))", dove il numero di parentesi tonde aperte prima dello
     * zero deve coincidere col numero di parentesi tonde chiuse dopo lo zero. La stringa "0" deve inoltre essere riconosciuta.
     * Al Parser in questione viene data in input la stringa "(0))", che non deve ovviamente passare il test.
     * Con l'attuale implementazione, il Parser conterrebbe 5 Token: lo start, l'end e tre Token, uno per la "(", uno per lo "0" e l'ultimo
     * per la ")". I collegamenti sarebbero del tipo:
     * start -> "(" oppure "0"
     * "(" -> "(" oppure "0"
     * "0" -> ")"
     * ")" -> ")" oppure end
     * E' facile quindi verificare che nel primo ciclo si riuscirebbe a trovare un cammino plausibile da start ad end guidato dalla stringa
     * in input; il controllo di validita' fallirebbe nel secondo ciclo, dove vengono presi in considerazione anche gli eventi associati ai
     * vari passi (in questo caso, ogni percorso non terminerebbe a profondita' 0 e quindi non sarebbe valido).
     */
    String localBestParse = "";
    
    List<Path> validPathList = new ArrayList<Path>();
    Stack<Step> stepList = new Stack<Step>();
    SearchKnot<Step> nextKnot;
    Path path;
    int index = 0;
    for (SearchKnot<Step> boundary : boundaryList) {
      for (Step step : connectionsMap.get(boundary.getValue().getDestination())) {
        if (step.getDestination() == end) {
          stepList.clear();
          stepList.push(step);
          nextKnot = boundary;
          while (nextKnot != null) {
            stepList.push(nextKnot.getValue());
            nextKnot = nextKnot.getParent();
          }


          Step nextStep = stepList.pop();
          path = new Path(nextStep.getDestination());
          index = 0;
          localBestParse = "";
          while (stepList.isEmpty() == false && path.isValid()) {
            if(index > 0){
              localBestParse += input.charAt(index - 1);
            }
            nextStep = stepList.pop();
            for (Event event : nextStep.getEventList()) {
              event.managePath(path);
            }
            if (index < input.length()) {
              path.moveTo(nextStep.getDestination(), input.charAt(index) + "");
              index++;
            }
          }
          
          if(globalBestParse == null || localBestParse.contains(globalBestParse)){
            globalBestParse = localBestParse;
          }
          
          if (path.close()) {
            validPathList.add(path);
          }
        }
      }
    }


    /*
     * La terza parte dell'algoritmo ordina tutti i percorsi corretti in base alla likeness, e crea per ciascuno di essi
     * uno Stream con i valori raccolti dai Sink.
     */
    if (validPathList.size() > 0) {
      Collections.sort(validPathList, new Comparator<Path>() {
        @Override
        public int compare(Path arg0, Path arg1) {
          if(arg1.getLikeness() == arg0.getLikeness()){
            return arg1.getSecondaryLikeness() - arg0.getSecondaryLikeness();
          } else {
            return arg1.getLikeness() - arg0.getLikeness();
          }
        }
      });


      for (Path validPath : validPathList) {
        Map<String, List<String>> streamMap = new HashMap<String, List<String>>();
        for (Sink sink : validPath.getClosedSink()) {
          if (streamMap.get(sink.getName()) == null) {
            streamMap.put(sink.getName(), new ArrayList<String>());
          }
          streamMap.get(sink.getName()).add(sink.getStream());
        }
        retVal.add(new Stream(streamMap));
      }
    } else {
      String message = "the input text doesn't match the bnf provided\ninput string was:\n";
      message += input + "\nbest parsed string was:";
      message += globalBestParse + "\n";
      throw new ParseException(message, input, globalBestParse);
    }
    return retVal;
  }
}
Source Code of it.halfone.parser.Parser

Related Classes of it.halfone.parser.Parser