Package org.parboiled.parserunners

Source Code of org.parboiled.parserunners.RecoveringParseRunner$Handler

/*
* Copyright (C) 2009-2010 Mathias Doenitz
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.parboiled.parserunners;

import com.google.common.base.Preconditions;
import org.jetbrains.annotations.NotNull;
import org.parboiled.BaseParser;
import org.parboiled.MatchHandler;
import org.parboiled.MatcherContext;
import org.parboiled.Rule;
import org.parboiled.buffers.MutableInputBuffer;
import org.parboiled.errors.InvalidInputError;
import org.parboiled.matchers.*;
import org.parboiled.matchervisitors.*;
import org.parboiled.support.Chars;
import org.parboiled.support.MatcherPath;
import org.parboiled.support.ParsingResult;
import org.parboiled.support.ValueStack;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import static org.parboiled.support.Chars.*;

/**
* A {@link ParseRunner} implementation that is able to recover from {@link InvalidInputError}s in the input and therefore
* report more than just the first {@link InvalidInputError} if the input does not conform to the rule grammar.
* Error recovery is done by attempting to either delete an error character, insert a potentially missing character
* or do both at once (which is equivalent to a one char replace) whereby this implementation is able to determine
* itself which of these options is the best strategy.
* If the parse error cannot be overcome by either deleting, inserting or replacing one character a resynchronization
* rule is determined and the parsing process resynchronized, so that parsing can still continue.
* In this way the RecoveringParseRunner is able to completely parse all input texts (This ParseRunner never returns
* an unmatched {@link ParsingResult}.
* If the input is error free this {@link ParseRunner} implementation will only perform one parsing run, with the same
* speed as the {@link BasicParseRunner}. However, if there are {@link InvalidInputError}s in the input potentially
* many more runs are performed to properly report all errors and test the various recovery strategies.
*/
public class RecoveringParseRunner<V> extends BasicParseRunner<V> {

    private int errorIndex;
    private InvalidInputError currentError;
    private MutableInputBuffer buffer;

    /**
     * Create a new RecoveringParseRunner instance with the given rule and input text and returns the result of
     * its {@link #run(String)} method invocation.
     *
     * @param rule  the parser rule to run
     * @param input the input text to run on
     * @return the ParsingResult for the parsing run
     */
    public static <V> ParsingResult<V> run(@NotNull Rule rule, @NotNull String input) {
        return new RecoveringParseRunner<V>(rule).run(input);
    }

    /**
     * Creates a new RecoveringParseRunner instance for the given rule.
     *
     * @param rule the parser rule
     */
    public RecoveringParseRunner(@NotNull Rule rule) {
        super(rule);
    }

    /**
     * Creates a new RecoveringParseRunner instance for the given rule using the given ValueStack instance.
     *
     * @param rule       the parser rule
     * @param valueStack the value stack
     */
    public RecoveringParseRunner(@NotNull Rule rule, @NotNull ValueStack<V> valueStack) {
        super(rule, valueStack);
    }

    @Override
    protected boolean runRootContext() {
        // run a basic match
        if (super.runRootContext()) {
            return true;
        }

        if (attemptRecordingMatch()) {
            throw new IllegalStateException(); // we failed before so we must fail again
        }

        // in order to be able to apply fixes we need to wrap the input buffer with a mutability wrapper
        inputBuffer = buffer = new MutableInputBuffer(inputBuffer);

        do {
            performErrorReportingRun();
        } while (!fixError(errorIndex));

        return true;
    }

    protected boolean attemptRecordingMatch() {
        RecordingParseRunner.Handler handler = new RecordingParseRunner.Handler(getInnerHandler());
        boolean matched = runRootContext(handler, false); // run without fast string matching
        errorIndex = handler.getErrorIndex();
        return matched;
    }

    protected void performErrorReportingRun() {
        ReportingParseRunner.Handler handler = new ReportingParseRunner.Handler(errorIndex, getInnerHandler());
        if (runRootContext(handler, false)) {
            throw new IllegalStateException(); // we failed before so we should really be failing again
        }
        currentError = handler.getParseError();
    }

    protected MatchHandler getInnerHandler() {
        return errorIndex >= 0 ? new Handler(currentError) : new BasicParseRunner.Handler();
    }

    protected boolean fixError(int fixIndex) {
        if (tryFixBySingleCharDeletion(fixIndex)) return true;
        int nextErrorAfterDeletion = errorIndex;

        Character bestInsertionCharacter = findBestSingleCharInsertion(fixIndex);
        if (bestInsertionCharacter == null) return true;
        int nextErrorAfterBestInsertion = errorIndex;

        Character bestReplacementCharacter = findBestSingleCharReplacement(fixIndex);
        if (bestReplacementCharacter == null) return true;
        int nextErrorAfterBestReplacement = errorIndex;

        int nextErrorAfterBestSingleCharFix =
                Math.max(Math.max(nextErrorAfterDeletion, nextErrorAfterBestInsertion), nextErrorAfterBestReplacement);
        if (nextErrorAfterBestSingleCharFix > fixIndex) {
            // we are able to overcome the error with a single char fix, so apply the best one found
            if (nextErrorAfterBestSingleCharFix == nextErrorAfterDeletion) {
                buffer.insertChar(fixIndex, DEL_ERROR);
                errorIndex = nextErrorAfterDeletion + 1;
                currentError.shiftIndexDeltaBy(1);
            } else if (nextErrorAfterBestSingleCharFix == nextErrorAfterBestInsertion) {
                // we need to insert the characters in reverse order, since we insert twice at the same location
                buffer.insertChar(fixIndex, bestInsertionCharacter);
                buffer.insertChar(fixIndex, INS_ERROR);
                errorIndex = nextErrorAfterBestInsertion + 2;
                currentError.shiftIndexDeltaBy(2);
            } else {
                // we need to insert the characters in reverse order, since we insert three times at the same location
                buffer.insertChar(fixIndex + 1, bestReplacementCharacter);
                buffer.insertChar(fixIndex + 1, INS_ERROR);
                buffer.insertChar(fixIndex, DEL_ERROR);
                errorIndex = nextErrorAfterBestReplacement + 5;
                currentError.shiftIndexDeltaBy(1);
            }
        } else {
            // we can't fix the error with a single char fix, so fall back to resynchronization
            // however, if we are already at EOI there is not much more we can do
            if (buffer.charAt(fixIndex) == EOI) return true;
            buffer.insertChar(fixIndex, RESYNC);
            currentError.shiftIndexDeltaBy(1);
            attemptRecordingMatch(); // find the next parse error
        }
        return errorIndex == -1;
    }

    protected boolean tryFixBySingleCharDeletion(int fixIndex) {
        buffer.insertChar(fixIndex, DEL_ERROR);
        boolean nowErrorFree = attemptRecordingMatch();
        if (nowErrorFree) {
            currentError.shiftIndexDeltaBy(1); // compensate for the inserted DEL_ERROR char
        } else {
            buffer.undoCharInsertion(fixIndex);
            errorIndex = Math.max(errorIndex - 1, 0);
        }
        return nowErrorFree;
    }

    @SuppressWarnings({"ConstantConditions"})
    protected Character findBestSingleCharInsertion(int fixIndex) {
        GetStarterCharVisitor getStarterCharVisitor = new GetStarterCharVisitor();
        int bestNextErrorIndex = -1;
        Character bestChar = null;
        for (MatcherPath failedMatcherPath : currentError.getFailedMatchers()) {
            Character starterChar = failedMatcherPath.element.matcher.accept(getStarterCharVisitor);
            Preconditions.checkState(starterChar != null); // we should only have single character matchers
            if (starterChar == EOI) {
                continue; // we should never conjure up an EOI character (that would be cheating :)
            }
            buffer.insertChar(fixIndex, starterChar);
            buffer.insertChar(fixIndex, INS_ERROR);
            if (attemptRecordingMatch()) {
                currentError.shiftIndexDeltaBy(2); // compensate for the inserted chars
                return null; // success, exit immediately
            }
            buffer.undoCharInsertion(fixIndex);
            buffer.undoCharInsertion(fixIndex);
            errorIndex = Math.max(errorIndex - 2, 0);

            if (bestNextErrorIndex < errorIndex) {
                bestNextErrorIndex = errorIndex;
                bestChar = starterChar;
            }
        }
        errorIndex = bestNextErrorIndex;
        return bestChar;
    }

    protected Character findBestSingleCharReplacement(int fixIndex) {
        /*errorIndex = fixIndex;
        return 'x';*/

        buffer.insertChar(fixIndex, DEL_ERROR);
        Character bestChar = findBestSingleCharInsertion(fixIndex + 2);
        if (bestChar == null) { // success, we found a fix that renders the complete input error free
            currentError.shiftIndexDeltaBy(-1); // delta from DEL_ERROR char insertion and index shift by insertion method
        } else {
            buffer.undoCharInsertion(fixIndex);
            errorIndex = Math.max(errorIndex - 3, 0);
        }
        return bestChar;
    }

    /**
     * A {@link MatchHandler} implementation that recognizes the special {@link Chars#RESYNC} character
     * to overcome {@link InvalidInputError}s at the respective error indices.
     */
    public static class Handler implements MatchHandler {
        private final IsSingleCharMatcherVisitor isSingleCharMatcherVisitor = new IsSingleCharMatcherVisitor();
        private final InvalidInputError currentError;
        private int fringeIndex;
        private MatcherPath lastMatchPath;

        /**
         * Creates a new Handler. If a non-null InvalidInputError is given the handler will set its endIndex
         * to the correct index if the error corresponds to an error that can only be overcome by resynchronizing.
         *
         * @param currentError an optional InvalidInputError whose endIndex is to set during resyncing
         */
        public Handler(InvalidInputError currentError) {
            this.currentError = currentError;
        }

        public boolean matchRoot(MatcherContext<?> rootContext) {
            return rootContext.runMatcher();
        }

        public boolean match(MatcherContext<?> context) {
            Matcher matcher = context.getMatcher();
            if (matcher.accept(isSingleCharMatcherVisitor)) {
                if (prepareErrorLocation(context) && matcher.match(context)) {
                    if (fringeIndex < context.getCurrentIndex()) {
                        fringeIndex = context.getCurrentIndex();
                        lastMatchPath = context.getPath();
                    }
                    return true;
                }
                return false;
            }

            if (matcher.match(context)) {
                return true;
            }

            // if we didn't match we might have to resynchronize, however we only resynchronize
            // if we are at a RESYNC location and the matcher is a SequenceMatchers that has already
            // matched at least one character and that is a parent of the last match
            return context.getInputBuffer().charAt(fringeIndex) == RESYNC &&
                    qualifiesForResync(context, matcher) &&
                    resynchronize(context);
        }

        @SuppressWarnings({"SimplifiableIfStatement"})
        private boolean qualifiesForResync(MatcherContext context, Matcher matcher) {
            if (matcher instanceof SequenceMatcher && context.getCurrentIndex() > context.getStartIndex() &&
                    context.getPath().isPrefixOf(lastMatchPath)) {
                return true;
            }
            return context.getParent() == null; // always resync on the root if there is nothing else
        }

        protected boolean prepareErrorLocation(MatcherContext context) {
            switch (context.getCurrentChar()) {
                case DEL_ERROR:
                    return willMatchDelError(context);
                case INS_ERROR:
                    return willMatchInsError(context);
            }
            return true;
        }

        protected boolean willMatchDelError(MatcherContext context) {
            int preSkipIndex = context.getCurrentIndex();
            context.advanceIndex(2); // skip del marker char and illegal char
            if (!runTestMatch(context)) {
                // if we wouldn't succeed with the match do not swallow the ERROR char & Co
                context.setCurrentIndex(preSkipIndex);
                return false;
            }
            context.setStartIndex(context.getCurrentIndex());
            context.clearNodeSuppression();
            if (context.getParent() != null) context.getParent().markError();
            return true;
        }

        protected boolean willMatchInsError(MatcherContext context) {
            int preSkipIndex = context.getCurrentIndex();
            context.advanceIndex(1); // skip ins marker char
            if (!runTestMatch(context)) {
                // if we wouldn't succeed with the match do not swallow the ERROR char
                context.setCurrentIndex(preSkipIndex);
                return false;
            }
            context.setStartIndex(context.getCurrentIndex());
            context.clearNodeSuppression();
            context.markError();
            return true;
        }

        protected boolean runTestMatch(MatcherContext context) {
            TestMatcher testMatcher = new TestMatcher(context.getMatcher());
            MatcherContext testContext = testMatcher.getSubContext(context);
            return prepareErrorLocation(testContext) && testContext.runMatcher();
        }

        protected boolean resynchronize(MatcherContext context) {
            context.clearNodeSuppression();
            context.markError();

            // create a node for the failed Sequence, taking ownership of all sub nodes created so far
            context.createNode();

            // by resyncing we flip an unmatched sequence to a matched one, so in order to keep the value stack
            // consistent we go into a special "error action mode" and execute the minimal set of actions underneath
            // the resync sequence
            executeErrorActions(context);

            // skip over all characters that are not legal followers of the failed Sequence
            context.advanceIndex(1); // gobble RESYNC marker
            fringeIndex++;
            List<Matcher> followMatchers = new FollowMatchersVisitor().getFollowMatchers(context);
            int endIndex = gobbleIllegalCharacters(context, followMatchers);

            if (currentError != null && currentError.getStartIndex() == fringeIndex && endIndex - fringeIndex > 1) {
                currentError.setEndIndex(endIndex);
            }

            return true;
        }

        private void executeErrorActions(MatcherContext context) {
            // the context is for the resync action, which at this point has FAILED, i.e. ALL its sub actions haven't
            // had a chance to change the value stack, even the ones having run before the actual parse error matcher
            // so we need to rerun all sub matchers of the resync sequence up to the point of the parse error
            // and then run the minimal set of action in "error action mode"

            context.setCurrentIndex(context.getStartIndex()); // restart matching the resync sequence

            Matcher lastGoodSub = lastMatchPath == null ? null :
                    lastMatchPath.getElementAtLevel(context.getLevel() + 1).matcher;
            boolean errorMode = false;

            for (Matcher sub : context.getMatcher().getChildren()) {
                if (errorMode) {
                    for (ActionMatcher action : sub.accept(new CollectResyncActionsVisitor())) {
                        action.getSubContext(context).runMatcher();
                    }
                    continue;
                }
                // as long as we are before the error matcher we simply execute normally
                sub.getSubContext(context).runMatcher();
               
                if (sub == lastGoodSub) {
                    // run an empty matcher which all error actions will see as the immediately preceding rule
                    context.getSubContext((Matcher) BaseParser.EMPTY).runMatcher();
                    errorMode = true;
                }
            }
        }

        protected int gobbleIllegalCharacters(MatcherContext context, List<Matcher> followMatchers) {
            while_loop:
            while (true) {
                char currentChar = context.getCurrentChar();
                if (currentChar == EOI) break;
                for (Matcher followMatcher : followMatchers) {
                    if (followMatcher.accept(new IsStarterCharVisitor(currentChar))) {
                        break while_loop;
                    }
                }
                context.advanceIndex(1);
            }
            return context.getCurrentIndex();
        }
    }

    /**
     * This MatcherVisitor collects the minimal set of actions that has to run underneath a resyncronization sequence
     * in order to maintain a consistent Value Stack state.
     */
    private static class CollectResyncActionsVisitor extends DefaultMatcherVisitor<List<ActionMatcher>> {

        private final Set<Matcher> visited = new HashSet<Matcher>();
        private final List<ActionMatcher> actions = new ArrayList<ActionMatcher>();

        @Override
        public List<ActionMatcher> visit(ActionMatcher matcher) {
            actions.add(matcher);
            return actions;
        }

        @Override
        public List<ActionMatcher> visit(FirstOfMatcher matcher) {
            // go through all subs in reverse order (because the simplest fall-back cases are often in last position)
            // and try all of them until we hit a path that does not lead to a recursion
            List<Matcher> children = matcher.getChildren();
            for (int i = children.size() - 1; i >= 0; i--) {
                if (children.get(i).accept(this) != null) return actions;
            }
            throw new IllegalStateException(); // a FirstOf where all subs lead to recursions?
        }

        @Override
        public List<ActionMatcher> visit(OneOrMoreMatcher matcher) {
            return matcher.subMatcher.accept(this);
        }

        @Override
        public List<ActionMatcher> visit(SequenceMatcher matcher) {
            if (visited.contains(matcher)) {
                // we hit a recursion, so signal to the next FirstOf parent that we need to take another path in order
                // to collect all actions
                return null;
            }

            visited.add(matcher);
            for (Matcher sub : matcher.getChildren()) {
                sub.accept(this);
            }
            return actions;
        }

        @Override
        public List<ActionMatcher> defaultValue(AbstractMatcher matcher) {
            return actions;
        }
    }

}
TOP

Related Classes of org.parboiled.parserunners.RecoveringParseRunner$Handler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.