Package dk.brics.string.charset

Source Code of dk.brics.string.charset.CharSet

package dk.brics.string.charset;

import java.util.HashSet;
import java.util.List;
import java.util.PriorityQueue;
import java.util.TreeSet;

import dk.brics.automaton.Automaton;
import dk.brics.automaton.State;
import dk.brics.automaton.Transition;

/**
* Character set.
*/
public class CharSet implements Cloneable {

    private HashSet<Interval> intervals = new HashSet<Interval>();

    /**
     * Constructs new empty character set.
     */
    public CharSet() {
    }

    /**
     * Returns new character set with all characters in strings in the given language.
     */
    public CharSet(Automaton a) {
        Automaton b = a.singleChars();
        for (Transition t : b.getInitialState().getTransitions()) {
            intervals.add(new Interval(t.getMin(), t.getMax()));
        }
    }
    /**
     * Returns new character set with all characters in the specified string.
     */
    public CharSet(String s) {
        for (int i=0; i<s.length(); i++) {
            intervals.add(new Interval(s.charAt(i), s.charAt(i+1)));
        }
    }

    @Override
    public CharSet clone() {
        CharSet a;
        try {
            a = (CharSet) super.clone();
        } catch (CloneNotSupportedException e) {
            throw new RuntimeException(e);
        }
        a.intervals = new HashSet<Interval>(intervals);
        return a;
    }

    @Override
    public boolean equals(Object obj) {
        if (obj instanceof CharSet) {
            CharSet a = (CharSet) obj;
            return intervals.equals(a.intervals);
        } else {
            return false;
        }
    }

    @Override
    public int hashCode() {
        return intervals.hashCode();
    }

    @Override
    public String toString() {
        StringBuilder b = new StringBuilder();
        boolean first = true;
        for (Interval v : new TreeSet<Interval>(intervals)) {
            if (first) {
                first = false;
            } else {
                b.append(",");
            }
            if (v.getMin() == v.getMax()) {
                appendChar(b, v.getMin());
            } else {
                appendChar(b, v.getMin());
                b.append("-");
                appendChar(b, v.getMax());
            }
        }
        return b.toString();
    }

    private void appendChar(StringBuilder b, char c) {
        b.append('\'');
        if (c >= 0x21 && c <= 0x7e && c != '-') {
            b.append(c);
        } else {
            b.append("\\u");
            String t = Integer.toHexString(c & 0xffff);
            for (int j = 0; j + t.length() < 4; j++) {
                b.append('0');
            }
            b.append(t);
        }
        b.append('\'');
    }

    void reduce() {
        TreeSet<Interval> s = new TreeSet<Interval>(intervals);
        intervals.clear();
        int min = -1, max = -1;
        for (Interval v : s) {
            if (v.getMin() <= max + 1) {
                if (v.getMax() > max) {
                    max = v.getMax();
                }
            } else {
                if (min > 0) {
                    intervals.add(new Interval((char) min, (char) max));
                }
                min = v.getMin();
                max = v.getMax();
            }
        }
        if (min > 0) {
            intervals.add(new Interval((char) min, (char) max));
        }
    }

    /**
     * Returns new character set with every character.
     */
    public static CharSet makeAnychars() {
        CharSet a = new CharSet();
        a.intervals.add(new Interval(Character.MIN_VALUE, Character.MAX_VALUE));
        return a;
    }

    /**
     * Constructs union of this character set and the given one.
     */
    public CharSet union(CharSet a) {
        CharSet b = clone();
        b.intervals.addAll(new HashSet<Interval>(a.intervals));
        b.reduce();
        return b;
    }

    /**
     * Constructs union of the given character sets.
     */
    public static CharSet union(List<CharSet> c) {
        CharSet a = new CharSet();
        for (CharSet b : c) {
            a.intervals.addAll(new HashSet<Interval>(b.intervals));
        }
        a.reduce();
        return a;
    }

    /**
     * Constructs character set as this one but removes the given character.
     */
    public CharSet remove(char c) {
        CharSet b = clone();
        for (Interval v : intervals) {
            if (v.getMin() <= c && c <= v.getMax()) {
                b.intervals.remove(v);
                if (v.getMin() < c) {
                    b.intervals.add(new Interval(v.getMin(), (char) (c - 1)));
                }
                if (c < v.getMax()) {
                    b.intervals.add(new Interval((char) (c + 1), v.getMax()));
                }
            }
        }
        return b;
    }

    /**
     * Constructs character set as this one but adds the given character.
     */
    public CharSet add(char c) {
        CharSet b = clone();
        b.intervals.add(new Interval(c));
        b.reduce();
        return b;
    }
   
    /**
     * Constructs a new character set as the intersection of this set and
     * the specified set.
     */
    public CharSet intersection(CharSet other) {
        PriorityQueue<Interval> mine = new PriorityQueue<Interval>(intervals);
        PriorityQueue<Interval> yours = new PriorityQueue<Interval>(other.intervals);
        CharSet result = new CharSet();
        while (!mine.isEmpty() && !yours.isEmpty()) {
            Interval a = mine.peek();
            Interval b = yours.peek();
           
            Interval inter = a.intersection(b);
           
            if (inter != null) {
                result.intervals.add(inter);
            }
           
            if (a.getMax() < b.getMax()) {
                mine.remove();
            } else {
                yours.remove();
            }
        }
        return result;
    }
   
    boolean isTotal() {
        if (intervals.size() == 1) {
            Interval v = intervals.iterator().next();
            return v.getMin() == Character.MIN_VALUE && v.getMax() == Character.MAX_VALUE;
        } else {
            return false;
        }
    }

    /**
     * Constructs character set as this one and performs uppercase conversion of all characters.
     */
    public CharSet toLowerCase() {
        if (isTotal()) {
            return clone();
        }
        CharSet b = new CharSet();
        for (Interval v : intervals) {
            for (int c = v.getMin(); c <= v.getMax(); c++) {
              String s = String.valueOf((char)c).toLowerCase();
              for (int i=0; i<s.length(); i++) {
                b.intervals.add(new Interval(s.charAt(i)));
              }
            }
        }
        b.reduce();
        return b;
    }
   
    /**
     * Constructs character set as this one and performs lowercase conversion of all characters,
     * as done by {@link String#toUpperCase()} (which occasionally differs from character-wise application of {@link Character#toUpperCase(char)})
     */
    public CharSet toUpperCase() {
        if (isTotal()) {
            return clone();
        }
        CharSet b = new CharSet();
        for (Interval v : intervals) {
            for (int c = v.getMin(); c <= v.getMax(); c++) {
              // some characters, like the German "�", become multiple characters after being turned into upper case
              String s = String.valueOf((char)c).toUpperCase();
              for (int i=0; i<s.length(); i++) {
                b.intervals.add(new Interval(s.charAt(i)));
              }
            }
        }
        b.reduce();
        return b;
    }

    /**
     * Constructs automaton accepting strings with zero or more characters from this set.
     */
    public Automaton toAutomaton() {
        Automaton a = new Automaton();
        State s = a.getInitialState();
        s.setAccept(true);
        for (Interval v : intervals) {
            s.addTransition(new Transition(v.getMin(), v.getMax(), s));
        }
        return a;
    }

    /**
     * Checks whether a particular characters is in the set.
     */
    public boolean contains(char c) {
        for (Interval v : intervals) {
            if (v.getMin() <= c && c <= v.getMax()) {
                return true;
            }
        }
        return false;
    }
}
TOP

Related Classes of dk.brics.string.charset.CharSet

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.