Package org.apache.fop.complexscripts.scripts

Source Code of org.apache.fop.complexscripts.scripts.IndicScriptProcessor

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/* $Id$ */

package org.apache.fop.complexscripts.scripts;

import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.Vector;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.fop.complexscripts.fonts.GlyphTable;
import org.apache.fop.complexscripts.util.CharScript;
import org.apache.fop.complexscripts.util.GlyphContextTester;
import org.apache.fop.complexscripts.util.GlyphSequence;
import org.apache.fop.complexscripts.util.ScriptContextTester;

// CSOFF: AvoidNestedBlocksCheck
// CSOFF: NoWhitespaceAfterCheck
// CSOFF: InnerAssignmentCheck
// CSOFF: SimplifyBooleanReturnCheck
// CSOFF: EmptyForIteratorPadCheck
// CSOFF: WhitespaceAfterCheck
// CSOFF: ParameterNumberCheck
// CSOFF: LineLengthCheck

/**
* <p>The <code>IndicScriptProcessor</code> class implements a script processor for
* performing glyph substitution and positioning operations on content associated with the Indic script.</p>
*
* <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
*/
public class IndicScriptProcessor extends DefaultScriptProcessor {

    /** logging instance */
    private static final Log log = LogFactory.getLog(IndicScriptProcessor.class);                                      // CSOK: ConstantNameCheck

    /** required features to use for substitutions */
    private static final String[] gsubReqFeatures =                                                                    // CSOK: ConstantNameCheck
    {
        "abvf",                                                 // above base forms
        "abvs",                                                 // above base substitutions
        "akhn",                                                 // akhand
        "blwf",                                                 // below base forms
        "blws",                                                 // below base substitutions
        "ccmp",                                                 // glyph composition/decomposition
        "cjct",                                                 // conjunct forms
        "clig",                                                 // contextual ligatures
        "half",                                                 // half forms
        "haln",                                                 // halant forms
        "locl",                                                 // localized forms
        "nukt",                                                 // nukta forms
        "pref",                                                 // pre-base forms
        "pres",                                                 // pre-base substitutions
        "pstf",                                                 // post-base forms
        "psts",                                                 // post-base substitutions
        "rkrf",                                                 // rakar forms
        "rphf",                                                 // reph form
        "vatu"                                                  // vattu variants
    };

    /** optional features to use for substitutions */
    private static final String[] gsubOptFeatures =                                                                     // CSOK: ConstantNameCheck
    {
        "afrc",                                                 // alternative fractions
        "calt",                                                 // contextual alternatives
        "dlig"                                                  // discretionary ligatures
    };

    /** required features to use for positioning */
    private static final String[] gposReqFeatures =                                                                     // CSOK: ConstantNameCheck
    {
        "abvm",                                                 // above base marks
        "blwm",                                                 // below base marks
        "dist",                                                 // distance (adjustment)
        "kern"                                                  // kerning
    };

    /** required features to use for positioning */
    private static final String[] gposOptFeatures =                                                                     // CSOK: ConstantNameCheck
    {
    };

    private static class SubstitutionScriptContextTester implements ScriptContextTester {
        private static Map/*<String,GlyphContextTester>*/ testerMap = new HashMap/*<String,GlyphContextTester>*/();
        public GlyphContextTester getTester ( String feature ) {
            return (GlyphContextTester) testerMap.get ( feature );
        }
    }

    private static class PositioningScriptContextTester implements ScriptContextTester {
        private static Map/*<String,GlyphContextTester>*/ testerMap = new HashMap/*<String,GlyphContextTester>*/();
        public GlyphContextTester getTester ( String feature ) {
            return (GlyphContextTester) testerMap.get ( feature );
        }
    }

    /**
     * Make script specific flavor of Indic script processor.
     * @param script tag
     * @return script processor instance
     */
    public static ScriptProcessor makeProcessor ( String script ) {
        switch ( CharScript.scriptCodeFromTag ( script ) ) {
        case CharScript.SCRIPT_DEVANAGARI:
        case CharScript.SCRIPT_DEVANAGARI_2:
            return new DevanagariScriptProcessor ( script );
        case CharScript.SCRIPT_GUJARATI:
        case CharScript.SCRIPT_GUJARATI_2:
            return new GujaratiScriptProcessor ( script );
        case CharScript.SCRIPT_GURMUKHI:
        case CharScript.SCRIPT_GURMUKHI_2:
            return new GurmukhiScriptProcessor ( script );
        // [TBD] implement other script processors
        default:
            return new IndicScriptProcessor ( script );
        }
    }

    private final ScriptContextTester subContextTester;
    private final ScriptContextTester posContextTester;

    IndicScriptProcessor ( String script ) {
        super ( script );
        this.subContextTester = new SubstitutionScriptContextTester();
        this.posContextTester = new PositioningScriptContextTester();
    }

    /** {@inheritDoc} */
    public String[] getSubstitutionFeatures() {
        return gsubReqFeatures;
    }

    /** {@inheritDoc} */
    public String[] getOptionalSubstitutionFeatures() {
        return gsubOptFeatures;
    }

    /** {@inheritDoc} */
    public ScriptContextTester getSubstitutionContextTester() {
        return subContextTester;
    }

    /** {@inheritDoc} */
    public String[] getPositioningFeatures() {
        return gposReqFeatures;
    }

    /** {@inheritDoc} */
    public String[] getOptionalPositioningFeatures() {
        return gposOptFeatures;
    }

    /** {@inheritDoc} */
    public ScriptContextTester getPositioningContextTester() {
        return posContextTester;
    }

    /** {@inheritDoc} */
    @Override
    public GlyphSequence substitute ( GlyphSequence gs, String script, String language, GlyphTable.UseSpec[] usa, ScriptContextTester sct ) {
        assert usa != null;
        // 1. syllabize
        GlyphSequence[] sa = syllabize ( gs, script, language );
        // 2. process each syllable
        for ( int i = 0, n = sa.length; i < n; i++ ) {
            GlyphSequence s = sa [ i ];
            // apply basic shaping subs
            for ( int j = 0, m = usa.length; j < m; j++ ) {
                GlyphTable.UseSpec us = usa [ j ];
                if ( isBasicShapingUse ( us ) ) {
                    s.setPredications ( true );
                    s = us.substitute ( s, script, language, sct );
                }
            }
            // reorder pre-base matra
            s = reorderPreBaseMatra ( s );
            // reorder reph
            s = reorderReph ( s );
            // apply presentation subs
            for ( int j = 0, m = usa.length; j < m; j++ ) {
                GlyphTable.UseSpec us = usa [ j ];
                if ( isPresentationUse ( us ) ) {
                    s.setPredications ( true );
                    s = us.substitute ( s, script, language, sct );
                }
            }
            // record result
            sa [ i ] = s;
        }
        // 3. return reassembled substituted syllables
        return unsyllabize ( gs, sa );
    }

    /**
     * Get script specific syllabizer class.
     * @return a syllabizer class object or null
     */
    protected Class<? extends Syllabizer> getSyllabizerClass() {
        return null;
    }

    private GlyphSequence[] syllabize ( GlyphSequence gs, String script, String language ) {
        return Syllabizer.getSyllabizer ( script, language, getSyllabizerClass() ) . syllabize ( gs );
    }

    private GlyphSequence unsyllabize ( GlyphSequence gs, GlyphSequence[] sa ) {
        return GlyphSequence.join ( gs, sa );
    }

    private static Set<String> basicShapingFeatures;
    private static final String[] basicShapingFeatureStrings = {                                                        // CSOK: ConstantNameCheck
        "abvf",
        "akhn",
        "blwf",
        "cjct",
        "half",
        "locl",
        "nukt",
        "pref",
        "pstf",
        "rkrf",
        "rphf",
        "vatu",
    };
    static {
        basicShapingFeatures = new HashSet<String>();
        for ( String s : basicShapingFeatureStrings ) {
            basicShapingFeatures.add ( s );
        }
    }
    private boolean isBasicShapingUse ( GlyphTable.UseSpec us ) {
        assert us != null;
        if ( basicShapingFeatures != null ) {
            return basicShapingFeatures.contains ( us.getFeature() );
        } else {
            return false;
        }
    }

    private static  Set<String> presentationFeatures;
    private static final String[] presentationFeatureStrings = {                                                        // CSOK: ConstantNameCheck
        "abvs",
        "blws",
        "calt",
        "haln",
        "pres",
        "psts",
    };
    static {
        presentationFeatures = new HashSet<String>();
        for ( String s : presentationFeatureStrings ) {
            presentationFeatures.add ( s );
        }
    }
    private boolean isPresentationUse ( GlyphTable.UseSpec us ) {
        assert us != null;
        if ( presentationFeatures != null ) {
            return presentationFeatures.contains ( us.getFeature() );
        } else {
            return false;
        }
    }

    private GlyphSequence reorderPreBaseMatra ( GlyphSequence gs ) {
        int source;
        if ( ( source = findPreBaseMatra ( gs ) ) >= 0 ) {
            int target;
            if ( ( target = findPreBaseMatraTarget ( gs, source ) ) >= 0 ) {
                if ( target != source ) {
                    gs = reorder ( gs, source, target );
                }
            }
        }
        return gs;
    }

    /**
     * Find pre-base matra in sequence.
     * @param gs input sequence
     * @return index of pre-base matra or -1 if not found
     */
    protected int findPreBaseMatra ( GlyphSequence gs ) {
        return -1;
    }

    /**
     * Find pre-base matra target in sequence.
     * @param gs input sequence
     * @param source index of pre-base matra
     * @return index of pre-base matra target or -1
     */
    protected int findPreBaseMatraTarget ( GlyphSequence gs, int source ) {
        return -1;
    }

    private GlyphSequence reorderReph ( GlyphSequence gs ) {
        int source;
        if ( ( source = findReph ( gs ) ) >= 0 ) {
            int target;
            if ( ( target = findRephTarget ( gs, source ) ) >= 0 ) {
                if ( target != source ) {
                    gs = reorder ( gs, source, target );
                }
            }
        }
        return gs;
    }

    /**
     * Find reph in sequence.
     * @param gs input sequence
     * @return index of reph or -1 if not found
     */
    protected int findReph ( GlyphSequence gs ) {
        return -1;
    }

    /**
     * Find reph target in sequence.
     * @param gs input sequence
     * @param source index of reph
     * @return index of reph target or -1
     */
    protected int findRephTarget ( GlyphSequence gs, int source ) {
        return -1;
    }

    private GlyphSequence reorder ( GlyphSequence gs, int source, int target ) {
        return GlyphSequence.reorder ( gs, source, 1, target );
    }

    /** {@inheritDoc} */
    @Override
    public boolean position ( GlyphSequence gs, String script, String language, int fontSize, GlyphTable.UseSpec[] usa, int[] widths, int[][] adjustments, ScriptContextTester sct ) {
        boolean adjusted = super.position ( gs, script, language, fontSize, usa, widths, adjustments, sct );
        return adjusted;
    }

    /** Abstract syllabizer. */
    protected abstract static class Syllabizer implements Comparable {
        private String script;
        private String language;
        Syllabizer ( String script, String language ) {
            this.script = script;
            this.language = language;
        }
        /**
         * Subdivide glyph sequence GS into syllabic segments each represented by a distinct
         * output glyph sequence.
         * @param gs input glyph sequence
         * @return segmented syllabic glyph sequences
         */
        abstract GlyphSequence[] syllabize ( GlyphSequence gs );
        /** {@inheritDoc} */
        public int hashCode() {
            int hc = 0;
            hc =  7 * hc + ( hc ^ script.hashCode() );
            hc = 11 * hc + ( hc ^ language.hashCode() );
            return hc;
        }
        /** {@inheritDoc} */
        public boolean equals ( Object o ) {
            if ( o instanceof Syllabizer ) {
                Syllabizer s = (Syllabizer) o;
                if ( ! s.script.equals ( script ) ) {
                    return false;
                } else if ( ! s.language.equals ( language ) ) {
                    return false;
                } else {
                    return true;
                }
            } else {
                return false;
            }
        }
        /** {@inheritDoc} */
        public int compareTo ( Object o ) {
            int d;
            if ( o instanceof Syllabizer ) {
                Syllabizer s = (Syllabizer) o;
                if ( ( d = script.compareTo ( s.script ) ) == 0 ) {
                    d = language.compareTo ( s.language );
                }
            } else {
                d = -1;
            }
            return d;
        }
        private static Map<String,Syllabizer> syllabizers = new HashMap<String,Syllabizer>();
        static Syllabizer getSyllabizer ( String script, String language, Class<? extends Syllabizer> syllabizerClass ) {
            String sid = makeSyllabizerId ( script, language );
            Syllabizer s = syllabizers.get ( sid );
            if ( s == null ) {
                if ( ( s = makeSyllabizer ( script, language, syllabizerClass ) ) == null ) {
                    s = new DefaultSyllabizer ( script, language );
                }
                syllabizers.put ( sid, s );
            }
            return s;
        }
        static String makeSyllabizerId ( String script, String language ) {
            return script + ":" + language;
        }
        static Syllabizer makeSyllabizer ( String script, String language, Class<? extends Syllabizer> syllabizerClass ) {
            Syllabizer s;
            try {
                Constructor<? extends Syllabizer> cf = syllabizerClass.getDeclaredConstructor ( new Class[] { String.class, String.class } );
                s = (Syllabizer) cf.newInstance ( script, language );
            } catch ( NoSuchMethodException e ) {
                s = null;
            } catch ( InstantiationException e ) {
                s = null;
            } catch ( IllegalAccessException e ) {
                s = null;
            } catch ( InvocationTargetException e ) {
                s = null;
            }
            return s;
        }
    }

    /** Default syllabizer. */
    protected static class DefaultSyllabizer extends Syllabizer {
        DefaultSyllabizer ( String script, String language ) {
            super ( script, language );
        }
        /** {@inheritDoc} */
        @Override
        GlyphSequence[] syllabize ( GlyphSequence gs ) {
            int[] ca = gs.getCharacterArray ( false );
            int   nc = gs.getCharacterCount();
            if ( nc == 0 ) {
                return new GlyphSequence[] { gs };
            } else {
                return segmentize ( gs, segmentize ( ca, nc ) );
            }
        }
        /**
         * Construct array of segements from original character array (associated with original glyph sequence)
         * @param ca input character sequence
         * @param nc number of characters in sequence
         * @return array of syllable segments
         */
        protected Segment[] segmentize ( int[] ca, int nc ) {
            Vector<Segment> sv = new Vector<Segment> ( nc );
            for ( int s = 0, e = nc; s < e; ) {
                int i;
                if ( ( i = findStartOfSyllable ( ca, s, e ) ) > s ) {
                    // from s to i is non-syllable segment
                    sv.add ( new Segment ( s, i, Segment.OTHER ) );
                    s = i; // move s to start of syllable
                } else if ( i > s ) {
                    // from s to e is non-syllable segment
                    sv.add ( new Segment ( s, e, Segment.OTHER ) );
                    s = e; // move s to end of input sequence
                }
                if ( ( i = findEndOfSyllable ( ca, s, e ) ) > s ) {
                    // from s to i is syllable segment
                    sv.add ( new Segment ( s, i, Segment.SYLLABLE ) );
                    s = i; // move s to end of syllable
                } else {
                    // from s to e is non-syllable segment
                    sv.add ( new Segment ( s, e, Segment.OTHER ) );
                    s = e; // move s to end of input sequence
                }
            }
            return sv.toArray ( new Segment [ sv.size() ] );
        }
        /**
         * Construct array of glyph sequences from original glyph sequence and segment array.
         * @param gs original input glyph sequence
         * @param sa segment array
         * @return array of glyph sequences each belonging to an (ordered) segment in SA
         */
        protected GlyphSequence[] segmentize ( GlyphSequence gs, Segment[] sa ) {
            int   ng = gs.getGlyphCount();
            int[] ga = gs.getGlyphArray ( false );
            GlyphSequence.CharAssociation[] aa = gs.getAssociations ( 0, -1 );
            Vector<GlyphSequence> nsv = new Vector<GlyphSequence>();
            for ( int i = 0, ns = sa.length; i < ns; i++ ) {
                Segment s = sa [ i ];
                Vector<Integer> ngv = new Vector<Integer> ( ng );
                Vector<GlyphSequence.CharAssociation> nav = new Vector<GlyphSequence.CharAssociation> ( ng );
                for ( int j = 0; j < ng; j++ ) {
                    GlyphSequence.CharAssociation ca = aa [ j ];
                    if ( ca.contained ( s.getOffset(), s.getCount() ) ) {
                        ngv.add ( ga [ j ] );
                        nav.add ( ca );
                    }
                }
                if ( ngv.size() > 0 ) {
                    nsv.add ( new GlyphSequence ( gs, null, toIntArray ( ngv ), null, null, nav.toArray ( new GlyphSequence.CharAssociation [ nav.size() ] ), null ) );
                }
            }
            if ( nsv.size() > 0 ) {
                return nsv.toArray ( new GlyphSequence [ nsv.size() ] );
            } else {
                return new GlyphSequence[] { gs };
            }
        }
        /**
         * Find start of syllable in character array, starting at S, ending at E.
         * @param ca character array
         * @param s start index
         * @param e end index
         * @return index of start or E if no start found
         */
        protected int findStartOfSyllable ( int[] ca, int s, int e ) {
            return e;
        }
        /**
         * Find end of syllable in character array, starting at S, ending at E.
         * @param ca character array
         * @param s start index
         * @param e end index
         * @return index of start or S if no end found
         */
        protected int findEndOfSyllable ( int[] ca, int s, int e ) {
            return s;
        }
        private static int[] toIntArray ( Vector<Integer> iv ) {
            int ni = iv.size();
            int[] ia = new int [ iv.size() ];
            for ( int i = 0, n = ni; i < n; i++ ) {
                ia [ i ] = (int) iv.get ( i );
            }
            return ia;
        }
    }

    /** Syllabic segment. */
    protected static class Segment {

        static final int OTHER = 0;            // other (non-syllable) characters
        static final int SYLLABLE = 1;         // (orthographic) syllable

        private int start;
        private int end;
        private int type;

        Segment ( int start, int end, int type ) {
            this.start = start;
            this.end = end;
            this.type = type;
        }

        int getStart() {
            return start;
        }

        int getEnd() {
            return end;
        }

        int getOffset() {
            return start;
        }

        int getCount() {
            return end - start;
        }

        int getType() {
            return type;
        }
    }
}
TOP

Related Classes of org.apache.fop.complexscripts.scripts.IndicScriptProcessor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.