Package smilehouse.opensyncro.defaultcomponents.converter.ascii

Source Code of smilehouse.opensyncro.defaultcomponents.converter.ascii.ASCIItoXMLConverter$ASCIItoXMLGUI

/* OpenSyncro - A web-based enterprise application integration tool
* Copyright (C) 2008 Smilehouse Oy, support@opensyncro.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/

/*
* Created on Feb 1, 2005
*
* ASCIItoXMLConverter converts text data to XML by performing a series of
* hierarchical regular expression matches. It executes a script consisting of
* lines specifying:
*
* 1. name of the source data (XML element) to pass to the matcher
* 2. names of new XML elements to contain regular expression group matches
* 3. the actual regular expression with match groups (parts of pattern
*    in parenthesis)
*
* For more information on the component use, see OpenSyncro component
* reference manual.
*
* ---
*
* TODO: Data debug mode, which outputs regexp match offset as an attribute to
*       each XML element.
*
* TODO: BUGFIX: allow multiple root level entries to make it possible to write
*       the following type of scripts without the first dummy line...
*       temp row (.+)
*       row messagetype1 m1(.*)
*       row messagetype2 m2(.*)
*       row messagetype3 m3(.*)
*
* TODO: BUGFIX: order of elements on the same nested level is determined by
*               their order of appearance in the script instead of the input data?
*/
package smilehouse.opensyncro.defaultcomponents.converter.ascii;

import java.util.LinkedList;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.xerces.util.XMLChar;

import smilehouse.gui.html.fieldbased.FieldInfo;
import smilehouse.gui.html.fieldbased.GUIContext;
import smilehouse.gui.html.fieldbased.editor.TextAreaEditor;
import smilehouse.gui.html.fieldbased.model.DefaultModelModifier;
import smilehouse.gui.html.fieldbased.model.ModelModifier;
import smilehouse.opensyncro.pipes.component.AbortTransferException;
import smilehouse.opensyncro.pipes.component.ConverterIF;
import smilehouse.opensyncro.pipes.component.FailTransferException;
import smilehouse.opensyncro.pipes.component.PipeComponentData;
import smilehouse.opensyncro.pipes.component.PipeComponentUtils;
import smilehouse.opensyncro.pipes.gui.GUIConfigurationIF;
import smilehouse.opensyncro.pipes.gui.GUIDefinition;
import smilehouse.opensyncro.pipes.log.MessageLogger;
import smilehouse.opensyncro.pipes.metadata.ConversionInfo;
import smilehouse.opensyncro.system.Environment;
import smilehouse.util.Utils;
import smilehouse.xml.XMLEscape;

public class ASCIItoXMLConverter implements ConverterIF, GUIConfigurationIF {

    private static final String defaultRootElementName = "file";

    private static final String SCRIPT_ATTRIBUTE = "script";
    private static final String XMLHEADERLINE_ATTRIBUTE = "xmlheaderline";
    private static final String ROOTELEMENTNAME_ATTRIBUTE = "rootelementname";

    private static final String lineSeparatorPattern = "\r\n|\r|\n";
    private static final char fieldNameSeparatorChar = ',';

   
    private static boolean regexpGroupAmountWarningOutput;

    private static GUIDefinition gui = new ASCIItoXMLGUI();

    private static class ASCIItoXMLGUI extends GUIDefinition {
        public ASCIItoXMLGUI() {
            try {

                //set unique id and description labelkey
                String id = "script";
                String label = "script";

                ModelModifier modifier = new DefaultModelModifier() {

                    public Object getModelValue(Object model) throws Exception {
                        return ((ASCIItoXMLConverter) model).getScript();
                    }

                    public void setModelValue(Object model, Object value) throws Exception {
                        ((ASCIItoXMLConverter) model).setScript((String) value);
                    }

                };

                TextAreaEditor editor = new TextAreaEditor();
                editor.setCols(120);
                editor.setRows(20);

                //and finally create the configurationObject
                FieldInfo fieldInfo = new FieldInfo(id, label, modifier, editor);

                //add the configuration to the context for usage in the http-requests.
                addField(id, fieldInfo);

                //add edit field for XML root element name
                addSimpleTextFieldForComponent(
                    ROOTELEMENTNAME_ATTRIBUTE,
                    ROOTELEMENTNAME_ATTRIBUTE,
                    40);

                //add XML header line edit field
                addSimpleTextFieldForComponent(
                    XMLHEADERLINE_ATTRIBUTE,
                    XMLHEADERLINE_ATTRIBUTE,
                    100);

            }

            catch(Exception e) {
                Environment.getInstance().log("Couldn't initialize ASCIItoXMLConverter GUI", e);
            }
        }
    }

    public GUIContext getGUIContext() {
        return gui.getGUIContext();
    }

    public String getGUITemplate() {
        return "<table border=0>" + "<tr><td>$" + SCRIPT_ATTRIBUTE + "$</td></tr>" + "<tr><td>$"
                + ROOTELEMENTNAME_ATTRIBUTE + "$</td></tr>" + "<tr><td>$" + XMLHEADERLINE_ATTRIBUTE
                + "$</td></tr></table>";
    }

    public ASCIItoXMLConverter() {}

    // ---
   
    public ASCIItoXMLConverter( Object pipeComponentData ) {
        setData((PipeComponentData) pipeComponentData);
    }

   
    protected PipeComponentData data;
   
    public void setData(PipeComponentData data) {
        this.data = data;
    }

    public PipeComponentData getData() {
        return data;
    }
   
    public final int getType() {
        return TYPE_CONVERTER;
    }
   
    public String getName() {
        return "ASCIItoXMLConverter";
    }

    public String getID() {
        return "smilehouse.opensyncro.defaultcomponents.converter.ascii.ASCIItoXMLConverter";
    }

    public String getDescription(Locale locale) {
        return PipeComponentUtils.getDescription(locale, this.getClass());
    }
   
    // Dummy methods due to no iteration supported
    public int open(ConversionInfo info, MessageLogger logger) throws FailTransferException {
        return ITERATION_OPEN_STATUS_OK;
    }

    public int close(ConversionInfo info, MessageLogger logger) throws FailTransferException {
        return ITERATION_CLOSE_STATUS_OK;
    }

    public void lastBlockStatus(int statusCode) { }

   
    /**
     * The method actually called by pipe during the conversion. This default implementation uses
     * the convert-method to convert all the input records separately and is usually sufficient so
     * you only have to implement it. If you however need access to all the input when converting
     * (foer example Join-converter) you need to override this.
     */
    public String[] convertAll(String[] data, ConversionInfo info, MessageLogger logger)
            throws FailTransferException, AbortTransferException {
        String[][] allResults = new String[data.length][];
        int resultCount = 0;
        for(int i = 0; i < data.length; i++) {
            allResults[i] = convert(data[i], info, logger);
            resultCount += allResults[i].length;
        }
        if(data.length == 1)
            return allResults[0];
        else {
            String[] combinedResult = new String[resultCount];
            int c = 0;
            for(int i = 0; i < allResults.length; i++) {
                for(int j = 0; j < allResults[i].length; j++, c++) {
                    combinedResult[c] = allResults[i][j];
                }
            }
            return combinedResult;
        }
    }
   
   
   
    // ---

   
   
    public String getScript() {
        String script = this.data.getAttribute(SCRIPT_ATTRIBUTE);
        if(script != null)
            return script;

        return "";
    }

    public void setScript(String script) {
        this.data.setAttribute(SCRIPT_ATTRIBUTE, script);
    }

    public String getRootElementName() {
        String rootElementName = this.data.getAttribute(ROOTELEMENTNAME_ATTRIBUTE);
        if(rootElementName != null)
            return rootElementName;

        return defaultRootElementName;
    }

    public void setRootElementName(String rootElementName) {
        this.data.setAttribute(ROOTELEMENTNAME_ATTRIBUTE, rootElementName);
    }

    public String getXMLHeaderLine() {
        String XMLHeaderLine = this.data.getAttribute(XMLHEADERLINE_ATTRIBUTE);
        if(XMLHeaderLine != null)
            return XMLHeaderLine;

        return "";
    }

    public void setXMLHeaderLine(String XMLHeaderLine) {
        this.data.setAttribute(XMLHEADERLINE_ATTRIBUTE, XMLHeaderLine);
    }

    private class scriptLine {

        /*
         * Store original script line numbers (1-n) here for use in error messages, since the script
         * parser skips all comment lines
         */
        public int lineNumber;

        public String sourceFieldName;
        public String[] destinationFieldNames;
        public String regExp;

        public scriptLine(int lineNumber,
                          String sourceFieldName,
                          String[] destinationFieldNames,
                          String regExp) {
            this.lineNumber = lineNumber;
            this.sourceFieldName = sourceFieldName;
            this.destinationFieldNames = destinationFieldNames;
            this.regExp = regExp;
        }

    }

    private boolean testValidXMLNamesInArray(String[] stringArray, MessageLogger logger) {

        for(int i = 0; i < stringArray.length; i++) {
            if(XMLChar.isValidName(stringArray[i]) == false) {
                logger.logMessage("Invalid XML destination element name \"" + stringArray[i]
                        + "\" detected, aborting", this, MessageLogger.ERROR);
                return false;
            }
        }
        return true;
    }

    private boolean testDestinationFieldNameArray(String[] stringArray, MessageLogger logger) {
        // There should at least one destination element on the list
        if(stringArray.length == 0)
            return false;

        return testValidXMLNamesInArray(stringArray, logger);
    }

    public void processElement(int scriptIndex,
                               int currentBufferStartOffset,
                               String currentBuffer,
                               StringBuffer resultBuffer,
                               scriptLine[] scriptLineArray,
                               MessageLogger logger) throws FailTransferException {



        Pattern pattern = Pattern.compile(scriptLineArray[scriptIndex].regExp);
        Matcher matcher = pattern.matcher(currentBuffer);

        while(matcher.find()) {

            // resultBuffer.append("Debug: Match start: " + matcher.start() + ", Match end: " +
            // matcher.end() + "\n");

            resultBuffer.append("<" + scriptLineArray[scriptIndex].sourceFieldName + ">\n");
            int gCount = matcher.groupCount();

            if(gCount > 0) {

                if(gCount > scriptLineArray[scriptIndex].destinationFieldNames.length) {
                    logger.logMessage(
                        "Regular expression matched " + gCount + " groups at input data offset "
                                + currentBufferStartOffset + matcher.start() + ", but only "
                                + scriptLineArray[scriptIndex].destinationFieldNames.length
                                + " destination element name(s) were specified for "
                                + scriptLineArray[scriptIndex].sourceFieldName + " on line "
                                + scriptLineArray[scriptIndex].lineNumber + ". Aborting.",
                        this,
                        MessageLogger.ERROR);
                    PipeComponentUtils.failTransfer();
                }

                /*
                 * Warn if there are more destination elements than regular expression match groups.
                 * Output warning only once per component execution
                 */
                if(!regexpGroupAmountWarningOutput
                        && gCount < scriptLineArray[scriptIndex].destinationFieldNames.length) {

                    logger.logMessage(
                        "Regular expression matched " + gCount + " group(s) at input data offset "
                                + currentBufferStartOffset + matcher.start() + ", but "
                                + scriptLineArray[scriptIndex].destinationFieldNames.length
                                + " destination element names were specified for "
                                + scriptLineArray[scriptIndex].sourceFieldName + " on line "
                                + scriptLineArray[scriptIndex].lineNumber
                                + ". Suppressing further warnings of this type.",
                        this,
                        MessageLogger.WARNING);

                    regexpGroupAmountWarningOutput = true;
                }


                for(int j = 0; j < gCount; j++) {

                    String fieldValue;
                    int scriptIndexOfNewRegExp = -1;

                    fieldValue = matcher.group(j + 1);
                    String currDestFieldName = scriptLineArray[scriptIndex].destinationFieldNames[j];

                    for(int k = 0; k < scriptLineArray.length; k++) {

                        if((k != scriptIndex)
                                && (scriptLineArray[k].sourceFieldName.compareTo(currDestFieldName) == 0)) {
                            scriptIndexOfNewRegExp = k;

                            processElement(
                                scriptIndexOfNewRegExp,
                                currentBufferStartOffset + matcher.start(j + 1),
                                fieldValue,
                                resultBuffer,
                                scriptLineArray,
                                logger);

                        }

                    }

                    // There weren't any further regexps to process, just output the content as is
                    if(scriptIndexOfNewRegExp == -1)

                    {
                        String outputFieldValue = Utils.filterInvalidXMLCharacters(fieldValue);
                        if(outputFieldValue.length() > 0) {
                            resultBuffer.append("<" + currDestFieldName + ">"
                                + XMLEscape.escape(outputFieldValue) + "</" + currDestFieldName + ">");
                        } else {
                            resultBuffer.append("<" + currDestFieldName + "/>");
                        }
                    }
                }
            }

            resultBuffer.append("</" + scriptLineArray[scriptIndex].sourceFieldName + ">\n");

        }

    }

    public String[] convert(String data, ConversionInfo info, MessageLogger logger)
            throws FailTransferException, AbortTransferException {

        String xmlDeclarationString = getXMLHeaderLine();
        // boolean trimFieldValues = true;

        // Reset warning output status
        regexpGroupAmountWarningOutput = false;

        scriptLine[] scriptLineArray;
        String[] scriptLineStrings = getScript().split(lineSeparatorPattern);

        // Initialize result buffer with XML declaration
        StringBuffer resultBuffer = new StringBuffer(xmlDeclarationString + "\n");

        LinkedList scriptLineList = new LinkedList();


        /* Parse script */

        /*
         * Extract 3 columns separated by white-space character, allow the 3rd column ("regExp") to
         * contain also spaces
         */
        Pattern p = Pattern.compile("(\\S+)?\\s+(\\S+)?\\s+(.+)?");

        for(int i = 0; i < scriptLineStrings.length; i++) {

            String sourceFieldName = "", destinationFieldNameString = "", regExp = "";
           
            // Skip empty lines and comment lines beginning with '#' character
            if(scriptLineStrings[i].length() > 0 &&
                    !(scriptLineStrings[i].startsWith("#") || scriptLineStrings[i].startsWith("\n"))) {
                Matcher m = p.matcher(scriptLineStrings[i]);
                if(m.find()) {
                    int gCount = m.groupCount();

                    if(gCount < 3) {

                        /*
                         * Script line column amount check does not seem to work currently, this
                         * code is never reached.
                         */
                        logger.logMessage(
                            ("Syntax error, line " + (i + 1) + ": less than 3 columns, aborting."),
                            this,
                            MessageLogger.ERROR);
                        PipeComponentUtils.failTransfer();

                    } else {
                        sourceFieldName = m.group(1);
                        if(XMLChar.isValidName(sourceFieldName) == false) {
                            logger.logMessage(
                                "Invalid XML source element name \"" + sourceFieldName
                                        + "\" detected, aborting",
                                this,
                                MessageLogger.ERROR);
                            PipeComponentUtils.failTransfer();
                        }

                        destinationFieldNameString = m.group(2);

                        /*
                         * Check that the destination element names are valid names in XML and that
                         * the element list is not empty
                         */

                        // ToDo: remove duplicate destinationFieldNameString split operation
                        if(testDestinationFieldNameArray(destinationFieldNameString.split(Character
                            .toString(fieldNameSeparatorChar)), logger) == false) {
                            logger.logMessage(
                                "Invalid or missing destination element name(s) on line " + (i + 1)
                                        + ", aborting.",
                                this,
                                MessageLogger.ERROR);
                            PipeComponentUtils.failTransfer();
                        }

                        regExp = m.group(3);
                        scriptLineList.add(new scriptLine(
                            i + 1,
                            sourceFieldName,
                            destinationFieldNameString.split(Character
                                .toString(fieldNameSeparatorChar)),
                            regExp));
                    }
                } else {
                    logger.logMessage(
                        "Invalid syntax at script line " + (i + 1) + ", aborting.",
                        this,
                        MessageLogger.ERROR);
                    PipeComponentUtils.failTransfer();
                }
            }

        }

        scriptLineArray = (scriptLine[]) scriptLineList.toArray(new scriptLine[scriptLineList
            .size()]);

        String tempMessage = "Successfully parsed " + scriptLineArray.length
                + " script command lines";
        if(scriptLineStrings.length > scriptLineArray.length)
            tempMessage = tempMessage + ", skipped "
                    + (scriptLineStrings.length - scriptLineArray.length)
                    + " comment or empty lines";
        logger.logMessage(tempMessage, this, MessageLogger.DEBUG);

        if(XMLChar.isValidName(getRootElementName()) == true) {
            resultBuffer.append("<" + getRootElementName() + ">\n");
        } else {
            logger.logMessage("Invalid XML root element name \"" + getRootElementName()
                    + "\" detected, aborting", this, MessageLogger.ERROR);
            PipeComponentUtils.failTransfer();
        }

        // Start script execution from line 1
        processElement(0, 0, data, resultBuffer, scriptLineArray, logger); // First column on the
                                                                           // first script line
                                                                           // contains root element
                                                                           // name

        resultBuffer.append("</" + getRootElementName() + ">\n");

        return new String[] {resultBuffer.toString()};
    }


} // ASCIItoXMLConverter
TOP

Related Classes of smilehouse.opensyncro.defaultcomponents.converter.ascii.ASCIItoXMLConverter$ASCIItoXMLGUI

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.