Package org.eobjects.datacleaner.regexparser

Source Code of org.eobjects.datacleaner.regexparser.RegexParserTransformer

/**
* eobjects.org DataCleaner
* Copyright (C) 2010 eobjects.org
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA  02110-1301  USA
*/
package org.eobjects.datacleaner.regexparser;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.eobjects.analyzer.beans.api.Categorized;
import org.eobjects.analyzer.beans.api.Configured;
import org.eobjects.analyzer.beans.api.Description;
import org.eobjects.analyzer.beans.api.OutputColumns;
import org.eobjects.analyzer.beans.api.Transformer;
import org.eobjects.analyzer.beans.api.TransformerBean;
import org.eobjects.analyzer.beans.categories.MatchingAndStandardizationCategory;
import org.eobjects.analyzer.beans.categories.ScriptingCategory;
import org.eobjects.analyzer.beans.categories.StringManipulationCategory;
import org.eobjects.analyzer.data.InputColumn;
import org.eobjects.analyzer.data.InputRow;

@TransformerBean("Regex parser")
@Description("Parses strings using a regular expression and transforms it into substrings based on regex groups")
@Categorized({ StringManipulationCategory.class, ScriptingCategory.class, MatchingAndStandardizationCategory.class })
public class RegexParserTransformer implements Transformer<String> {

  @Configured
  InputColumn<String> column;

  @Configured
  @Description("A regular expression containing\ngroup tokens, marked by parantheses.\n\nFor example:\n([a-z]+)_(\\d*)")
  Pattern pattern;

  @Override
  public OutputColumns getOutputColumns() {
    String[] columns = new String[pattern.matcher("").groupCount()];
    for (int i = 0; i < columns.length; i++) {
      columns[i] = column.getName() + " (group " + (i + 1) + ")";
    }
    return new OutputColumns(column.getName() + " (matched part)", columns);
  }

  @Override
  public String[] transform(InputRow inputRow) {
    final Matcher matcher = pattern.matcher("");
    final String value = inputRow.getValue(column);
    final boolean match = value != null && matcher.reset(value).matches();

    String[] result = new String[matcher.groupCount() + 1];
    for (int i = 0; i < result.length; i++) {
      result[i] = match ? matcher.group(i) : null;
    }
    return result;
  }
}
TOP

Related Classes of org.eobjects.datacleaner.regexparser.RegexParserTransformer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.