Package de.fuberlin.wiwiss.d2rq.values

Source Code of de.fuberlin.wiwiss.d2rq.values.Pattern$ColumnFunction

package de.fuberlin.wiwiss.d2rq.values;

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;

import de.fuberlin.wiwiss.d2rq.D2RQException;
import de.fuberlin.wiwiss.d2rq.algebra.Attribute;
import de.fuberlin.wiwiss.d2rq.algebra.ColumnRenamer;
import de.fuberlin.wiwiss.d2rq.algebra.OrderSpec;
import de.fuberlin.wiwiss.d2rq.algebra.ProjectionSpec;
import de.fuberlin.wiwiss.d2rq.expr.AttributeExpr;
import de.fuberlin.wiwiss.d2rq.expr.Concatenation;
import de.fuberlin.wiwiss.d2rq.expr.Conjunction;
import de.fuberlin.wiwiss.d2rq.expr.Constant;
import de.fuberlin.wiwiss.d2rq.expr.Equality;
import de.fuberlin.wiwiss.d2rq.expr.Expression;
import de.fuberlin.wiwiss.d2rq.mapgen.IRIEncoder;
import de.fuberlin.wiwiss.d2rq.nodes.NodeSetFilter;
import de.fuberlin.wiwiss.d2rq.sql.ResultRow;
import de.fuberlin.wiwiss.d2rq.sql.SQL;

/**
* A pattern that combines one or more database columns into a String. Often
* used as an UriPattern for generating URIs from a column's primary key.
*
* @author Richard Cyganiak (richard@cyganiak.de)
*/
public class Pattern implements ValueMaker {
  public final static String DELIMITER = "@@";
  private final static java.util.regex.Pattern embeddedColumnRegex =
    java.util.regex.Pattern.compile("@@([^@]+?)(?:\\|(urlencode|urlify|encode))?@@");

  private String pattern;
  private String firstLiteralPart;
  private List<Attribute> columns = new ArrayList<Attribute>(3);
  private List<ColumnFunction> columnFunctions = new ArrayList<ColumnFunction>(3);
  private List<String> literalParts = new ArrayList<String>(3);
  private Set<ProjectionSpec> columnsAsSet;
  private java.util.regex.Pattern regex;
 
  /**
   * Constructs a new Pattern instance from a pattern syntax string
   * @param pattern a pattern syntax string
   * @throws D2RQException on malformed pattern
   */
  public Pattern(String pattern) {
    this.pattern = pattern;
    parsePattern();
    this.columnsAsSet = new HashSet<ProjectionSpec>(this.columns);
  }

  public String firstLiteralPart() {
    return firstLiteralPart;
  }
 
  public String lastLiteralPart() {
    if (literalParts.isEmpty()) {
      return firstLiteralPart;
    }
    return literalParts.get(literalParts.size() - 1);
  }
 
  public boolean literalPartsMatchRegex(String regex) {
    if (!this.firstLiteralPart.matches(regex)) {
      return false;
    }
    for (String literalPart: literalParts) {
      if (!literalPart.matches(regex)) {
        return false;
      }
    }
    return true;
  }
 
  public List<Attribute> attributes() {
    return this.columns;
  }
 
  public void describeSelf(NodeSetFilter c) {
    c.limitValuesToPattern(this);
  }

  public boolean matches(String value) {
    return !valueExpression(value).isFalse();
  }
 
  public Expression valueExpression(String value) {
    if (value == null) {
      return Expression.FALSE;
    }
    Matcher match = this.regex.matcher(value);
    if (!match.matches()) {
      return Expression.FALSE;
    }
    Collection<Expression> expressions = new ArrayList<Expression>(columns.size());
    for (int i = 0; i < this.columns.size(); i++) {
      Attribute attribute = columns.get(i);
      ColumnFunction function = columnFunctions.get(i);
      String attributeValue = function.decode(match.group(i + 1));
      if (attributeValue == null) {
        return Expression.FALSE;
      }
      expressions.add(Equality.createAttributeValue(attribute, attributeValue));
    }
    return Conjunction.create(expressions);
  }

  public Set<ProjectionSpec> projectionSpecs() {
    return this.columnsAsSet;
  }

  /**
   * Constructs a String from the pattern using the given database row.
   * @param row a database row
   * @return the pattern's value for the given row
   */
  public String makeValue(ResultRow row) {
    int index = 0;
    StringBuffer result = new StringBuffer(this.firstLiteralPart);
    while (index < this.columns.size()) {
      Attribute column = columns.get(index);
      ColumnFunction function = columnFunctions.get(index);
      String value = row.get(column);
      if (value == null) {
        return null;
      }
      value = function.encode(value);
      if (value == null) {
        return null;
      }
      result.append(value);
      result.append(this.literalParts.get(index));
      index++;
    }
    return result.toString();
  }
 
  public List<OrderSpec> orderSpecs(boolean ascending) {
    List<OrderSpec> result = new ArrayList<OrderSpec>(columns.size());
    for (Attribute column: columns) {
      result.add(new OrderSpec(new AttributeExpr(column), ascending));
    }
    return result;
  }
 
  public String toString() {
    return "Pattern(" + this.pattern + ")";
  }

  public boolean equals(Object otherObject) {
    if (!(otherObject instanceof Pattern)) {
      return false;
    }
    Pattern other = (Pattern) otherObject;
    return this.pattern.equals(other.pattern);
  }
 
  public int hashCode() {
    return this.pattern.hashCode();
  }
 
  /**
   * @return <code>true</code> if the pattern is identical or differs only in
   * the column names
   */
  public boolean isEquivalentTo(Pattern p) {
    return this.firstLiteralPart.equals(p.firstLiteralPart)
        && this.literalParts.equals(p.literalParts)
        && this.columnFunctions.equals(p.columnFunctions);
  }
 
  public ValueMaker renameAttributes(ColumnRenamer renames) {
    int index = 0;
    StringBuffer newPattern = new StringBuffer(this.firstLiteralPart);
    while (index < this.columns.size()) {
      Attribute column = columns.get(index);
      ColumnFunction function = columnFunctions.get(index);
      newPattern.append(DELIMITER);
      newPattern.append(renames.applyTo(column).qualifiedName());
      if (function.name() != null) {
        newPattern.append("|");
        newPattern.append(function.name());
      }
      newPattern.append(DELIMITER);
      newPattern.append(this.literalParts.get(index));
      index++;
    }
    return new Pattern(newPattern.toString());
  }
 
  private void parsePattern() {
    Matcher match = embeddedColumnRegex.matcher(this.pattern);
    boolean matched = match.find();
    int firstLiteralEnd = matched ? match.start() : this.pattern.length();
    this.firstLiteralPart = this.pattern.substring(0, firstLiteralEnd);
    String regexPattern = "\\Q" + this.firstLiteralPart + "\\E";
    while (matched) {
      this.columns.add(SQL.parseAttribute(match.group(1)));
      this.columnFunctions.add(getColumnFunction(match.group(2)));
      int nextLiteralStart = match.end();
      matched = match.find();
      int nextLiteralEnd = matched ? match.start() : this.pattern.length();
      String nextLiteralPart = this.pattern.substring(nextLiteralStart, nextLiteralEnd);
      this.literalParts.add(nextLiteralPart);
      regexPattern += "(.*?)\\Q" + nextLiteralPart + "\\E";
    }
    this.regex = java.util.regex.Pattern.compile(regexPattern, java.util.regex.Pattern.DOTALL);
  }
 
  public Iterator<Object> partsIterator() {
      return new Iterator<Object>() {
        private int i = 0;
          public boolean hasNext() {
              return i < columns.size() + literalParts.size() + 1;
          }
          public Object next() {
              i++;
              if (i == 1) {
                return firstLiteralPart;
              } else if (i % 2 == 0) {
                return columns.get(i / 2 - 1);
              }
              return literalParts.get(i / 2 - 1);
          }
          public void remove() {
            throw new UnsupportedOperationException();
          }
      };
  }

  // FIXME: This doesn't take column functions other than IDENTITY into account
  // The usesColumnFunctions() method is here to allow detection of this case.
  public Expression toExpression() {
    List<Expression> parts = new ArrayList<Expression>(literalParts.size() * 2 + 1);
    parts.add(new Constant(firstLiteralPart));
    for (int i = 0; i < columns.size(); i++) {
      parts.add(new AttributeExpr(columns.get(i)));
      parts.add(new Constant(literalParts.get(i)));
    }
    return Concatenation.create(parts);
  }
 
  /**
   * @return TRUE if this pattern uses any column function (encode, urlify, etc.)
   */
  public boolean usesColumnFunctions() {
    for (ColumnFunction f: columnFunctions) {
      if (f != IDENTITY) return true;
    }
    return false;
  }
 
  private final static ColumnFunction IDENTITY = new IdentityFunction();
  private final static ColumnFunction URLENCODE = new URLEncodeFunction();
  private final static ColumnFunction ENCODE = new EncodeFunction();
  private final static ColumnFunction URLIFY = new URLifyFunction();
 
  private ColumnFunction getColumnFunction(String functionName) {
    if ("urlencode".equals(functionName)) {
      return URLENCODE;
    }
    if ("urlify".equals(functionName)) {
      return URLIFY;
    }
    if ("encode".equals(functionName)) {
      return ENCODE;
    }
    if ("".equals(functionName) || functionName == null) {
      return IDENTITY;
    }
    // Shouldn't happen
    throw new D2RQException("Unrecognized column function '" + functionName + "'");
  }
 
  private interface ColumnFunction {
    String encode(String s);
    String decode(String s);
    String name();
  }
 
  static class IdentityFunction implements ColumnFunction {
    public String encode(String s) { return s; }
    public String decode(String s) { return s; }
    public String name() { return null; }
  }
 
  static class URLEncodeFunction implements ColumnFunction {
    public String encode(String s) {
      try {
        return URLEncoder.encode(s, "utf-8");
      } catch (UnsupportedEncodingException ex) {
        // Can't happen, UTF-8 is always supported
        throw new RuntimeException(ex);
      }
    }
    public String decode(String s) {
      try {
        return URLDecoder.decode(s, "utf-8");
      } catch (UnsupportedEncodingException ex) {
        // Can't happen, UTF-8 is always supported
        throw new RuntimeException(ex);
      } catch (IllegalArgumentException ex) {
        // Broken encoding
        return null;
      }
    }
    public String name() { return "urlencode"; }
  }
  static class URLifyFunction implements ColumnFunction {
    public String encode(String s) {
      try {
        return URLEncoder.encode(s, "utf-8")
            .replaceAll("_", "%5F").replace('+', '_');
      } catch (UnsupportedEncodingException ex) {
        // Can't happen, UTF-8 is always supported
        throw new RuntimeException(ex);
      }
    }
    public String decode(String s) {
      try {
        return URLDecoder.decode(s.replace('_', '+'), "utf-8");
      } catch (UnsupportedEncodingException ex) {
        // Can't happen, UTF-8 is always supported
        throw new RuntimeException(ex);
      } catch (IllegalArgumentException ex) {
        // Broken encoding
        return null;
      }
    }
    public String name() { return "urlify"; }
  }
  public static class EncodeFunction implements ColumnFunction {
    public String encode(String s) {
      return IRIEncoder.encode(s);
    }
    public String decode(String s) {
      try {
        return URLDecoder.decode(s.replaceAll("%20", "+"), "utf-8");
      } catch (UnsupportedEncodingException ex) {
        // Can't happen, UTF-8 is always supported
        throw new RuntimeException(ex);
      } catch (IllegalArgumentException ex) {
        // Broken encoding
        return null;
      }
    }
    public String name() { return "encode"; }
  }
}
TOP

Related Classes of de.fuberlin.wiwiss.d2rq.values.Pattern$ColumnFunction

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.