Package de.sub.goobi.metadaten.copier

Source Code of de.sub.goobi.metadaten.copier.MetadataPathSelector

/**
* This file is part of the Goobi Application - a Workflow tool for the support
* of mass digitization.
*
* (c) 2014 Goobi. Digitalisieren im Verein e.V. <contact@goobi.org>
*
* Visit the websites for more information.
*         - http://www.goobi.org/en/
*         - https://github.com/goobi
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place, Suite 330, Boston, MA 02111-1307 USA
*
* Linking this library statically or dynamically with other modules is making a
* combined work based on this library. Thus, the terms and conditions of the
* GNU General Public License cover the whole combination. As a special
* exception, the copyright holders of this library give you permission to link
* this library with independent modules to produce an executable, regardless of
* the license terms of these independent modules, and to copy and distribute
* the resulting executable under terms of your choice, provided that you also
* meet, for each linked independent module, the terms and conditions of the
* license of that module. An independent module is a module which is not
* derived from or based on this library. If you modify this library, you may
* extend this exception to your version of the library, but you are not obliged
* to do so. If you do not wish to do so, delete this exception statement from
* your version.
*/
package de.sub.goobi.metadaten.copier;

import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.configuration.ConfigurationException;
import org.apache.log4j.Logger;

import ugh.dl.DocStruct;
import ugh.dl.DocStructType;
import ugh.exceptions.TypeNotAllowedAsChildException;
import ugh.exceptions.TypeNotAllowedForParentException;

import com.sharkysoft.util.UnreachableCodeException;

/**
* A MetadataPathSelector provides methods to retrieve or modify document
* structure nodes on a document structure node.
*
* @author Matthias Ronge <matthias.ronge@zeutschel.de>
*/
public class MetadataPathSelector extends MetadataSelector {
  /**
   * Symbol meaning that all indices are to be matched.
   */
  private static final String ALL_CHILDREN_SYMBOL = "*";

  /**
   * Symbol meaning that any metadata types are to be matched.
   */
  private static final String ANY_METADATA_TYPE_SYMBOL = "*";

  @SuppressWarnings("javadoc")
  private static final Logger LOG = Logger.getLogger(MetadataPathSelector.class);
  /**
   * The constant METADATA_SPLIT_PATH_SCHEME holds a regular expression used
   * to extract the first metadata path segment.
   */
  private static final Pattern METADATA_SPLIT_PATH_SCHEME = Pattern.compile("^" + METADATA_PATH_SEPARATOR + "([^"
      + METADATA_PATH_SEPARATOR + METADATA_SEPARATOR + "]+)");

  /**
   * The constant SEGMENT_WITH_ELEMENT_SELELCTOR_SCHEME holds a regular
   * expression used to detect and extract a quantifier expression at the end
   * of the string.
   */
  private static final Pattern SEGMENT_WITH_ELEMENT_SELELCTOR_SCHEME = Pattern.compile("(.*?)\\[(.+?)\\]");

  /**
   * DocStructType name of the structure element to look for or create. "*"
   * may be used to look up "any element", but will not work if an element
   * needs to be constructed at this level.
   */
  private final String docStructType;

  /**
   * Integer of the element referenced, where Integer.MAX_VALUE indicates the
   * "last" element, or null if none.
   */
  private final Object index;

  /**
   * A metadata selector resolving the subsequent path
   */
  private final MetadataSelector selector;

  /**
   * Creates a new MetadataPathSelector.
   *
   * @param path
   *            path to create sub-selector, passed to {
   *            {@link #create(String)}.
   * @throws ConfigurationException
   *             if the path is invalid
   */

  public MetadataPathSelector(String path) throws ConfigurationException {
    String pathSegment = matchCurrentPathSegment(path);
    Matcher pathSelectorHasElementSelector = SEGMENT_WITH_ELEMENT_SELELCTOR_SCHEME.matcher(pathSegment);
    if (pathSelectorHasElementSelector.matches()) {
      docStructType = pathSelectorHasElementSelector.group(1);
      String indexSymbol = pathSelectorHasElementSelector.group(2);
      try {
        index = getIndexValue(indexSymbol);
        if (index instanceof Integer && ((Integer) index).intValue() < 0) {
          throw new ConfigurationException("Negative element count is not allowed, in path: " + path);
        }
      } catch (NumberFormatException e) {
        throw new ConfigurationException("Cannot create metadata path selector: " + e.getMessage(), e);
      }
    } else {
      docStructType = pathSegment;
      index = null;
    }
    selector = super.create(path.substring(pathSegment.length() + 1));
  }

  /**
   * Creates a new metadata path selector as specified by the arguments
   * passed.
   *
   * @param docStructType
   *            docStructType name to match
   * @param index
   *            index to match
   * @param selector
   *            selector for the subsequent path
   */
  private MetadataPathSelector(String docStructType, int index, MetadataSelector selector) {
    this.docStructType = docStructType;
    this.index = Integer.valueOf(index);
    this.selector = selector;
  }

  /**
   * Creates a metadatum with the given value if the full path is applied and
   * no such metadatum is already available under at the path. Leaves the
   * document structure element unchanged if such a metadatum already exists.
   * This works recursively, by calling itself on the subnode, if found, or
   * returning null otherwise. Metadata creation is, by definition, always
   * done in a {@link LocalMetadataSelector}.
   *
   * @param data
   *            data to work on
   * @param logicalNode
   *            document structure node to start from, intended for recursion
   * @param value
   *            value to write if no metadatum is available at the path’s end
   * @see de.sub.goobi.metadaten.copier.MetadataSelector#createIfPathExistsOnly(CopierData,
   *      DocStruct, String)
   */
  @Override
  protected void createIfPathExistsOnly(CopierData data, DocStruct logicalNode, String value) {
    DocStruct subnode = getSubnode(logicalNode);
    if (subnode == null) {
      return;
    }
    selector.createIfPathExistsOnly(data, subnode, value);
  }

  /**
   * Sets the metadatum identified by the given path if available, otherwise
   * creates the path and metadatum. This works recursively. Metadata creation
   * is done in a {@link LocalMetadataSelector}.
   *
   * @param data
   *            data to work on
   * @param logicalNode
   *            document structure node to start from, intended for recursion
   * @param value
   *            value to write
   * @see de.sub.goobi.metadaten.copier.MetadataSelector#createOrOverwrite(CopierData,
   *      DocStruct, String)
   */
  @Override
  protected void createOrOverwrite(CopierData data, DocStruct logicalNode, String value) {
    DocStruct subnode = getSubnode(logicalNode);
    if (subnode == null) {
      try {
        // TODO: after merge of newspaper module the following three
        //       lines can be subsumed as:
        // subnode = logicalNode.createChild(docStructType, data.getDigitalDocument(), data.getPreferences());
        DocStructType dsType = data.getPreferences().getDocStrctTypeByName(docStructType);
        subnode = data.getDigitalDocument().createDocStruct(dsType);
        logicalNode.addChild(subnode);
      } catch (TypeNotAllowedAsChildException e) {
        // copy rules aren’t related to the rule set but depend on it,
        // so copy rules that don’t work with the current rule set are
        // ignored
        LOG.debug("Cannot create structural element " + docStructType + " as child of "
            + (logicalNode.getType() != null ? logicalNode.getType().getName() : "without type")
            + " because it isn’t allowed by the rule set.");
      } catch (TypeNotAllowedForParentException e) {
        throw new UnreachableCodeException("TypeNotAllowedForParentException is never thrown"); // see https://github.com/goobi/goobi-ugh/issues/2
      } catch (Exception e) {
        // copy rule failed, skip it
        LOG.debug(
            "Cannot create structural element " + docStructType + " as child of "
                + (logicalNode.getType() != null ? logicalNode.getType().getName() : "without type")
                + ": Accessing the rule set failed with exception: "
                + (e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName()), e);
        return;
      }
    }
    selector.createOrOverwrite(data, subnode, value);
  }

  /**
   * The function findAll() returns all concrete metadata selectors the
   * potentially generic metadata selector expression resolves to.
   *
   * @param logicalNode
   *            Node of the logical document structure to work on
   * @return all metadata selectors the expression resolves to
   *
   * @see de.sub.goobi.metadaten.copier.MetadataSelector#findAll(ugh.dl.DocStruct)
   */
  @Override
  protected Iterable<MetadataSelector> findAll(DocStruct logicalNode) {
    LinkedList<MetadataSelector> result = new LinkedList<MetadataSelector>();
    List<DocStruct> children = logicalNode.getAllChildren();
    if (children == null) {
      children = Collections.emptyList();
    }
    int lastChild = children.size() - 1;
    int count = 0;
    for (DocStruct child : children) {
      if (typeCheck(child) && indexCheck(count, lastChild)) {
        for (MetadataSelector cms : selector.findAll(child)) {
          result.add(new MetadataPathSelector(ANY_METADATA_TYPE_SYMBOL, count, cms));
        }
      }
      count++;
    }
    return result;
  }

  /**
   * Returns the value of the metadatum named by the path used to construct
   * the metadata selector, or null if either the path or the metadatum at the
   * end of the path aren’t available. This works recursively, by calling
   * itself on the subnode, if found, or returning null otherwise.
   *
   * @see de.sub.goobi.metadaten.copier.MetadataSelector#findIn(ugh.dl.DocStruct)
   */
  @Override
  protected String findIn(DocStruct supernode) {
    DocStruct subnode = getSubnode(supernode);
    if (subnode == null) {
      return null;
    } else {
      return selector.findIn(subnode);
    }
  }

  /**
   * Returns the numeric index of the metadata selector, if any. If no index
   * is specified ({@code null}), or generically refers to all or the last
   * element, {@code -1} is returned.
   *
   * @return the index number of the metadata selector
   */
  public int getIndex() {
    if (index != null && index instanceof Integer) {
      int a = ((Integer) index).intValue();
      if (a < Integer.MAX_VALUE) {
        return a;
      }
    }
    return -1;
  }

  /**
   * The function getIndexValue() returns the numerical value represented by
   * the symbolic (String) representation passed in. Since the method is
   * called from the constructor it must not be overridden in subclasses.
   *
   * @param indexSymbol
   *            an integer value or ">" to refer to Integer.MAX_VALUE
   * @return the integer value of the string, or Integer.MAX_VALUE for the
   *         symbol ">".
   */
  private final Object getIndexValue(String indexSymbol) {
    try {
      return Integer.valueOf(indexSymbol);
    } catch (NumberFormatException cannotParseInt) {
      if (LAST_CHILD_QUANTIFIER.equals(indexSymbol)) {
        return Integer.MAX_VALUE;
      } else {
        return indexSymbol;
      }
    }
  }

  /**
   * Returns the selector for the rest of the expression.
   *
   * @return the subsequent selector
   */
  public MetadataSelector getSelector() {
    return selector;
  }

  /**
   * Returns the subnode identified by the path segment this metadata path
   * selector is responsible for. Returns null if no such node can be found.
   *
   * @param logicalNode
   *            document structure node to retrieve the subnode from
   * @return the subnode in question
   * @throws RuntimeException
   *             if there is more than one element matching but no index was
   *             given to chose among them
   */
  private DocStruct getSubnode(DocStruct logicalNode) {
    List<DocStruct> children = logicalNode.getAllChildrenByTypeAndMetadataType(docStructType,
        ANY_METADATA_TYPE_SYMBOL);
    if (children == null) {
      children = Collections.emptyList();
    }
    switch (children.size()) {
    case 0:
      return null;
    case 1:
      if (index == null || index.equals(0) || index.equals(Integer.MAX_VALUE)) {
        return children.get(0);
      }
    default:
      if (index == null) {
        throw new RuntimeException("Could not resolve metadata path: Path selector is ambiguous for "
            + docStructType);
      } else {
        if (!(index instanceof Integer)) {
          throw new RuntimeException("Could not resolve metadata path: In this regard, index \"" + index
              + "\" is not allowed.");
        } else {
          if (index.equals(Long.MAX_VALUE)) {
            return children.get(children.size() - 1);
          }
          if (children.size() >= ((Integer) index).intValue()) {
            return children.get(((Integer) index).intValue());
          } else {
            return null;
          }
        }
      }
    }
  }

  /**
   * The function indexCheck() calculates whether the given child’s index is
   * to be matched by this metadata path selector. A child index is to match
   * if
   * <ul>
   * <li>the metadata path selector doesn’t specify an index and the index of
   * the last child is equal to {@code 0},</li>
   * <li>the metadata path selector specifies all children,</li>
   * <li>the metadata path selector exactly points to the given index, or</li>
   * <li>generically to the last element, and the given index is the last
   * index.</li>
   * </ul>
   *
   * @param childIndex
   *            index to check
   * @param lastChildIndex
   *            last available index
   * @return whether the index is to be matched
   */
  private boolean indexCheck(int childIndex, int lastChildIndex) {
    if (index == null && lastChildIndex == 0 || ALL_CHILDREN_SYMBOL.equals(index)) {
      return true;
    }
    int comparee = ((Integer) index).intValue();
    if (childIndex == comparee || (comparee == Integer.MAX_VALUE && childIndex == lastChildIndex)) {
      return true;
    }
    throw new RuntimeException("Could not resolve metadata path: Path selector is ambiguous for " + docStructType);
  }

  /**
   * The function matchCurrentPathSegment() returns the path segment this
   * metadata path selector is responsible to represent. Since the method is
   * called from the constructor it must not be overridden in subclasses.
   *
   * @param path
   *            path expression to parse
   * @return the path segment for this selector
   * @throws ConfigurationException
   *             if the path cannot be parsed
   */
  private final String matchCurrentPathSegment(String path) throws ConfigurationException {
    Matcher metadataPathSplitter = METADATA_SPLIT_PATH_SCHEME.matcher(path);
    if (!metadataPathSplitter.find()) {
      throw new ConfigurationException(
          "Cannot create metadata path selector: Path must contain path segment, but is: " + path);
    }
    return metadataPathSplitter.group(1);
  }

  /**
   * Returns a string that textually represents this MetadataPathSelector.
   *
   * @return a string representation of this MetadataPathSelector
   * @see java.lang.Object#toString()
   */
  @Override
  public String toString() {
    StringBuilder result = new StringBuilder(40);
    result.append(METADATA_PATH_SEPARATOR);
    result.append(docStructType);
    if (index != null) {
      result.append('[');
      result.append(index.equals(Integer.MAX_VALUE) ? LAST_CHILD_QUANTIFIER : index.toString());
      result.append(']');
    }
    result.append(selector);
    return result.toString();
  }

  /**
   * The function typeCheck() calculates whether the given child is to be
   * matched by type name by this metadata path selector. A child is to match
   * if
   * <ul>
   * <li>the metadata path selector specifies all children, or</li>
   * <li>the metadata path selector specifies exactly the type of the child.</li>
   * </ul>
   *
   * @param child
   *            child whose type shall be checked
   * @return whether the child type is to be matched
   */
  private boolean typeCheck(DocStruct child) {
    return ANY_METADATA_TYPE_SYMBOL.equals(docStructType) || docStructType.equals(child.getType().getName());
  }
}
TOP

Related Classes of de.sub.goobi.metadaten.copier.MetadataPathSelector

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.