/**
* This file is part of the Goobi Application - a Workflow tool for the support
* of mass digitization.
*
* (c) 2014 Goobi. Digitalisieren im Verein e.V. <contact@goobi.org>
*
* Visit the websites for more information.
* - http://www.goobi.org/en/
* - https://github.com/goobi
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place, Suite 330, Boston, MA 02111-1307 USA
*
* Linking this library statically or dynamically with other modules is making a
* combined work based on this library. Thus, the terms and conditions of the
* GNU General Public License cover the whole combination. As a special
* exception, the copyright holders of this library give you permission to link
* this library with independent modules to produce an executable, regardless of
* the license terms of these independent modules, and to copy and distribute
* the resulting executable under terms of your choice, provided that you also
* meet, for each linked independent module, the terms and conditions of the
* license of that module. An independent module is a module which is not
* derived from or based on this library. If you modify this library, you may
* extend this exception to your version of the library, but you are not obliged
* to do so. If you do not wish to do so, delete this exception statement from
* your version.
*/
package de.sub.goobi.metadaten.copier;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.log4j.Logger;
import ugh.dl.DocStruct;
import ugh.dl.DocStructType;
import ugh.exceptions.TypeNotAllowedAsChildException;
import ugh.exceptions.TypeNotAllowedForParentException;
import com.sharkysoft.util.UnreachableCodeException;
/**
* A MetadataPathSelector provides methods to retrieve or modify document
* structure nodes on a document structure node.
*
* @author Matthias Ronge <matthias.ronge@zeutschel.de>
*/
public class MetadataPathSelector extends MetadataSelector {
/**
* Symbol meaning that all indices are to be matched.
*/
private static final String ALL_CHILDREN_SYMBOL = "*";
/**
* Symbol meaning that any metadata types are to be matched.
*/
private static final String ANY_METADATA_TYPE_SYMBOL = "*";
@SuppressWarnings("javadoc")
private static final Logger LOG = Logger.getLogger(MetadataPathSelector.class);
/**
* The constant METADATA_SPLIT_PATH_SCHEME holds a regular expression used
* to extract the first metadata path segment.
*/
private static final Pattern METADATA_SPLIT_PATH_SCHEME = Pattern.compile("^" + METADATA_PATH_SEPARATOR + "([^"
+ METADATA_PATH_SEPARATOR + METADATA_SEPARATOR + "]+)");
/**
* The constant SEGMENT_WITH_ELEMENT_SELELCTOR_SCHEME holds a regular
* expression used to detect and extract a quantifier expression at the end
* of the string.
*/
private static final Pattern SEGMENT_WITH_ELEMENT_SELELCTOR_SCHEME = Pattern.compile("(.*?)\\[(.+?)\\]");
/**
* DocStructType name of the structure element to look for or create. "*"
* may be used to look up "any element", but will not work if an element
* needs to be constructed at this level.
*/
private final String docStructType;
/**
* Integer of the element referenced, where Integer.MAX_VALUE indicates the
* "last" element, or null if none.
*/
private final Object index;
/**
* A metadata selector resolving the subsequent path
*/
private final MetadataSelector selector;
/**
* Creates a new MetadataPathSelector.
*
* @param path
* path to create sub-selector, passed to {
* {@link #create(String)}.
* @throws ConfigurationException
* if the path is invalid
*/
public MetadataPathSelector(String path) throws ConfigurationException {
String pathSegment = matchCurrentPathSegment(path);
Matcher pathSelectorHasElementSelector = SEGMENT_WITH_ELEMENT_SELELCTOR_SCHEME.matcher(pathSegment);
if (pathSelectorHasElementSelector.matches()) {
docStructType = pathSelectorHasElementSelector.group(1);
String indexSymbol = pathSelectorHasElementSelector.group(2);
try {
index = getIndexValue(indexSymbol);
if (index instanceof Integer && ((Integer) index).intValue() < 0) {
throw new ConfigurationException("Negative element count is not allowed, in path: " + path);
}
} catch (NumberFormatException e) {
throw new ConfigurationException("Cannot create metadata path selector: " + e.getMessage(), e);
}
} else {
docStructType = pathSegment;
index = null;
}
selector = super.create(path.substring(pathSegment.length() + 1));
}
/**
* Creates a new metadata path selector as specified by the arguments
* passed.
*
* @param docStructType
* docStructType name to match
* @param index
* index to match
* @param selector
* selector for the subsequent path
*/
private MetadataPathSelector(String docStructType, int index, MetadataSelector selector) {
this.docStructType = docStructType;
this.index = Integer.valueOf(index);
this.selector = selector;
}
/**
* Creates a metadatum with the given value if the full path is applied and
* no such metadatum is already available under at the path. Leaves the
* document structure element unchanged if such a metadatum already exists.
* This works recursively, by calling itself on the subnode, if found, or
* returning null otherwise. Metadata creation is, by definition, always
* done in a {@link LocalMetadataSelector}.
*
* @param data
* data to work on
* @param logicalNode
* document structure node to start from, intended for recursion
* @param value
* value to write if no metadatum is available at the path’s end
* @see de.sub.goobi.metadaten.copier.MetadataSelector#createIfPathExistsOnly(CopierData,
* DocStruct, String)
*/
@Override
protected void createIfPathExistsOnly(CopierData data, DocStruct logicalNode, String value) {
DocStruct subnode = getSubnode(logicalNode);
if (subnode == null) {
return;
}
selector.createIfPathExistsOnly(data, subnode, value);
}
/**
* Sets the metadatum identified by the given path if available, otherwise
* creates the path and metadatum. This works recursively. Metadata creation
* is done in a {@link LocalMetadataSelector}.
*
* @param data
* data to work on
* @param logicalNode
* document structure node to start from, intended for recursion
* @param value
* value to write
* @see de.sub.goobi.metadaten.copier.MetadataSelector#createOrOverwrite(CopierData,
* DocStruct, String)
*/
@Override
protected void createOrOverwrite(CopierData data, DocStruct logicalNode, String value) {
DocStruct subnode = getSubnode(logicalNode);
if (subnode == null) {
try {
// TODO: after merge of newspaper module the following three
// lines can be subsumed as:
// subnode = logicalNode.createChild(docStructType, data.getDigitalDocument(), data.getPreferences());
DocStructType dsType = data.getPreferences().getDocStrctTypeByName(docStructType);
subnode = data.getDigitalDocument().createDocStruct(dsType);
logicalNode.addChild(subnode);
} catch (TypeNotAllowedAsChildException e) {
// copy rules aren’t related to the rule set but depend on it,
// so copy rules that don’t work with the current rule set are
// ignored
LOG.debug("Cannot create structural element " + docStructType + " as child of "
+ (logicalNode.getType() != null ? logicalNode.getType().getName() : "without type")
+ " because it isn’t allowed by the rule set.");
} catch (TypeNotAllowedForParentException e) {
throw new UnreachableCodeException("TypeNotAllowedForParentException is never thrown"); // see https://github.com/goobi/goobi-ugh/issues/2
} catch (Exception e) {
// copy rule failed, skip it
LOG.debug(
"Cannot create structural element " + docStructType + " as child of "
+ (logicalNode.getType() != null ? logicalNode.getType().getName() : "without type")
+ ": Accessing the rule set failed with exception: "
+ (e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName()), e);
return;
}
}
selector.createOrOverwrite(data, subnode, value);
}
/**
* The function findAll() returns all concrete metadata selectors the
* potentially generic metadata selector expression resolves to.
*
* @param logicalNode
* Node of the logical document structure to work on
* @return all metadata selectors the expression resolves to
*
* @see de.sub.goobi.metadaten.copier.MetadataSelector#findAll(ugh.dl.DocStruct)
*/
@Override
protected Iterable<MetadataSelector> findAll(DocStruct logicalNode) {
LinkedList<MetadataSelector> result = new LinkedList<MetadataSelector>();
List<DocStruct> children = logicalNode.getAllChildren();
if (children == null) {
children = Collections.emptyList();
}
int lastChild = children.size() - 1;
int count = 0;
for (DocStruct child : children) {
if (typeCheck(child) && indexCheck(count, lastChild)) {
for (MetadataSelector cms : selector.findAll(child)) {
result.add(new MetadataPathSelector(ANY_METADATA_TYPE_SYMBOL, count, cms));
}
}
count++;
}
return result;
}
/**
* Returns the value of the metadatum named by the path used to construct
* the metadata selector, or null if either the path or the metadatum at the
* end of the path aren’t available. This works recursively, by calling
* itself on the subnode, if found, or returning null otherwise.
*
* @see de.sub.goobi.metadaten.copier.MetadataSelector#findIn(ugh.dl.DocStruct)
*/
@Override
protected String findIn(DocStruct supernode) {
DocStruct subnode = getSubnode(supernode);
if (subnode == null) {
return null;
} else {
return selector.findIn(subnode);
}
}
/**
* Returns the numeric index of the metadata selector, if any. If no index
* is specified ({@code null}), or generically refers to all or the last
* element, {@code -1} is returned.
*
* @return the index number of the metadata selector
*/
public int getIndex() {
if (index != null && index instanceof Integer) {
int a = ((Integer) index).intValue();
if (a < Integer.MAX_VALUE) {
return a;
}
}
return -1;
}
/**
* The function getIndexValue() returns the numerical value represented by
* the symbolic (String) representation passed in. Since the method is
* called from the constructor it must not be overridden in subclasses.
*
* @param indexSymbol
* an integer value or ">" to refer to Integer.MAX_VALUE
* @return the integer value of the string, or Integer.MAX_VALUE for the
* symbol ">".
*/
private final Object getIndexValue(String indexSymbol) {
try {
return Integer.valueOf(indexSymbol);
} catch (NumberFormatException cannotParseInt) {
if (LAST_CHILD_QUANTIFIER.equals(indexSymbol)) {
return Integer.MAX_VALUE;
} else {
return indexSymbol;
}
}
}
/**
* Returns the selector for the rest of the expression.
*
* @return the subsequent selector
*/
public MetadataSelector getSelector() {
return selector;
}
/**
* Returns the subnode identified by the path segment this metadata path
* selector is responsible for. Returns null if no such node can be found.
*
* @param logicalNode
* document structure node to retrieve the subnode from
* @return the subnode in question
* @throws RuntimeException
* if there is more than one element matching but no index was
* given to chose among them
*/
private DocStruct getSubnode(DocStruct logicalNode) {
List<DocStruct> children = logicalNode.getAllChildrenByTypeAndMetadataType(docStructType,
ANY_METADATA_TYPE_SYMBOL);
if (children == null) {
children = Collections.emptyList();
}
switch (children.size()) {
case 0:
return null;
case 1:
if (index == null || index.equals(0) || index.equals(Integer.MAX_VALUE)) {
return children.get(0);
}
default:
if (index == null) {
throw new RuntimeException("Could not resolve metadata path: Path selector is ambiguous for "
+ docStructType);
} else {
if (!(index instanceof Integer)) {
throw new RuntimeException("Could not resolve metadata path: In this regard, index \"" + index
+ "\" is not allowed.");
} else {
if (index.equals(Long.MAX_VALUE)) {
return children.get(children.size() - 1);
}
if (children.size() >= ((Integer) index).intValue()) {
return children.get(((Integer) index).intValue());
} else {
return null;
}
}
}
}
}
/**
* The function indexCheck() calculates whether the given child’s index is
* to be matched by this metadata path selector. A child index is to match
* if
* <ul>
* <li>the metadata path selector doesn’t specify an index and the index of
* the last child is equal to {@code 0},</li>
* <li>the metadata path selector specifies all children,</li>
* <li>the metadata path selector exactly points to the given index, or</li>
* <li>generically to the last element, and the given index is the last
* index.</li>
* </ul>
*
* @param childIndex
* index to check
* @param lastChildIndex
* last available index
* @return whether the index is to be matched
*/
private boolean indexCheck(int childIndex, int lastChildIndex) {
if (index == null && lastChildIndex == 0 || ALL_CHILDREN_SYMBOL.equals(index)) {
return true;
}
int comparee = ((Integer) index).intValue();
if (childIndex == comparee || (comparee == Integer.MAX_VALUE && childIndex == lastChildIndex)) {
return true;
}
throw new RuntimeException("Could not resolve metadata path: Path selector is ambiguous for " + docStructType);
}
/**
* The function matchCurrentPathSegment() returns the path segment this
* metadata path selector is responsible to represent. Since the method is
* called from the constructor it must not be overridden in subclasses.
*
* @param path
* path expression to parse
* @return the path segment for this selector
* @throws ConfigurationException
* if the path cannot be parsed
*/
private final String matchCurrentPathSegment(String path) throws ConfigurationException {
Matcher metadataPathSplitter = METADATA_SPLIT_PATH_SCHEME.matcher(path);
if (!metadataPathSplitter.find()) {
throw new ConfigurationException(
"Cannot create metadata path selector: Path must contain path segment, but is: " + path);
}
return metadataPathSplitter.group(1);
}
/**
* Returns a string that textually represents this MetadataPathSelector.
*
* @return a string representation of this MetadataPathSelector
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder result = new StringBuilder(40);
result.append(METADATA_PATH_SEPARATOR);
result.append(docStructType);
if (index != null) {
result.append('[');
result.append(index.equals(Integer.MAX_VALUE) ? LAST_CHILD_QUANTIFIER : index.toString());
result.append(']');
}
result.append(selector);
return result.toString();
}
/**
* The function typeCheck() calculates whether the given child is to be
* matched by type name by this metadata path selector. A child is to match
* if
* <ul>
* <li>the metadata path selector specifies all children, or</li>
* <li>the metadata path selector specifies exactly the type of the child.</li>
* </ul>
*
* @param child
* child whose type shall be checked
* @return whether the child type is to be matched
*/
private boolean typeCheck(DocStruct child) {
return ANY_METADATA_TYPE_SYMBOL.equals(docStructType) || docStructType.equals(child.getType().getName());
}
}