Package org.apache.jena.riot.checker

Source Code of org.apache.jena.riot.checker.CheckerLiterals

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.jena.riot.checker;

import java.util.regex.Pattern ;

import org.apache.jena.riot.SysRIOT ;
import org.apache.jena.riot.system.ErrorHandler ;
import org.apache.xerces.impl.dv.InvalidDatatypeValueException ;
import org.apache.xerces.impl.dv.ValidatedInfo ;
import org.apache.xerces.impl.dv.ValidationContext ;
import org.apache.xerces.impl.dv.XSSimpleType ;
import org.apache.xerces.impl.validation.ValidationState ;

import com.hp.hpl.jena.datatypes.RDFDatatype ;
import com.hp.hpl.jena.datatypes.xsd.impl.XSDAbstractDateTimeType ;
import com.hp.hpl.jena.datatypes.xsd.impl.XSDBaseNumericType ;
import com.hp.hpl.jena.datatypes.xsd.impl.XSDDouble ;
import com.hp.hpl.jena.datatypes.xsd.impl.XSDFloat ;
import com.hp.hpl.jena.graph.Node ;

public class CheckerLiterals implements NodeChecker
{
    // A flag to enable the test suite to read bad data.
    public static boolean WarnOnBadLiterals = true ;
   
    private ErrorHandler handler ;
    public CheckerLiterals(ErrorHandler handler)
    {
        this.handler = handler ;
    }
   
    @Override
    public boolean check(Node node, long line, long col)
    { return node.isLiteral() && checkLiteral(node, handler, line, col) ; }
   
    final static private Pattern langPattern = Pattern.compile("[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*") ;

    public static boolean checkLiteral(Node node, ErrorHandler handler, long line, long col)
    {
        if ( ! node.isLiteral() )
        {
            handler.error("Not a literal: "+node, line, col) ;
            return false ;
        }
      
        return checkLiteral(node.getLiteralLexicalForm(), node.getLiteralLanguage(), node.getLiteralDatatype()
                            handler, line, col) ;
    }
   
   
    public static boolean checkLiteral(String lexicalForm, RDFDatatype datatype,
                                       ErrorHandler handler, long line, long col)
    {
        return checkLiteral(lexicalForm, null, datatype, handler, line, col) ;
    }

    public static boolean checkLiteral(String lexicalForm, String lang,
                                       ErrorHandler handler, long line, long col)
    {
        return checkLiteral(lexicalForm, lang, null, handler, line, col) ;
    }
   
    public static boolean checkLiteral(String lexicalForm, String lang, RDFDatatype datatype,
                                       ErrorHandler handler, long line, long col)
    {
        if ( ! WarnOnBadLiterals )
            return true ;
       
        boolean hasLang = lang != null && ! lang.equals("") ;
       
        if ( datatype != null && hasLang )
            handler.error("Literal has datatype and language", line, col) ;
       
        // Datatype check (and plain literals are always well formed)
        if ( datatype != null )
            return validateByDatatype(lexicalForm, datatype, handler, line, col) ;
       
        // No datatype.  Language?

        if ( hasLang )
        {
            // Not a perfect test.
            if ( lang.length() > 0 && ! langPattern.matcher(lang).matches() )
            {
                handler.warning("Language not valid: "+lang, line, col) ;
                return false;
            }
        }
       
        return true ;
    }

    protected static boolean validateByDatatype(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col)
    {
        // XXX Reconsider.
        if ( SysRIOT.StrictXSDLexicialForms )
        {
            if ( datatype instanceof XSDBaseNumericType || datatype instanceof XSDFloat || datatype instanceof XSDDouble )
                return validateByDatatypeNumeric(lexicalForm, datatype, handler, line, col) ;
            if ( datatype instanceof XSDAbstractDateTimeType )
                return validateByDatatypeDateTime(lexicalForm, datatype, handler, line, col) ;
        }
        return validateByDatatypeJena(lexicalForm, datatype, handler, line, col) ;
    }

    protected static boolean validateByDatatypeJena(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col)
    {
        if ( datatype.isValid(lexicalForm) )
            return true ;
        handler.warning("Lexical form '"+lexicalForm+"' not valid for datatype "+datatype.getURI(), line, col) ;
        return false ;
    }
   
    protected static boolean validateByDatatypeDateTime(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col)
    {
        if ( lexicalForm.contains(" ") )  { handler.warning("Whitespace in XSD date or time literal: '"+lexicalForm+"'", line, col) ; return false ; }
        if ( lexicalForm.contains("\n") ) { handler.warning("Newline in XSD date or time literal: '"+lexicalForm+"'", line, col) ; return false ; }
        if ( lexicalForm.contains("\r") ) { handler.warning("Newline in XSD date or time literal: '"+lexicalForm+"'", line, col) ; return false ; }
        //if ( ! StrictXSDLexicialForms )
        // Jena is already strict.
        return validateByDatatypeJena(lexicalForm, datatype, handler, line, col) ;
    }
   
    protected static boolean validateByDatatypeNumeric(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col)
    {
        // Do a white space check as well for numerics.
        if ( lexicalForm.contains(" ") )  { handler.warning("Whitespace in numeric XSD literal: '"+lexicalForm+"'", line, col) ; return false ; }
        if ( lexicalForm.contains("\n") ) { handler.warning("Newline in numeric XSD literal: '"+lexicalForm+"'", line, col) ; return false ; }
        if ( lexicalForm.contains("\r") ) { handler.warning("Carriage return in numeric XSD literal: '"+lexicalForm+"'", line, col) ; return false ; }
       
//        if ( lit.getDatatype() instanceof XSDAbstractDateTimeType )
//        {
//            // Do a white space check as well for numerics.
//            if ( lex.contains(" ") )  { handler.warning("Whitespace in XSD date or time literal: "+node, line, col) ; return false ; }
//            if ( lex.contains("\n") ) { handler.warning("Newline in XSD date or time literal: "+node, line, col) ; return false ; }
//            if ( lex.contains("\r") ) { handler.warning("Newline in XSD date or time literal: "+node, line, col) ; return false ; }
//        }
//
        if ( ! SysRIOT.StrictXSDLexicialForms )
            return validateByDatatypeJena(lexicalForm, datatype, handler, line, col) ;
       
        // From Jena 2.6.3, XSDDatatype.parse
        XSSimpleType typeDeclaration = (XSSimpleType)datatype.extendedTypeDefinition() ;
        try {
            ValidationContext context = new ValidationState();
            ValidatedInfo resultInfo = new ValidatedInfo();
            Object result = typeDeclaration.validate(lexicalForm, context, resultInfo);
            return true ;
        } catch (InvalidDatatypeValueException e) {
            handler.warning("Lexical form '"+lexicalForm+"' not valid for datatype "+datatype.getURI(), line, col) ;
            return false ;
        }
    }
}
TOP

Related Classes of org.apache.jena.riot.checker.CheckerLiterals

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.