Package org.marc4j

Examples of org.marc4j.MarcException


            input.mark(10);
            if (input.read() == -1)
                return false;
            input.reset();
        } catch (IOException e) {
            throw new MarcException(e.getMessage(), e);
        }
        return true;
    }
View Full Code Here


                record.setLeader(l);
            }
            return(record);
        }
        catch (EOFException e) {
            throw new MarcException("Premature end of file encountered", e);
        }
        catch (IOException e) {
            throw new MarcException("an error occured reading input", e);
        }  
    }
View Full Code Here

        try {               
            parseLeader(ldr, byteArray);
            directoryLength = ldr.getBaseAddressOfData() - (24 + 1);
        }
        catch (IOException e) {
            throw new MarcException("error parsing leader with data: "
                    + new String(byteArray), e);
        }
        catch (MarcException e) {
            if (permissive)
            {
                if (recordBuf[recordBuf.length-1] == Constants.RT && recordBuf[recordBuf.length-2] == Constants.FT)
                {
                    errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR,
                                    "Error parsing leader, trying to re-read leader either shorter or longer");
                    // make an attempt to recover record.
                    int offset = 0;
                    while (offset < recordBuf.length)
                    {
                        if (recordBuf[offset] == Constants.FT)
                        {
                            break;
                        }
                        offset++;
                    }
                    if (offset % 12 == 1)
                    {
                        // move one byte from body to leader, make new leader, and try again
                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR,
                                        "Leader appears to be too short, moving one byte from record body to leader, and trying again");
                        byte oldBody[] = recordBuf;
                        recordBuf = new byte[oldBody.length-1];
                        System.arraycopy(oldBody, 1, recordBuf, 0, oldBody.length-1);
                        directoryLength = offset-1;
                        ldr.setIndicatorCount(2);
                        ldr.setSubfieldCodeLength(2);
                        ldr.setImplDefined1((""+(char)byteArray[7]+" ").toCharArray());
                        ldr.setImplDefined2((""+(char)byteArray[18]+(char)byteArray[19]+(char)byteArray[20]).toCharArray());
                        ldr.setEntryMap("4500".toCharArray());
                        if (byteArray[10] == (byte)' ' || byteArray[10] == (byte)'a') // if its ' ' or 'a'
                        {
                            ldr.setCharCodingScheme((char)byteArray[10]);
                        }
                    }
                    else if (offset % 12 == 11)
                    {
                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR,
                                        "Leader appears to be too long, moving one byte from leader to record body, and trying again");
                        byte oldBody[] = recordBuf;
                        recordBuf = new byte[oldBody.length+1];
                        System.arraycopy(oldBody, 0, recordBuf, 1, oldBody.length);
                        recordBuf[0] = (byte)'0';
                        directoryLength = offset+1;
                        ldr.setIndicatorCount(2);
                        ldr.setSubfieldCodeLength(2);
                        ldr.setImplDefined1((""+(char)byteArray[7]+" ").toCharArray());
                        ldr.setImplDefined2((""+(char)byteArray[16]+(char)byteArray[17]+(char)byteArray[18]).toCharArray());
                        ldr.setEntryMap("4500".toCharArray());
                        if (byteArray[8] == (byte)' ' || byteArray[8] == (byte)'a') // if its ' ' or 'a'
                        {
                            ldr.setCharCodingScheme((char)byteArray[10]);
                        }
                        if (byteArray[10] == (byte)' ' || byteArray[10] == (byte)'a') // if its ' ' or 'a'
                        {
                            ldr.setCharCodingScheme((char)byteArray[10]);
                        }
                    }
                    else
                    {
                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.FATAL,
                                       "error parsing leader with data: " + new String(byteArray));
                        throw new MarcException("error parsing leader with data: "
                                + new String(byteArray), e);
                    }
                }
            }
            else
            {
                throw new MarcException("error parsing leader with data: "
                        + new String(byteArray), e);
            }
        }
        char tmp[] = ldr.getEntryMap();
        if (permissive && !(""+ tmp[0]+tmp[1]+tmp[2]+tmp[3]).equals("4500"))
        {
            if (tmp[0] >= '0' && tmp[0] <= '9' &&
                    tmp[1] >= '0' && tmp[1] <= '9' &&
                    tmp[2] >= '0' && tmp[2] <= '9' &&
                    tmp[3] >= '0' && tmp[3] <= '9')
            {
                errors.addError("unknown", "n/a", "n/a", ErrorHandler.ERROR_TYPO,
                            "Unusual character found at end of leader [ "+tmp[0]+tmp[1]+tmp[2]+tmp[3]+" ]");
            }
            else
            {
                errors.addError("unknown", "n/a", "n/a", ErrorHandler.ERROR_TYPO,
                                "Erroneous character found at end of leader [ "+tmp[0]+tmp[1]+tmp[2]+tmp[3]+" ]; changing them to the standard \"4500\"");
                ldr.setEntryMap("4500".toCharArray());
            }
        }

        // if MARC 21 then check encoding
        switch (ldr.getCharCodingScheme()) {
        case 'a':
            encoding = "UTF8";
            break;
        case ' ':
            if (convertToUTF8)
                encoding = defaultEncoding;
            else
                encoding = "ISO8859_1";
            break;
        default:
            if (convertToUTF8)
                if (permissive)
                {
                    errors.addError("unknown", "n/a", "n/a", ErrorHandler.MINOR_ERROR,
                                    "Record character encoding should be 'a' or ' ' in this record it is '"+ldr.getCharCodingScheme()+"'. Attempting to guess the correct encoding.");
                    encoding = "BESTGUESS";
                }
                else
                    encoding = defaultEncoding;
            else
                encoding = "ISO8859_1";
            break;

        }
        String utfCheck;
        if (encoding.equalsIgnoreCase("BESTGUESS"))
        {
            try
            {
                String marc8EscSeqCheck = new String(recordBuf, "ISO-8859-1");
                //  If record has MARC8 character set selection strings, it must be MARC8 encoded
                if (marc8EscSeqCheck.split("\\e[-(,)$bsp]", 2).length > 1)
                {
                    encoding = "MARC8";
                }
                else
                {
                    boolean hasHighBitChars = false;
                    for (int i = 0; i < recordBuf.length; i++)
                    {
                        if (recordBuf[i] < 0) // the high bit is set
                        {
                            hasHighBitChars = true;
                            break;
                        }
                    }
                    if (!hasHighBitChars)
                    {
                        encoding = "ISO8859_1"//  You can choose any encoding you want here, the results will be the same.
                    }
                    else
                    {
                        utfCheck = new String(recordBuf, "UTF-8");
                        byte byteCheck[] = utfCheck.getBytes("UTF-8");
                        encoding = "UTF8"
                        if (recordBuf.length == byteCheck.length)
                        {
                            for (int i = 0; i < recordBuf.length; i++)
                            {
                                if (byteCheck[i] != recordBuf[i])
                                {
                                    encoding = "MARC8-Maybe";
                                    break;
                                }
                            }
                        }
                        else
                        {
                            encoding = "MARC8-Maybe";
                        }
                    }
                }
            }
            catch (UnsupportedEncodingException e)
            {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        else if (permissive && encoding.equals("UTF8"))
        {
            try
            {
                utfCheck = new String(recordBuf, "UTF-8");
                byte byteCheck[] = utfCheck.getBytes("UTF-8");
                if (recordBuf.length != byteCheck.length)
                {
                    boolean foundESC = false;
                    for (int i = 0; i < recordBuf.length; i++)
                    {
                        if (recordBuf[i] == 0x1B)
                        {
                            errors.addError("unknown", "n/a", "n/a", ErrorHandler.MINOR_ERROR,
                                            "Record claims to be UTF-8, but its not. Its probably MARC8.");
                            encoding = "MARC8-Maybe";
                            foundESC = true;
                            break;
                        }
                        if (byteCheck[i] != recordBuf[i])
                        {
                            encoding = "MARC8-Maybe";
                        }
                       
                    }
                    if (!foundESC)
                    {
                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.MINOR_ERROR,
                                "Record claims to be UTF-8, but its not. It may be MARC8, or maybe UNIMARC, or maybe raw ISO-8859-1 ");
                    }
                }
                if (utfCheck.contains("a$1!"))
                {
                    encoding = "MARC8-Broken";
                    errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR,
                                "Record claims to be UTF-8, but its not. It seems to be MARC8-encoded but with missing escape codes.");
                }
            }
            catch (UnsupportedEncodingException e)
            {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        else if (permissive && !encoding.equals("UTF8") && convertToUTF8)
        {
            try
            {
                utfCheck = new String(recordBuf, "UTF-8");
                byte byteCheck[] = utfCheck.getBytes("UTF-8");
                if (recordBuf.length == byteCheck.length)
                {
                  for (int i = 0; i < recordBuf.length; i++)
                  {
                      // need to check for byte < 0 to see if the high bit is set, because Java doesn't have unsigned types.
                      if (recordBuf[i] < 0x00 || byteCheck[i] != recordBuf[i])
                      {
                          errors.addError("unknown", "n/a", "n/a", ErrorHandler.MINOR_ERROR,
                                        "Record claims not to be UTF-8, but it seems to be.");
                            encoding = "UTF8-Maybe";
                            break;
                      }
                  }
                }
             }
            catch (UnsupportedEncodingException e)
            {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        record.setLeader(ldr);
       
        boolean discardOneAtStartOfDirectory = false;
        boolean discardOneSomewhereInDirectory = false;
       
        if ((directoryLength % 12) != 0)
        {
            if (permissive && directoryLength % 12 == 11 && recordBuf[1] != (byte)'0')
            {
                errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR,
                                "Directory length is not a multiple of 12 bytes long.  Prepending a zero and trying to continue.");
                byte oldBody[] = recordBuf;
                recordBuf = new byte[oldBody.length+1];
                System.arraycopy(oldBody, 0, recordBuf, 1, oldBody.length);
                recordBuf[0] = (byte)'0';
                directoryLength = directoryLength+1;
            }
            else
            {
                if (permissive && directoryLength % 12 == 1 && recordBuf[1] == (byte)'0' && recordBuf[2] == (byte)'0')
                {
                    discardOneAtStartOfDirectory = true;
                    errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR,
                                    "Directory length is not a multiple of 12 bytes long. Discarding byte from start of directory and trying to continue.");
                }
                else if (permissive && directoryLength % 12 == 1 && recordLength > 10000 && recordBuf[0] == (byte)'0' &&
                         recordBuf[1] == (byte)'0' && recordBuf[2] > (byte)'0' && recordBuf[2] <= (byte)'9')
                {
                    discardOneSomewhereInDirectory = true;
                    errors.addError("unknown", "n/a", "n/a", ErrorHandler.MAJOR_ERROR,
                                    "Directory length is not a multiple of 12 bytes long.  Will look for oversized field and try to work around it.");
                }               
                else
                {
                    if (errors != null)               
                    {   
                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.FATAL,
                                "Directory length is not a multiple of 12 bytes long. Unable to continue.");
                    }
                    throw new MarcException("Directory length is not a multiple of 12 bytes long. Unable to continue.");
                }
            }
        }
        DataInputStream inputrec = new DataInputStream(new ByteArrayInputStream(recordBuf));
        int size = directoryLength / 12;

        String[] tags = new String[size];
        int[] lengths = new int[size];

        byte[] tag = new byte[3];
        byte[] length = new byte[4];
        byte[] start = new byte[5];

        String tmpStr;
        try {
            if (discardOneAtStartOfDirectoryinputrec.read();
            int totalOffset = 0;
            for (int i = 0; i < size; i++)
            {
                inputrec.readFully(tag);               
                tmpStr = new String(tag);
                tags[i] = tmpStr;
   
                boolean proceedNormally = true;
                if (discardOneSomewhereInDirectory)
                {
                    byte lenCheck[] = new byte[10];
                    inputrec.mark(20);
                    inputrec.readFully(lenCheck);               
                    if (byteCompare(lenCheck, 4, 5, totalOffset)) // proceed normally
                    {
                        proceedNormally = true;
                    }
                    else if (byteCompare(lenCheck, 5, 5, totalOffset)) // field length is 5 bytes!  Bad Marc record, proceed normally
                    {
                        discardOneSomewhereInDirectory = false;
                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.FATAL,
                                        "Field is longer than 9999 bytes.  Writing this record out will result in a bad record.");
                        proceedNormally = false;
                    }
                    else
                    {
                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.FATAL,
                                        "Unable to reconcile problems in directory. Unable to continue.");                   
                        throw new MarcException("Directory length is not a multiple of 12 bytes long. Unable to continue.");
                    }
                    inputrec.reset();
                }
                if (proceedNormally)
                {
                    inputrec.readFully(length);
                    tmpStr = new String(length);
                    lengths[i] = Integer.parseInt(tmpStr);
   
                    inputrec.readFully(start);
                }
                else // length is 5 bytes long
                {
                    inputrec.readFully(start);
                    tmpStr = new String(start);
                    lengths[i] = Integer.parseInt(tmpStr);
   
                    inputrec.readFully(start);                   
                }
                totalOffset += lengths[i];
            }
           
            // If we still haven't found the extra byte, throw out the last byte and try to continue;
            if (discardOneSomewhereInDirectoryinputrec.read();
   
            if (inputrec.read() != Constants.FT)
            {
                errors.addError("unknown", "n/a", "n/a", ErrorHandler.FATAL,
                                "Expected field terminator at end of directory. Unable to continue.");
                throw new MarcException("expected field terminator at end of directory");
            }
           
            int numBadLengths = 0;
           
            int totalLength = 0;
            for (int i = 0; i < size; i++)
            {
                int fieldLength = getFieldLength(inputrec);
                if (fieldLength+1 != lengths[i] && permissive)
                {
                    if (numBadLengths < 3 && (totalLength + fieldLength < recordLength + 26))
                    {
                        inputrec.mark(9999);
                        byteArray = new byte[lengths[i]];
                        inputrec.readFully(byteArray);
                        inputrec.reset();
                        if (fieldLength+1 < lengths[i] && byteArray[lengths[i]-1] == Constants.FT)
                        {
                            errors.addError("unknown", "n/a", "n/a", ErrorHandler.MINOR_ERROR,
                                            "Field Terminator character found in the middle of a field.");
                        }
                        else
                        {
                            numBadLengths++;
                            lengths[i] = fieldLength+1;
                            errors.addError("unknown", "n/a", "n/a", ErrorHandler.MINOR_ERROR,
                                            "Field length found in record different from length stated in the directory.");
                            if (fieldLength+1 > 9999)
                            {
                                errors.addError("unknown", "n/a", "n/a", ErrorHandler.FATAL,
                                            "Field length is greater than 9999, record cannot be represented as a binary Marc record.");
                            }
                        }

                    }
                }
                totalLength += lengths[i];
                if (isControlField(tags[i]))
                {
                    byteArray = new byte[lengths[i] - 1];
                    inputrec.readFully(byteArray);
   
                    if (inputrec.read() != Constants.FT)
                    {
                        errors.addError("unknown", "n/a", "n/a", ErrorHandler.FATAL,
                                        "Expected field terminator at end of field. Unable to continue.");
                        throw new MarcException("expected field terminator at end of field");
                    }
   
                    ControlField field = factory.newControlField();
                    field.setTag(tags[i]);
                    field.setData(getDataAsString(byteArray));
                    record.addVariableField(field);
   
                }
                else
                {
                    byteArray = new byte[lengths[i]];
                    inputrec.readFully(byteArray);
                    try {
                        record.addVariableField(parseDataField(tags[i], byteArray));
                    } catch (IOException e) {
                        throw new MarcException(
                                "error parsing data field for tag: " + tags[i]
                                        + " with data: "
                                        + new String(byteArray), e);
                    }
                }
            }
           
            // We've determined that although the record says it is UTF-8, it is not.
            // Here we make an attempt to determine the actual encoding of the data in the record.
            if (permissive && conversionCheck1.length() > 1 &&
                    conversionCheck2.length() > 1 && conversionCheck3.length() > 1)
            {
                guessAndSelectCorrectNonUTF8Encoding();
            }
            if (inputrec.read() != Constants.RT)
            {
                errors.addError("unknown", "n/a", "n/a", ErrorHandler.FATAL,
                                "Expected record terminator at end of record. Unable to continue.");
                throw new MarcException("expected record terminator");
            }
        }
        catch (IOException e)
        {
            errors.addError("unknown", "n/a", "n/a", ErrorHandler.FATAL,
                            "Error reading from data file. Unable to continue.");
            throw new MarcException("an error occured reading input", e);           
        }
    }
View Full Code Here

        try {
            length = Integer.parseInt(new String(tmp));
        } catch (NumberFormatException e) {
            errors.addError(ErrorHandler.FATAL,
                            "Unable to parse record length, Unable to Continue");
            throw new MarcException("unable to parse record length", e);
        }
        return(length);
    }
View Full Code Here

                // All Marc21 records should have indicatorCount '2'
                errors.addError(ErrorHandler.ERROR_TYPO, "bogus indicator count - byte value =  " + Integer.toHexString(indicatorCount & 0xff));
                ldr.setIndicatorCount(2)
            }
            else {
                throw new MarcException("unable to parse indicator count", e);
            }
        }
        try {
            ldr.setSubfieldCodeLength(Integer.parseInt(String
                    .valueOf(subfieldCodeLength)));
        } catch (NumberFormatException e) {
            if (permissive) {
                // All Marc21 records should have subfieldCodeLength '2'
                errors.addError(ErrorHandler.ERROR_TYPO, "bogus subfield count - byte value =  " + Integer.toHexString(subfieldCodeLength & 0xff));
                ldr.setSubfieldCodeLength(2);
            }
            else {
                throw new MarcException("unable to parse subfield code length", e);
            }
        }
        try {
            ldr.setBaseAddressOfData(Integer.parseInt(new String(baseAddr)));
        } catch (NumberFormatException e) {
            throw new MarcException("unable to parse base address of data", e);
        }

    }
View Full Code Here

        {
            try {
                dataElement = new String(bytes, "UTF-8");
            }
            catch (UnsupportedEncodingException e) {
                throw new MarcException("unsupported encoding", e);
            }
        }
        else if (encoding.equals("UTF8-Maybe"))
        {
            try {
                dataElement = new String(bytes, "UTF-8");
            }
            catch (UnsupportedEncodingException e) {
                throw new MarcException("unsupported encoding", e);
            }
        }
        else if (encoding.equals("MARC-8") || encoding.equals("MARC8"))
        {
            dataElement = getMarc8Conversion(bytes);
        }
        else if (encoding.equalsIgnoreCase("Unimarc") || encoding.equals("IS05426"))
        {
            dataElement = getUnimarcConversion(bytes);
        }
        else if (encoding.equals("MARC8-Maybe"))
        {
            String dataElement1 = getMarc8Conversion(bytes);
            String dataElement2 = getUnimarcConversion(bytes);
            String dataElement3 = null;
            try
            {
                dataElement3 = new String(bytes, "ISO-8859-1");
            }
            catch (UnsupportedEncodingException e)
            {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            if (dataElement1.equals(dataElement2) && dataElement1.equals(dataElement3))
            {
                dataElement = dataElement1;
            }
            else
            {
                conversionCheck1 = conversionCheck1 + "|>" + Normalizer.compose(dataElement1, false);
                conversionCheck2 = conversionCheck2 + "|>" + dataElement2;
                conversionCheck3 = conversionCheck3 + "|>" + dataElement3;
                dataElement = dataElement1 + "%%@%%" + dataElement2 + "%%@%%" + dataElement3;               
            }           
        }
        else if (encoding.equals("MARC8-Broken"))
        {
            try
            {
                dataElement = new String(bytes, "ISO-8859-1");
            }
            catch (UnsupportedEncodingException e)
            {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            String newdataElement = dataElement.replaceAll("&lt;", "<");
            newdataElement = newdataElement.replaceAll("&gt;", ">");
            newdataElement = newdataElement.replaceAll("&amp;", "&");
            newdataElement = newdataElement.replaceAll("&apos;", "'");
            newdataElement = newdataElement.replaceAll("&quot;", "\"");
            if (!newdataElement.equals(dataElement))  
            {
                dataElement = newdataElement;
                errors.addError(ErrorHandler.ERROR_TYPO, "Subfield contains escaped html character entities, un-escaping them. ");
            }
            String rep1 = ""+(char)0x1b+"\\$1$1";
            String rep2 = ""+(char)0x1b+"\\(B";                   
            newdataElement = dataElement.replaceAll("\\$1(.)", rep1);
            newdataElement = newdataElement.replaceAll("\\(B", rep2);
            if (!newdataElement.equals(dataElement))  
            {
                dataElement = newdataElement;
                errors.addError(ErrorHandler.MAJOR_ERROR, "Subfield seems to be missing MARC8 escape sequences, trying to restore them.");
            }
            try
            {
                dataElement = getMarc8Conversion(dataElement.getBytes("ISO-8859-1"));
            }
            catch (UnsupportedEncodingException e)
            {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }

        }
        else if (encoding.equals("ISO-8859-1") || encoding.equals("ISO8859_1"))
        {
            try {
                dataElement = new String(bytes, "ISO-8859-1");
            }
            catch (UnsupportedEncodingException e) {
                throw new MarcException("unsupported encoding", e);
            }
        }
        else
        {
            throw new MarcException("Unknown or unsupported Marc character encoding:" + encoding);          
        }
        if (errors != null && dataElement.matches("[^&]*&[a-z]*;.*"))
        {
            String newdataElement = dataElement.replaceAll("&lt;", "<");
            newdataElement = newdataElement.replaceAll("&gt;", ">");
View Full Code Here

      charset = saxUms.getCharSets();
      combining = saxUms.getCombiningChars();

    } catch (Exception e) {
        throw new MarcException(e.getMessage(), e);
    }

  }
View Full Code Here

      charset = saxUms.getCharSets();
      combining = saxUms.getCombiningChars();

    } catch (Exception e) {
        throw new MarcException(e.getMessage(), e);
    }
  }
View Full Code Here

      charset = saxUms.getCharSets();
      combining = saxUms.getCombiningChars();

    } catch (Exception e) {
        throw new MarcException(e.getMessage(), e);
    }
  }
View Full Code Here

            if ((line = br.readLine()) != null)
                return true;
            else
                return false;
        } catch (IOException e) {
            throw new MarcException(e.getMessage(), e);
        }
    }
View Full Code Here

TOP

Related Classes of org.marc4j.MarcException

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.