A utility to convert MARC-8 data to non-precomposed UCS/Unicode.
The MARC-8 to Unicode mapping used is the version with the March 2005 revisions.
5859606162636465666768
int counter = 0; MarcStreamReader reader = new MarcStreamReader(input); ByteArrayOutputStream out = new ByteArrayOutputStream(); MarcStreamWriter writer = new MarcStreamWriter(out, "UTF8"); writer.setConverter(new AnselToUnicode()); while (reader.hasNext()) { Record record = reader.next(); writer.write(record); counter++; }
5758596061626364656667
10741075107610771078107910801081108210831084
static AnselToUnicode conv = null; private void cleanupBadFieldSeperators(byte[] field) { if (conv == null) conv = new AnselToUnicode(true); boolean hasEsc = false; boolean inMultiByte = false; boolean justCleaned = false; int mbOffset = 0;
14701471147214731474147514761477147814791480
static byte overbar[] = { (byte)(char)(0xaf) }; private String getMarc8Conversion(byte[] bytes) { String dataElement = null; if (converterAnsel == null) converterAnsel = new AnselToUnicode(errors); if (permissive && (byteArrayContains(bytes, badEsc) || byteArrayContains(bytes, overbar))) { String newDataElement = null; try {
5152535455565758596061
InputStream input = ReadMarcExample.class .getResourceAsStream("resources/summerland.mrc"); MarcReader reader = new MarcStreamReader(input); MarcXmlWriter writer = new MarcXmlWriter(result, stylesheet); writer.setConverter(new AnselToUnicode()); while (reader.hasNext()) { Record record = (Record) reader.next(); writer.write(record); } writer.close();
406407408409410411412413414415416
throw new MarcException("unsupported encoding", e); } } else if (encoding.equals("MARC-8") || encoding.equals("MARC8")) { if (converterAnsel == null) converterAnsel = new AnselToUnicode(); dataElement = converterAnsel.convert(bytes); } else if (encoding.equals("ISO-8859-1") || encoding.equals("ISO8859_1") || encoding.equals("ISO_8859_1")) { try {
202203204205206207208209210211212
writer.setIndent(true); if (convert != null) { CharConverter charconv = null; if (Constants.MARC_8_ENCODING.equals(convert)) charconv = new AnselToUnicode(); else if (Constants.ISO5426_ENCODING.equals(convert)) charconv = new Iso5426ToUnicode(); else if (Constants.ISO6937_ENCODING.equals(convert)) charconv = new Iso6937ToUnicode(); else {
4647484950515253545556
MarcReader reader = new MarcStreamReader(input); DOMResult result = new DOMResult(); MarcXmlWriter writer = new MarcXmlWriter(result); writer.setConverter(new AnselToUnicode()); while (reader.hasNext()) { Record record = (Record) reader.next(); writer.write(record); } writer.close();
4344454647484950515253
.getResourceAsStream("resources/summerland.mrc"); MarcReader reader = new MarcStreamReader(input); MarcWriter writer = new MarcXmlWriter(System.out, true); AnselToUnicode converter = new AnselToUnicode(); writer.setConverter(converter); while (reader.hasNext()) { Record record = reader.next(); writer.write(record);
.getResourceAsStream("resources/brkrtest.mrc"); MarcReader reader = new MarcStreamReader(input); MarcWriter writer = new MarcStreamWriter(System.out, "UTF8"); AnselToUnicode converter = new AnselToUnicode(); writer.setConverter(converter); while (reader.hasNext()) { Record record = reader.next(); writer.write(record);