Package org.mmisw.orrclient.core.util.charset

Source Code of org.mmisw.orrclient.core.util.charset.ConverterTest

package org.mmisw.orrclient.core.util.charset;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.Collection;
import java.util.Iterator;
import java.util.SortedMap;

import org.apache.commons.io.IOUtils;
import org.mmisw.orrclient.core.util.Utf8Util;
import org.mmisw.orrclient.gwt.client.rpc.ReadFileResult;

/**
* Charset conversion of a file.
* <p>
* Note: this was testing code previously located in Utf8Util, but now kept here
* mainly for back-up purposes (it's not used anywhere in the ORR modules at this point).
* Code minimally maintained; could be improved.
*
* @author Carlos Rueda
*/
public class ConverterTest  {

 
  /**
   * Reads a file.
   * If the file can be read using charset UTF-8, no conversion is attempted,
   * but an error is signaled so the main program does not create the output file.
   * Otherwise, conversion to UTF-8 is attempted.
   *
   * @param file The file to read
   * @return the result of the operation.
   * @throws Exception
   */
  private static ReadFileResult readFileWithConversionToUtf8(File file) throws Exception {
    ReadFileResult result = new ReadFileResult();
   
    try {
      String str = IOUtils.toString(new FileInputStream(file), "UTF-8");
      result.setLogInfo("OK: file can be read as UTF-8 (no conversion necessary)");
      result.setError("setting error to avoid unnecesary conversion");
      result.setContents(str);
      return result;

    }
    catch(Throwable ex) {
      result.setLogInfo("OK: file cannot be read as UTF-8 directly.");
    }
   
    byte[] bytes = IOUtils.toByteArray(new FileInputStream(file));
   
    Collection<String> charsets = Utf8Util.isUtf8(bytes);
   
    if ( charsets == null ) {
      // charsets == null means the bytes are good UTF-8, so this
      // should NOT happen.
      result.addLogInfo("OK: already in UTF-8.");
      result.setError("setting error to avoid unnecesary conversion");
      result.setContents(new String(bytes, "UTF-8"));
      return result;
    }
   
    result.addLogInfo("Charset of the file may be one of: " +charsets+ "\n");
    result.addLogInfo("Attempting conversiones..\n");
   
    for ( String charsetName : charsets ) {
      try {
        String outputStr = _asString(bytes, charsetName);
        result.addLogInfo("Conversion from " +charsetName+ ": OK.\n");
        result.setContents(outputStr);
        return result;
      }
      catch(CharacterCodingException ex) {
        // continue with the other possible charsets...
      }
    }
   
    result.setError("None of the conversions from the possible detected charsets "
        + " was successful: " +charsets
    );
    result.setContents(null);
    return result;
  }
 

  /**
   * A test program to convert a file that may not pass the UTF-8 test into one that is UTF-8.
   * If the input file is already UTF-8, it does nothing.
   * @param args NOT used.
   * @throws Exception
   */
  public static void main(String[] args) throws Exception {
    if ( false ) availableCharsets();
   
    String filename;
    if ( args.length > 0 ) {
      filename = args[0];
      System.out.println("File: " +filename);
    }
    else {
      filename = "resource/utf8/theme-windows-1250.owl";
      System.out.println("No arg given. Using a hard-coded filename: " +filename);
    }

    File file = new File(filename);
   
    File outFile = new File(file.getParent(), "utf8-" +file.getName());
    String outFilename = outFile.getPath();
   
    ReadFileResult result = readFileWithConversionToUtf8(file);
    System.out.println("readFileWithConversionToUtf8:");
    String error = result.getError();
    if ( error != null ) {
      System.out.println("error: " +result.getError());
    }
    System.out.println("logInfo:\n\t" +result.getLogInfo().replaceAll("\n", "\n\t"));
   
    if ( error == null ) {
      _writeStringTo(result.getContents(), outFilename);
      System.out.println("Output written to " +outFilename)
     
    }
  }
 
 
  private static void _writeStringTo(String outputStr, String outFilename) throws IOException {
    File outputFile = new File(outFilename);
   
    OutputStreamWriter os = new OutputStreamWriter(new FileOutputStream(outputFile), "UTF-8");
    try {     
      IOUtils.write(outputStr, os);
    }
    finally {
      IOUtils.closeQuietly(os);
    }
  }

  private static String _asString(byte[] bytes,  String charsetName) throws CharacterCodingException, UnsupportedEncodingException {
//    http://www.exampledepot.com/egs/java.nio.charset/ConvertChar.html
   
    Charset charset = Charset.forName(charsetName);

    CharsetDecoder decoder = charset.newDecoder();
    decoder
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT)
    ;
   
    ByteBuffer bbuf = ByteBuffer.wrap(bytes);
    CharBuffer cbuf = decoder.decode(bbuf);
    String str = cbuf.toString();
   
    return str;
  }
 
 

  private static void availableCharsets() {
    System.out.println("availableCharsets");

    SortedMap<String, Charset> map = Charset.availableCharsets();
    Iterator<String> it = map.keySet().iterator();
    while (it.hasNext()) {
      String charsetName = (String)it.next();
      Charset charset = Charset.forName(charsetName);
      System.out.println(charset);
    }
  }

}
TOP

Related Classes of org.mmisw.orrclient.core.util.charset.ConverterTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.