Package cn.bran.strings

Source Code of cn.bran.strings.Strings$DirectEncoder

package cn.bran.strings;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.util.ArrayList;
import java.util.Date;

import org.junit.Test;

public class Strings {
    private interface UTF8Encoder {
        /** Assumes destination is large enough to hold encoded source. */
        int encode(String source);
        byte[] encodeToArray(String source);

        UTF8Encoder newInstance();
    }

    private static final Charset UTF8 = Charset.forName("UTF-8");

    private static abstract class UTF8CharsetEncoder implements UTF8Encoder {
        private final byte[] destination;
        protected final ByteBuffer wrapper;
        protected final CharsetEncoder encoder;

        protected UTF8CharsetEncoder(byte[] destination) {
            this.destination = destination;
            wrapper = ByteBuffer.wrap(destination);
            //~ wrapper = ByteBuffer.allocateDirect(destination.length);
            encoder = UTF8.newEncoder();
        }

       
        public byte[] encodeToArray(String source) {
            int length = encode(source);
            byte[] out = new byte[length];
            System.arraycopy(destination, 0, out, 0, length);
            return out;
        }

    }

    private static final class DirectEncoder extends UTF8CharsetEncoder {
        public DirectEncoder(byte[] destination) { super(destination); }

        @Override
        public  int encode(String source) {
            wrapper.clear();
            encoder.reset();

            CharBuffer in = CharBuffer.wrap(source);
            CoderResult result = encoder.encode(in, wrapper, true);
            assert result == CoderResult.UNDERFLOW;
            result = encoder.flush(wrapper);
            assert result == CoderResult.UNDERFLOW;

            return wrapper.position();
        }

        public DirectEncoder newInstance() {
            return new DirectEncoder(wrapper.array());
        }
    }

    private static final class CharBufferCopyEncoder extends UTF8CharsetEncoder {
        private final CharBuffer tempWrapper = CharBuffer.allocate(1024);
        private final char[] tempChars = tempWrapper.array();

        public CharBufferCopyEncoder(byte[] destination) { super(destination); }

        @Override
        public int encode(String source) {
            wrapper.clear();
            encoder.reset();
           
            //~ final CharBuffer tempWrapper = CharBuffer.allocate(1024);
            //~ final char[] tempChars = tempWrapper.array();

            int readOffset = 0;
            boolean done = false;
            while (!done) {
                int readLength = source.length() - readOffset;
                if (readLength > tempChars.length) {
                    readLength = tempChars.length;
                }

                // Copy the chunk into our temporary buffer
                source.getChars(0, readLength, tempChars, 0);
                tempWrapper.clear();
                tempWrapper.limit(readLength);
                readOffset += readLength;

                done = readOffset == source.length();
                CoderResult result = encoder.encode(tempWrapper, wrapper, done);
                assert result == CoderResult.UNDERFLOW;
            }
            CoderResult result = encoder.flush(wrapper);
            assert result == CoderResult.UNDERFLOW;

            return wrapper.position();
        }

        public CharBufferCopyEncoder newInstance() {
            return new CharBufferCopyEncoder(wrapper.array());
        }
    }

    private static final class StringEncoder implements UTF8Encoder {
        private final byte[] destination;

        public StringEncoder(byte[] destination) {
            this.destination = destination;
        }

        public int encode(String source) {
            byte[] array = encodeToArray(source);
            System.arraycopy(array, 0, destination, 0, array.length);
            return array.length;
        }
       
        public byte[] encodeToArray(String source) {
            try {
                return source.getBytes("UTF-8");
            } catch (UnsupportedEncodingException e) {
                throw new RuntimeException(e);
            }
        }

        public StringEncoder newInstance() {
            return new StringEncoder(destination);
        }
    }

    private static void error() {
        System.err.println("(bytebuffer|string|chars) (once|reuse) (buffer|array) (input strings)");
        System.exit(1);
    }

    public static void main(String[] args) throws IOException {
        if (args.length != 4) {
            error();
            return;
        }

        byte[] destination = new byte[4096];

        UTF8Encoder encoder;
        if (args[0].equals("bytebuffer")) {
            encoder = new DirectEncoder(destination);
        } else if (args[0].equals("string")) {
            encoder = new StringEncoder(destination);
        } else if (args[0].equals("chars")) {
            encoder = new CharBufferCopyEncoder(destination);
        } else {
            error();
            return;
        }

        boolean reuseEncoder = true;
        if (args[1].equals("once")) {
            reuseEncoder = false;
        } else if (!args[1].equals("reuse")) {
            error();
            return;
        }

        boolean toBuffer = true;
        if (args[2].equals("array")) {
            toBuffer = false;
        } else if (!args[2].equals("buffer")) {
            error();
            return;
        }

        ArrayList<String> strings = new ArrayList<String>();
        BufferedReader reader =
                new BufferedReader(new InputStreamReader(new FileInputStream(args[3]), "UTF-8"));
        String line;
        while ((line = reader.readLine()) != null) {
            strings.add(line);
        }

        //~ final int ITERATIONS = 5000000;
        //~ final int ITERATIONS = 1000000;
        //~ final int ITERATIONS = 10000;
        final int ITERATIONS = 400;
        for (int j = 0; j < 10; ++j) {
            long start = System.nanoTime();
            for (int i = 0; i < ITERATIONS; ++i) {
                for (String value : strings) {
                    UTF8Encoder temp = encoder;
                    if (!reuseEncoder) {
                        temp = encoder.newInstance();
                    }

                    if (toBuffer) {
                        int bytes = temp.encode(value);
                        assert new String(destination, 0, bytes, "UTF-8").equals(value);
                    } else {
                        byte[] out = temp.encodeToArray(strings.get(i));
                        assert new String(out, "UTF-8").equals(value);
                    }
                }
            }
            long end = System.nanoTime();
            System.out.println(((double) end-start)/1000000000. + " seconds");
        }
    }
   
    /**
     * show printwriter is about 15% - 20% slower than stringbuilder
     */
    @Test public void strinBuilderVsPrintWriter() {
      ByteArrayOutputStream baos = new ByteArrayOutputStream(1000);
      PrintWriter spw = new PrintWriter(baos);
   
      StringBuilder sb = new StringBuilder(1000);
      long tsb = 0, tpw = 0;
      long t = 0;
     
      String s1 = "社光明的枯草满足在在晒太阳\n";
    // warm up
      spw.print(s1);
      spw.print(s1);
      spw.print(s1);
     
      sb.append(s1);
      sb.append(s1);
      sb.append(s1);
     
      for (int i =0; i < 10000; i++) {
//        baos.reset();
        t = System.nanoTime();
        spw.print(s1);
        tpw += System.nanoTime() - t;
//        sb.
        t = System.nanoTime();
        sb.append(s1);
        tsb += System.nanoTime() - t;
      }
     
      System.out.println("string builder: " + tsb);
      System.out.println("print writer: " + tpw);
     
    }
    @Test public void testArrays() {
      Object[] oa = new String[] {"1", "2"};
      assertEquals(String[].class, oa.getClass());
      assertTrue(oa instanceof Object[]);
      mymethod(oa);
      oa = new Date[] {new Date()};
      assertTrue(oa instanceof Object[]);
      int[] ia = new int[] {};
      // ia instanceof Object[] is a syntax error
//      memethod(ia);
    }
   
    private void mymethod(Object[] oa) {}
}
TOP

Related Classes of cn.bran.strings.Strings$DirectEncoder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.