Package org.exoplatform.services.chars.chardet

Examples of org.exoplatform.services.chars.chardet.Detector


      FileInputStream input = new FileInputStream(file);
      BufferedInputStream buffer = new BufferedInputStream(input);
      byte[] data = new byte[buffer.available()];
      int available = -1;

      Detector det = new Detector(PSMDetector.ALL);

      //Set an observer...
      //The Notify() will be called when a matching charset is found.
      det.init(new ICharsetDetectionObserver()
      {
         public void notify(String charset)
         {
            System.out.println("CHARSET === " + charset);
         }
      });

      boolean done = false;
      boolean isAscii = true;

      while ((available = buffer.read(data)) > -1)
      {
         //Khoilv'code.
         //System.out.print(data);
         if (isAscii)
            isAscii = det.isAscii(data, available);

         //DoIt if non-ascii and not done yet.
         if (!isAscii && !done)
            done = det.doIt(data, available, false);
      }

      det.dataEnd();

      if (isAscii)
      {
         System.out.println("CHARSET = ASCII");
         found = true;
      }

      if (!found)
      {
         String prob[] = det.getProbableCharsets();
         for (int i = 0; i < prob.length; i++)
         {
            System.out.println("Probable Charset = " + prob[i]);
         }
      }
View Full Code Here


         return;
      }

      //Initialize the nsDetector();
      int lang = (argv.length == 2) ? Integer.parseInt(argv[1]) : PSMDetector.ALL;
      Detector det = new Detector(lang);

      //Set an observer...
      //The Notify() will be called when a matching charset is found.

      det.init(new ICharsetDetectionObserver()
      {
         public void notify(String charset)
         {
            HtmlCharsetDetector.found = true;
            System.out.println("CHARSET = " + charset);
         }
      });

      URL url = new URL(argv[0]);
      BufferedInputStream imp = new BufferedInputStream(url.openStream());

      byte[] buf = new byte[1024];
      int len;
      boolean done = false;
      boolean isAscii = true;

      while ((len = imp.read(buf, 0, buf.length)) != -1)
      {
         //Check if the stream is only ascii.
         if (isAscii)
            isAscii = det.isAscii(buf, len);

         //DoIt if non-ascii and not done yet.
         if (!isAscii && !done)
            done = det.doIt(buf, len, false);
      }
      det.dataEnd();

      if (isAscii)
      {
         System.out.println("CHARSET = ARSII");
         found = true;
      }

      if (!found)
      {
         String prob[] = det.getProbableCharsets();
         for (int i = 0; i < prob.length; i++)
         {
            System.out.println("Probable Charset = " + prob[i]);
         }
      }
View Full Code Here

      return createDocument(reader.load(file), charset);
   }

   public static String detect(byte[] buf)
   {
      Detector det = new Detector(PSMDetector.ALL);
      charset_ = null;
      det.init(new ICharsetDetectionObserver()
      {
         public void notify(String charset)
         {
            charset_ = charset;
         }
      });

      boolean isAscii = true;
      int len = buf.length;

      isAscii = det.isAscii(buf, len);
      if (!isAscii)
         det.doIt(buf, len, false);
      det.dataEnd();

      if (isAscii)
         charset_ = "ASCII";
      return charset_;
   }
View Full Code Here

TOP

Related Classes of org.exoplatform.services.chars.chardet.Detector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.