/**
* Copyright (C) 2009 eXo Platform SAS.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
/*
* DO NOT EDIT THIS DOCUMENT MANUALLY !!!
* THIS FILE IS AUTOMATICALLY GENERATED BY THE TOOLS UNDER
* AutoDetect/tools/
*/
package org.exoplatform.test;
import org.exoplatform.component.test.AbstractGateInTest;
import org.exoplatform.services.chars.chardet.Detector;
import org.exoplatform.services.chars.chardet.ICharsetDetectionObserver;
import org.exoplatform.services.chars.chardet.PSMDetector;
import java.io.BufferedInputStream;
import java.net.URL;
/**
* Created by The eXo Platform SARL
* Author : Lai Van Khoi
* laivankhoi46pm1@yahoo.com
* Nov 27, 2006
*/
public class HtmlCharsetDetector extends AbstractGateInTest
{
public static boolean found = false;
public void testClass() throws Exception
{
String[] argv = new String[]{"http://www.laodong.com.vn/Home/khoilv/2006/9/4343.laodong", "6"};
//if(argv.length!=1 && argv.length!=2){
if (argv.length != 2)
{
System.out.println("Usage: HtmlCharacterDetector <url> [<languageHint>]");
System.out.println("");
System.out.println("Where <url> is http://...");
System.out.println("For optional <languageHint>. Use the following...");
System.out.println(" 1 => Japanese");
System.out.println(" 2 => Chinese");
System.out.println(" 3 => Simplified Chinese");
System.out.println(" 4 => Traditional Chinese");
System.out.println(" 5 => Korean");
System.out.println(" 6 => Don't know (default)");
return;
}
//Initialize the nsDetector();
int lang = (argv.length == 2) ? Integer.parseInt(argv[1]) : PSMDetector.ALL;
Detector det = new Detector(lang);
//Set an observer...
//The Notify() will be called when a matching charset is found.
det.init(new ICharsetDetectionObserver()
{
public void notify(String charset)
{
HtmlCharsetDetector.found = true;
System.out.println("CHARSET = " + charset);
}
});
URL url = new URL(argv[0]);
BufferedInputStream imp = new BufferedInputStream(url.openStream());
byte[] buf = new byte[1024];
int len;
boolean done = false;
boolean isAscii = true;
while ((len = imp.read(buf, 0, buf.length)) != -1)
{
//Check if the stream is only ascii.
if (isAscii)
isAscii = det.isAscii(buf, len);
//DoIt if non-ascii and not done yet.
if (!isAscii && !done)
done = det.doIt(buf, len, false);
}
det.dataEnd();
if (isAscii)
{
System.out.println("CHARSET = ARSII");
found = true;
}
if (!found)
{
String prob[] = det.getProbableCharsets();
for (int i = 0; i < prob.length; i++)
{
System.out.println("Probable Charset = " + prob[i]);
}
}
}
}