Examples of nu.validator.htmlparser.io.Encoding

nu.validator.htmlparser.io.Encoding

        // revised -- 2008-03-17 hsivonen
        sourceType = INPUT_READER;
        characterEncoding = Encoding.toAsciiLowerCase(actualName);
        encoding = Encoding.toAsciiLowerCase(encoding);
        try {
            Encoding cs = Encoding.forName(encoding);
            String canonName = cs.getCanonName();
            if (requireAsciiSuperset) {
                if (!cs.isAsciiSuperset()) {
                    fatal("The encoding \u201C"
                            + actualName
                            + "\u201D is not an ASCII superset and, therefore, cannot be used in an internal encoding declaration.");
                }
            }
            if (!cs.isRegistered()) {
                if (encoding.startsWith("x-")) {
                    err("The encoding \u201C"
                            + actualName
                            + "\u201D is not an IANA-registered encoding. (Charmod C022)");                    
                } else {
                    err("The encoding \u201C"
                            + actualName
                            + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
                }
            } else if (!canonName.equals(encoding)) {
                err("The encoding \u201C"
                        + actualName
                        + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
                        + canonName + "\u201D. (Charmod C024)");
            }
            if (!("utf-8".equals(encoding) || "utf-16".equals(encoding)
                    || "utf-16be".equals(encoding)
                    || "utf-16le".equals(encoding)
                    || "iso-8859-1".equals(encoding) || "us-ascii".equals(encoding))) {
                handler.warn("XML processors are required to support the UTF-8 and UTF-16 character encodings. The encoding was \u201C"
                        + actualName
                        + "\u201D instead, which is an incompatibility risk.");
            }
            Encoding htmlActual = cs.getActualHtmlEncoding();
            if (htmlActual != null) {
                handler.warn("Documents encoded as \u201C"
                        + htmlActual.getCanonName()
                        + "\u201D are often mislabeled as \u201C"
                        + actualName
                        + "\u201D, which is the declared encoding of this document.");
            }
            CharsetDecoder decoder = cs.newDecoder();

View Full Code Here

        // revised -- 2008-03-17 hsivonen
        sourceType = INPUT_READER;
        characterEncoding = Encoding.toAsciiLowerCase(actualName);
        encoding = Encoding.toAsciiLowerCase(encoding);
        try {
            Encoding cs = Encoding.forName(encoding);
            String canonName = cs.getCanonName();
            if (requireAsciiSuperset) {
                if (!cs.isAsciiSuperset()) {
                    fatal("The encoding \u201C"
                            + actualName
                            + "\u201D is not an ASCII superset and, therefore, cannot be used in an internal encoding declaration.");
                }
            }
            if (!cs.isRegistered()) {
                if (encoding.startsWith("x-")) {
                    err("The encoding \u201C"
                            + actualName
                            + "\u201D is not an IANA-registered encoding. (Charmod C022)");                    
                } else {
                    err("The encoding \u201C"
                            + actualName
                            + "\u201D is not an IANA-registered encoding and did not use the \u201Cx-\u201D prefix. (Charmod C023)");
                }
            } else if (!canonName.equals(encoding)) {
                err("The encoding \u201C"
                        + actualName
                        + "\u201D is not the preferred name of the character encoding in use. The preferred name is \u201C"
                        + canonName + "\u201D. (Charmod C024)");
            }
            if (!("utf-8".equals(encoding) || "utf-16".equals(encoding)
                    || "utf-16be".equals(encoding)
                    || "utf-16le".equals(encoding)
                    || "iso-8859-1".equals(encoding) || "us-ascii".equals(encoding))) {
                handler.warn("XML processors are required to support the UTF-8 and UTF-16 character encodings. The encoding was \u201C"
                        + actualName
                        + "\u201D instead, which is an incompatibility risk.");
            }
            Encoding htmlActual = cs.getActualHtmlEncoding();
            if (htmlActual != null) {
                handler.warn("Documents encoded as \u201C"
                        + htmlActual.getCanonName()
                        + "\u201D are often mislabeled as \u201C"
                        + actualName
                        + "\u201D, which is the declared encoding of this document.");
            }
            CharsetDecoder decoder = cs.newDecoder();

View Full Code Here

        }
    }


    public void Notify(String charsetName) {
        try {
            Encoding enc = Encoding.forName(charsetName);
            Encoding actual = enc.getActualHtmlEncoding();
            if (actual != null) {
                enc = actual;
            }
            returnValue = enc;
        } catch (UnsupportedCharsetException e) {

View Full Code Here

    public Encoding sniff() throws IOException {
        try {
            CharsetDetector detector = new CharsetDetector();
            detector.setText(this);
            CharsetMatch match = detector.detect();
            Encoding enc = Encoding.forName(match.getName());
            Encoding actual = enc.getActualHtmlEncoding();
            if (actual != null) {
                enc = actual;
            }
            if (enc != Encoding.WINDOWS1252 && enc.isAsciiSuperset()) {
                return enc;

View Full Code Here

TOP

Related Classes of nu.validator.htmlparser.io.Encoding

nu.validator.gnu.xml.aelfred2.XmlParser

nu.validator.htmlparser.extra.ChardetSniffer

nu.validator.htmlparser.extra.IcuDetectorSniffer

java.io.InputStreamReader

java.nio.charset.UnsupportedCharsetException

java.nio.charset.CharsetDecoder

java.io.Reader

java.io.ByteArrayInputStream

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.