Package net.htmlparser.jericho

Examples of net.htmlparser.jericho.Source


            fileForParsing = new File(pathToFile);
            length = (int) fileForParsing.length();

            if (length != 0) {
                Source source;
                char[] cbuf = new char[length];
                InputStreamReader isr = new InputStreamReader(
                        new FileInputStream(fileForParsing), fileEnc);
                final int read = isr.read(cbuf);

                fileContent = new String(cbuf, 0, read);
                isr.close();

                source = new Source(fileContent);
                source.setLogger(null);
                fileContent = source.getTextExtractor().toString();

                pathToFile = null;
                source = null;
                isr = null;
                fileForParsing = null;
View Full Code Here



            if (MainFrame.downloadTomcatFlag.isSelected()) {

                Pattern pattern = Pattern.compile("^http://.*/tomcat/.*bin/apache-tomcat-[[0-9]+\\.]+zip");
                Source source = new Source(new URL("http://tomcat.apache.org/download-70.cgi"));
                source.setLogger(null);
                source.fullSequentialParse();
                List<Element> linkElements = source.getAllElements(HTMLElementName.A);

                for (Element linkElement : linkElements) {
                    String href = linkElement.getAttributeValue("href");
                    if (href != null) {
                        Matcher matcher = pattern.matcher(href);
View Full Code Here

            fileForParsing = new File(pathToFile);
            length = (int) fileForParsing.length();

            if (length != 0) {
                Source source;
                AddDataToIndex AddDataToIndex = new AddDataToIndex(null);
                char[] cbuf = new char[length];
                InputStreamReader isr = new InputStreamReader(
                        new FileInputStream(fileForParsing), fileEnc);
                final int read = isr.read(cbuf);

                fileContent = new String(cbuf, 0, read);
                isr.close();

                if (!fileEnc.equals("UTF-8")) {
                    fileContent = (new String(fileContent.getBytes("UTF-8"), "UTF-8"));
                }

                source = new Source(fileContent);
                source.setLogger(null);
                fileContent = source.getTextExtractor().toString();

                AddDataToIndex.doAddData(fileContent, pathToFile, fileName);

                pathToFile = null;
                AddDataToIndex = null;
View Full Code Here

      return false;
    }
   
    System.out.print("Convering HTML file ("+filename+" -> "+targetFilename+")... ");
   
    Source source = new Source(htmlText);
    OutputDocument outputDocument = new OutputDocument(source);
   
    int index = 1;
    HashMap<String, String> diagramList = new HashMap<String, String>();
    for(Element element : source.getAllElements("pre")) {
      StartTag tag = element.getStartTag();
      Attribute classAttr = tag.getAttributes().get("class");
      if(classAttr != null
          && classAttr.hasValue()
          && classAttr.getValue().equals(TAG_CLASS)) {
View Full Code Here

    private Source getHTMLSource(String htmlURL) throws IOException {
        if (htmlURL.startsWith(HTTP_STRING)
                || htmlURL.startsWith(HTTPS_STRING)
                || htmlURL.startsWith(FTP_STRING)) {
            try {
                return new Source(new URL(htmlURL));
            } catch (MalformedURLException ex) {
                logger.error("Invalid URL: " + htmlURL, ex);
            }
        } else {
            return new Source(reader = new FileReader(htmlURL));
        }
        return null;
    }
View Full Code Here

   *
   * @throws IOException
   */
  private static HashMap<String, KMLReferenceField> parseKmlReferenceForSuitableJavaDoc(final String kmlReferenceHtmlPage)
      throws MalformedURLException, IOException {
    final Source source = new Source(new URL(kmlReferenceHtmlPage));

    // look out for these tags
    final ArrayList<String> lookForTheseTags = new ArrayList<String>();
    lookForTheseTags.add("p");
    lookForTheseTags.add("pre");
    lookForTheseTags.add("ul");
    lookForTheseTags.add("dl");
    lookForTheseTags.add("ol");
    lookForTheseTags.add("h4");
    lookForTheseTags.add("table");

    // returns a list with all (html-) elements found in the kml-reference-guide
    final List<Element> allHtmlElements = source.getAllElements();
    final HashMap<String, KMLReferenceField> kmlElements = preParseElements(allHtmlElements);

    for (final Element htmlElement : allHtmlElements) {
      // each kml element has a h2-heading!
      if (htmlElement.getName().equals("h2")) {
View Full Code Here

            }
            result.setHtmlOutput(portletOutput);

            // what we need to do now is to do special processing for <script> tags, and on the client side we will
            // create them dynamically.
            Source source = new Source(portletOutput);
            source = new Source((new SourceFormatter(source)).toString());
            List<StartTag> scriptTags = source.getAllStartTags(HTMLElementName.SCRIPT);
            for (StartTag curScriptTag : scriptTags) {
                if ((curScriptTag.getAttributeValue("src") != null) &&
                        (!curScriptTag.getAttributeValue("src").equals(""))) {
                    result.getScriptsWithSrc().add(curScriptTag.getAttributeValue("src"));
                } else {
View Full Code Here

        }

        long timer = System.currentTimeMillis();
        boolean modified = false;

        Source src = new Source(content);
        OutputDocument out = new OutputDocument(src);
        for (String filteredTagName : filteredTags) {
            for (StartTag startTag : src.getAllStartTags(filteredTagName)) {
                if (startTag.getTagType() == StartTagType.NORMAL) {
                    Element element = startTag.getElement();
                    EndTag endTag = element.getEndTag();
                    if (removeContentBetweenTags && endTag != null) {
                        out.remove(element);
View Full Code Here

            // replace /live/ by /default/ in href and src attributes as it represents same image
            if(original.contains("/files/"+Constants.EDIT_WORKSPACE+"/")||amendment.contains("/files/"+Constants.EDIT_WORKSPACE+"/")) {
                original = original.replaceAll("/"+ Constants.LIVE_WORKSPACE+"/","/"+Constants.EDIT_WORKSPACE+"/");
                amendment = amendment.replaceAll("/"+ Constants.LIVE_WORKSPACE+"/","/"+Constants.EDIT_WORKSPACE+"/");
            }
            original = new SourceFormatter(new Source(original)).toString();
            amendment = new SourceFormatter(new Source(amendment)).toString();
            final ContentHandler postProcess = filter.xsl(result, "jahiahtmlheader.xsl");

            final Locale locale = Locale.ENGLISH;
            final String prefix = "diff";
View Full Code Here

    }

    protected String processCss(String previousOut, String serverUrl, HttpServletRequest request,
            HttpServletResponse response) {

        Source source = new Source(previousOut);
        OutputDocument document = new OutputDocument(source);
        StringBuilder sb = new StringBuilder();
        List<StartTag> linkStartTags = source.getAllStartTags(HTMLElementName.LINK);
        for (StartTag linkTag : linkStartTags) {
            Attributes attributes = linkTag.getAttributes();
            String rel = attributes.getValue("rel");
            if (rel == null || !"stylesheet".equalsIgnoreCase(rel)) {
                continue;
View Full Code Here

TOP

Related Classes of net.htmlparser.jericho.Source

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.