Package org.apache.lenya.lucene.parser

Examples of org.apache.lenya.lucene.parser.HTMLParser


     * @throws Exception DOCUMENT ME!
     */
    public Document getDocument(File file, File htdocsDumpDir) throws Exception {
        Document document = super.getDocument(file, htdocsDumpDir);

        HTMLParser parser = HTMLParserFactory.newInstance(file);
        parser.parse(file);

        document.add(Field.Text("title", parser.getTitle()));
        document.add(Field.Text("keywords", parser.getKeywords()));
        document.add(Field.Text("contents", parser.getReader()));

        return document;
    }
View Full Code Here


     * @return DOCUMENT ME!
     *
     * @throws Exception DOCUMENT ME!
     */
    public static String getBodyText(File file) throws Exception {
        HTMLParser parser = HTMLParserFactory.newInstance(file);
        parser.parse(file);

        Reader reader = parser.getReader();
        Writer writer = new StringWriter();

        int c;

        while ((c = reader.read()) != -1)
View Full Code Here

     * @throws Exception DOCUMENT ME!
     */
    public Document getDocument(File file, File htdocsDumpDir) throws Exception {
        Document document = super.getDocument(file, htdocsDumpDir);

        HTMLParser parser = HTMLParserFactory.newInstance(file);
        parser.parse(file);

        document.add(Field.Text("title", parser.getTitle()));
        document.add(Field.Text("keywords", parser.getKeywords()));
        document.add(Field.Text("contents", parser.getReader()));

        return document;
    }
View Full Code Here

TOP

Related Classes of org.apache.lenya.lucene.parser.HTMLParser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.