Package it.unimi.dsi.mg4j.document

Examples of it.unimi.dsi.mg4j.document.HtmlDocumentFactory$HtmlDocument


    TRECDocumentCollection collection = new TRECDocumentCollection(
        new String[] { temp.toString() },
        CompositeDocumentFactory
            .getFactory(new DocumentFactory[] {
                new TRECHeaderDocumentFactory(),
                new HtmlDocumentFactory( new String[] { "encoding=ISO-8859-1" } ) } ),
        4, // Very small, to induce fragmentation
        false);

    try {
      DocumentIterator iter = collection.iterator();
View Full Code Here


    TRECDocumentCollection collection = new TRECDocumentCollection(
        new String[] { temp.toString(), tempAgain.toString() },
        CompositeDocumentFactory
            .getFactory(new DocumentFactory[] {
                new TRECHeaderDocumentFactory(),
                new HtmlDocumentFactory( new String[] { "encoding=ISO-8859-1" } ) } ),
        4, // Very small, to induce fragmentation
        false);

    DocumentIterator iter = collection.iterator();
    Document d = null;
View Full Code Here

  public void processForms(IInjectionModuleContext ctx, HttpUriRequest request, IHttpResponse response) {
    final IHTMLParseResult html = response.getParsedHTML();
    if(html == null)
      return;

    final HTMLDocument document = html.getDOMDocument();
    final HTMLCollection forms = document.getForms();

    for(int i = 0; i < forms.getLength(); i++) {
      Node n = forms.item(i);
      if(n instanceof Element) {
        processFormElement(ctx, request, (Element) n);
View Full Code Here

    this.htmlDocument = document;
  }
 
  @Override
  public void jsConstructor(Object ob) {
    final HTMLDocument d = (HTMLDocument) Context.jsToJava(ob, HTMLDocument.class);
    this.htmlDocument = d;
    this.document = d;
    setNode(d);
    setDocumentJS(this);
  }
View Full Code Here

    final IHTMLParseResult htmlResult = response.getParsedHTML();
    if(htmlResult == null) {
      return null;
    }
    final Context cx = Context.getCurrentContext();
    final HTMLDocument domDocument = htmlResult.getDOMDocument();
    final Scriptable scope = ScriptableObject.getTopLevelScope(this);
    final Object docOb = Context.javaToJS(domDocument, scope);
    final Object[] args = { docOb };
    return cx.newObject(scope, "HTMLDocument", args);
  }
View Full Code Here

TOP

Related Classes of it.unimi.dsi.mg4j.document.HtmlDocumentFactory$HtmlDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.