Package com.google.caja.plugin.stages

Source Code of com.google.caja.plugin.stages.ResolveUriStage

// Copyright (C) 2009 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.caja.plugin.stages;

import com.google.caja.lang.html.HTML;
import com.google.caja.lang.html.HtmlSchema;
import com.google.caja.lexer.FilePosition;
import com.google.caja.lexer.escaping.UriUtil;
import com.google.caja.parser.html.AttribKey;
import com.google.caja.parser.html.Dom;
import com.google.caja.parser.html.ElKey;
import com.google.caja.parser.html.Nodes;
import com.google.caja.plugin.Job;
import com.google.caja.plugin.JobEnvelope;
import com.google.caja.plugin.Jobs;
import com.google.caja.plugin.PluginMessageType;
import com.google.caja.reporting.MessagePart;
import com.google.caja.reporting.MessageQueue;
import com.google.caja.util.ContentType;
import com.google.caja.util.Pipeline;
import com.google.caja.util.Strings;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.ListIterator;

import org.w3c.dom.Attr;
import org.w3c.dom.Element;
import org.w3c.dom.Node;

/**
* For each HTML job, tries to determine a base URI.  If it can, it will resolve
* well-formed relative URIs in attributes.
*
* @author mikesamuel@gmail.com
*/
public class ResolveUriStage implements Pipeline.Stage<Jobs> {
  private static final ElKey BASE = ElKey.forHtmlElement("base");
  private static final AttribKey BASE_HREF
      = AttribKey.forHtmlAttrib(BASE, "href");

  private final HtmlSchema schema;

  public ResolveUriStage(HtmlSchema schema) {
    this.schema = schema;
  }

  private static boolean isBaseUri(URI uri) {
    return uri != null && uri.isAbsolute() && !uri.isOpaque()
        // Don't interpret FilePosition.UNKNOWN as a base URI.
        && !Strings.eqIgnoreCase("unknown", uri.getScheme());
  }

  private URI baseUri(Node root, URI uri, FilePosition pos) {
    URI baseUri = baseUriForDoc(root);
    if (!isBaseUri(baseUri)) {
      baseUri = uri;
      if (!isBaseUri(baseUri)) {
        // TODO(mikesamuel): this is problematic for DOM nodes parsed without
        // proper debugging info.
        baseUri = pos.source().getUri();
        if (!isBaseUri(baseUri)) { return null; }
      }
    }
    return baseUri;
  }

  private URI baseUriForDoc(Node root) {
    if (root instanceof Element) {
      Element el = (Element) root;
      if (BASE.is((Element) root)) {
        return uriFromBaseElement(el);
      } else {
        for (Element base : Nodes.nodeListIterable(
            el.getElementsByTagNameNS(BASE.ns.uri, BASE.localName),
            Element.class)) {
          URI uri = uriFromBaseElement(base);
          if (uri != null) { return uri; }
        }
      }
      return null;
    } else {
      for (Node c : Nodes.childrenOf(root)) {
        URI uri = baseUriForDoc(c);
        if (uri != null) { return uri; }
      }
    }
    return null;
  }

  private URI uriFromBaseElement(Element base) {
    Attr a = base.getAttributeNodeNS(BASE_HREF.ns.uri, BASE_HREF.localName);
    if (a == null) { return null; }
    String value = a.getValue();
    try {
      URI uri = new URI(value);
      return isBaseUri(uri) ? uri : null;
    } catch (URISyntaxException ex) {
      return null;
    }
  }

  public boolean apply(Jobs jobs) {
    MessageQueue mq = jobs.getMessageQueue();
    for (ListIterator<JobEnvelope> it = jobs.getJobs().listIterator();
         it.hasNext();) {
      JobEnvelope env = it.next();
      if (env.fromCache) { continue; }
      Job job = env.job;
      if (job.getType() != ContentType.HTML) { continue; }
      Dom dom = (Dom) job.getRoot();
      Node node = dom.getValue();
      URI baseUri = baseUri(node, job.getBaseUri(), dom.getFilePosition());
      if (baseUri != null) {
        try {
          baseUri = URI.create(UriUtil.normalizeUri(baseUri.toString()));
        } catch (URISyntaxException ex) {
          mq.addMessage(
              PluginMessageType.MALFORMED_URL, dom.getFilePosition(),
              MessagePart.Factory.valueOf(baseUri.toString()));
          baseUri = null;
        }
      }
      if (baseUri != null) {
        resolveRelativeUrls(node, baseUri, mq);
        it.set(env.withJob(Job.domJob(dom, baseUri)));
      }
    }
    return true;
  }

  private void resolveRelativeUrls(Node n, URI base, MessageQueue mq) {
    if (n instanceof Element) {
      Element el = (Element) n;
      ElKey elKey = ElKey.forElement(el);
      for (Attr a : Nodes.attributesOf(el)) {
        AttribKey aKey = AttribKey.forAttribute(elKey, a);
        // If we ignored a relative base href, don't make it valid based on a
        // later one.
        if (BASE_HREF.equals(aKey)) { continue; }
        HTML.Attribute attrInfo = schema.lookupAttribute(aKey);
        if (attrInfo != null && attrInfo.getType() == HTML.Attribute.Type.URI) {
          String value = a.getValue();
          // Don't muck with inter-document references.
          if (value.startsWith("#")) { continue; }
          URI uri = null;
          try {
            uri = UriUtil.resolve(base, value);
          } catch (URISyntaxException ex) {
            mq.addMessage(
                PluginMessageType.MALFORMED_URL,
                Nodes.getFilePositionForValue(a),
                MessagePart.Factory.valueOf(value));
          }
          if (uri != null && uri.isAbsolute()) {
            FilePosition valuePos = Nodes.getFilePositionForValue(a);
            a.setValue(base.resolve(uri).toString());
            Nodes.setFilePositionForValue(a, valuePos);
          }
        }
      }
    }
    for (Node c = n.getFirstChild(); c != null; c = c.getNextSibling()) {
      resolveRelativeUrls(c, base, mq);
    }
  }
}
TOP

Related Classes of com.google.caja.plugin.stages.ResolveUriStage

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.