Package com.google.caja.plugin.templates

Source Code of com.google.caja.plugin.templates.Localizer

// Copyright (C) 2009 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.caja.plugin.templates;

import com.google.caja.SomethingWidgyHappenedError;
import com.google.caja.lexer.CharProducer;
import com.google.caja.lexer.FilePosition;
import com.google.caja.lexer.HtmlLexer;
import com.google.caja.lexer.HtmlTokenType;
import com.google.caja.lexer.ParseException;
import com.google.caja.lexer.Token;
import com.google.caja.parser.html.Namespaces;
import com.google.caja.parser.html.Nodes;
import com.google.caja.render.Concatenator;
import com.google.caja.reporting.MarkupRenderMode;
import com.google.caja.reporting.MessagePart;
import com.google.caja.reporting.MessageQueue;
import com.google.caja.reporting.RenderContext;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;
import org.w3c.dom.Node;

/**
* Replaces message strings in IHTML with ones from a message bundle.
*
* <p>
* This does not require that the IHTML document contain any templates, so can
* be used on a subset of IHTML that only includes {@code ihtml:message},
* {@code ihtml:ph} and {@code ihtml:eph} to do message message substitution
* that returns plain HTML.
*
* <p>
* This assumes that the input has been {@link IhtmlSanityChecker sanitized}.
*
* @author mikesamuel@gmail.com
*/
public class Localizer {
  private final MessageQueue mq;

  /**
   * @param mq receives errors and warnings about undefined messages; and
   *     missing, and extraneous placeholders.
  */
  public Localizer(MessageQueue mq) {
    this.mq = mq;
  }

  /**
   * Replace <code>ihtml:message</code> elements with the messages in cx.
   */
  public void localize(Element root, IhtmlL10NContext cx) {
    final Document doc = root.getOwnerDocument();
    for (Element message : snapshot(allMessages(root))) {
      String name = IHTML.getName(message).getValue();
      // presence of name verified by the IhtmlSanityCheck class
      final Map<String, Placeholder> placeholders
          = byName(extractPlaceholders(message));
      LocalizedHtml substitution = cx.getMessageByName(name);
      if (substitution == null) {
        notifyUntranslatedMessage(message, cx.getLocale());
        substitution = messageToLocalizedHtml(message);
      }
      DocumentFragment substitutedMessage;
      try {
        substitutedMessage = substitution.substitute(
            doc, new LocalizedHtml.PlaceholderHandler() {
              public Iterator<Token<HtmlTokenType>> substitutePlaceholder(
                  String placeholderName, FilePosition placeholderLoc) {
                Placeholder ph = placeholders.get(placeholderName);
                if (ph == null) {
                  notifyMissingPlaceholder(placeholderLoc);
                  return Collections.<Token<HtmlTokenType>>emptyList()
                      .iterator();
                }
                return Localizer.tokensFromNode(ph.start).iterator();
              }
            });
      } catch (ParseException ex) {
        notifyMalformedMessage(message, name);
        ex.toMessageQueue(mq);
        substitutedMessage = doc.createDocumentFragment();
      }
      for (Node child : Nodes.childrenOf(substitutedMessage)) {
        message.getParentNode().insertBefore(child, message);
      }
      message.getParentNode().removeChild(message);
    }
  }

  private List<Placeholder> extractPlaceholders(Element message) {
    List<Placeholder> placeholders = new ArrayList<Placeholder>();
    Element ph = null;
    for (Element el : IHTML.getPlaceholders(message)) {
      // Allow placeholders that are not siblings as per
      // <ihtml:ph name="startLink"/>
      // <a><ihtml:attribute><ihtml:dynamic expr="href"/></ihtml:attribute>
      // <ihtml:eph/>
      // Click
      // <ihtml:ph name="endLink"/>
      // </a>
      // <ihtml:eph/>
      if (ph != null) {
        Element eph = el;
        assert IHTML.isPh(ph)// Enforced by IhtmlSanityCheck
        assert IHTML.isEph(eph)// Enforced by IhtmlSanityCheck
        placeholders.add(new Placeholder(ph));
        ph = null;
      } else {
        ph = el;
      }
    }
    assert ph == null// Enforced by IhtmlSanityCheck
    return placeholders;
  }

  private Map<String, Placeholder> byName(List<Placeholder> phs) {
    Map<String, Placeholder> byName = new LinkedHashMap<String, Placeholder>();
    for (Placeholder ph : phs) {
      String name = IHTML.getName(ph.start).getValue();
      if (byName.containsKey(name)) {
        notifyDupePlaceholder(ph, byName.get(name));
      }
      byName.put(name, ph);
    }
    return byName;
  }

  private static final String XML_NS = Namespaces.XML_NAMESPACE_URI;
  public IhtmlL10NContext extractMessages(Element ihtmlRoot) {
    Locale locale = ihtmlRoot.hasAttributeNS(XML_NS, "lang")
        ? new Locale(ihtmlRoot.getAttributeNS(XML_NS, "lang").replace('-', '_'))
        // Choose a default that is independent of the default locale since we
        // typically run tests in the Turkish locale.
        : Locale.ENGLISH;
    Map<String, Element> messageEls = new HashMap<String, Element>();
    Map<String, LocalizedHtml> messages
        = new LinkedHashMap<String, LocalizedHtml>();
    for (Element message : allMessages(ihtmlRoot)) {
      LocalizedHtml extracted = messageToLocalizedHtml(message);
      if (extracted == null) { continue; }
      LocalizedHtml orig = messages.get(extracted.getName());
      if (orig != null) {
        if (!orig.getSerializedForm().equals(extracted.getSerializedForm())) {
          notifyDupeMessage(message, messageEls.get(extracted.getName()));
        }
      } else {
        messages.put(extracted.getName(), extracted);
        messageEls.put(extracted.getName(), message);
      }
    }
    return new IhtmlL10NContext(locale, messages);
  }

  private LocalizedHtml messageToLocalizedHtml(Element message) {
    // Clone the message.
    message = (Element) message.cloneNode(true);
    String name = IHTML.getName(message).getValue();
    StringBuilder filteredXhtml = new StringBuilder();
    if (message.getFirstChild() != null) {
      // Render an XHTML string containing the message content, with embedded
      // <ph> elements.
      StringBuilder xhtml = new StringBuilder();
      RenderContext rc = new RenderContext(new Concatenator(xhtml))
          .withMarkupRenderMode(MarkupRenderMode.XML);
      for (Node c : Nodes.childrenOf(message)) {
        Nodes.render(c, rc);
      }
      rc.getOut().noMoreTokens();
      HtmlLexer lexer = new HtmlLexer(
          CharProducer.Factory.fromString(
              xhtml.toString(),
              Nodes.getFilePositionFor(message.getFirstChild())));
      lexer.setTreatedAsXml(true);
      // 1 - saw <ihtml:ph
      // 2 - saw <ihtml:ph >
      // 3 - saw <ihtml:ph > </ihtml:ph
      // 4 - saw <ihtml:ph > </ihtml:ph > or <ihtml:ph />
      // 5 - saw <ihtml:eph
      // 6 - saw <ihtml:eph >
      // 7 - saw <ihtml:eph > </ihtml:eph
      int state = 0;
      // Filter out everything from the end of the <ihtml:ph> exclusive to the
      // end of the <ihtml:eph> inclusive.
      try {
        while (lexer.hasNext()) {
          Token<HtmlTokenType> tt = lexer.next();
          boolean emit = state < 4;
          switch (tt.type) {
            case TAGBEGIN:
              if (state == 0 && "<ihtml:ph".equals(tt.text)) {
                state = 1;
              } else if (state == 2 && "</ihtml:ph".equals(tt.text)) {
                state = 3;
              } else if (state == 4 && "<ihtml:eph".equals(tt.text)) {
                state = 5;
              } else if (state == 6 && "</ihtml:eph".equals(tt.text)) {
                state = 7;
              }
              break;
            case TAGEND:
              boolean selfclose = "/>".equals(tt.text);
              if (state == 1 && !selfclose) {
                state = 2;
              } else if (state == 1 && selfclose || state == 3) {
                state = 4;
              } else if (state == 5 && !selfclose) {
                state = 6;
              } else if (state == 5 && selfclose || state == 7) {
                state = 0;
              }
              if (emit && "/>".equals(tt.text)) { filteredXhtml.append(' '); }
              break;
            case ATTRNAME:
              if (emit) { filteredXhtml.append(' '); }
              break;
            case ATTRVALUE:
              if (emit) { filteredXhtml.append('='); }
              break;
            default: break;
          }
          if (emit) { filteredXhtml.append(tt.text); }
        }
      } catch (ParseException ex) {
        throw new SomethingWidgyHappenedError(
            "IOException reading from String", ex);
      }
    }

    return new LocalizedHtml(name, filteredXhtml.toString());
  }

  private static class Placeholder {
    final Element start;
    Placeholder(Element ph) {
      this.start = ph;
    }
  }

  private static Iterable<Element> allMessages(Element root) {
    return Nodes.nodeListIterable(
        root.getElementsByTagNameNS(IHTML.NAMESPACE, "message"), Element.class);
  }

  private static <T> List<T> snapshot(Iterable<T> it) {
    List<T> snapshot = new ArrayList<T>();
    for (T el : it) {
      snapshot.add(el);
    }
    return snapshot;
  }

  private void notifyUntranslatedMessage(Element message, Locale locale) {
    mq.addMessage(
        IhtmlMessageType.UNTRANSLATED_MESSAGE,
        Nodes.getFilePositionFor(message),
        MessagePart.Factory.valueOf(IHTML.getName(message).getValue()),
        MessagePart.Factory.valueOf(locale.toString()));
  }

  private void notifyMissingPlaceholder(FilePosition phLoc) {
    mq.addMessage(IhtmlMessageType.MISSING_PLACEHOLDER, phLoc);
  }

  private void notifyMalformedMessage(Element message, String name) {
    mq.addMessage(
        IhtmlMessageType.MALFORMED_MESSAGE,
        Nodes.getFilePositionFor(message), MessagePart.Factory.valueOf(name));
  }

  private void notifyDupeMessage(Element a, Element b) {
    mq.addMessage(
        IhtmlMessageType.DUPLICATE_MESSAGE,
        Nodes.getFilePositionFor(a),
        MessagePart.Factory.valueOf(IHTML.getName(a).getValue()),
        Nodes.getFilePositionFor(b));
  }

  private void notifyDupePlaceholder(Placeholder ph, Placeholder orig) {
    mq.addMessage(
        IhtmlMessageType.DUPLICATE_PLACEHOLDER,
        Nodes.getFilePositionFor(ph.start),
        MessagePart.Factory.valueOf(IHTML.getName(ph.start).getValue()),
        Nodes.getFilePositionFor(orig.start));
  }

  public static Iterable<Token<HtmlTokenType>> tokensFromNode(Node n) {
    List<Token<HtmlTokenType>> toks = new ArrayList<Token<HtmlTokenType>>();

    // The end placeholder must be a descendant of a sibling of an ancestor of
    // n as in
    // (1) <a><ihtml:ph/></a><ihtml:eph/>
    //        ^^^^^^^^^^^    ^^^^^^^^^^^^
    //     This is n.     Stop here.
    // where the DOM structure looks like
    // DocumentFragment
    //   Element : a
    //     Element : ihtml:ph   <-- n
    //   Element : ihtml:eph
    // and in
    // (2) <ihtml:ph/><a><ihtml:eph/>
    //     ^^^^^^^^^^    ^^^^^^^^^^^^
    //     This is n.     Stop here.
    // where the DOM structure looks like
    // DocumentFragment
    //   Element : ihtml:ph   <-- n
    //   Element : a
    //     Element : ihtml:eph
    //
    // In (1) the below finds no siblings of n, so it descends to the parent,
    // emitting the close element for the <a> element in the process.  Then
    // it reenters the loop and calls emitTokens with the next sibling of the
    // <a> element.  emitTokens recognizes that it was passed an <ihtml:eph>
    // element and returns false stopping the whole process.
    // In (2) next sibling is an <a> element, and so emitTokens writes out
    // the start tag, before recursing.  On the first recursion, it finds
    // the <ihtml:eph> and returns false.  That false bubbles all the way up
    // to this method which stops the whole process.
    Node ancestor = n;
    emit_loop:
    do {
      for (Node sib = ancestor; (sib = sib.getNextSibling()) != null ;) {
        if (!emitTokens(sib, toks)) { break emit_loop; }
      }
      ancestor = ancestor.getParentNode();
      if (ancestor == null) { break; }
      emitEndOf(ancestor, toks);
    } while (true);
    return toks;
  }

  private static boolean emitTokens(Node n, List<Token<HtmlTokenType>> out) {
    FilePosition pos = Nodes.getFilePositionFor(n);
    switch (n.getNodeType()) {
      case Node.TEXT_NODE:
      case Node.CDATA_SECTION_NODE:
        out.add(Token.instance(
            Nodes.encode(n.getNodeValue()), HtmlTokenType.TEXT, pos));
        break;
      case Node.ELEMENT_NODE:
        if (IHTML.isEph(n)) { return false; }
        Element e = (Element) n;
        FilePosition spos = FilePosition.startOf(pos);
        out.add(Token.instance("<" + tagName(e), HtmlTokenType.TAGBEGIN, spos));
        FilePosition cpos = spos;
        for (Attr a : Nodes.attributesOf(e)) {
          emitTokens(a, out);
          cpos = Nodes.getFilePositionForValue(a);
        }
        cpos = FilePosition.endOf(cpos);
        if (n.getFirstChild() == null) {
          out.add(Token.instance("/>", HtmlTokenType.TAGEND, cpos));
        } else {
          out.add(Token.instance(">", HtmlTokenType.TAGEND, cpos));
          for (Node child : Nodes.childrenOf(n)) {
            if (!emitTokens(child, out)) { return false; }
          }
          emitEndOf(e, out);
        }
        break;
      case Node.ATTRIBUTE_NODE:
        Attr a = (Attr) n;
        out.add(Token.instance(a.getName(), HtmlTokenType.ATTRNAME, pos));
        out.add(Token.instance(
            '"' + Nodes.encode(a.getValue()) + '"', HtmlTokenType.ATTRVALUE,
            Nodes.getFilePositionForValue(a)));
        break;
      default: break;
    }
    return true;
  }

  private static void emitEndOf(Node n, List<Token<HtmlTokenType>> out) {
    if (n instanceof Element) {
      Element e = (Element) n;
      FilePosition epos = FilePosition.endOf(Nodes.getFilePositionFor(e));
      out.add(Token.instance(
          "</" + tagName(e), HtmlTokenType.TAGBEGIN, epos));
      out.add(Token.instance(">", HtmlTokenType.TAGEND, epos));
    }
  }

  private static String tagName(Element e) {
    Namespaces ns = Namespaces.COMMON.forUri(e.getNamespaceURI());
    String prefix = ns != null ? ns.prefix : "";
    String localName = e.getLocalName();
    return "".equals(prefix) ? localName : prefix + ":" + localName;
  }
}
TOP

Related Classes of com.google.caja.plugin.templates.Localizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.