Source Code of winterwell.markdown.pagemodel.MarkdownPage$Header

/**
 * Copyright winterwell Mathematics Ltd.
 * @author Daniel Winterstein
 * 11 Jan 2007
 */
package winterwell.markdown.pagemodel;


import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


import org.eclipse.jface.preference.IPreferenceStore;


import winterwell.markdown.Activator;
import winterwell.markdown.StringMethods;
import winterwell.markdown.preferences.MarkdownPreferencePage;
import winterwell.utils.FailureException;
import winterwell.utils.Process;
import winterwell.utils.StrUtils;
import winterwell.utils.Utils;
import winterwell.utils.io.FileUtils;


import com.petebevin.markdown.MarkdownProcessor;


/**
 * Understands Markdown syntax.
 * 
 * @author Daniel Winterstein
 */
public class MarkdownPage {


  /**
   * Strip leading and trailing #s and whitespace
   * 
   * @param line
   * @return cleaned up line
   */
  private String cleanHeader(String line) {
    for (int j = 0; j < line.length(); j++) {
      char c = line.charAt(j);
      if (c != '#' && !Character.isWhitespace(c)) {
        line = line.substring(j);
        break;
      }
    }
    for (int j = line.length() - 1; j > 0; j--) {
      char c = line.charAt(j);
      if (c != '#' && !Character.isWhitespace(c)) {
        line = line.substring(0, j + 1);
        break;
      }
    }
    return line;
  }


  /**
   * Represents information about a section header. E.g. ## Misc Warblings
   * 
   * @author daniel
   */
  public class Header {
    /**
     * 1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc.
     */
    final int level;
    /**
     * The text of the Header
     */
    final String heading;
    /**
     * Sub-sections, if any
     */
    final List<Header> subHeaders = new ArrayList<Header>();
    /**
     * The line on which this header occurs.
     */
    final int lineNumber;


    public int getLineNumber() {
      return lineNumber;
    }


    /**
     * 
     * @return the next section (at this depth if possible), null if none
     */
    public Header getNext() {
      if (parent == null) {
        int ti = level1Headers.indexOf(this);
        if (ti == -1 || ti == level1Headers.size() - 1)
          return null;
        return level1Headers.get(ti + 1);
      }
      int i = parent.subHeaders.indexOf(this);
      assert i != -1 : this;
      if (i == parent.subHeaders.size() - 1)
        return parent.getNext();
      return parent.subHeaders.get(i + 1);
    }
    /**
     * 
     * @return the next section (at this depth if possible), null if none
     */
    public Header getPrevious() {
      if (parent == null) {
        int ti = level1Headers.indexOf(this);
        if (ti == -1 || ti == 0)
          return null;
        return level1Headers.get(ti - 1);
      }
      int i = parent.subHeaders.indexOf(this);
      assert i != -1 : this;
      if (i == 0)
        return parent.getPrevious();
      return parent.subHeaders.get(i - 1);
    }
    


    /**
     * The parent section. Can be null.
     */
    private Header parent;


    /**
     * Create a marker for a section Header
     * 
     * @param level
     *            1 = top-level (i.e. #), 2= 2nd-level (i.e. ##), etc.
     * @param lineNumber
     *            The line on which this header occurs
     * @param heading
     *            The text of the Header, trimmed of #s
     * @param currentHeader
     *            The previous Header. This is used to find the parent
     *            section if there is one. Can be null.
     */
    Header(int level, int lineNumber, String heading, Header currentHeader) {
      this.lineNumber = lineNumber;
      this.level = level;
      this.heading = cleanHeader(heading);
      // Heading Tree
      setParent(currentHeader);
    }


    private void setParent(Header currentHeader) {
      if (currentHeader == null) {
        parent = null;
        return;
      }
      if (currentHeader.level < level) {
        parent = currentHeader;
        parent.subHeaders.add(this);
        return;
      }
      setParent(currentHeader.parent);
    }


    public Header getParent() {
      return parent;
    }


    /**
     * Sub-sections. May be zero-length, never null.
     */
    public List<Header> getSubHeaders() {
      return subHeaders;
    }


    @Override
    public String toString() {
      return heading;
    }


    public int getLevel() {
      return level;
    }
  }


  /**
   * The raw text, broken up into individual lines.
   */
  private List<String> lines;


  /**
   * The raw text, broken up into individual lines.
   */
  public List<String> getText() {
    return Collections.unmodifiableList(lines);
  }


  public enum KLineType {
    NORMAL, H1, H2, H3, H4, H5, H6, BLANK,
    // TODO LIST, BLOCKQUOTE,
    /** A line marking Markdown info about the preceding line, e.g. ====== */
    MARKER,
    /** A line containing meta-data, e.g. title: My Page */
    META
  }


  /**
   * Information about each line.
   */
  private List<KLineType> lineTypes;
  private Map<Integer,Object> pageObjects = new HashMap<Integer, Object>();


  private boolean multiMarkdownSupport = true;
  // TODO meta-data, footnotes, tables, link & image attributes
  private static Pattern multiMarkdownTag = Pattern.compile("(.+):(.*)");
  private Map<String, String> multiMarkdownTags = new HashMap<String, String>();


  /**
   * The top-level headers. FIXME handle documents which have a 2nd level
   * header before any 1st level ones
   */
  private final List<Header> level1Headers = new ArrayList<Header>();


  /**
   * Create a page.
   * 
   * @param text
   */
  public MarkdownPage(String text) {
    setText(text);
  }


  /**
   * Reset the text for this page.
   * 
   * @param text
   */
  private void setText(String text) {
    // Get lines
    lines = StringMethods.splitLines(text);
    // Clean out old
    level1Headers.clear();
    lineTypes = new ArrayList<KLineType>(lines.size());
    pageObjects.clear();
    // Dummy level-1 header in case there are none    
    Header dummyTopHeader = new Header(1, 0, "", null);
    level1Headers.add(dummyTopHeader);
    Header currentHeader = dummyTopHeader;    
    // Identify line types    
    int lineNum = 0;
    // Multi-markdown header
    if (multiMarkdownSupport) {
      // The key is the text before the colon, and the data is the text
      // after the
      // colon. In the above example, notice that there are two lines of
      // information
      // for the Author key. If you end a line with “space-space-newline”,
      // the newline
      // will be included when converted to other formats.
      //
      // There must not be any whitespace above the metadata, and the
      // metadata block
      // ends with the first whitespace only line. The metadata is
      // stripped from the
      // document before it is passed on to the syntax parser.
      String data = "";
      String tag = "";
      for (; lineNum < lines.size(); lineNum++) {
        String line = lines.get(lineNum);
        if (Utils.isBlank(line)) {
          break;
        }
        Matcher m = multiMarkdownTag.matcher(line);
        if (!m.find()) {
          if (lineNum == 0)
            break;
          // Multi-line tag
          lineTypes.add(KLineType.META);
          data += StrUtils.LINEEND + line.trim();
          multiMarkdownTags.put(tag, data);
        } else {
          lineTypes.add(KLineType.META);
          tag = m.group(0);
          data = m.group(1).trim();
          if (m.group(1).endsWith(line))
            multiMarkdownTags.put(tag, data);
        }
      }
    }
    for (; lineNum < lines.size(); lineNum++) {
      String line = lines.get(lineNum);
      // Headings
      int h = numHash(line);
      String hLine = line;
      int hLineNum = lineNum;
      int underline = -1;
      if (lineNum != 0) {
        underline = just(line, '=') ? 1 : just(line, '-') ? 2 : -1;
      }
      if (underline != -1) {
        h = underline;
        hLineNum = lineNum - 1;
        hLine = lines.get(lineNum - 1);
        lineTypes.set(hLineNum, KLineType.values()[h]);
        lineTypes.add(KLineType.MARKER);
      }
      // Create a Header object
      if (h > 0) {
        if (underline == -1)
          lineTypes.add(KLineType.values()[h]);
        Header header = new Header(h, hLineNum, hLine, currentHeader);
        if (h == 1) {
          level1Headers.add(header);
        }
        pageObjects.put(hLineNum, header);
        currentHeader = header;
        continue;
      }
      // TODO List
      // TODO Block quote
      // Blank line
      if (Utils.isBlank(line)) {
        lineTypes.add(KLineType.BLANK);
        continue;
      }
      // Normal
      lineTypes.add(KLineType.NORMAL);
    } // end line-loop
    // Remove dummy header?
    if (dummyTopHeader.getSubHeaders().size() == 0) {
      level1Headers.remove(dummyTopHeader);
    }
  }


  /**
   * @param line
   * @param c
   * @return true if line is just cs (and whitespace at the start/end)
   */
  boolean just(String line, char c) {
    return line.matches("\\s*"+c+"+\\s*");
  }


  /**
   * @param line
   * @return The number of # symbols prepending the line.
   */
  private int numHash(String line) {
    for (int i = 0; i < line.length(); i++) {
      if (line.charAt(i) != '#')
        return i;
    }
    return line.length();
  }


  /**
   * 
   * @param parent
   *            Can be null for top-level
   * @return List of sub-headers. Never null. FIXME handle documents which
   *         have a 2nd level header before any 1st level ones
   */
  public List<Header> getHeadings(Header parent) {
    if (parent == null) {
      return Collections.unmodifiableList(level1Headers);
    }
    return Collections.unmodifiableList(parent.subHeaders);
  }


  // public WebPage getWebPage() {
  // WebPage page = new WebPage();
  // // Add the lines, one by one
  // boolean inParagraph = false;
  // for (int i=0; i<lines.size(); i++) {
  // String line = lines.get(i);
  // KLineType type = lineTypes.get(i);
  // switch(type) {
  // // Heading?
  // case H1: case H2: case H3:
  // case H4: case H5: case H6:
  // if (inParagraph) page.addText("</p>");
  // line = cleanHeader(line);
  // page.addText("<"+type+">"+line+"</"+type+">");
  // continue;
  // case MARKER: // Ignore
  // continue;
  // // TODO List?
  // // TODO Block quote?
  // }
  // // Paragraph end?
  // if (Utils.isBlank(line)) {
  // if (inParagraph) page.addText("</p>");
  // continue;
  // }
  // // Paragraph start?
  // if (!inParagraph) {
  // page.addText("<p>");
  // inParagraph = true;
  // }
  // // Plain text
  // page.addText(line);
  // }
  // return page;
  // }


  /**
   * Get the HTML for this page. Uses the MarkdownJ project.
   */
  public String html() {
    IPreferenceStore pStore = Activator.getDefault().getPreferenceStore();
    // Section numbers??
    boolean sectionNumbers = pStore
        .getBoolean(MarkdownPreferencePage.PREF_SECTION_NUMBERS);
    // Chop out multi-markdown header
    StringBuilder sb = new StringBuilder();
    assert lines.size() == lineTypes.size();
    for (int i = 0, n = lines.size(); i < n; i++) {
      KLineType type = lineTypes.get(i);
      if (type == KLineType.META)
        continue;
      String line = lines.get(i);
      if (sectionNumbers && isHeader(type) && line.contains("$section")) {
        // TODO Header section = headers.get(i);
        // String secNum = section.getSectionNumber();
        // line.replace("$section", secNum);
      }
      sb.append(line);
    }
    String text = sb.toString();
    // Use external converter?
    final String cmd = pStore
        .getString(MarkdownPreferencePage.PREF_MARKDOWN_COMMAND);
    if (Utils.isBlank(cmd)
        || (cmd.startsWith("(") && cmd.contains("MarkdownJ"))) {
      // Use MarkdownJ
      MarkdownProcessor markdown = new MarkdownProcessor();
      // MarkdownJ doesn't convert £s for some reason
      text = text.replace("£", "&pound;");
      String html = markdown.markdown(text);
      return html;
    }
    // Attempt to run external command
    try {
      final File md = File.createTempFile("tmp", ".md");
      FileUtils.write(md, text);
      Process process = new Process(cmd+" "+md.getAbsolutePath());
      process.run();
      int ok = process.waitFor(10000);
      if (ok != 0) throw new FailureException(cmd+" failed:\n"+process.getError());
      String html = process.getOutput();
      FileUtils.delete(md);
      return html;
    } catch (Exception e) {
      throw Utils.runtime(e);
    }
  }


  /**
   * @param type
   * @return
   */
  private boolean isHeader(KLineType type) {
    return type == KLineType.H1 || type == KLineType.H2
        || type == KLineType.H3 || type == KLineType.H4
        || type == KLineType.H5 || type == KLineType.H6;
  }


  /**
   * Return the raw text of this page.
   */
  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();
    for (String line : lines) {
      sb.append(line);
    }
    return sb.toString();
  }


  /**
   * Line type information for the raw text.
   * 
   * @return
   */
  public List<KLineType> getLineTypes() {
    return Collections.unmodifiableList(lineTypes);
  }


  /**
   * @param line
   * @return
   */
  public Object getPageObject(int line) {    
    return pageObjects.get(line);
  }


}
Source Code of winterwell.markdown.pagemodel.MarkdownPage$Header

Related Classes of winterwell.markdown.pagemodel.MarkdownPage$Header