Package edu.stanford.nlp.pipeline

Examples of edu.stanford.nlp.pipeline.CoreNLPProtos$Document


    // Construct a raw surrogate pair character and confirm it outputs hex escaped, when UTF-8 too
    @Test
    public void test_RawSurrogatePairUTF8() throws JDOMException, IOException {
      SAXBuilder builder = new SAXBuilder();
      builder.setExpandEntities(true);
      Document doc = builder.build(new StringReader("<?xml version=\"1.0\"?><root>\uD800\uDC00</root>"));
      Format format = Format.getCompactFormat().setEncoding("UTF-8");
      XMLOutputter outputter = new XMLOutputter(format);
      ByteArrayOutputStream baos = new ByteArrayOutputStream();
      outputter.output(doc, baos);
      String xml = baos.toString();
View Full Code Here


    // Construct illegal XML and check if the parser notices
    @Test
    public void test_ErrorSurrogatePair() throws JDOMException, IOException {
      SAXBuilder builder = new SAXBuilder();
      builder.setExpandEntities(true);
      Document doc = builder.build(new StringReader("<?xml version=\"1.0\"?><root></root>"));
      try {
        doc.getRootElement().setText("\uD800\uDBFF");
        fail("Illegal surrogate pair should have thrown an exception");
      }
      catch (IllegalDataException e) {
        // do nothing
      } catch (Exception e) {
View Full Code Here

    // Manually construct illegal XML and make sure the outputter notices
    @Test
    public void test_ErrorSurrogatePairOutput() throws JDOMException, IOException {
      SAXBuilder builder = new SAXBuilder();
      builder.setExpandEntities(true);
      Document doc = builder.build(new StringReader("<?xml version=\"1.0\"?><root></root>"));
      Text t = new UncheckedJDOMFactory().text("\uD800\uDBFF");
      doc.getRootElement().setContent(t);
      Format format = Format.getCompactFormat().setEncoding("ISO-8859-1");
      XMLOutputter outputter = new XMLOutputter(format);
      ByteArrayOutputStream baos = new ByteArrayOutputStream();
      try {
        outputter.output(doc, baos);
View Full Code Here

    assertNotNull(out.toString());
  }
 
  @Test
  public void testCRNLEscaping() {
    Document doc = new Document();
    Element root = new Element("root");
    Element child1 = new Element("child1");
    Element child2 = new Element("child2");
    Text stuff = new Text("foo");
    root.addContent(child1);
    root.addContent(stuff);
    root.addContent(child2);
    doc.setRootElement(root);
    XMLOutputter xout = new XMLOutputter(Format.getPrettyFormat());
    String actual = xout.outputString(doc);
    String expect = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n"
        + "<root>\r\n"
        + "  <child1 />\r\n"
View Full Code Here

        outputString(ftfw,     root));
  }
 
  @Test
  public void testDocumentSimple() {
    Document content = new Document();
    assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
        outputString(fraw,     content));
    assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
        outputString(fcompact, content));
    assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
View Full Code Here

        outputString(ftfw,     content));
  }
 
  @Test
  public void testDocumentDocType() {
    Document content = new Document();
    content.setDocType(new DocType("root"));
    assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE root>\n",
        outputString(fraw,     content));
    assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE root>\n",
        outputString(fcompact, content));
    assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE root>\n",
View Full Code Here

        outputString(ftfw,     content));
  }
 
  @Test
  public void testDocumentComment() {
    Document content = new Document();
    content.addContent(new Comment("comment"));
    assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!--comment-->\n",
        outputString(fraw,     content));
    assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!--comment-->\n",
        outputString(fcompact, content));
    assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!--comment-->\n",
View Full Code Here

    checkOutput(new DocType("root", "publicID", "systemID"), dec, dec, dec, dec, dec);
  }

  @Test
  public void testOutputDocumentSimple() {
    Document doc = new Document();
    doc.addContent(new Element("root"));
    String xmldec = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
    String rtdec = "<root />";
    checkOutput(doc,
        xmldec + "\n" + rtdec + "\n",
        xmldec + "\n" + rtdec + "\n",
View Full Code Here

        xmldec + "\n" + rtdec + "\n");
  }

  @Test
  public void testOutputDocumentOmitEncoding() {
    Document doc = new Document();
    doc.addContent(new Element("root"));
    String xmldec = "<?xml version=\"1.0\"?>";
    FormatSetup setup = new FormatSetup() {
      @Override
      public void setup(Format fmt) {
        fmt.setOmitEncoding(true);
View Full Code Here

        xmldec + "\n" + rtdec + "\n");
  }

  @Test
  public void testOutputDocumentOmitDeclaration() {
    Document doc = new Document();
    doc.addContent(new Element("root"));
    FormatSetup setup = new FormatSetup() {
      @Override
      public void setup(Format fmt) {
        fmt.setOmitDeclaration(true);
      }
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.pipeline.CoreNLPProtos$Document

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.