Examples of edu.ucla.sspace.text.StringDocument

edu.ucla.sspace.text.StringDocument
A {@code Document} implementation backed by a {@code String} whose contentsare used for the document text.

                // process it as a single document.  For consistency, strip off
                // the USENET threading formatting, e.g. >>>, from the front of
                // each line.
                while ((line = usenetReader.readLine()) != null) {
                    if (line.contains(END_OF_DOCUMENT)) 
                        return new StringDocument(cleanDoc(content.toString()));
                    else {
                        int lineStart = 0;
                        // Find the first non '>' or ' ' in the line to
                        // determine where the auto-threading formatting stops.
                        for (char c = line.charAt(lineStart);

View Full Code Here

                        context.append("|||| ");
                        tokens[i] = lemma;
                    }
                    context.append(tokens[i]).append(" ");
                }
                contexts.add(new StringDocument(context.toString()));
                context.setLength(0);
            }
        }

View Full Code Here

                inHead = false;
            else if (name.equals("lexelt"))
                inLexElement = false;
            else if (name.equals("context")) {
                inContext = false;
                contexts.add(new StringDocument(context.toString()));
                context.setLength(0);
            }
        }

View Full Code Here

                        // that substring.
                        if (endIndex > startIndex) {
                            String extractedContent = 
                                line.substring(startIndex, endIndex);
                            extractedContent = cleanDoc(extractedContent);
                            return new StringDocument(extractedContent);
                        }
                        // Otherwise create a new builder and everything
                        // appearing after the content tag.
                        else  {
                            content = new StringBuilder(line.substring(
                                        startIndex));
                            inContent = true;
                        }
                    } else if (line.contains("</content>")) {
                        inContent = false;
                        // If this is the end of the content, extract everything
                        // before it and return the total amount of text
                        // extracted.
                        int endIndex = line.lastIndexOf("<");
                        content.append(line.substring(0, endIndex));


                        return new StringDocument(cleanDoc(content.toString()));
                    } else if (line.contains("<updated>") && content != null) {
                        // When the line has an updated tag and content is not
                        // null, we need to extract the date time and prepend it
                        // to the content.
                        int startIndex = line.indexOf(">")+1;
                        int endIndex = line.lastIndexOf("<");
                        String date = line.substring(startIndex, endIndex);
                        long dateTime = date.equals("")
                            ? 0 :
                            Timestamp.valueOf(date).getTime();
                        String doc = String.format(
                                "%d %s", dateTime,
                                cleanDoc(content.toString()));
                        return new StringDocument(doc);
                    } else if (inContent && content != null) {
                        // If the content builder has been created, we know this
                        // line contains content.  Add it to the builder.
                        content.append(line);
                    }

View Full Code Here

                    addTextFromUtterance((Element) utterances.item(i),
                                         utteranceBuilder);
                    utteranceBuilder.append(". ");
                }
            }
            return new StringDocument(utteranceBuilder.toString());
        }

View Full Code Here


        /**
         * {@inheritDoc}
         */
        public synchronized Document next() {
            Document doc = new StringDocument(currentDoc);
            currentDoc = advance();
            return doc;
        }

View Full Code Here


        /**
         * {@inheritDoc}
         */
        public Document next() {
            Document doc = new StringDocument(next);
            next = advance();
            return doc;
        }

View Full Code Here


        /**
         * {@inheritDoc}
         */
        public Document next() {
            Document doc = new StringDocument(next);
            next = advance();
            return doc;
        }

View Full Code Here

                              Arrays.asList(expectedRelations)));
    }


    @Test public void testSingleExtraction() throws Exception {
        DependencyExtractor extractor = new CoNLLDependencyExtractor();
        Document doc = new StringDocument(toTabs(SINGLE_PARSE));
        DependencyTreeNode[] nodes = extractor.readNextTree(doc.reader());


        assertEquals(12, nodes.length);


        // Check the basics of the node.
        assertEquals("review", nodes[8].word());

View Full Code Here

                    Arrays.asList(expectedRelations)));
    }


    @Test public void testDoubleExtraction() throws Exception {
        DependencyExtractor extractor = new CoNLLDependencyExtractor();
        Document doc = new StringDocument("\n\n" +
                                          toTabs(SINGLE_PARSE) +
                                          "\n" +
                                          toTabs(SECOND_PARSE));
        BufferedReader reader = doc.reader();
        DependencyTreeNode[] relations = extractor.readNextTree(reader);
        assertTrue(relations != null);
        assertEquals(12, relations.length);


        testFirstRoot(relations, 2);

View Full Code Here

0 1

TOP

Related Classes of edu.ucla.sspace.text.StringDocument

edu.ucla.sspace.dependency.CoNLLDependencyExtractorTest

edu.ucla.sspace.dependency.WaCKyDependencyExtractorTest

edu.ucla.sspace.text.corpora.BloglinesCorpusReader$BloglinesIterator

edu.ucla.sspace.text.corpora.ChildesCorpusReader$ChildesFileIterator

edu.ucla.sspace.text.corpora.PukWacCorpusReader$UkWacIterator

edu.ucla.sspace.text.corpora.PukWacDependencyCorpusReader$UkWacIterator

edu.ucla.sspace.text.corpora.SemEvalCorpusReader$SemEvalHandler

edu.ucla.sspace.text.corpora.SemEvalLexSubReader$SemEvalHandler

edu.ucla.sspace.text.corpora.SenseEvalDependencyCorpusReader$SenseEvalIterator

edu.ucla.sspace.text.corpora.UsenetCorpusReader$UseNetIterator

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.