Package opennlp.tools.util

Examples of opennlp.tools.util.Span


    }

    @Test
    public void testSimpleSentenceMerge() throws ExecException {
        String sentence = JOHN_SENTENCE;
        List<Span> names = Arrays.asList(new Span(0, 10, "person"), new Span(
                19, 36, "organization"));
        String merged = merger.merge(sentence, names);
        assertEquals("<START:person> John Smith <END> works"
                + " at <START:organization> Smith Consulting <END> .", merged);

        names = Arrays.asList(new Span(0, 10), new Span(19, 36));
        merged = merger.merge(sentence, names);
        assertEquals("<START> John Smith <END> works"
                + " at <START> Smith Consulting <END> .", merged);
    }
View Full Code Here


        DataBag paragraphBag = (DataBag) t2;

        // convert the bag of links as absolute spans over the text
        List<Span> linkSpans = new ArrayList<Span>();
        for (Tuple l : links) {
            linkSpans.add(new Span((Integer) l.get(1), (Integer) l.get(2),
                    (String) l.get(0)));
        }
        Collections.sort(linkSpans);

        // iterate of the paragraph and extract sentence locations
        int order = 0;
        for (Tuple p : paragraphBag) {
            Integer beginParagraph = (Integer) p.get(1);
            Integer endParagraph = (Integer) p.get(2);
            Span[] spans = sentenceDetector.sentPosDetect(text.substring(
                    beginParagraph, endParagraph));
            for (Span sentenceRelative : spans) {
                // for each sentence found in that paragraph, compute the
                // absolute span of the text
                order++;
                Span absoluteSentence = new Span(beginParagraph
                        + sentenceRelative.getStart(), beginParagraph
                        + sentenceRelative.getEnd(), sentenceRelative.getType());

                String sentence = text.substring(absoluteSentence.getStart(),
                        absoluteSentence.getEnd());
                // replace some formatting white-spaces without changing the
                // number of chars not to break the annotations
                sentence = sentence.replaceAll("\n", " ");
                sentence = sentence.replaceAll("\t", " ");

                // for each link in that sentence, emit a tuple
                for (Span link : linkSpans) {
                    // TODO: optimize me by leveraging the link ordering
                    if (absoluteSentence.contains(link)) {
                        int begin = link.getStart()
                                - absoluteSentence.getStart();
                        int end = link.getEnd() - absoluteSentence.getStart();
                        output.add(tupleFactory.newTupleNoCopy(Arrays.asList(
                                order, sentence, link.getType(), begin, end)));
                    } else if (link.compareTo(absoluteSentence) > 1) {
                        break;
                    }
View Full Code Here

        Collections.sort(links);
        List<Span> tokens = Arrays.asList(tokenizer.tokenizePos(text));
        Iterator<Span> tokensIterator = tokens.iterator();
        Iterator<Span> linksIterator = links.iterator();

        Span nextToken = null;
        Span activeLink = null;
        Span nextLink = null;

        StringBuilder sb = new StringBuilder();
        while (linksIterator.hasNext()) {
            // peek at the next link
            nextLink = linksIterator.next();
            while (nextLink != null
                    && (nextToken != null || tokensIterator.hasNext())) {
                nextToken = nextToken == null ? tokensIterator.next()
                        : nextToken;
                if (nextLink.contains(nextToken)) {
                    activeLink = nextLink;
                    nextLink = null;
                    if (activeLink.getType() != null) {
                        sb.append(NameSampleDataStream.START_TAG_PREFIX);
                        sb.append(activeLink.getType());
View Full Code Here

  @Test
  public void testRetrievingContent() {
   
    String sentence = "A test";
   
    TokenSample sample = new TokenSample(sentence, new Span[]{new Span(0, 1),
        new Span(2, 6)});
  
    assertEquals("A test", sample.getText());
   
    assertEquals(new Span(0, 1), sample.getTokenSpans()[0]);
    assertEquals(new Span(2, 6), sample.getTokenSpans()[1]);
  }
View Full Code Here

   
    assertEquals("start (" + TokenSample.DEFAULT_SEPARATOR_CHARS + ") end" + TokenSample.DEFAULT_SEPARATOR_CHARS + ".", a.toString());
   
    assertEquals(5, a.getTokenSpans().length);
   
    assertEquals(new Span(0, 5), a.getTokenSpans()[0]);
    assertEquals(new Span(6, 7), a.getTokenSpans()[1]);
    assertEquals(new Span(7, 8), a.getTokenSpans()[2]);
    assertEquals(new Span(9, 12), a.getTokenSpans()[3]);
    assertEquals(new Span(12, 13), a.getTokenSpans()[4]);
  }
View Full Code Here

      }
      ns = ds.read();
    }

    assertEquals(expectedNames.length, names.size());
    assertEquals(new Span(6,8), spans.get(0));
    assertEquals(new Span(3,4), spans.get(1));
    assertEquals(new Span(1,3), spans.get(2));
    assertEquals(new Span(4,6), spans.get(3));
    assertEquals(new Span(1,2), spans.get(4));
    assertEquals(new Span(4,6), spans.get(5));
    assertEquals(new Span(2,3), spans.get(6));
    assertEquals(new Span(16,17), spans.get(7));
    assertEquals(new Span(0,2), spans.get(8));
    assertEquals(new Span(0,1), spans.get(9));
    assertEquals(new Span(3,5), spans.get(10));
    assertEquals(new Span(3,5), spans.get(11));
    assertEquals(new Span(10,12), spans.get(12));
    assertEquals(new Span(1,3), spans.get(13));
    assertEquals(new Span(6,8), spans.get(14));
    assertEquals(new Span(6,8), spans.get(15));
    assertEquals(new Span(8,10), spans.get(16));
    assertEquals(new Span(12,14), spans.get(17));
    assertEquals(new Span(1,3), spans.get(18));
    assertEquals(new Span(0,1), spans.get(19));
    assertEquals(new Span(2,4), spans.get(20));
    assertEquals(new Span(5,6), spans.get(21));
  }
View Full Code Here

    assertEquals(expectedPerson.length, names.get(person).size());
    assertEquals(expectedDate.length, names.get(date).size());
    assertEquals(expectedLocation.length, names.get(location).size());
    assertEquals(expectedOrganization.length, names.get(organization).size());
   
    assertEquals(new Span(5,7, person), spans.get(person).get(0));
    assertEquals(expectedPerson[0], names.get(person).get(0));
    assertEquals(new Span(10,11, person), spans.get(person).get(1));
    assertEquals(expectedPerson[1], names.get(person).get(1));
    assertEquals(new Span(29,30, person), spans.get(person).get(2));
    assertEquals(expectedPerson[2], names.get(person).get(2));
    assertEquals(new Span(23,27, person), spans.get(person).get(3));
    assertEquals(expectedPerson[3], names.get(person).get(3));
    assertEquals(new Span(1,2, person), spans.get(person).get(4));
    assertEquals(expectedPerson[4], names.get(person).get(4));
    assertEquals(new Span(8,9, person), spans.get(person).get(5));
    assertEquals(expectedPerson[5], names.get(person).get(5));
    assertEquals(new Span(0,2, person), spans.get(person).get(6));
    assertEquals(expectedPerson[6], names.get(person).get(6));
    assertEquals(new Span(25,26, person), spans.get(person).get(7));
    assertEquals(expectedPerson[7], names.get(person).get(7));
    assertEquals(new Span(1,2, person), spans.get(person).get(8));
    assertEquals(expectedPerson[8], names.get(person).get(8));
    assertEquals(new Span(6,7, person), spans.get(person).get(9));
    assertEquals(expectedPerson[9], names.get(person).get(9));
    assertEquals(new Span(14,15, person), spans.get(person).get(10));
    assertEquals(expectedPerson[10], names.get(person).get(10));
    assertEquals(new Span(0,2, person), spans.get(person).get(11));
    assertEquals(expectedPerson[11], names.get(person).get(11));
    assertEquals(new Span(12,13, person), spans.get(person).get(12));
    assertEquals(expectedPerson[12], names.get(person).get(12));
    assertEquals(new Span(12,13, person), spans.get(person).get(13));
    assertEquals(expectedPerson[13], names.get(person).get(13));

    assertEquals(new Span(7,8, date), spans.get(date).get(0));
    assertEquals(expectedDate[0], names.get(date).get(0));
    assertEquals(new Span(27,28, date), spans.get(date).get(1));
    assertEquals(expectedDate[1], names.get(date).get(1));
    assertEquals(new Span(15,16, date), spans.get(date).get(2));
    assertEquals(expectedDate[2], names.get(date).get(2));
   
    assertEquals(new Span(0, 4, location), spans.get(location).get(0));
    assertEquals(expectedLocation[0], names.get(location).get(0));
    assertEquals(new Span(10,12, location), spans.get(location).get(1));
    assertEquals(expectedLocation[1], names.get(location).get(1));
    assertEquals(new Span(28,30, location), spans.get(location).get(2));
    assertEquals(expectedLocation[2], names.get(location).get(2));
    assertEquals(new Span(3,4, location), spans.get(location).get(3));
    assertEquals(expectedLocation[3], names.get(location).get(3));
    assertEquals(new Span(5,7, location), spans.get(location).get(4));
    assertEquals(expectedLocation[4], names.get(location).get(4));
    assertEquals(new Span(16,18, location), spans.get(location).get(5));
    assertEquals(expectedLocation[5], names.get(location).get(5));
    assertEquals(new Span(1,3, location), spans.get(location).get(6));
    assertEquals(expectedLocation[6], names.get(location).get(6));
    assertEquals(new Span(5,9, location), spans.get(location).get(7));
    assertEquals(expectedLocation[7], names.get(location).get(7));
    assertEquals(new Span(0,2, location), spans.get(location).get(8));
    assertEquals(expectedLocation[8], names.get(location).get(8));
    assertEquals(new Span(4,6, location), spans.get(location).get(9));
    assertEquals(expectedLocation[9], names.get(location).get(9));
    assertEquals(new Span(10,11, location), spans.get(location).get(10));
    assertEquals(expectedLocation[10], names.get(location).get(10));
    assertEquals(new Span(6,8, location), spans.get(location).get(11));
    assertEquals(expectedLocation[11], names.get(location).get(11));
    assertEquals(new Span(4,6, location), spans.get(location).get(12));
    assertEquals(expectedLocation[12], names.get(location).get(12));
    assertEquals(new Span(10,11, location), spans.get(location).get(13));
    assertEquals(expectedLocation[13], names.get(location).get(13));
    assertEquals(new Span(12,13, location), spans.get(location).get(14));
    assertEquals(expectedLocation[14], names.get(location).get(14));
    assertEquals(new Span(5,9, location), spans.get(location).get(15));
    assertEquals(expectedLocation[15], names.get(location).get(15));
    assertEquals(new Span(11,12, location), spans.get(location).get(16));
    assertEquals(expectedLocation[16], names.get(location).get(16));
   
    assertEquals(new Span(7,15, organization), spans.get(organization).get(0));
    assertEquals(expectedOrganization[0], names.get(organization).get(0));
   
  }
View Full Code Here

    assertFalse(createPredSample().equals(createGoldSample()));
    assertFalse(createPredSample().equals(new Object()));
  }
 
  public static TokenSample createGoldSample() {
    return new TokenSample("A test.", new Span[] { new Span(0, 1),
        new Span(2, 6) });
  }
View Full Code Here

    assertEquals("Advanced", ns.getSentence()[1]);
    assertEquals("Integrated", ns.getSentence()[2]);
    assertEquals("Pest", ns.getSentence()[3]);
    assertEquals("Management", ns.getSentence()[4]);
    assertEquals("</li>", ns.getSentence()[5]);
    assertEquals(new Span(1, 5, organization), ns.getNames()[0]);
   
    // <li> <START:organization> Bay Cities Produce Co., Inc. <END> </li>
    ns = ds.read();
    assertEquals(7, ns.getSentence().length);
    assertEquals("<li>", ns.getSentence()[0]);
    assertEquals("Bay", ns.getSentence()[1]);
    assertEquals("Cities", ns.getSentence()[2]);
    assertEquals("Produce", ns.getSentence()[3]);
    assertEquals("Co.,", ns.getSentence()[4]);
    assertEquals("Inc.", ns.getSentence()[5]);
    assertEquals("</li>", ns.getSentence()[6]);
    assertEquals(new Span(1, 6, organization), ns.getNames()[0]);
   
    ns = ds.read();
    assertEquals(1, ns.getSentence().length);
    assertEquals("</ul>", ns.getSentence()[0]);
   
View Full Code Here

    return new TokenSample("A test.", new Span[] { new Span(0, 1),
        new Span(2, 6) });
  }

  public static TokenSample createPredSample() {
    return new TokenSample("A test.", new Span[] { new Span(0, 3),
        new Span(2, 6) });
  }
View Full Code Here

TOP

Related Classes of opennlp.tools.util.Span

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.