Package edu.harvard.wcfia.yoshikoder.document.tokenizer

Examples of edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenList


  // first pass to get vocab
  protected List<String> getVocab(List<YKDocument> docs) throws IOException, TokenizationException {
    Set<String> vocab = new HashSet<String>();
    TokenizationCache tcache = yoshikoder.getTokenizationCache();
    for (YKDocument doc : docs) {
      TokenList tl = tcache.getTokenList(doc);
      if (tl == null){
        tl = TokenizationService.getTokenizationService().tokenize(doc);
        tcache.putTokenList(doc, tl);
      }
      WordFrequencyMap map = new WordFrequencyMap(tl);
View Full Code Here


      writer.write("," + FileUtil.escapeForCsv(word));
    writer.write(",Total\n");
   
    TokenizationCache tcache = yoshikoder.getTokenizationCache();
    for (YKDocument doc : docs) {
      TokenList tl = tcache.getTokenList(doc);
      if (tl == null){
        tl = TokenizationService.getTokenizationService().tokenize(doc);
        tcache.putTokenList(doc, tl);
      }
      WordFrequencyMap map = new WordFrequencyMap(tl);
View Full Code Here

        cell.setCellValue("Total");
   
        int rowNumber = 1;
    TokenizationCache tcache = yoshikoder.getTokenizationCache();
    for (YKDocument doc : docs) {
      TokenList tl = tcache.getTokenList(doc);
      if (tl == null){
        tl = TokenizationService.getTokenizationService().tokenize(doc);
        tcache.putTokenList(doc, tl);
      }
      WordFrequencyMap map = new WordFrequencyMap(tl);
View Full Code Here

                    DocumentList dl = new DocumentListImpl();
                    dl.add(doc1);
                    dl.add(doc2);

                    TokenizationCache tcache = yoshikoder.getTokenizationCache();
                    TokenList tl1 = tcache.getTokenList(doc1);
                    TokenList tl2 = tcache.getTokenList(doc2);
                    if (tl1 == null){
                        tl1 = TokenizationService.getTokenizationService().tokenize(doc1);
                        tcache.putTokenList(doc1, tl1);
                    }
                    if (tl2 == null){
View Full Code Here

        if (doc == null) return;
       
        dworker = new DialogWorker(yoshikoder){
            protected void doWork() throws Exception {
                TokenizationCache tcache = yoshikoder.getTokenizationCache();
                TokenList tl = tcache.getTokenList(doc);
                if (tl == null){
                    tl = TokenizationService.getTokenizationService().tokenize(doc);
                    tcache.putTokenList(doc, tl);
                }
               
View Full Code Here

              sb.append(doclabDimmed);
          else
            sb.append(doclab);
          counter++;
         
          TokenList lhs = line.getLeftHandSide();
          sb.append("<td class='right'>");
          for (Iterator<Token> iterator = lhs.iterator(); iterator.hasNext();) {
            Token word = iterator.next();
            sb.append(escapeXML(word.getText()) + " ")
          }
          sb.append("</td><td><b>");
          sb.append(escapeXML(line.getTarget().getText()));
          TokenList rhs = line.getRightHandSide();
          sb.append("</b>");
          for (Iterator<Token> iterator = rhs.iterator(); iterator.hasNext();) {
            Token word = iterator.next();
            sb.append(" " + escapeXML(word.getText()))
          }
          sb.append("</td></tr>\n");
        }
View Full Code Here

          sb.append(toXML(doc));
          Concordance conc = map.get(doc);
          for (Iterator<ConcordanceLine> iter = conc.iterator(); iter.hasNext();) {
            ConcordanceLine line = iter.next();
            sb.append("<line>");
            TokenList lhs = line.getLeftHandSide();
            for (Iterator<Token> iterator = lhs.iterator(); iterator.hasNext();) {
              Token word = iterator.next();
              sb.append("<w txt=\"" + escapeXML(word.getText()) + "\"/>")
            }
            sb.append( "<w txt=\"" + escapeXML(line.getTarget().getText())
            "\" target=\"true\"/>");
            TokenList rhs = line.getRightHandSide();
            for (Iterator<Token> iterator = rhs.iterator(); iterator.hasNext();) {
              Token word = iterator.next();
              sb.append("<w txt=\"" + escapeXML(word.getText()) + "\"/>")
            }
            sb.append("</line>\n");
          }
View Full Code Here

        sb.append(" windowsize=\"" + conc.getWindowSize() + "\"")
        sb.append(" creationdate=\"" + (new java.util.Date()) + "\">\n");
        for (Iterator<ConcordanceLine> iter = conc.iterator(); iter.hasNext();) {
            ConcordanceLine line = iter.next();
            sb.append("    <line>");
            TokenList lhs = line.getLeftHandSide();
            for (Iterator<Token> iterator = lhs.iterator(); iterator.hasNext();) {
                Token word = iterator.next();
                sb.append("<w txt=\"" + escapeXML(word.getText()) + "\"/>")
            }
            sb.append( "<w txt=\"" + escapeXML(line.getTarget().getText())
            "\" target=\"true\"/>");
            TokenList rhs = line.getRightHandSide();
            for (Iterator<Token> iterator = rhs.iterator(); iterator.hasNext();) {
                Token word = iterator.next();
                sb.append("<w txt=\"" + escapeXML(word.getText()) + "\"/>")
            }
            sb.append("</line>\n");
        }
View Full Code Here

        }
    }

    public void endElement(String uri, String localName, String qName){                                                 
        if (qName.equals("line")) { //$NON-NLS-1$
            TokenList tl = new TokenListImpl();
            for (Iterator iter = lhs.iterator(); iter.hasNext();) {
                String lhs = (String) iter.next();
                Token t = new TokenImpl(lhs, 0, 0);
                tl.add(t);
            }
            TokenList rl = new TokenListImpl();
            for (Iterator iter = rhs.iterator(); iter.hasNext();) {
                String rhs = (String) iter.next();
                Token t = new TokenImpl(rhs, 0, 0);
                rl.add(t);
            }
            Token targ = new TokenImpl(target, 0, 0);
            ConcordanceLine line = new ConcordanceLineImpl(tl, targ, rl);
            clist.add(line);
        }
View Full Code Here

       
        TaskWorker tworker = new TaskWorker(yoshikoder){
            YKDictionaryReportDialog dia;
          protected void doWork() throws Exception {
                TokenizationCache tcache = yoshikoder.getTokenizationCache();
                TokenList tl = tcache.getTokenList(doc);
                if (tl == null){
                    tl = TokenizationService.getTokenizationService().tokenize(doc);
                    tcache.putTokenList(doc, tl);
                }
               
View Full Code Here

TOP

Related Classes of edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenList

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.