Examples of edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenList

Package edu.harvard.wcfia.yoshikoder.document.tokenizer

Examples of edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenList

edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenList

  // first pass to get vocab
  protected List<String> getVocab(List<YKDocument> docs) throws IOException, TokenizationException {
    Set<String> vocab = new HashSet<String>();
    TokenizationCache tcache = yoshikoder.getTokenizationCache();
    for (YKDocument doc : docs) {
      TokenList tl = tcache.getTokenList(doc);
      if (tl == null){
        tl = TokenizationService.getTokenizationService().tokenize(doc);
        tcache.putTokenList(doc, tl);
      }
      WordFrequencyMap map = new WordFrequencyMap(tl);

View Full Code Here

      writer.write("," + FileUtil.escapeForCsv(word));
    writer.write(",Total\n");
    
    TokenizationCache tcache = yoshikoder.getTokenizationCache();
    for (YKDocument doc : docs) {
      TokenList tl = tcache.getTokenList(doc);
      if (tl == null){
        tl = TokenizationService.getTokenizationService().tokenize(doc);
        tcache.putTokenList(doc, tl);
      }
      WordFrequencyMap map = new WordFrequencyMap(tl);

View Full Code Here

        cell.setCellValue("Total");
    
        int rowNumber = 1;
    TokenizationCache tcache = yoshikoder.getTokenizationCache();
    for (YKDocument doc : docs) {
      TokenList tl = tcache.getTokenList(doc);
      if (tl == null){
        tl = TokenizationService.getTokenizationService().tokenize(doc);
        tcache.putTokenList(doc, tl);
      }
      WordFrequencyMap map = new WordFrequencyMap(tl);

View Full Code Here

                    DocumentList dl = new DocumentListImpl();
                    dl.add(doc1);
                    dl.add(doc2);


                    TokenizationCache tcache = yoshikoder.getTokenizationCache();
                    TokenList tl1 = tcache.getTokenList(doc1);
                    TokenList tl2 = tcache.getTokenList(doc2);
                    if (tl1 == null){
                        tl1 = TokenizationService.getTokenizationService().tokenize(doc1);
                        tcache.putTokenList(doc1, tl1);
                    }
                    if (tl2 == null){

View Full Code Here

        if (doc == null) return;
        
        dworker = new DialogWorker(yoshikoder){
            protected void doWork() throws Exception {
                TokenizationCache tcache = yoshikoder.getTokenizationCache();
                TokenList tl = tcache.getTokenList(doc);
                if (tl == null){
                    tl = TokenizationService.getTokenizationService().tokenize(doc);
                    tcache.putTokenList(doc, tl);
                }

View Full Code Here

              sb.append(doclabDimmed);
          else
            sb.append(doclab);
          counter++;
          
          TokenList lhs = line.getLeftHandSide();
          sb.append("<td class='right'>");
          for (Iterator<Token> iterator = lhs.iterator(); iterator.hasNext();) {
            Token word = iterator.next();
            sb.append(escapeXML(word.getText()) + " ");  
          }
          sb.append("</td><td><b>");
          sb.append(escapeXML(line.getTarget().getText()));
          TokenList rhs = line.getRightHandSide();
          sb.append("</b>");
          for (Iterator<Token> iterator = rhs.iterator(); iterator.hasNext();) {
            Token word = iterator.next();
            sb.append(" " + escapeXML(word.getText()));  
          }
          sb.append("</td></tr>\n"); 
        }

View Full Code Here

          sb.append(toXML(doc));
          Concordance conc = map.get(doc);
          for (Iterator<ConcordanceLine> iter = conc.iterator(); iter.hasNext();) {
            ConcordanceLine line = iter.next();
            sb.append("<line>"); 
            TokenList lhs = line.getLeftHandSide();
            for (Iterator<Token> iterator = lhs.iterator(); iterator.hasNext();) {
              Token word = iterator.next();
              sb.append("<w txt=\"" + escapeXML(word.getText()) + "\"/>");  
            }
            sb.append( "<w txt=\"" + escapeXML(line.getTarget().getText()) +  
            "\" target=\"true\"/>"); 
            TokenList rhs = line.getRightHandSide();
            for (Iterator<Token> iterator = rhs.iterator(); iterator.hasNext();) {
              Token word = iterator.next();
              sb.append("<w txt=\"" + escapeXML(word.getText()) + "\"/>");  
            }
            sb.append("</line>\n"); 
          }

View Full Code Here

        sb.append(" windowsize=\"" + conc.getWindowSize() + "\"");  
        sb.append(" creationdate=\"" + (new java.util.Date()) + "\">\n"); 
        for (Iterator<ConcordanceLine> iter = conc.iterator(); iter.hasNext();) {
            ConcordanceLine line = iter.next();
            sb.append("    <line>"); 
            TokenList lhs = line.getLeftHandSide();
            for (Iterator<Token> iterator = lhs.iterator(); iterator.hasNext();) {
                Token word = iterator.next();
                sb.append("<w txt=\"" + escapeXML(word.getText()) + "\"/>");  
            }
            sb.append( "<w txt=\"" + escapeXML(line.getTarget().getText()) +  
            "\" target=\"true\"/>"); 
            TokenList rhs = line.getRightHandSide();
            for (Iterator<Token> iterator = rhs.iterator(); iterator.hasNext();) {
                Token word = iterator.next();
                sb.append("<w txt=\"" + escapeXML(word.getText()) + "\"/>");  
            }
            sb.append("</line>\n"); 
        }

View Full Code Here

        }
    }


    public void endElement(String uri, String localName, String qName){                                                  
        if (qName.equals("line")) { //$NON-NLS-1$
            TokenList tl = new TokenListImpl();
            for (Iterator iter = lhs.iterator(); iter.hasNext();) {
                String lhs = (String) iter.next();
                Token t = new TokenImpl(lhs, 0, 0);
                tl.add(t);
            }
            TokenList rl = new TokenListImpl();
            for (Iterator iter = rhs.iterator(); iter.hasNext();) {
                String rhs = (String) iter.next();
                Token t = new TokenImpl(rhs, 0, 0);
                rl.add(t);
            }
            Token targ = new TokenImpl(target, 0, 0);
            ConcordanceLine line = new ConcordanceLineImpl(tl, targ, rl);
            clist.add(line);
        }

View Full Code Here

        
        TaskWorker tworker = new TaskWorker(yoshikoder){
            YKDictionaryReportDialog dia;
          protected void doWork() throws Exception {
                TokenizationCache tcache = yoshikoder.getTokenizationCache();
                TokenList tl = tcache.getTokenList(doc);
                if (tl == null){
                    tl = TokenizationService.getTokenizationService().tokenize(doc);
                    tcache.putTokenList(doc, tl);
                }

View Full Code Here

0 1 2

TOP

Related Classes of edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenList

edu.harvard.wcfia.yoshikoder.AddHighlightsAction

edu.harvard.wcfia.yoshikoder.ConcordanceFrequencyReportAction

edu.harvard.wcfia.yoshikoder.dictionary.AbstractYKDictionary

edu.harvard.wcfia.yoshikoder.dictionary.Dict

edu.harvard.wcfia.yoshikoder.DictionaryRRDocumentComparisonAction

edu.harvard.wcfia.yoshikoder.MakeConcordanceAction

edu.harvard.wcfia.yoshikoder.MultipleConcordanceFrequencyReportAction

edu.harvard.wcfia.yoshikoder.SingleDocumentDictionaryReportAction

edu.harvard.wcfia.yoshikoder.SingleDocumentWordFrequencyReportAction

edu.harvard.wcfia.yoshikoder.UnifiedDictionaryFrequencyReportAction

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.