Source Code of CiteCrawler

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.StreamTokenizer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;


public class CiteCrawler {


  private List<File> texFiles;


  private List<String> list;


  public CiteCrawler(File tex) {
    if (tex.isDirectory()) {
      texFiles = Arrays.asList(tex.listFiles(new FilenameFilter() {
        public boolean accept(File dir, String name) {
          return name.endsWith(".tex");
        }
      }));
    } else {
      texFiles = new ArrayList<File>();
      texFiles.add(tex);
    }


    list = new ArrayList<String>();
  }


  public List<String> getEntries() {
    System.out.println("Analyzing " + texFiles + "...");
    for (File tex : texFiles)
      GetEntries(tex);
    return list;
  }


  private void GetEntries(File tex) {
    BufferedReader reader;
    try {
      reader = new BufferedReader(new FileReader(tex));
      StreamTokenizer toki = new StreamTokenizer(reader);
      toki.resetSyntax();
      toki.eolIsSignificant(false);
      toki.commentChar('%');
      toki.wordChars('a', 'z');
      toki.wordChars('A', 'Z');
      toki.wordChars('0', '9');
      toki.wordChars('-', '-');
      toki.wordChars(' ', ' ');
      toki.wordChars(',', ',');


      int type;
      boolean cite = false;
      while ((type = toki.nextToken()) != StreamTokenizer.TT_EOF) {
        if (type == StreamTokenizer.TT_WORD) {


          String val = toki.sval;


          if (cite) {
            String[] items = val.split(",");


            for (String item : items) {
              item = item.trim();
              if (!list.contains(item))
                list.add(item);
            }
            cite = false;
          }


          if (val != null && val.equals("cite"))
            cite = true;
        }
      }


    } catch (IOException e) {
      e.printStackTrace();
    }
  }
}
Source Code of CiteCrawler

Related Classes of CiteCrawler