Package edu.buffalo.cse.ir.wikiindexer.wikipedia

Examples of edu.buffalo.cse.ir.wikiindexer.wikipedia.IndexableDocument


    Thread.sleep(2000);

    long completed = 0, totalTasks = tpe.getTaskCount();
    long remaining = totalTasks - completed;
   
    IndexableDocument idoc = null;
    SharedDictionary docDict = new SharedDictionary(properties, INDEXFIELD.LINK);
    int currDocId;
    ThreadedIndexerRunner termRunner = new ThreadedIndexerRunner(properties);
    SingleIndexerRunner authIdxer = new SingleIndexerRunner(properties, INDEXFIELD.AUTHOR, INDEXFIELD.LINK, docDict, false);
    SingleIndexerRunner catIdxer = new SingleIndexerRunner(properties, INDEXFIELD.CATEGORY, INDEXFIELD.LINK, docDict, false);
    SingleIndexerRunner linkIdxer = new SingleIndexerRunner(properties, INDEXFIELD.LINK, INDEXFIELD.LINK, docDict, true);
    Map<String, Integer> tokenmap;
   
    try {
      while (remaining > 0) {
        idoc = pool.take().get();
        if (idoc != null) {
          currDocId = docDict.lookup(idoc.getDocumentIdentifier());
          TokenStream stream;
          try {
            for (INDEXFIELD fld : INDEXFIELD.values()) {
              stream = idoc.getStream(fld);

              if (stream != null) {
                tokenmap = stream.getTokenMap();

                if (tokenmap != null) {
View Full Code Here


      pool.submit(new DocumentTransformer(tknizerMap, doc));
    }
   
    System.out.println("Submitted tokenization: " + (System.currentTimeMillis() - start));
   
    IndexableDocument idoc;
    SharedDictionary docDict = new SharedDictionary(properties, INDEXFIELD.LINK);
    int currDocId;
    ThreadedIndexerRunner termRunner = new ThreadedIndexerRunner(properties);
    SingleIndexerRunner authIdxer = new SingleIndexerRunner(properties, INDEXFIELD.AUTHOR, INDEXFIELD.LINK, docDict, false);
    SingleIndexerRunner catIdxer = new SingleIndexerRunner(properties, INDEXFIELD.CATEGORY, INDEXFIELD.LINK, docDict, false);
    SingleIndexerRunner linkIdxer = new SingleIndexerRunner(properties, INDEXFIELD.LINK, INDEXFIELD.LINK, docDict, true);
    Map<String, Integer> tokenmap;
   
    System.out.println("Starting indexing.....");
    start = System.currentTimeMillis();
    double pctComplete = 0;
    for (int i = 0; i < numdocs; i++) {
      try {
        idoc = pool.take().get();
        if (idoc != null) {
          currDocId = docDict.lookup(idoc.getDocumentIdentifier());
          TokenStream stream;
          try {
            for (INDEXFIELD fld : INDEXFIELD.values()) {
              stream = idoc.getStream(fld);

              if (stream != null) {
                tokenmap = stream.getTokenMap();

                if (tokenmap != null) {
View Full Code Here

TOP

Related Classes of edu.buffalo.cse.ir.wikiindexer.wikipedia.IndexableDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.