Package ivory.core.data.index

Examples of ivory.core.data.index.TermPositions


    StringBuffer s = new StringBuffer("");
    try {
      Reader r = this.getReader();
      while (r.hasMoreTerms()) {
        int id = r.nextTerm();
        TermPositions pos = new TermPositions();
        r.getPositions(pos);
        s.append(String.format("(%d, %d, %s)", map.getTerm(id), pos.getTf(), pos));
      }
      s.append("]");
    } catch (Exception e) {
      e.printStackTrace();
    }
View Full Code Here


      try {
        bytesOut = new ByteArrayOutputStream();
        bitsOut = new BitOutputStream(bytesOut);

        ArrayListOfInts positions;
        TermPositions tp = new TermPositions();
        String term;

        for (Map.Entry<String, ArrayListOfInts> posting : termPositionsMap.entrySet()) {
          term = posting.getKey();
          positions = posting.getValue();
          tp.set(positions.getArray(), (short) positions.size());

          // Write the term.
          out.writeUTF(term);
          // Write out the tf value.
          bitsOut.writeGamma((short) positions.size());
View Full Code Here

        + rawBytes + "," + terms + "\n" + "[");
    try {
      Reader r = this.getReader();
      while (r.hasMoreTerms()) {
        String id = r.nextTerm();
        TermPositions pos = new TermPositions();
        r.getPositions(pos);
        s.append("(" + id + ", " + pos.getTf() + ", " + pos + ")");
      }
      s.append("]");
    } catch (Exception e) {
      e.printStackTrace();
    }
View Full Code Here

            continue;
          }
        }
        positions.add(pos[i]);
      }
      smallTermPositions.add(new TermPositions(pos, (short) pos.length));
    }
  }
View Full Code Here

            continue;
          }
        }
        positions.add(pos[i]);
      }
      largeTermPositions.add(new TermPositions(pos, (short) pos.length));
    }
  }
View Full Code Here

      bitsOut = new BitOutputStream(bytesOut);

      Iterator<Map.Entry<Integer, int[]>> it = termPositionsMap.entrySet().iterator();
      Map.Entry<Integer, int[]> posting = it.next();
      int[] positions = posting.getValue();
      TermPositions tp = new TermPositions();
      // Write out the first termid.
      int lastTerm = posting.getKey().intValue();
      bitsOut.writeBinary(32, lastTerm);
      // Write out the tf value.
      bitsOut.writeGamma((short) positions.length);
      tp.set(positions, (short) positions.length);
      // Write out the positions.
      writePositions(bitsOut, tp);

      int curTerm;
      while (it.hasNext()) {
        posting = it.next();
        curTerm = posting.getKey().intValue();
        positions = posting.getValue();
        int tgap = curTerm - lastTerm;
        if (tgap <= 0) {
          throw new RuntimeException("Error: encountered invalid t-gap. termid=" + curTerm);
        }
        // Write out the gap.
        bitsOut.writeGamma(tgap);
        tp.set(positions, (short) positions.length);
        // Write out the tf value.
        bitsOut.writeGamma((short) positions.length);
        // Write out the positions.
        writePositions(bitsOut, tp);
        lastTerm = curTerm;
View Full Code Here

      if (partialPostings.size() == 0) {
        return true;
      }

      TermPositions tp = new TermPositions();
      // Start the timer.
      long startTime = System.currentTimeMillis();
      for (MapIV.Entry<PostingsAccumulator> e : partialPostings.entrySet()) {
        // Emit a partial posting list for each term.
        TERM.set(e.getKey());
        context.setStatus("t" + TERM.get());
        PostingsAccumulator pl = e.getValue();
        postingsList.clear();
        postingsList.setCollectionDocumentCount(collectionDocumentCount);
        postingsList.setNumberOfPostings(pl.size());

        int[] docnos = pl.getDocnos();
        int[][] positions = pl.getPositions();
        QuickSort.quicksortWithStack(positions, docnos, 0, pl.size() - 1);
        for (int i = 0; i < pl.size(); i++) {
          tp.set(positions[i], (short) positions[i].length);
          postingsList.add(docnos[i], tp.getTf(), tp);
        }
        context.write(TERM, postingsList);
      }
      context.getCounter(MapTime.Spilling).increment(System.currentTimeMillis() - startTime);
      partialPostings.clear();
View Full Code Here

        }

        numPostings = 0;
        Iterator<TermPositions> iter = values.iterator();
        while (iter.hasNext()) {
          TermPositions positions = iter.next();
          numPostings += positions.getPositions()[0];
        }

        postings.setNumberOfPostings(numPostings);
        return;
      }

      Iterator<TermPositions> iter = values.iterator();
      TermPositions positions = iter.next();
      postings.add(pair.getRightElement(), positions.getTf(), positions);

      if (iter.hasNext()) {
        throw new RuntimeException(
            String.format("Error: values with the same (term, docno): docno=%d, term=%d",
                pair.getRightElement(), curTerm));
View Full Code Here

      bitsOut = new BitOutputStream(bytesOut);

      Iterator<Map.Entry<Integer, int[]>> it = termPositionsMap.entrySet().iterator();
      Map.Entry<Integer, int[]> posting = it.next();
      int[] positions = posting.getValue();
      TermPositions tp = new TermPositions();
      // Write out the first termid.
      int lastTerm = posting.getKey().intValue();
      bitsOut.writeBinary(32, lastTerm);
      // Write out the tf value.
      bitsOut.writeGamma((short) positions.length);
      tp.set(positions, (short) positions.length);
      // Write out the positions.
      writePositions(bitsOut, tp);

      int curTerm;
      while (it.hasNext()) {
        posting = it.next();
        curTerm = posting.getKey().intValue();
        positions = posting.getValue();
        int tgap = curTerm - lastTerm;
        if (tgap <= 0) {
          throw new RuntimeException("Error: encountered invalid t-gap. termid=" + curTerm);
        }
        // Write out the gap.
        bitsOut.writeGamma(tgap);
        tp.set(positions, (short) positions.length);
        // Write out the tf value.
        bitsOut.writeGamma((short) positions.length);
        // Write out the positions.
        writePositions(bitsOut, tp);
        lastTerm = curTerm;
View Full Code Here

    StringBuffer s = new StringBuffer("[");
    try {
      Reader r = this.getReader();
      while (r.hasMoreTerms()) {
        int id = r.nextTerm();
        TermPositions pos = new TermPositions();
        r.getPositions(pos);
        s.append("(" + id + ", " + pos.getTf() + ", " + pos + ")");
      }
      s.append("]");
    } catch (Exception e) {
      e.printStackTrace();
    }
View Full Code Here

TOP

Related Classes of ivory.core.data.index.TermPositions

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.