Package uk.ac.ucl.panda.utility.structure

Examples of uk.ac.ucl.panda.utility.structure.SegmentInfo


   * then copy them over.  Currently this is only used by
   * addIndexesNoOptimize(). */
  private synchronized void copyExternalSegments() throws CorruptIndexException, IOException {
    final int numSegments = segmentInfos.size();
    for(int i=0;i<numSegments;i++) {
      SegmentInfo info = segmentInfos.info(i);
      if (info.dir != directory) {
        MergePolicy.OneMerge merge = new MergePolicy.OneMerge(segmentInfos.range(i, 1+i), info.getUseCompoundFile());
        if (registerMerge(merge)) {
          pendingMerges.remove(merge);
          runningMerges.add(merge);
          merge(merge);
        } else
View Full Code Here


    optimize();            // start with zero or 1 seg

    final String mergedName = newSegmentName();
    SegmentMerger merger = new SegmentMerger(this, mergedName, null);

    SegmentInfo info;

    IndexReader sReader = null;
    try {
      if (segmentInfos.size() == 1){ // add existing index, if any
        sReader = SegmentReader.get(segmentInfos.info(0));
        merger.add(sReader);
      }

      for (int i = 0; i < readers.length; i++)      // add new indexes
        merger.add(readers[i]);

      boolean success = false;

      startTransaction();

      try {
        int docCount = merger.merge();                // merge 'em

        if(sReader != null) {
          sReader.close();
          sReader = null;
        }

        segmentInfos.setSize(0);                      // pop old infos & add new
        info = new SegmentInfo(mergedName, docCount, directory, false, true,
                               -1, null, false);
        segmentInfos.addElement(info);

        success = true;

      } finally {
        if (!success) {
          if (infoStream != null)
            message("hit exception in addIndexes during merge");

          rollbackTransaction();
        } else {
          commitTransaction();
        }
      }
    } finally {
      if (sReader != null) {
        sReader.close();
      }
    }
   
    if (mergePolicy instanceof LogMergePolicy && getUseCompoundFile()) {

      boolean success = false;

      startTransaction();

      try {
        merger.createCompoundFile(mergedName + ".cfs");
        info.setUseCompoundFile(true);
      } finally {
        if (!success) {
          if (infoStream != null)
            message("hit exception building compound file in addIndexes during merge");
View Full Code Here

      return false;
    }

    try {

      SegmentInfo newSegment = null;

      final int numDocs = docWriter.getNumDocsInRAM();

      // Always flush docs if there are any
      boolean flushDocs = numDocs > 0;

      // With autoCommit=true we always must flush the doc
      // stores when we flush
      flushDocStores |= autoCommit;
      String docStoreSegment = docWriter.getDocStoreSegment();
      if (docStoreSegment == null)
        flushDocStores = false;

      // Always flush deletes if there are any delete terms.
      // TODO: when autoCommit=false we don't have to flush
      // deletes with every flushed segment; we can save
      // CPU/IO by buffering longer & flushing deletes only
      // when they are full or writer is being closed.  We
      // have to fix the "applyDeletesSelectively" logic to
      // apply to more than just the last flushed segment
      boolean flushDeletes = docWriter.hasDeletes();

      if (infoStream != null) {
        message("  flush: segment=" + docWriter.getSegment() +
                " docStoreSegment=" + docWriter.getDocStoreSegment() +
                " docStoreOffset=" + docWriter.getDocStoreOffset() +
                " flushDocs=" + flushDocs +
                " flushDeletes=" + flushDeletes +
                " flushDocStores=" + flushDocStores +
                " numDocs=" + numDocs +
                " numBufDelTerms=" + docWriter.getNumBufferedDeleteTerms());
        message("  index before flush " + segString());
      }

      int docStoreOffset = docWriter.getDocStoreOffset();

      // docStoreOffset should only be non-zero when
      // autoCommit == false
      assert !autoCommit || 0 == docStoreOffset;

      boolean docStoreIsCompoundFile = false;

      // Check if the doc stores must be separately flushed
      // because other segments, besides the one we are about
      // to flush, reference it
      if (flushDocStores && (!flushDocs || !docWriter.getSegment().equals(docWriter.getDocStoreSegment()))) {
        // We must separately flush the doc store
        if (infoStream != null)
          message("  flush shared docStore segment " + docStoreSegment);
     
        docStoreIsCompoundFile = flushDocStores();
        flushDocStores = false;
      }

      String segment = docWriter.getSegment();

      // If we are flushing docs, segment must not be null:
      assert segment != null || !flushDocs;

      if (flushDocs || flushDeletes) {

        SegmentInfos rollback = null;

        if (flushDeletes)
          rollback = (SegmentInfos) segmentInfos.clone();

        boolean success = false;

        try {
          if (flushDocs) {

            if (0 == docStoreOffset && flushDocStores) {
              // This means we are flushing private doc stores
              // with this segment, so it will not be shared
              // with other segments
              assert docStoreSegment != null;
              assert docStoreSegment.equals(segment);
              docStoreOffset = -1;
              docStoreIsCompoundFile = false;
              docStoreSegment = null;
            }

            int flushedDocCount = docWriter.flush(flushDocStores);
         
            newSegment = new SegmentInfo(segment,
                                         flushedDocCount,
                                         directory, false, true,
                                         docStoreOffset, docStoreSegment,
                                         docStoreIsCompoundFile);
            segmentInfos.addElement(newSegment);
          }

          if (flushDeletes) {
            // we should be able to change this so we can
            // buffer deletes longer and then flush them to
            // multiple flushed segments, when
            // autoCommit=false
            applyDeletes(flushDocs);
            doAfterFlush();
          }

          checkpoint();
          success = true;
        } finally {
          if (!success) {

            if (infoStream != null)
              message("hit exception flushing segment " + segment);
               
            if (flushDeletes) {

              // Carefully check if any partial .del files
              // should be removed:
              final int size = rollback.size();
              for(int i=0;i<size;i++) {
                final String newDelFileName = segmentInfos.info(i).getDelFileName();
                final String delFileName = rollback.info(i).getDelFileName();
                if (newDelFileName != null && !newDelFileName.equals(delFileName))
                  deleter.deleteFile(newDelFileName);
              }

              // Fully replace the segmentInfos since flushed
              // deletes could have changed any of the
              // SegmentInfo instances:
              segmentInfos.clear();
              segmentInfos.addAll(rollback);
             
            } else {
              // Remove segment we added, if any:
              if (newSegment != null &&
                  segmentInfos.size() > 0 &&
                  segmentInfos.info(segmentInfos.size()-1) == newSegment)
                segmentInfos.remove(segmentInfos.size()-1);
            }
            if (flushDocs)
              docWriter.abort(null);
            deletePartialSegmentsFile();
            deleter.checkpoint(segmentInfos, false);

            if (segment != null)
              deleter.refresh(segment);
          }
        }

        deleter.checkpoint(segmentInfos, autoCommit);

        if (flushDocs && mergePolicy.useCompoundFile(segmentInfos,
                                                     newSegment)) {
          success = false;
          try {
            docWriter.createCompoundFile(segment);
            newSegment.setUseCompoundFile(true);
            checkpoint();
            success = true;
          } finally {
            if (!success) {
              if (infoStream != null)
                message("hit exception creating compound file for newly flushed segment " + segment);
              newSegment.setUseCompoundFile(false);
              deleter.deleteFile(segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
              deletePartialSegmentsFile();
            }
          }
View Full Code Here

    final int numSegments = segmentInfos.size();
   
    final int numSegmentsToMerge = merge.segments.size();
    for(int i=0;i<numSegmentsToMerge;i++) {
      final SegmentInfo info = merge.segments.info(i);

      if (first + i >= numSegments || !segmentInfos.info(first+i).equals(info)) {
        if (segmentInfos.indexOf(info) == -1)
          throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the index");
        else
View Full Code Here

      BitVector deletes = null;
      int docUpto = 0;

      final int numSegmentsToMerge = sourceSegments.size();
      for(int i=0;i<numSegmentsToMerge;i++) {
        final SegmentInfo previousInfo = sourceSegmentsClone.info(i);
        final SegmentInfo currentInfo = sourceSegments.info(i);

        assert currentInfo.docCount == previousInfo.docCount;

        final int docCount = currentInfo.docCount;

        if (previousInfo.hasDeletions()) {

          // There were deletes on this segment when the merge
          // started.  The merge has collapsed away those
          // deletes, but, if new deletes were flushed since
          // the merge started, we must now carefully keep any
          // newly flushed deletes but mapping them to the new
          // docIDs.

          assert currentInfo.hasDeletions();

          // Load deletes present @ start of merge, for this segment:
          BitVector previousDeletes = new BitVector(previousInfo.dir, previousInfo.getDelFileName());

          if (!currentInfo.getDelFileName().equals(previousInfo.getDelFileName())) {
            // This means this segment has had new deletes
            // committed since we started the merge, so we
            // must merge them:
            if (deletes == null)
              deletes = new BitVector(merge.info.docCount);

            BitVector currentDeletes = new BitVector(currentInfo.dir, currentInfo.getDelFileName());
            for(int j=0;j<docCount;j++) {
              if (previousDeletes.get(j))
                assert currentDeletes.get(j);
              else {
                if (currentDeletes.get(j))
                  deletes.set(docUpto);
                docUpto++;
              }
            }
          } else
            docUpto += docCount - previousDeletes.count();
       
        } else if (currentInfo.hasDeletions()) {
          // This segment had no deletes before but now it
          // does:
          if (deletes == null)
            deletes = new BitVector(merge.info.docCount);
          BitVector currentDeletes = new BitVector(directory, currentInfo.getDelFileName());

          for(int j=0;j<docCount;j++) {
            if (currentDeletes.get(j))
              deletes.set(docUpto);
            docUpto++;
          }

        } else
          // No deletes before or after
          docUpto += currentInfo.docCount;

        merge.checkAborted(directory);
      }

      if (deletes != null) {
        merge.info.advanceDelGen();
        deletes.write(directory, merge.info.getDelFileName());
      }
      success = true;
    } finally {
      if (!success) {
        if (infoStream != null)
          message("hit exception creating merged deletes file");
        deleter.refresh(merge.info.name);
      }
    }

    // Simple optimization: if the doc store we are using
    // has been closed and is in now compound format (but
    // wasn't when we started), then we will switch to the
    // compound format as well:
    final String mergeDocStoreSegment = merge.info.getDocStoreSegment();
    if (mergeDocStoreSegment != null && !merge.info.getDocStoreIsCompoundFile()) {
      final int size = segmentInfos.size();
      for(int i=0;i<size;i++) {
        final SegmentInfo info = segmentInfos.info(i);
        final String docStoreSegment = info.getDocStoreSegment();
        if (docStoreSegment != null &&
            docStoreSegment.equals(mergeDocStoreSegment) &&
            info.getDocStoreIsCompoundFile()) {
          merge.info.setDocStoreIsCompoundFile(true);
          break;
        }
      }
    }
View Full Code Here

    final SegmentInfos sourceSegmentsClone = merge.segmentsClone;
    final int numSegmentsToMerge = sourceSegmentsClone.size();
    assert merge.increfDone;
    merge.increfDone = false;
    for(int i=0;i<numSegmentsToMerge;i++) {
      final SegmentInfo previousInfo = sourceSegmentsClone.info(i);
      // Decref all files for this SegmentInfo (this
      // matches the incref in mergeInit):
      if (previousInfo.dir == directory)
        deleter.decRef(previousInfo.files());
    }
  }
View Full Code Here

      return true;

    final int count = merge.segments.size();
    boolean isExternal = false;
    for(int i=0;i<count;i++) {
      final SegmentInfo info = merge.segments.info(i);
      if (mergingSegments.contains(info))
        return false;
      if (segmentInfos.indexOf(info) == -1)
        return false;
      if (info.dir != directory)
View Full Code Here

    final String currentDocStoreSegment = docWriter.getDocStoreSegment();

    // Test each segment to be merged: check if we need to
    // flush/merge doc stores
    for (int i = 0; i < end; i++) {
      SegmentInfo si = sourceSegments.info(i);

      // If it has deletions we must merge the doc stores
      if (si.hasDeletions())
        mergeDocStores = true;

      // If it has its own (private) doc stores we must
      // merge the doc stores
      if (-1 == si.getDocStoreOffset())
        mergeDocStores = true;

      // If it has a different doc store segment than
      // previous segments, we must merge the doc stores
      String docStoreSegment = si.getDocStoreSegment();
      if (docStoreSegment == null)
        mergeDocStores = true;
      else if (lastDocStoreSegment == null)
        lastDocStoreSegment = docStoreSegment;
      else if (!lastDocStoreSegment.equals(docStoreSegment))
        mergeDocStores = true;

      // Segments' docScoreOffsets must be in-order,
      // contiguous.  For the default merge policy now
      // this will always be the case but for an arbitrary
      // merge policy this may not be the case
      if (-1 == next)
        next = si.getDocStoreOffset() + si.docCount;
      else if (next != si.getDocStoreOffset())
        mergeDocStores = true;
      else
        next = si.getDocStoreOffset() + si.docCount;
     
      // If the segment comes from a different directory
      // we must merge
      if (lastDir != si.dir)
        mergeDocStores = true;

      // If the segment is referencing the current "live"
      // doc store outputs then we must merge
      if (si.getDocStoreOffset() != -1 && currentDocStoreSegment != null && si.getDocStoreSegment().equals(currentDocStoreSegment))
        doFlushDocStore = true;
    }

    final int docStoreOffset;
    final String docStoreSegment;
    final boolean docStoreIsCompoundFile;

    if (mergeDocStores) {
      docStoreOffset = -1;
      docStoreSegment = null;
      docStoreIsCompoundFile = false;
    } else {
      SegmentInfo si = sourceSegments.info(0);       
      docStoreOffset = si.getDocStoreOffset();
      docStoreSegment = si.getDocStoreSegment();
      docStoreIsCompoundFile = si.getDocStoreIsCompoundFile();
    }

    if (mergeDocStores && doFlushDocStore) {
      // SegmentMerger intends to merge the doc stores
      // (stored fields, vectors), and at least one of the
      // segments to be merged refers to the currently
      // live doc stores.

      // TODO: if we know we are about to merge away these
      // newly flushed doc store files then we should not
      // make compound file out of them...
      if (infoStream != null)
        message("flush at merge");
      flush(false, true);
    }

    // We must take a full copy at this point so that we can
    // properly merge deletes in commitMerge()
    merge.segmentsClone = (SegmentInfos) merge.segments.clone();

    for (int i = 0; i < end; i++) {
      SegmentInfo si = merge.segmentsClone.info(i);

      // IncRef all files for this segment info to make sure
      // they are not removed while we are trying to merge.
      if (si.dir == directory)
        deleter.incRef(si.files());
    }

    merge.increfDone = true;

    merge.mergeDocStores = mergeDocStores;

    // Bind a new segment name here so even with
    // ConcurrentMergePolicy we keep deterministic segment
    // names.
    merge.info = new SegmentInfo(newSegmentName(), 0,
                                 directory, false, true,
                                 docStoreOffset,
                                 docStoreSegment,
                                 docStoreIsCompoundFile);
    // Also enroll the merged segment into mergingSegments;
View Full Code Here

    try {
      int totDocCount = 0;

      for (int i = 0; i < numSegments; i++) {
        SegmentInfo si = sourceSegmentsClone.info(i);
        IndexReader reader = SegmentReader.get(si, MERGE_READ_BUFFER_SIZE, merge.mergeDocStores); // no need to set deleter (yet)
        merger.add(reader);
        totDocCount += reader.numDocs();
      }
      if (infoStream != null) {
View Full Code Here

TOP

Related Classes of uk.ac.ucl.panda.utility.structure.SegmentInfo

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.