Package org.apache.lucene.codecs

Examples of org.apache.lucene.codecs.Codec


    writer.commit();
    writer.close();

    assertQuery(new Term("content", "ccc"), dir, 10);
    assertQuery(new Term("content", "aaa"), dir, 10);
    Codec codec = iwconf.getCodec();

    iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
        .setOpenMode(OpenMode.APPEND).setCodec(codec);
    //((LogMergePolicy) iwconf.getMergePolicy()).setNoCFSRatio(0.0);
    //((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10);
View Full Code Here


    }
    dir.close();
  }
 
  public void testSameCodecDifferentInstance() throws Exception {
    Codec codec = new Lucene49Codec() {
      @Override
      public PostingsFormat getPostingsFormatForField(String field) {
        if ("id".equals(field)) {
          return new Pulsing41PostingsFormat(1);
        } else if ("date".equals(field)) {
View Full Code Here

    };
    doTestMixedPostings(codec);
  }
 
  public void testSameCodecDifferentParams() throws Exception {
    Codec codec = new Lucene49Codec() {
      @Override
      public PostingsFormat getPostingsFormatForField(String field) {
        if ("id".equals(field)) {
          return new Pulsing41PostingsFormat(1);
        } else if ("date".equals(field)) {
View Full Code Here

      }
    };
   
    // create lots of aborting exceptions with a broken codec
    // we don't need a random codec, as we aren't trying to find bugs in the codec here.
    Codec inner = RANDOM_MULTIPLIER > 1 ? Codec.getDefault() : new AssertingCodec();
    Codec codec = new CrankyCodec(inner, new Random(random().nextLong()));
   
    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
    // just for now, try to keep this test reproducible
    conf.setMergeScheduler(new SerialMergeScheduler());
    conf.setCodec(codec);
View Full Code Here

    if (termsIndexDivisor == 0) {
      throw new IllegalArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
    }
   
    final Codec codec = si.info.getCodec();
    final Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

    boolean success = false;
   
    try {
      if (si.info.getUseCompoundFile()) {
        cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName(si.info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
      } else {
        cfsReader = null;
        cfsDir = dir;
      }

      final FieldInfos fieldInfos = owner.fieldInfos;
     
      this.termsIndexDivisor = termsIndexDivisor;
      final PostingsFormat format = codec.postingsFormat();
      final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.info, fieldInfos, context, termsIndexDivisor);
      // Ask codec for its Fields
      fields = format.fieldsProducer(segmentReadState);
      assert fields != null;
      // ask codec for its Norms:
      // TODO: since we don't write any norms file if there are no norms,
      // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

      if (fieldInfos.hasNorms()) {
        normsProducer = codec.normsFormat().normsProducer(segmentReadState);
        assert normsProducer != null;
      } else {
        normsProducer = null;
      }
 
View Full Code Here

  private FieldInfos currentFieldInfos;

  // maxAllowed = the "highest" we can index, but we will still
  // randomly index at lower IndexOption
  private FieldsProducer buildIndex(Directory dir, IndexOptions maxAllowed, boolean allowPayloads, boolean alwaysTestMax) throws IOException {
    Codec codec = getCodec();
    SegmentInfo segmentInfo = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, "_0", maxDoc, false, codec, null);

    int maxIndexOption = Arrays.asList(IndexOptions.values()).indexOf(maxAllowed);
    if (VERBOSE) {
      System.out.println("\nTEST: now build index");
    }

    int maxIndexOptionNoOffsets = Arrays.asList(IndexOptions.values()).indexOf(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);

    // TODO use allowPayloads

    FieldInfo[] newFieldInfoArray = new FieldInfo[fields.size()];
    for(int fieldUpto=0;fieldUpto<fields.size();fieldUpto++) {
      FieldInfo oldFieldInfo = fieldInfos.fieldInfo(fieldUpto);

      String pf = TestUtil.getPostingsFormat(codec, oldFieldInfo.name);
      int fieldMaxIndexOption;
      if (doesntSupportOffsets.contains(pf)) {
        fieldMaxIndexOption = Math.min(maxIndexOptionNoOffsets, maxIndexOption);
      } else {
        fieldMaxIndexOption = maxIndexOption;
      }
   
      // Randomly picked the IndexOptions to index this
      // field with:
      IndexOptions indexOptions = IndexOptions.values()[alwaysTestMax ? fieldMaxIndexOption : random().nextInt(1+fieldMaxIndexOption)];
      boolean doPayloads = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads;

      newFieldInfoArray[fieldUpto] = new FieldInfo(oldFieldInfo.name,
                                                   true,
                                                   fieldUpto,
                                                   false,
                                                   false,
                                                   doPayloads,
                                                   indexOptions,
                                                   null,
                                                   DocValuesType.NUMERIC,
                                                   -1,
                                                   null);
    }

    FieldInfos newFieldInfos = new FieldInfos(newFieldInfoArray);

    // Estimate that flushed segment size will be 25% of
    // what we use in RAM:
    long bytes =  totalPostings * 8 + totalPayloadBytes;

    SegmentWriteState writeState = new SegmentWriteState(null, dir,
                                                         segmentInfo, newFieldInfos,
                                                         32, null, new IOContext(new FlushInfo(maxDoc, bytes)));
    FieldsConsumer fieldsConsumer = codec.postingsFormat().fieldsConsumer(writeState);

    for(Map.Entry<String,Map<BytesRef,Long>> fieldEnt : fields.entrySet()) {
      String field = fieldEnt.getKey();
      Map<BytesRef,Long> terms = fieldEnt.getValue();

      FieldInfo fieldInfo = newFieldInfos.fieldInfo(field);

      IndexOptions indexOptions = fieldInfo.getIndexOptions();

      if (VERBOSE) {
        System.out.println("field=" + field + " indexOtions=" + indexOptions);
      }

      boolean doFreq = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
      boolean doPos = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
      boolean doPayloads = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads;
      boolean doOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
     
      TermsConsumer termsConsumer = fieldsConsumer.addField(fieldInfo);
      long sumTotalTF = 0;
      long sumDF = 0;
      FixedBitSet seenDocs = new FixedBitSet(maxDoc);
      for(Map.Entry<BytesRef,Long> termEnt : terms.entrySet()) {
        BytesRef term = termEnt.getKey();
        SeedPostings postings = getSeedPostings(term.utf8ToString(), termEnt.getValue(), false, maxAllowed);
        if (VERBOSE) {
          System.out.println("  term=" + field + ":" + term.utf8ToString() + " docFreq=" + postings.docFreq + " seed=" + termEnt.getValue());
        }
       
        PostingsConsumer postingsConsumer = termsConsumer.startTerm(term);
        long totalTF = 0;
        int docID = 0;
        while((docID = postings.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
          final int freq = postings.freq();
          if (VERBOSE) {
            System.out.println("    " + postings.upto + ": docID=" + docID + " freq=" + postings.freq);
          }
          postingsConsumer.startDoc(docID, doFreq ? postings.freq : -1);
          seenDocs.set(docID);
          if (doPos) {
            totalTF += postings.freq;
            for(int posUpto=0;posUpto<freq;posUpto++) {
              int pos = postings.nextPosition();
              BytesRef payload = postings.getPayload();

              if (VERBOSE) {
                if (doPayloads) {
                  System.out.println("      pos=" + pos + " payload=" + (payload == null ? "null" : payload.length + " bytes"));
                } else {
                  System.out.println("      pos=" + pos);
                }
              }
              postingsConsumer.addPosition(pos, doPayloads ? payload : null,
                                           doOffsets ? postings.startOffset() : -1,
                                           doOffsets ? postings.endOffset() : -1);
            }
          } else if (doFreq) {
            totalTF += freq;
          } else {
            totalTF++;
          }
          postingsConsumer.finishDoc();
        }
        termsConsumer.finishTerm(term, new TermStats(postings.docFreq, doFreq ? totalTF : -1));
        sumTotalTF += totalTF;
        sumDF += postings.docFreq;
      }

      termsConsumer.finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.cardinality());
    }

    fieldsConsumer.close();

    if (VERBOSE) {
      System.out.println("TEST: after indexing: files=");
      for(String file : dir.listAll()) {
        System.out.println("  " + file + ": " + dir.fileLength(file) + " bytes");
      }
    }

    currentFieldInfos = newFieldInfos;

    SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.READ, 1);

    return codec.postingsFormat().fieldsProducer(readState);
  }
View Full Code Here

    return result;
  }
 
  public void testWriteReadMerge() throws IOException {
    // get another codec, other than the default: so we are merging segments across different codecs
    final Codec otherCodec;
    if ("SimpleText".equals(Codec.getDefault().getName())) {
      otherCodec = new Lucene49Codec();
    } else {
      otherCodec = new SimpleTextCodec();
    }
View Full Code Here

        // shortly-to-be-opened SegmentReader and let it
        // carry the changes; there's no reason to use
        // filesystem as intermediary here.
         
        SegmentCommitInfo info = flushedSegment.segmentInfo;
        Codec codec = info.info.getCodec();
        codec.liveDocsFormat().writeLiveDocs(flushedSegment.liveDocs, directory, info, delCount, context);
        newSegment.setDelCount(delCount);
        newSegment.advanceDelGen();
      }

      success = true;
View Full Code Here

    // We must rewrite the SI file because it references
    // segment name (its own name, if its 3.x, and doc
    // store segment name):
    TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
    final Codec currentCodec = newInfo.getCodec();
    try {
      currentCodec.segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, newInfo, fis, context);
    } catch (UnsupportedOperationException uoe) {
      if (currentCodec instanceof Lucene3xCodec) {
        // OK: 3x codec cannot write a new SI file;
        // SegmentInfos will write this on commit
      } else {
View Full Code Here

  public void testDocsStuckInRAMForever() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwc.setRAMBufferSizeMB(.2);
    Codec codec = Codec.forName("Lucene49");
    iwc.setCodec(codec);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    final IndexWriter w = new IndexWriter(dir, iwc);
    final CountDownLatch startingGun = new CountDownLatch(1);
    Thread[] threads = new Thread[2];
View Full Code Here

TOP

Related Classes of org.apache.lucene.codecs.Codec

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.