Package org.apache.lucene.index

Examples of org.apache.lucene.index.AtomicReader


  protected void checkCorrectClassification(Classifier<T> classifier, String inputDoc, T expectedResult, Analyzer analyzer, String textFieldName, String classFieldName) throws Exception {
    checkCorrectClassification(classifier, inputDoc, expectedResult, analyzer, textFieldName, classFieldName, null);
  }

  protected void checkCorrectClassification(Classifier<T> classifier, String inputDoc, T expectedResult, Analyzer analyzer, String textFieldName, String classFieldName, Query query) throws Exception {
    AtomicReader atomicReader = null;
    try {
      populateSampleIndex(analyzer);
      atomicReader = SlowCompositeReaderWrapper.wrap(indexWriter.getReader());
      classifier.train(atomicReader, textFieldName, classFieldName, analyzer, query);
      ClassificationResult<T> classificationResult = classifier.assignClass(inputDoc);
      assertNotNull(classificationResult.getAssignedClass());
      assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass());
      assertTrue("got a not positive score " + classificationResult.getScore(), classificationResult.getScore() > 0);
    } finally {
      if (atomicReader != null)
        atomicReader.close();
    }
  }
View Full Code Here


  protected void checkOnlineClassification(Classifier<T> classifier, String inputDoc, T expectedResult, Analyzer analyzer, String textFieldName, String classFieldName) throws Exception {
    checkOnlineClassification(classifier, inputDoc, expectedResult, analyzer, textFieldName, classFieldName, null);
  }

  protected void checkOnlineClassification(Classifier<T> classifier, String inputDoc, T expectedResult, Analyzer analyzer, String textFieldName, String classFieldName, Query query) throws Exception {
    AtomicReader atomicReader = null;
    try {
      populateSampleIndex(analyzer);
      atomicReader = SlowCompositeReaderWrapper.wrap(indexWriter.getReader());
      classifier.train(atomicReader, textFieldName, classFieldName, analyzer, query);
      ClassificationResult<T> classificationResult = classifier.assignClass(inputDoc);
      assertNotNull(classificationResult.getAssignedClass());
      assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass());
      assertTrue("got a not positive score " + classificationResult.getScore(), classificationResult.getScore() > 0);
      updateSampleIndex(analyzer);
      ClassificationResult<T> secondClassificationResult = classifier.assignClass(inputDoc);
      assertEquals(classificationResult.getAssignedClass(), secondClassificationResult.getAssignedClass());
      assertEquals(Double.valueOf(classificationResult.getScore()), Double.valueOf(secondClassificationResult.getScore()));

    } finally {
      if (atomicReader != null)
        atomicReader.close();
    }
  }
View Full Code Here

    indexWriter.commit();
  }

  protected void checkPerformance(Classifier<T> classifier, Analyzer analyzer, String classFieldName) throws Exception {
    AtomicReader atomicReader = null;
    long trainStart = System.currentTimeMillis();
    try {
      populatePerformanceIndex(analyzer);
      atomicReader = SlowCompositeReaderWrapper.wrap(indexWriter.getReader());
      classifier.train(atomicReader, textFieldName, classFieldName, analyzer);
      long trainEnd = System.currentTimeMillis();
      long trainTime = trainEnd - trainStart;
      assertTrue("training took more than 2 mins : " + trainTime / 1000 + "s", trainTime < 120000);
    } finally {
      if (atomicReader != null)
        atomicReader.close();
    }
  }
View Full Code Here

    @Override
    public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
        boolean topScorer, Bits acceptDocs) throws IOException {
      assert !terms.isEmpty();
      final AtomicReader reader = context.reader();
      final Bits liveDocs = acceptDocs;
      PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()];

      final Terms fieldTerms = reader.terms(field);
      if (fieldTerms == null) {
        return null;
      }

      // Reuse single TermsEnum below:
View Full Code Here

      ordinalMap.setSize(size);
      int base = 0;
      TermsEnum te = null;
      DocsEnum docs = null;
      for (final AtomicReaderContext ctx : r.leaves()) {
        final AtomicReader ar = ctx.reader();
        final Terms terms = ar.terms(Consts.FULL);
        te = terms.iterator(te);
        while (te.next() != null) {
          FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString()));
          final int ordinal = addCategory(cp);
          docs = te.docs(null, docs, DocsEnum.FLAG_NONE);
          ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
        }
        base += ar.maxDoc(); // no deletions, so we're ok
      }
      ordinalMap.addDone();
    } finally {
      r.close();
    }
View Full Code Here

    Directory dirTmp = getDirectory(new File(indexPath.toString() + ".tmp"));

    IndexWriter w = null;
    IndexWriter w2 = null;
    AtomicReader r = null;
    boolean success = false;
    count = 0;
    try {
      Analyzer gramAnalyzer = new AnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {
          @Override
          protected Analyzer getWrappedAnalyzer(String fieldName) {
            return indexAnalyzer;
          }

          @Override
          protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
            if (fieldName.equals("textgrams") && minPrefixChars > 0) {
              return new TokenStreamComponents(components.getTokenizer(),
                                               new EdgeNGramTokenFilter(matchVersion,
                                                                        components.getTokenStream(),
                                                                        1, minPrefixChars));
            } else {
              return components;
            }
          }
        };

      w = new IndexWriter(dirTmp,
                          getIndexWriterConfig(matchVersion, gramAnalyzer));
      BytesRef text;
      Document doc = new Document();
      FieldType ft = getTextFieldType();
      Field textField = new Field(TEXT_FIELD_NAME, "", ft);
      doc.add(textField);

      Field textGramField = new Field("textgrams", "", ft);
      doc.add(textGramField);

      Field textDVField = new BinaryDocValuesField(TEXT_FIELD_NAME, new BytesRef());
      doc.add(textDVField);

      // TODO: use threads...?
      Field weightField = new NumericDocValuesField("weight", 0);
      doc.add(weightField);

      Field payloadField;
      if (iter.hasPayloads()) {
        payloadField = new BinaryDocValuesField("payloads", new BytesRef());
        doc.add(payloadField);
      } else {
        payloadField = null;
      }
      //long t0 = System.nanoTime();
      while ((text = iter.next()) != null) {
        String textString = text.utf8ToString();
        textField.setStringValue(textString);
        textGramField.setStringValue(textString);
        textDVField.setBytesValue(text);
        weightField.setLongValue(iter.weight());
        if (iter.hasPayloads()) {
          payloadField.setBytesValue(iter.payload());
        }
        w.addDocument(doc);
        count++;
      }
      //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");

      r = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(w, false));
      //long t1 = System.nanoTime();
      w.rollback();

      final int maxDoc = r.maxDoc();

      final NumericDocValues weights = r.getNumericDocValues("weight");

      final Sorter.DocComparator comparator = new Sorter.DocComparator() {
          @Override
          public int compare(int docID1, int docID2) {
            final long v1 = weights.get(docID1);
            final long v2 = weights.get(docID2);
            // Reverse sort (highest weight first);
            // java7 only:
            //return Long.compare(v2, v1);
            if (v1 > v2) {
              return -1;
            } else if (v1 < v2) {
              return 1;
            } else {
              return 0;
            }
          }
        };

      r = SortingAtomicReader.wrap(r, new Sorter() {
          @Override
          public Sorter.DocMap sort(AtomicReader reader) throws IOException {
            return Sorter.sort(maxDoc, comparator);
          }

          @Override
          public String getID() {
            return "Weight";
          }
        });
     
      w2 = new IndexWriter(dir,
                           getIndexWriterConfig(matchVersion, indexAnalyzer));
      w2.addIndexes(new IndexReader[] {r});
      r.close();

      //System.out.println("sort time: " + ((System.nanoTime()-t1)/1000000) + " msec");

      searcher = new IndexSearcher(DirectoryReader.open(w2, false));
      w2.close();
View Full Code Here

      }

      final Map<Integer, JoinScore> docToJoinScore = new HashMap<Integer, JoinScore>();
      if (multipleValuesPerDocument) {
        if (scoreDocsInOrder) {
          AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.wrap(toSearcher.getIndexReader());
          Terms terms = slowCompositeReader.terms(toField);
          if (terms != null) {
            DocsEnum docsEnum = null;
            TermsEnum termsEnum = null;
            SortedSet<BytesRef> joinValues = new TreeSet<BytesRef>(BytesRef.getUTF8SortedAsUnicodeComparator());
            joinValues.addAll(joinValueToJoinScores.keySet());
            for (BytesRef joinValue : joinValues) {
              termsEnum = terms.iterator(termsEnum);
              if (termsEnum.seekExact(joinValue)) {
                docsEnum = termsEnum.docs(slowCompositeReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
                JoinScore joinScore = joinValueToJoinScores.get(joinValue);

                for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) {
                  // First encountered join value determines the score.
                  // Something to keep in mind for many-to-many relations.
View Full Code Here

        new Filter() {
          @Override
          public DocIdSet getDocIdSet(AtomicReaderContext context,
              Bits acceptDocs) throws IOException {
            final boolean nullBitset = random().nextInt(10) == 5;
            final AtomicReader reader = context.reader();
            DocsEnum termDocsEnum = reader.termDocsEnum(new Term("field", "0"));
            if (termDocsEnum == null) {
              return null; // no docs -- return null
            }
            final BitSet bitSet = new BitSet(reader.maxDoc());
            int d;
            while ((d = termDocsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
              bitSet.set(d, true);
            }
            return new DocIdSet() {
             
              @Override
              public Bits bits() throws IOException {
                if (nullBitset) {
                  return null;
                }
                return new Bits() {
                 
                  @Override
                  public boolean get(int index) {
                    assertTrue("filter was called for a non-matching doc",
                        bitSet.get(index));
                    return bitSet.get(index);
                  }
                 
                  @Override
                  public int length() {
                    return bitSet.length();
                  }
                 
                };
              }
             
              @Override
              public DocIdSetIterator iterator() throws IOException {
                assertTrue(
                    "iterator should not be called if bitset is present",
                    nullBitset);
                return reader.termDocsEnum(new Term("field", "0"));
              }
             
            };
          }
        }, FilteredQuery.QUERY_FIRST_FILTER_STRATEGY);
View Full Code Here

      }
      bi.setText(content);
      int doc = docids[i];
      int leaf = ReaderUtil.subIndex(doc, leaves);
      AtomicReaderContext subContext = leaves.get(leaf);
      AtomicReader r = subContext.reader();
      Terms t = r.terms(field);
      if (t == null) {
        continue; // nothing to do
      }
      if (leaf != lastLeaf) {
        termsEnum = t.iterator(null);
View Full Code Here

    IOUtils.close(tr, ir, iw, tw, indexDir, taxoDir);
  }

  private void assertOrdinalsExist(String field, IndexReader ir) throws IOException {
    for (AtomicReaderContext context : ir.leaves()) {
      AtomicReader r = context.reader();
      if (r.getBinaryDocValues(field) != null) {
        return; // not all segments must have this DocValues
      }
    }
    fail("no ordinals found for " + field);
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.AtomicReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.