Package org.apache.lucene.util

Examples of org.apache.lucene.util.OfflineSorter


      }
      writer.close();

      // We don't know the distribution of scores and we need to bucket them, so we'll sort
      // and divide into equal buckets.
      SortInfo info = new OfflineSorter().sort(tempInput, tempSorted);
      tempInput.delete();
      FSTCompletionBuilder builder = new FSTCompletionBuilder(
          buckets, sorter = new ExternalRefSorter(new OfflineSorter()), sharedTailLength);

      final int inputLines = info.lines;
      reader = new OfflineSorter.ByteSequencesReader(tempSorted);
      long line = 0;
      int previousBucket = 0;
View Full Code Here


        count++;
      }
      writer.close();

      // Sort all input/output pairs (required by FST.Builder):
      new OfflineSorter(new AnalyzingComparator(hasPayloads)).sort(tempInput, tempSorted);

      // Free disk space:
      tempInput.delete();

      reader = new OfflineSorter.ByteSequencesReader(tempSorted);
View Full Code Here

        IOUtils.closeWhileHandlingException(writer);
      }
    }
    File sorted = File.createTempFile("sorted", "dat", tempDir);
   
    OfflineSorter sorter = new OfflineSorter(new Comparator<BytesRef>() {
      BytesRef scratch1 = new BytesRef();
      BytesRef scratch2 = new BytesRef();
     
      @Override
      public int compare(BytesRef o1, BytesRef o2) {
        scratch1.bytes = o1.bytes;
        scratch1.offset = o1.offset;
        scratch1.length = o1.length;
       
        for (int i = scratch1.length - 1; i >= 0; i--) {
          if (scratch1.bytes[scratch1.offset + i] == FLAG_SEPARATOR) {
            scratch1.length = i;
            break;
          }
        }
       
        scratch2.bytes = o2.bytes;
        scratch2.offset = o2.offset;
        scratch2.length = o2.length;
       
        for (int i = scratch2.length - 1; i >= 0; i--) {
          if (scratch2.bytes[scratch2.offset + i] == FLAG_SEPARATOR) {
            scratch2.length = i;
            break;
          }
        }
       
        int cmp = scratch1.compareTo(scratch2);
        if (cmp == 0) {
          // tie break on whole row
          return o1.compareTo(o2);
        } else {
          return cmp;
        }
      }
    });
    sorter.sort(unsorted, sorted);
    unsorted.delete();
   
    ByteSequencesReader reader = new ByteSequencesReader(sorted);
    BytesRef scratchLine = new BytesRef();
   
View Full Code Here

        IOUtils.closeWhileHandlingException(writer);
      }
    }
    File sorted = File.createTempFile("sorted", "dat", tempDir);
   
    OfflineSorter sorter = new OfflineSorter(new Comparator<BytesRef>() {
      BytesRef scratch1 = new BytesRef();
      BytesRef scratch2 = new BytesRef();
     
      @Override
      public int compare(BytesRef o1, BytesRef o2) {
        scratch1.bytes = o1.bytes;
        scratch1.offset = o1.offset;
        scratch1.length = o1.length;
       
        for (int i = scratch1.length - 1; i >= 0; i--) {
          if (scratch1.bytes[scratch1.offset + i] == FLAG_SEPARATOR) {
            scratch1.length = i;
            break;
          }
        }
       
        scratch2.bytes = o2.bytes;
        scratch2.offset = o2.offset;
        scratch2.length = o2.length;
       
        for (int i = scratch2.length - 1; i >= 0; i--) {
          if (scratch2.bytes[scratch2.offset + i] == FLAG_SEPARATOR) {
            scratch2.length = i;
            break;
          }
        }
       
        int cmp = scratch1.compareTo(scratch2);
        if (cmp == 0) {
          // tie break on whole row
          return o1.compareTo(o2);
        } else {
          return cmp;
        }
      }
    });
    sorter.sort(unsorted, sorted);
    unsorted.delete();
   
    ByteSequencesReader reader = new ByteSequencesReader(sorted);
    BytesRef scratchLine = new BytesRef();
   
View Full Code Here

      }
      writer.close();

      // We don't know the distribution of scores and we need to bucket them, so we'll sort
      // and divide into equal buckets.
      SortInfo info = new OfflineSorter().sort(tempInput, tempSorted);
      tempInput.delete();
      FSTCompletionBuilder builder = new FSTCompletionBuilder(
          buckets, sorter = new ExternalRefSorter(new OfflineSorter()), sharedTailLength);

      final int inputLines = info.lines;
      reader = new OfflineSorter.ByteSequencesReader(tempSorted);
      long line = 0;
      int previousBucket = 0;
View Full Code Here

    File input = new File("/home/dweiss/tmp/shuffled.dict");

    int buckets = 20;
    int shareMaxTail = 10;

    ExternalRefSorter sorter = new ExternalRefSorter(new OfflineSorter());
    FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter, shareMaxTail);

    BufferedReader reader = new BufferedReader(
        new InputStreamReader(
            new FileInputStream(input), StandardCharsets.UTF_8));
View Full Code Here

      TestUtil.rm(tempDir);
    super.tearDown();
  }

  public void testEmpty() throws Exception {
    checkSort(new OfflineSorter(), new byte [][] {});
  }
View Full Code Here

  public void testEmpty() throws Exception {
    checkSort(new OfflineSorter(), new byte [][] {});
  }

  public void testSingleLine() throws Exception {
    checkSort(new OfflineSorter(), new byte [][] {
        "Single line only.".getBytes(StandardCharsets.UTF_8)
    });
  }
View Full Code Here

    });
  }

  public void testIntermediateMerges() throws Exception {
    // Sort 20 mb worth of data with 1mb buffer, binary merging.
    SortInfo info = checkSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(1), OfflineSorter.defaultTempDir(), 2),
        generateRandom((int)OfflineSorter.MB * 20));
    assertTrue(info.mergeRounds > 10);
  }
View Full Code Here

    assertTrue(info.mergeRounds > 10);
  }

  public void testSmallRandom() throws Exception {
    // Sort 20 mb worth of data with 1mb buffer.
    SortInfo sortInfo = checkSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(1), OfflineSorter.defaultTempDir(), OfflineSorter.MAX_TEMPFILES),
        generateRandom((int)OfflineSorter.MB * 20));
    assertEquals(1, sortInfo.mergeRounds);
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.OfflineSorter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.