Package org.apache.lucene.util.OfflineSorter

Examples of org.apache.lucene.util.OfflineSorter.ByteSequencesWriter


    IntsRef scratchInts = new IntsRef();
   
    StringBuilder sb = new StringBuilder();
   
    File unsorted = File.createTempFile("unsorted", "dat", tempDir);
    ByteSequencesWriter writer = new ByteSequencesWriter(unsorted);
    boolean success = false;
    try {
      for (InputStream dictionary : dictionaries) {
        BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder));
        String line = lines.readLine(); // first line is number of entries (approximately, sometimes)
       
        while ((line = lines.readLine()) != null) {
          line = unescapeEntry(line);
          if (needsInputCleaning) {
            int flagSep = line.lastIndexOf(FLAG_SEPARATOR);
            if (flagSep == -1) {
              CharSequence cleansed = cleanInput(line, sb);
              writer.write(cleansed.toString().getBytes(StandardCharsets.UTF_8));
            } else {
              String text = line.substring(0, flagSep);
              CharSequence cleansed = cleanInput(text, sb);
              if (cleansed != sb) {
                sb.setLength(0);
                sb.append(cleansed);
              }
              sb.append(line.substring(flagSep));
              writer.write(sb.toString().getBytes(StandardCharsets.UTF_8));
            }
          } else {
            writer.write(line.getBytes(StandardCharsets.UTF_8));
          }
        }
      }
      success = true;
    } finally {
View Full Code Here


    IntsRef scratchInts = new IntsRef();
   
    StringBuilder sb = new StringBuilder();
   
    File unsorted = File.createTempFile("unsorted", "dat", tempDir);
    ByteSequencesWriter writer = new ByteSequencesWriter(unsorted);
    boolean success = false;
    try {
      for (InputStream dictionary : dictionaries) {
        BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder));
        String line = lines.readLine(); // first line is number of entries (approximately, sometimes)
       
        while ((line = lines.readLine()) != null) {
          line = unescapeEntry(line);
          if (needsInputCleaning) {
            int flagSep = line.lastIndexOf(FLAG_SEPARATOR);
            if (flagSep == -1) {
              CharSequence cleansed = cleanInput(line, sb);
              writer.write(cleansed.toString().getBytes(StandardCharsets.UTF_8));
            } else {
              String text = line.substring(0, flagSep);
              CharSequence cleansed = cleanInput(text, sb);
              if (cleansed != sb) {
                sb.setLength(0);
                sb.append(cleansed);
              }
              sb.append(line.substring(flagSep));
              writer.write(sb.toString().getBytes(StandardCharsets.UTF_8));
            }
          } else {
            writer.write(line.getBytes(StandardCharsets.UTF_8));
          }
        }
      }
      success = true;
    } finally {
View Full Code Here

    IOUtils.close(is1, is2);
  }

  private File writeAll(String name, byte[][] data) throws IOException {
    File file = new File(tempDir, name);
    ByteSequencesWriter w = new OfflineSorter.ByteSequencesWriter(file);
    for (byte [] datum : data) {
      w.write(datum);
    }
    w.close();
    return file;
  }
View Full Code Here

    String prefix = getClass().getSimpleName();
    File directory = OfflineSorter.defaultTempDir();
    tempInput = File.createTempFile(prefix, ".input", directory);
    tempSorted = File.createTempFile(prefix, ".sorted", directory);
   
    final ByteSequencesWriter writer = new ByteSequencesWriter(tempInput);
    boolean success = false;
    try {
      BytesRef spare;
      byte[] buffer = new byte[0];
      ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);

      while ((spare = source.next()) != null) {
        encode(writer, output, buffer, spare, source.weight());
      }
      writer.close();
      new OfflineSorter(tieBreakByCostComparator).sort(tempInput, tempSorted);
      ByteSequencesReader reader = new ByteSequencesReader(tempSorted);
      success = true;
      return reader;
     
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.OfflineSorter.ByteSequencesWriter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.