Package org.apache.lucene.codecs.simpletext

Source Code of org.apache.lucene.codecs.simpletext.SimpleTextStoredFieldsReader

package org.apache.lucene.codecs.simpletext;

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.io.IOException;
import java.nio.charset.StandardCharsets;

import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil;

import static org.apache.lucene.codecs.simpletext.SimpleTextStoredFieldsWriter.*;

/**
* reads plaintext stored fields
* <p>
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
* @lucene.experimental
*/
public class SimpleTextStoredFieldsReader extends StoredFieldsReader {

  private static final long BASE_RAM_BYTES_USED =
        RamUsageEstimator.shallowSizeOfInstance(SimpleTextStoredFieldsReader.class)
      + RamUsageEstimator.shallowSizeOfInstance(BytesRef.class)
      + RamUsageEstimator.shallowSizeOfInstance(CharsRef.class);

  private long offsets[]; /* docid -> offset in .fld file */
  private IndexInput in;
  private BytesRef scratch = new BytesRef();
  private CharsRef scratchUTF16 = new CharsRef();
  private final FieldInfos fieldInfos;

  public SimpleTextStoredFieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
    this.fieldInfos = fn;
    boolean success = false;
    try {
      in = directory.openInput(IndexFileNames.segmentFileName(si.name, "", SimpleTextStoredFieldsWriter.FIELDS_EXTENSION), context);
      success = true;
    } finally {
      if (!success) {
        try {
          close();
        } catch (Throwable t) {} // ensure we throw our original exception
      }
    }
    readIndex(si.getDocCount());
  }
 
  // used by clone
  SimpleTextStoredFieldsReader(long offsets[], IndexInput in, FieldInfos fieldInfos) {
    this.offsets = offsets;
    this.in = in;
    this.fieldInfos = fieldInfos;
  }
 
  // we don't actually write a .fdx-like index, instead we read the
  // stored fields file in entirety up-front and save the offsets
  // so we can seek to the documents later.
  private void readIndex(int size) throws IOException {
    ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
    offsets = new long[size];
    int upto = 0;
    while (!scratch.equals(END)) {
      SimpleTextUtil.readLine(input, scratch);
      if (StringHelper.startsWith(scratch, DOC)) {
        offsets[upto] = input.getFilePointer();
        upto++;
      }
    }
    SimpleTextUtil.checkFooter(input);
    assert upto == offsets.length;
  }
 
  @Override
  public void visitDocument(int n, StoredFieldVisitor visitor) throws IOException {
    in.seek(offsets[n]);
   
    while (true) {
      readLine();
      if (StringHelper.startsWith(scratch, FIELD) == false) {
        break;
      }
      int fieldNumber = parseIntAt(FIELD.length);
      FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
      readLine();
      assert StringHelper.startsWith(scratch, NAME);
      readLine();
      assert StringHelper.startsWith(scratch, TYPE);
     
      final BytesRef type;
      if (equalsAt(TYPE_STRING, scratch, TYPE.length)) {
        type = TYPE_STRING;
      } else if (equalsAt(TYPE_BINARY, scratch, TYPE.length)) {
        type = TYPE_BINARY;
      } else if (equalsAt(TYPE_INT, scratch, TYPE.length)) {
        type = TYPE_INT;
      } else if (equalsAt(TYPE_LONG, scratch, TYPE.length)) {
        type = TYPE_LONG;
      } else if (equalsAt(TYPE_FLOAT, scratch, TYPE.length)) {
        type = TYPE_FLOAT;
      } else if (equalsAt(TYPE_DOUBLE, scratch, TYPE.length)) {
        type = TYPE_DOUBLE;
      } else {
        throw new RuntimeException("unknown field type");
      }
     
      switch (visitor.needsField(fieldInfo)) {
        case YES: 
          readField(type, fieldInfo, visitor);
          break;
        case NO:  
          readLine();
          assert StringHelper.startsWith(scratch, VALUE);
          break;
        case STOP: return;
      }
    }
  }
 
  private void readField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor) throws IOException {
    readLine();
    assert StringHelper.startsWith(scratch, VALUE);
    if (type == TYPE_STRING) {
      visitor.stringField(fieldInfo, new String(scratch.bytes, scratch.offset+VALUE.length, scratch.length-VALUE.length, StandardCharsets.UTF_8));
    } else if (type == TYPE_BINARY) {
      byte[] copy = new byte[scratch.length-VALUE.length];
      System.arraycopy(scratch.bytes, scratch.offset+VALUE.length, copy, 0, copy.length);
      visitor.binaryField(fieldInfo, copy);
    } else if (type == TYPE_INT) {
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+VALUE.length, scratch.length-VALUE.length, scratchUTF16);
      visitor.intField(fieldInfo, Integer.parseInt(scratchUTF16.toString()));
    } else if (type == TYPE_LONG) {
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+VALUE.length, scratch.length-VALUE.length, scratchUTF16);
      visitor.longField(fieldInfo, Long.parseLong(scratchUTF16.toString()));
    } else if (type == TYPE_FLOAT) {
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+VALUE.length, scratch.length-VALUE.length, scratchUTF16);
      visitor.floatField(fieldInfo, Float.parseFloat(scratchUTF16.toString()));
    } else if (type == TYPE_DOUBLE) {
      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+VALUE.length, scratch.length-VALUE.length, scratchUTF16);
      visitor.doubleField(fieldInfo, Double.parseDouble(scratchUTF16.toString()));
    }
  }

  @Override
  public StoredFieldsReader clone() {
    if (in == null) {
      throw new AlreadyClosedException("this FieldsReader is closed");
    }
    return new SimpleTextStoredFieldsReader(offsets, in.clone(), fieldInfos);
  }
 
  @Override
  public void close() throws IOException {
    try {
      IOUtils.close(in);
    } finally {
      in = null;
      offsets = null;
    }
  }
 
  private void readLine() throws IOException {
    SimpleTextUtil.readLine(in, scratch);
  }
 
  private int parseIntAt(int offset) {
    UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+offset, scratch.length-offset, scratchUTF16);
    return ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
  }
 
  private boolean equalsAt(BytesRef a, BytesRef b, int bOffset) {
    return a.length == b.length - bOffset &&
        ArrayUtil.equals(a.bytes, a.offset, b.bytes, b.offset + bOffset, b.length - bOffset);
  }

  @Override
  public long ramBytesUsed() {
    return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(offsets)
        + RamUsageEstimator.sizeOf(scratch.bytes) + RamUsageEstimator.sizeOf(scratchUTF16.chars);
  }

  @Override
  public void checkIntegrity() throws IOException {}
}
TOP

Related Classes of org.apache.lucene.codecs.simpletext.SimpleTextStoredFieldsReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.