Package cleo.search.bootstrap

Source Code of cleo.search.bootstrap.TypeaheadElementStoreBootstrap

/*
* Copyright (c) 2011 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package cleo.search.bootstrap;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;

import cleo.search.ElementFactory;
import cleo.search.ElementSerializer;
import cleo.search.SimpleTypeaheadElementFactory;
import cleo.search.TypeaheadElement;
import cleo.search.TypeaheadElementSerializer;
import cleo.search.store.ArrayStoreElement;
import cleo.search.store.StoreFactory;
import cleo.search.util.CompositeTermsHandler;
import cleo.search.util.TermsDedup;
import cleo.search.util.TermsScanner;

import krati.core.segment.MemorySegmentFactory;
import krati.core.segment.SegmentFactory;
import krati.util.Chronos;

/**
* TypeaheadElementStoreBootstrap
*
* @author jwu
* @since 02/05, 2011
*/
public class TypeaheadElementStoreBootstrap extends ArrayStoreElementBootstrap<TypeaheadElement> {
  private final int indexStart;
  private final int indexEnd;
  private String[] line1Data;
  private String[] line2Data;
  private String[] line3Data;
  private String[] mediaData;
 
  public TypeaheadElementStoreBootstrap(ArrayStoreElement<TypeaheadElement> elementStore,
                                        ElementFactory<TypeaheadElement> elementFactory) throws Exception {
    super(elementStore, elementFactory);
    indexStart = elementStore.getIndexStart();
    indexEnd = indexStart + elementStore.capacity();
    line1Data = new String[elementStore.capacity()];
    line2Data = new String[elementStore.capacity()];
    line3Data = new String[elementStore.capacity()];
    mediaData = new String[elementStore.capacity()];
  }
 
  protected void loadData(File dataDir, String[] dataArray) {
    Chronos c = new Chronos();
    for(File f : dataDir.listFiles()) {
      if(f.isFile()) {
        try {
          loadDataFile(f, dataArray);
          System.out.printf("%s loaded in %d ms%n", f.getAbsolutePath(), c.tick());
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
    }
  }
 
  protected void loadDataFile(File dataFile, String[] dataArray) throws IOException {
    final BufferedReader r = new BufferedReader(new FileReader(dataFile.getAbsolutePath()));
   
    String line = null;
    while((line = r.readLine()) != null) {
      line = line.trim().replaceAll("\\s+", " ");
      int ind = line.indexOf(' ');
      if(ind > 0) {
        try {
          int source = Integer.parseInt(line.substring(0, ind));
          if(indexStart <= source && source < indexEnd) {
            String rest = line.substring(ind + 1).trim();
            if(rest.length() > 0) {
              dataArray[source - indexStart] = rest;
            }
          }
        } catch(Exception e) {
          System.err.println(line);
          e.printStackTrace();
        }
      }
    }
   
    r.close();
  }
 
  public void loadLine1(File dataDir) throws IOException {
    if(dataDir.exists()) {
      loadData(dataDir, line1Data);
    }
  }
 
  public void loadLine2(File dataDir) throws IOException {
    if(dataDir.exists()) {
      loadData(dataDir, line2Data);
    }
  }

  public void loadLine3(File dataDir) throws IOException {
    if(dataDir.exists()) {
      loadData(dataDir, line3Data);
    }
  }
 
  public void loadMedia(File dataDir) throws IOException {
    if(dataDir.exists()) {
      loadData(dataDir, mediaData);
    }
  }
 
  @Override
  protected void customize(TypeaheadElement e) {
    int index = e.getElementId();
    if(indexStart <= index && index < indexEnd) {
      e.setLine1(line1Data[index - indexStart]);
      e.setLine2(line2Data[index - indexStart]);
      e.setLine3(line3Data[index - indexStart]);
      e.setMedia(mediaData[index - indexStart]);
    }
  }
 
  /**
   * <pre>
   * java TypeaheadElementStoreBootstrap -server -Xms2G -Xmx8G \
   *      elementStorePath idStart idCount elementStoreSegmentSizeMB \
   *      termsDir line1Dir line2Dir line3Dir mediaDir
   *     
   * java TypeaheadElementStoreBootstrap -server -Xms2G -Xmx8G \
   *      bootstrap/i001/question/typeahead/element-store 0 5000000 32 \
   *      bootstrap/i001/question/terms \
   *      bootstrap/i001/question/line1 \
   *      bootstrap/i001/question/line2 \
   *      bootstrap/i001/question/line3 \
   *      bootstrap/i001/question/media
   * </pre>
   *
   * @param args
   * @throws Exception
   */
  public static void main(String args[]) throws Exception {
    File elementStoreDir = new File(args[0]);
    int idStart = Integer.parseInt(args[1]);
    int idCount = Integer.parseInt(args[2]);
    int elementStoreSegMB = Integer.parseInt(args[3]);
    File termsDir = new File(args[4]);
    File line1Dir = new File(args[5]);
    File line2Dir = new File(args[6]);
    File line3Dir = new File(args[7]);
    File mediaDir = new File(args[8]);
   
    Chronos c = new Chronos();
   
    SegmentFactory elementStoreSegFactory = new MemorySegmentFactory();
    ElementFactory<TypeaheadElement> elementFactory = new SimpleTypeaheadElementFactory();
    ElementSerializer<TypeaheadElement> elementSerializer = new TypeaheadElementSerializer();
    ArrayStoreElement<TypeaheadElement> elementStore =
      StoreFactory.createElementStorePartition(elementStoreDir, idStart, idCount, elementStoreSegFactory, elementStoreSegMB, elementSerializer);
    TypeaheadElementStoreBootstrap elementStoreBootstrap =
      new TypeaheadElementStoreBootstrap(elementStore, elementFactory);
   
    elementStoreBootstrap.loadLine1(line1Dir);
    elementStoreBootstrap.loadLine2(line2Dir);
    elementStoreBootstrap.loadLine3(line3Dir);
    elementStoreBootstrap.loadMedia(mediaDir);
   
    TermsScanner scanner = new TermsScanner(termsDir);
    scanner.scan(new CompositeTermsHandler().add(new TermsDedup()).add(elementStoreBootstrap));
    elementStore.sync();
   
    System.out.printf("Bootstrap done in %d ms%n", c.tick());
  }
}
TOP

Related Classes of cleo.search.bootstrap.TypeaheadElementStoreBootstrap

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.