Package com.winvector.db

Source Code of com.winvector.db.LoadFFF$FFReader$FFIt

package com.winvector.db;

import java.io.File;
import java.io.IOException;
import java.io.LineNumberReader;
import java.net.URI;
import java.sql.Connection;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Properties;
import java.util.Random;

import com.winvector.db.DBUtil.DBHandle;
import com.winvector.util.BurstMap;
import com.winvector.util.TrivialReader;

public final class LoadFFF {
  public static final class FixedFieldDef {
    public final int l;
    public final int r;
    public final String name;
   
    public FixedFieldDef(final int l,
       final int r,
       final String name) {
      this.l = l;
      this.r = r;
      this.name = name;
    }
   
    @Override
    public String toString() {
      return name + "_[" + l + "," + r + ")";
    }
  }
 
  public static final class FFReader implements Iterable<BurstMap> {
    private final ArrayList<FixedFieldDef> fields;
    private final URI source;
   
    public FFReader(final ArrayList<FixedFieldDef> fields, final URI source) {
      this.fields = fields;
      this.source = source;
    }
   
    private final class FFIt implements Iterator<BurstMap> {
      private BurstMap next = null;
      private LineNumberReader rdr = null;
     
      public FFIt() throws IOException {
        rdr = TrivialReader.openBufferedReader(source,null);
        advance();
      }
     
      private void advance() throws IOException {
        next = null;
        while((next==null)&&(null!=rdr)) {
          final String line = rdr.readLine();
          if(null==line) {
            rdr.close();
            rdr = null;
          } else {
            if(line.trim().length()>0) {
              final Map<String,Object> map = new LinkedHashMap<String,Object>();
              final int lineLen = line.length();
              boolean gotOne = false;
              for(final FixedFieldDef fi: fields) {
                // numbering from zero
                if((fi.l<lineLen)&&(fi.r>fi.l)) {
                  final String val = line.substring(fi.l,Math.min(fi.r,lineLen)).trim();
                  gotOne |= val.length()>0;
                  map.put(fi.name,val);
                } else {
                  map.put(fi.name,null);
                }
              }
              if(gotOne) {
                next = new BurstMap(line,map);
              }
            }
          }
        }
      }
     
      @Override
      public boolean hasNext() {
        return null!=next;
      }

      @Override
      public BurstMap next() {
        if(!hasNext()) {
          throw new NoSuchElementException();
        }
        final BurstMap r = next;
        try {
          advance();
        } catch (IOException ex) {
          throw new RuntimeException(ex);
        }
        return r;
      }

      @Override
      public void remove() {
        throw new UnsupportedOperationException();
      }

      @Override
      public String toString() {
        return source.toString()
      }     
    }

    @Override
    public Iterator<BurstMap> iterator() {
      try {
        return new FFIt();
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
   
    @Override
    public String toString() {
      return source.toString();
    }
  }
 
  private static ArrayList<FixedFieldDef> readFieldDefs(final Iterable<BurstMap> defSource) {
    final ArrayList<FixedFieldDef> fieldDefs = new ArrayList<FixedFieldDef>();
    for(final BurstMap di: defSource) {
      final String posStr = di.getAsString("Position");
      final String fieldName = di.getAsString("Field");
      final String[] range = posStr.split("-",2);
      final int l;
      final int r;
      // external numbering starts form 1 and is inclusive
      if(range.length>1) {
        l = Integer.parseInt(range[0])-1;
        r = Integer.parseInt(range[1]);
      } else {
        l = Integer.parseInt(posStr)-1;
        r = l + 1;
      }
      fieldDefs.add(new FixedFieldDef(l,r,fieldName));
    }
    return fieldDefs;
  }
 
  private static ArrayList<FixedFieldDef> notCoveredBySingles(ArrayList<FixedFieldDef> defs) {
    final ArrayList<FixedFieldDef> r = new ArrayList<FixedFieldDef>();
    final int defsSize = defs.size();
    final BitSet take = new BitSet(defsSize);
    final BitSet covered = new BitSet();
    for(int i=0;i<defsSize;++i) {
      final FixedFieldDef di = defs.get(i);
      if(di.r==di.l+1) {
        if(!covered.get(di.l)) {
          covered.set(di.l);
          take.set(i);
        }
      }
    }
    for(int i=0;i<defsSize;++i) {
      final FixedFieldDef di = defs.get(i);
      if(di.r>di.l+1) {
        boolean mis = false;
        for(int j=di.l;(!mis)&&(j<di.r);++j) {
          if(!covered.get(j)) {
            mis = true;
          }
        }
        if(mis) {
          take.set(i);
        }
      }
    }
    for(int i=0;i<defsSize;++i) {
      if(take.get(i)) {
        final FixedFieldDef di = defs.get(i);
        r.add(di);
      }
    }
    return r;
  }
 
  public static void main(final String[] args) throws Exception {
    final Date now = new Date();
    System.out.println("start LoadFFF\t" + now);
    System.out.println("\tfor details see: http://www.win-vector.com/blog/2011/01/sql-screwdriver/");
    System.out.println("\tfor latest version see: https://github.com/WinVector/SQLScrewdriver");
    final int firstSourceArg = 3;
    if(args.length<4) {
      throw new Exception("use: LoadFFF dbProbsURI fieldSpecURI.tsv tableName inputURI+");
    }
    final URI propsURI = new URI(args[0]);
    final URI fieldSpecURI = new URI(args[1]);
    final String tableName = args[2];
    System.out.println("\tcwd: " + (new File(".")).getAbsolutePath());
    System.out.println("\tDBProperties XML:\t" + propsURI.toString());
    System.out.println("\ttableName:\t" + tableName);
    final Properties props = DBUtil.loadProps(propsURI);
    final DBHandle handle = DBUtil.buildConnection(propsURI.toString(),props,false);
    System.out.println("\tdb:\t" + handle);
    final ArrayList<FixedFieldDef> origFieldDefs = readFieldDefs(new TrivialReader(fieldSpecURI,'\t',null,true,null, false));
    final ArrayList<FixedFieldDef> fieldDefs = notCoveredBySingles(origFieldDefs);
    try {
      handle.conn.setTransactionIsolation(Connection.TRANSACTION_READ_UNCOMMITTED);
    } catch (Exception ex) {
    }
    try {
      handle.conn.setAutoCommit(false);
    } catch (Exception ex) {
    }
    final TableControl tableControl = new TableControl(props,tableName);
    final ArrayList<Iterable<BurstMap>> sources = new ArrayList<Iterable<BurstMap>>();
    for(int argi=firstSourceArg;argi<args.length;++argi) {
      final URI inURI = new URI(args[argi]);
      final Iterable<BurstMap> source = new FFReader(fieldDefs,inURI);
      sources.add(source);
    }
    for(final Iterable<BurstMap> source: sources) {
      System.out.println("\tscan source URI:\t" + source.toString());
      tableControl.scanForDefs(source.toString(),source, null);
    }
    tableControl.buildSQLStatements();
    tableControl.createTable(handle);
    final Random rand = new Random(42);
    long nInserted = 0;
    for(final Iterable<BurstMap> source: sources) {
      System.out.println("\tload source URI:\t" + source);
      final long nI = tableControl.loadData(source.toString(),now,rand,source, null, handle);
      System.out.println("\tinserted " + nI + " rows");
      nInserted += nI;
    }
    handle.conn.close();
    System.out.println("\tdone, wrote " + nInserted + " rows\t" + new Date());
    System.out.println("done LoadFFF\t" + new Date());
  }
}
TOP

Related Classes of com.winvector.db.LoadFFF$FFReader$FFIt

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.