Package water.fvec

Examples of water.fvec.Vec$CollectDomain


      throw new IllegalArgumentException("vpredict cannot be class labels, expect probabilities.");
  }

  public void execImpl() {
    init();
    Vec va = null, vp;
    try {
      va = vactual.toEnum(); // always returns TransfVec
      vp = vpredict;
      // The vectors are from different groups => align them, but properly delete it after computation
      if (!va.group().equals(vp.group())) {
        vp = va.align(vp);
      }
      // compute thresholds, if not user-given
      if (thresholds != null) {
        sort(thresholds);
        if (ArrayUtils.minValue(thresholds) < 0) throw new IllegalArgumentException("Minimum threshold cannot be negative.");
        if (ArrayUtils.maxValue(thresholds) > 1) throw new IllegalArgumentException("Maximum threshold cannot be greater than 1.");
      } else {
        HashSet hs = new HashSet();
        final int bins = (int)Math.min(vpredict.length(), 200l);
        final long stride = Math.max(vpredict.length() / bins, 1);
        for( int i=0; i<bins; ++i) hs.add(new Float(vpredict.at(i*stride))); //data-driven thresholds TODO: use percentiles (from Summary2?)
        for (int i=0;i<51;++i) hs.add(new Float(i/50.)); //always add 0.02-spaced thresholds from 0 to 1

        // created sorted vector of unique thresholds
        thresholds = new float[hs.size()];
        int i=0;
        for (Object h : hs) {thresholds[i++] = (Float)h; }
        sort(thresholds);
      }
      // compute CMs
      aucdata = new AUCData().compute(new AUCTask(thresholds,va.mean()).doAll(va,vp).getCMs(), thresholds, va.factors(), threshold_criterion);
    } finally {       // Delete adaptation vectors
      if (va!=null) DKV.remove(va._key);
    }
  }
View Full Code Here


      throw new IllegalArgumentException("Predicted vector cannot be categorical for regression scoring.");
  }

  public void execImpl() {
    init();
    Vec va = null,vp = null, avp = null;
    try {
      if (classification) {
        // Create a new vectors - it is cheap since vector are only adaptation vectors
        va = vactual .toEnum(); // always returns TransfVec
        actual_domain = va.factors();
        vp = vpredict.toEnum(); // always returns TransfVec
        predicted_domain = vp.factors();
        if (!Arrays.equals(actual_domain, predicted_domain)) {
          domain = ArrayUtils.domainUnion(actual_domain, predicted_domain);
          int[][] vamap = Model.getDomainMapping(domain, actual_domain, true);
          va = TransfVec.compose( (TransfVec) va, vamap, domain, false ); // delete original va
          int[][] vpmap = Model.getDomainMapping(domain, predicted_domain, true);
          vp = TransfVec.compose( (TransfVec) vp, vpmap, domain, false ); // delete original vp
        } else domain = actual_domain;
        // The vectors are from different groups => align them, but properly delete it after computation
        if (!va.group().equals(vp.group())) {
          avp = vp;
          vp = va.align(vp);
        }
        cm = new CM(domain.length).doAll(va,vp)._cm;
      } else {
        mse = new CM(1).doAll(vactual,vpredict).mse();
      }
View Full Code Here

    if( !val.isFrame() ) throw new IllegalArgumentException("Not a Frame");
    Frame fr = val.get();

    // Peel out an optional column; restrict to this column
    if( column != null ) {
      Vec vec = fr.vec(column);
      if( vec==null ) throw new IllegalArgumentException("Column "+column+" not found in frame "+key);
      fr = new Frame(new String[]{column}, new Vec[]{vec});
    }

    f._fr = fr;
View Full Code Here

  /** Return a single column from the frame. */
  protected Schema column(int version, Frames f) { // TODO: should return a Vec schema
    Frame frame = getFromDKV(f.key);

    // TODO: We really want to return a different schema here!
    Vec vec = frame.vec(f.column);
    if (null == vec)
      throw new IllegalArgumentException("Did not find column: " + f.column + " in frame: " + f.key.toString());

    Vec[] vecs = { vec };
    String[] names = { f.column };
View Full Code Here

    return this.schema(version).fillFromImpl(f);
  }

  protected FramesBase columnSummary(int version, Frames frames) {
    Frame frame = getFromDKV(frames.key);
    Vec vec = frame.vec(frames.column);
    if (null == vec)
      throw new IllegalArgumentException("Did not find column: " + frames.column + " in frame: " + frames.key.toString());

    // Compute second pass of rollups: the histograms.  Side-effects the Vec.
    // TODO: side effects, ugh.
View Full Code Here

    _adaptedValidation = av[0];
//    gtrash(av[1]); // delete this after computation
    if (_fromValid2CM!=null) {
      assert classification : "Validation response transformation should be declared only for classification!";
      assert _fromModel2CM != null : "Model response transformation should exist if validation response transformation exists!";
      Vec tmp = _validResponse.toEnum();
      _adaptedValidationResponse = tmp.makeTransf(_fromValid2CM, getCMDomain()); // Add an original response adapted to CM domain
//      gtrash(_adaptedValidationResponse); // Add the created vector to a clean-up list
//      gtrash(tmp);
    }
  }
View Full Code Here

   * @param adaptFrm
   * @return
   */
  private Frame scoreImpl(Frame adaptFrm) {
    int ridx = adaptFrm.find(_output.responseName());
    Vec vecs[] = adaptFrm.vecs();
    assert ridx == -1 : "Adapted frame should not contain response in scoring method!";
    assert _output.nfeatures() == adaptFrm.numCols() : "Number of model features " + _output.nfeatures() + " != number of test set columns: " + adaptFrm.numCols();
    assert vecs.length == _output._names.length-1 : "Scoring data set contains wrong number of columns: " + vecs.length  + " instead of " + (_output._names.length-1);

    // Create a new vector for response
    // If the model produces a classification/enum, copy the domain into the
    // result vector.
    Vec v = adaptFrm.anyVec().makeZero(_output.classNames());
    adaptFrm.add("predict",v);
    if( _output.nclasses() > 1 ) {
      String prefix = "";
      for( int c=0; c<_output.nclasses(); c++ ) // if any class is the same as column name in frame, then prefix all classnames
        if (ArrayUtils.contains(adaptFrm._names, _output.classNames()[c])) { prefix = "class_"; break; }
View Full Code Here

    ArrayList<Vec> avecs = new ArrayList<>(); // adapted vectors
    ArrayList<String> anames = new ArrayList<>(); // names for adapted vector

    for( int c=0; c<map.length; c++ ) // Iterate over columns
      if(map[c] != null) { // Column needs adaptation
        Vec adaptedVec;
        if (toEnum[c]) { // Vector was flipped to column already, compose transformation
          adaptedVec = TransfVec.compose((TransfVec) frvecs[c], map[c], vfr.domains()[c], false);
        } else adaptedVec = frvecs[c].makeTransf(map[c], vfr.domains()[c]);
        avecs.add(frvecs[c] = adaptedVec);
        anames.add(names[c]); // Collect right names
View Full Code Here

  }

//  @Override
  public void execImpl() {
    init();
    Vec va = null;
    try {
      va = vactual.toEnum(); // always returns TransfVec
      actual_domain = va.factors();
      if (max_k > predict.numCols()-1) {
        Log.warn("Reducing Hitratio Top-K value to maximum value allowed: " + String.format("%,d", predict.numCols() - 1));
        max_k = predict.numCols() - 1;
      }
      final Frame actual_predict = new Frame(predict.names().clone(), predict.vecs().clone());
View Full Code Here

   * @param adaptFrm
   * @return
   */
  private Frame scoreImpl(Frame adaptFrm) {
    int ridx = adaptFrm.find(responseName());
    Vec vecs[] = adaptFrm.vecs();
    assert ridx == -1 : "Adapted frame should not contain response in scoring method!";
    assert nfeatures() == adaptFrm.numCols() : "Number of model features " + nfeatures() + " != number of test set columns: " + adaptFrm.numCols();
    assert vecs.length == _names.length-1 : "Scoring data set contains wrong number of columns: " + vecs.length  + " instead of " + (_names.length-1);

    // Create a new vector for response
    // If the model produces a classification/enum, copy the domain into the
    // result vector.
    Vec v = adaptFrm.anyVec().makeZero(classNames());
    adaptFrm.add("predict",v);
    if( nclasses() > 1 ) {
      String prefix = "";
      for( int c=0; c<nclasses(); c++ ) // if any class is the same as column name in frame, then prefix all classnames
        if (ArrayUtils.contains(adaptFrm._names, classNames()[c])) { prefix = "class_"; break; }
View Full Code Here

TOP

Related Classes of water.fvec.Vec$CollectDomain

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.