Package edu.umd.cloud9.webgraph.driver

Examples of edu.umd.cloud9.webgraph.driver.SortWebGraph$Partition


  public Partition execute(Partition iPart)
      throws WindowingException
  {
    PartitionIterator<Object> pItr = iPart.iterator();
    RuntimeUtils.connectLeadLagFunctionsToPartition(qDef, pItr);
    Partition outP = new Partition(getPartitionClass(),
        getPartitionMemSize(), tDef.getSerde(), OI);
    execute(pItr, outP);
    return outP;
  }
View Full Code Here


 
  protected void processInputPartition() throws HiveException
  {
    try
    {
      Partition outPart = Executor.executeChain(qDef, inputPart);
      Executor.executeSelectList(qDef, outPart, new ForwardPTF());
    }
    catch (WindowingException we)
    {
      throw new HiveException("Cannot close PTFOperator.", we);
View Full Code Here

  protected void processMapFunction() throws HiveException
  {
    try
    {
      TableFuncDef tDef = RuntimeUtils.getFirstTableFunction(qDef);
      Partition outPart = tDef.getFunction().transformRawInput(inputPart);
      PartitionIterator<Object> pItr = outPart.iterator();
      while (pItr.hasNext())
      {
        Object oRow = pItr.next();
        forward(oRow, outputObjInspector);
      }
View Full Code Here

    try
    {
      SerDe serDe = (SerDe) wIn.getDeserializer();
      StructObjectInspector oI = (StructObjectInspector) serDe
          .getObjectInspector();
      Partition p = new Partition(partitionClass, partitionMemSize,
          serDe, oI);
      Writable w = wIn.createRow();
      while( wIn.next(w) != -1)
      {
        p.append(w);
      }
      return p;
    }
    catch (WindowingException we)
    {
View Full Code Here

  @SuppressWarnings({ "unchecked", "rawtypes" })
  @Override
  public void execute(PartitionIterator<Object> pItr, Partition outP) throws WindowingException
  {
    ArrayList<List<?>> oColumns = new ArrayList<List<?>>();
    Partition iPart = pItr.getPartition();
    StructObjectInspector inputOI;
    try {
      inputOI = (StructObjectInspector) iPart.getSerDe().getObjectInspector();
    } catch (SerDeException se) {
      throw new WindowingException(se);
    }
   
    try
    {
      for(WindowFunctionDef wFn : wFnDefs)
      {
        boolean processWindow = wFn.getWindow() != null;
        pItr.reset();
        if ( !processWindow )
        {
          GenericUDAFEvaluator fEval = wFn.getEvaluator();
          Object[] args = new Object[wFn.getArgs().size()];
          AggregationBuffer aggBuffer = fEval.getNewAggregationBuffer();
          while(pItr.hasNext())
          {
            Object row = pItr.next();
            int i =0;
            for(ArgDef arg : wFn.getArgs())
            {
              args[i++] = arg.getExprEvaluator().evaluate(row);
            }
            fEval.aggregate(aggBuffer, args);
          }
          Object out = fEval.evaluate(aggBuffer);
          WindowFunctionInfo wFnInfo = FunctionRegistry.getWindowFunctionInfo(wFn.getSpec().getName());
          if ( !wFnInfo.isPivotResult())
          {
            out = new SameList(iPart.size(), out);
          }
          oColumns.add((List<?>)out);
        }
        else
        {
          oColumns.add(executeFnwithWindow(getQueryDef(), wFn, iPart));
        }
      }
     
      for(int i=0; i < iPart.size(); i++)
      {
        ArrayList oRow = new ArrayList();
        Object iRow = iPart.getAt(i);
       
        for(StructField f : inputOI.getAllStructFieldRefs())
        {
          oRow.add(inputOI.getStructFieldData(iRow, f));
        }
View Full Code Here

  public DocnoMapping getDocnoMapping() throws IOException {
    return loadDocnoMapping(indexPath, fs);
  }

  public static DocnoMapping loadDocnoMapping(String indexPath, FileSystem fs) throws IOException {
    DocnoMapping mDocMapping = null;
    // load the docid to docno mappings
    try {
      LOG.info("Loading DocnoMapping file...");
      RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

      String className = env.readDocnoMappingClass();
      LOG.info(" - Class name: " + className);
      mDocMapping = (DocnoMapping) Class.forName(className).newInstance();

      Path mappingFile = env.getDocnoMappingData();
      LOG.info(" - File name: " + mappingFile);
      mDocMapping.loadMapping(mappingFile, fs);
      LOG.info("Done!");
    } catch (Exception e) {
      throw new IOException("Error initializing DocnoMapping!");
    }
    return mDocMapping;
View Full Code Here

      return -1;
    }

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    Path mappingFile = env.getDocnoMappingData();
    new ClueWarcDocnoMappingBuilder().build(new Path(collection), mappingFile, conf);

    conf.set(Constants.CollectionName, "ClueWeb:English:Segment" + segment);
    conf.set(Constants.CollectionPath, collection);
    conf.set(Constants.IndexPath, indexPath);
    conf.set(Constants.InputFormat, SequenceFileInputFormat.class.getCanonicalName());
View Full Code Here

    // (sequentially-number integer). If it doesn't exist create it.
    Path mappingFile = env.getDocnoMappingData();
    if (!fs.exists(mappingFile)) {
      sLogger.info(mappingFile + " doesn't exist, creating...");
      String[] arr = new String[] { collection, indexPath + "/medline-docid-tmp",  mappingFile.toString(), new Integer(numMappers).toString() };
      NumberMedlineCitations tool = new NumberMedlineCitations();
      tool.setConf(conf);
      tool.run(arr);

      fs.delete(new Path(indexPath + "/medline-docid-tmp"), true);
    }

    // Now we're ready to start the preprocessing pipeline... set
View Full Code Here

    if (!fs.exists(mappingFile)) {
      LOG.info("docno-mapping.dat doesn't exist, creating...");
      String[] arr = new String[] { collection, mappingDir.toString(),
              mappingFile.toString() };
      NumberTrecDocuments2 tool = new NumberTrecDocuments2();
      tool.setConf(conf);
      tool.run(arr);

      fs.delete(mappingDir, true);
    }

    conf.set(Constants.CollectionName, "TREC_vol45");
View Full Code Here

    if (!fs.exists(mappingFile)) {
      LOG.info("docno-mapping.dat doesn't exist, creating...");
      String[] arr = new String[] { collection, mappingDir.toString(),
          mappingFile.toString(), "100" };
      NumberTrecWebDocuments tool = new NumberTrecWebDocuments();
      tool.setConf(conf);
      tool.run(arr);

      fs.delete(mappingDir, true);
    }

    conf.set(Constants.CollectionName, "Wt10g");
View Full Code Here

TOP

Related Classes of edu.umd.cloud9.webgraph.driver.SortWebGraph$Partition

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.