Package org.apache.hadoop.hbase.master

Examples of org.apache.hadoop.hbase.master.AssignmentManager$RegionsOnDeadServer


  @Override
  public void process() throws IOException {
    boolean gotException = true;
    try {
      AssignmentManager am = this.services.getAssignmentManager();
      try {
        if (this.shouldSplitHlog) {
          LOG.info("Splitting hbase:meta logs for " + serverName);
          if (this.distributedLogReplay) {
            Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
            regions.add(HRegionInfo.FIRST_META_REGIONINFO);
            this.services.getMasterFileSystem().prepareLogReplay(serverName, regions);
          } else {
            this.services.getMasterFileSystem().splitMetaLog(serverName);
          }
          am.getRegionStates().logSplit(HRegionInfo.FIRST_META_REGIONINFO);
        }
      } catch (IOException ioe) {
        this.services.getExecutorService().submit(this);
        this.deadServers.add(serverName);
        throw new IOException("failed log splitting for " + serverName + ", will retry", ioe);
      }
 
      // Assign meta if we were carrying it.
      // Check again: region may be assigned to other where because of RIT
      // timeout
      if (am.isCarryingMeta(serverName)) {
        LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
        am.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
        verifyAndAssignMetaWithRetries();
      } else if (!this.services.getCatalogTracker().isMetaLocationAvailable()) {
        // the meta location as per master is null. This could happen in case when meta assignment
        // in previous run failed, while meta znode has been updated to null. We should try to
        // assign the meta again.
        verifyAndAssignMetaWithRetries();
      } else {
        LOG.info("META has been assigned to otherwhere, skip assigning.");
      }

      try {
        if (this.shouldSplitHlog && this.distributedLogReplay) {
          if (!am.waitOnRegionToClearRegionsInTransition(HRegionInfo.FIRST_META_REGIONINFO,
            regionAssignmentWaitTimeout)) {
            // Wait here is to avoid log replay hits current dead server and incur a RPC timeout
            // when replay happens before region assignment completes.
            LOG.warn("Region " + HRegionInfo.FIRST_META_REGIONINFO.getEncodedName()
                + " didn't complete assignment in time");
View Full Code Here


    if (cpHost != null) {
      cpHost.preDeleteTableHandler(this.tableName);
    }

    // 1. Wait because of region in transition
    AssignmentManager am = this.masterServices.getAssignmentManager();
    RegionStates states = am.getRegionStates();
    long waitTime = server.getConfiguration().
      getLong("hbase.master.wait.on.region", 5 * 60 * 1000);
    for (HRegionInfo region : regions) {
      long done = System.currentTimeMillis() + waitTime;
      while (System.currentTimeMillis() < done) {
        if (states.isRegionInState(region, State.FAILED_OPEN)) {
          am.regionOffline(region);
        }
        if (!states.isRegionInTransition(region)) break;
        Threads.sleep(waitingTimeForEvents);
        LOG.debug("Waiting on region to clear regions in transition; "
          + am.getRegionStates().getRegionTransitionState(region));
      }
      if (states.isRegionInTransition(region)) {
        throw new IOException("Waited hbase.master.wait.on.region (" +
          waitTime + "ms) for region to leave region " +
          region.getRegionNameAsString() + " in transitions");
      }
    }

    // 2. Remove regions from META
    LOG.debug("Deleting regions from META");
    MetaEditor.deleteRegions(this.server.getCatalogTracker(), regions);

    // 3. Move the table in /hbase/.tmp
    MasterFileSystem mfs = this.masterServices.getMasterFileSystem();
    Path tempTableDir = mfs.moveTableToTemp(tableName);

    try {
      // 4. Delete regions from FS (temp directory)
      FileSystem fs = mfs.getFileSystem();
      for (HRegionInfo hri: regions) {
        LOG.debug("Archiving region " + hri.getRegionNameAsString() + " from FS");
        HFileArchiver.archiveRegion(fs, mfs.getRootDir(),
            tempTableDir, new Path(tempTableDir, hri.getEncodedName()));
      }

      // 5. Delete table from FS (temp directory)
      if (!fs.delete(tempTableDir, true)) {
        LOG.error("Couldn't delete " + tempTableDir);
      }

      LOG.debug("Table '" + tableName + "' archived!");
    } finally {
      // 6. Update table descriptor cache
      LOG.debug("Removing '" + tableName + "' descriptor.");
      this.masterServices.getTableDescriptors().remove(tableName);

      // 7. If entry for this table in zk, and up in AssignmentManager, remove it.
      LOG.debug("Marking '" + tableName + "' as deleted.");
      am.getZKTable().setDeletedTable(tableName);
    }

    if (cpHost != null) {
      cpHost.postDeleteTableHandler(this.tableName);
    }
View Full Code Here

      // executor pool is always available.
      //
      // If AssignmentManager hasn't finished rebuilding user regions,
      // we are not ready to assign dead regions either. So we re-queue up
      // the dead server for further processing too.
      AssignmentManager am = services.getAssignmentManager();
      if (isCarryingMeta() // hbase:meta
          || !am.isFailoverCleanupDone()) {
        this.services.getServerManager().processDeadServer(serverName, this.shouldSplitHlog);
        return;
      }

      // Wait on meta to come online; we need it to progress.
      // TODO: Best way to hold strictly here?  We should build this retry logic
      // into the MetaReader operations themselves.
      // TODO: Is the reading of hbase:meta necessary when the Master has state of
      // cluster in its head?  It should be possible to do without reading hbase:meta
      // in all but one case. On split, the RS updates the hbase:meta
      // table and THEN informs the master of the split via zk nodes in
      // 'unassigned' dir.  Currently the RS puts ephemeral nodes into zk so if
      // the regionserver dies, these nodes do not stick around and this server
      // shutdown processing does fixup (see the fixupDaughters method below).
      // If we wanted to skip the hbase:meta scan, we'd have to change at least the
      // final SPLIT message to be permanent in zk so in here we'd know a SPLIT
      // completed (zk is updated after edits to hbase:meta have gone in).  See
      // {@link SplitTransaction}.  We'd also have to be figure another way for
      // doing the below hbase:meta daughters fixup.
      NavigableMap<HRegionInfo, Result> hris = null;
      while (!this.server.isStopped()) {
        try {
          this.server.getCatalogTracker().waitForMeta();
          hris = MetaReader.getServerUserRegions(this.server.getCatalogTracker(),
            this.serverName);
          break;
        } catch (InterruptedException e) {
          Thread.currentThread().interrupt();
          throw new IOException("Interrupted", e);
        } catch (IOException ioe) {
          LOG.info("Received exception accessing hbase:meta during server shutdown of " +
            serverName + ", retrying hbase:meta read", ioe);
        }
      }
      if (this.server.isStopped()) {
        throw new IOException("Server is stopped");
      }

      try {
        if (this.shouldSplitHlog) {
          LOG.info("Splitting logs for " + serverName + " before assignment.");
          if (this.distributedLogReplay) {
            LOG.info("Mark regions in recovery before assignment.");
            Set<ServerName> serverNames = new HashSet<ServerName>();
            serverNames.add(serverName);
            this.services.getMasterFileSystem().prepareLogReplay(serverNames);
          } else {
            this.services.getMasterFileSystem().splitLog(serverName);
          }
          am.getRegionStates().logSplit(serverName);
        } else {
          LOG.info("Skipping log splitting for " + serverName);
        }
      } catch (IOException ioe) {
        resubmit(serverName, ioe);
      }

      // Clean out anything in regions in transition.  Being conservative and
      // doing after log splitting.  Could do some states before -- OPENING?
      // OFFLINE? -- and then others after like CLOSING that depend on log
      // splitting.
      List<HRegionInfo> regionsInTransition = am.processServerShutdown(serverName);
      LOG.info("Reassigning " + ((hris == null)? 0: hris.size()) +
        " region(s) that " + (serverName == null? "null": serverName+
        " was carrying (and " + regionsInTransition.size() +
        " regions(s) that were opening on this server)");

      List<HRegionInfo> toAssignRegions = new ArrayList<HRegionInfo>();
      toAssignRegions.addAll(regionsInTransition);

      // Iterate regions that were on this server and assign them
      if (hris != null) {
        RegionStates regionStates = am.getRegionStates();
        for (Map.Entry<HRegionInfo, Result> e: hris.entrySet()) {
          HRegionInfo hri = e.getKey();
          if (regionsInTransition.contains(hri)) {
            continue;
          }
          String encodedName = hri.getEncodedName();
          Lock lock = am.acquireRegionLock(encodedName);
          try {
            RegionState rit = regionStates.getRegionTransitionState(hri);
            if (processDeadRegion(hri, e.getValue(), am, server.getCatalogTracker())) {
              ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
              if (addressFromAM != null && !addressFromAM.equals(this.serverName)) {
                // If this region is in transition on the dead server, it must be
                // opening or pending_open, which should have been covered by AM#processServerShutdown
                LOG.info("Skip assigning region " + hri.getRegionNameAsString()
                  + " because it has been opened in " + addressFromAM.getServerName());
                continue;
              }
              if (rit != null) {
                if (rit.getServerName() != null && !rit.isOnServer(serverName)) {
                  // Skip regions that are in transition on other server
                  LOG.info("Skip assigning region in transition on other server" + rit);
                  continue;
                }
                try{
                  //clean zk node
                  LOG.info("Reassigning region with rs = " + rit + " and deleting zk node if exists");
                  ZKAssign.deleteNodeFailSilent(services.getZooKeeper(), hri);
                  regionStates.updateRegionState(hri, State.OFFLINE);
                } catch (KeeperException ke) {
                  this.server.abort("Unexpected ZK exception deleting unassigned node " + hri, ke);
                  return;
                }
              } else if (regionStates.isRegionInState(
                  hri, State.SPLITTING_NEW, State.MERGING_NEW)) {
                regionStates.regionOffline(hri);
              }
              toAssignRegions.add(hri);
            } else if (rit != null) {
              if (rit.isPendingCloseOrClosing()
                  && am.getZKTable().isDisablingOrDisabledTable(hri.getTable())) {
                // If the table was partially disabled and the RS went down, we should clear the RIT
                // and remove the node for the region.
                // The rit that we use may be stale in case the table was in DISABLING state
                // but though we did assign we will not be clearing the znode in CLOSING state.
                // Doing this will have no harm. See HBASE-5927
                regionStates.updateRegionState(hri, State.OFFLINE);
                am.deleteClosingOrClosedNode(hri, rit.getServerName());
                am.offlineDisabledRegion(hri);
              } else {
                LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition "
                  + rit + " not to be assigned by SSH of server " + serverName);
              }
            }
          } finally {
            lock.unlock();
          }
        }
      }

      try {
        am.assign(toAssignRegions);
      } catch (InterruptedException ie) {
        LOG.error("Caught " + ie + " during round-robin assignment");
        throw new IOException(ie);
      }

      if (this.shouldSplitHlog && this.distributedLogReplay) {
        // wait for region assignment completes
        for (HRegionInfo hri : toAssignRegions) {
          try {
            if (!am.waitOnRegionToClearRegionsInTransition(hri, regionAssignmentWaitTimeout)) {
              // Wait here is to avoid log replay hits current dead server and incur a RPC timeout
              // when replay happens before region assignment completes.
              LOG.warn("Region " + hri.getEncodedName()
                  + " didn't complete assignment in time");
            }
View Full Code Here

      cpHost.preSnapshot(snapshot, desc);
    }

    // if the table is enabled, then have the RS run actually the snapshot work
    TableName snapshotTable = TableName.valueOf(snapshot.getTable());
    AssignmentManager assignmentMgr = master.getAssignmentManager();
    if (assignmentMgr.getZKTable().isEnabledTable(snapshotTable)) {
      LOG.debug("Table enabled, starting distributed snapshot.");
      snapshotEnabledTable(snapshot);
      LOG.debug("Started snapshot: " + ClientSnapshotDescriptionUtils.toString(snapshot));
    }
    // For disabled table, snapshot is created by the master
    else if (assignmentMgr.getZKTable().isDisabledTable(snapshotTable)) {
      LOG.debug("Table is disabled, running snapshot entirely on master.");
      snapshotDisabledTable(snapshot);
      LOG.debug("Started snapshot: " + ClientSnapshotDescriptionUtils.toString(snapshot));
    } else {
      LOG.error("Can't snapshot table '" + snapshot.getTable()
View Full Code Here

    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
    HMaster master = cluster.getMaster();
    HBaseAdmin localAdmin = createTable(tableName);
    List<HRegionInfo> tableRegions = localAdmin.getTableRegions(tableName);
    HRegionInfo hri = tableRegions.get(0);
    AssignmentManager am = master.getAssignmentManager();
    assertTrue("Region " + hri.getRegionNameAsString()
      + " should be assigned properly", am.waitForAssignment(hri));
    ServerName server = am.getRegionStates().getRegionServerOfRegion(hri);
    localAdmin.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(server.getServerName()));
    assertEquals("Current region server and region server before move should be same.", server,
      am.getRegionStates().getRegionServerOfRegion(hri));
  }
View Full Code Here

    TEST_UTIL.startMiniCluster(3);

    executorService = new ThreadPoolExecutor(1, Integer.MAX_VALUE, 60, TimeUnit.SECONDS,
        new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));

    AssignmentManager assignmentManager =
      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
    regionStates = assignmentManager.getRegionStates();
  }
View Full Code Here

  }

  @Override
  protected void handleTableOperation(List<HRegionInfo> regions)
  throws IOException, KeeperException {
    AssignmentManager am = this.masterServices.getAssignmentManager();
    long waitTime = server.getConfiguration().
      getLong("hbase.master.wait.on.region", 5 * 60 * 1000);
    for (HRegionInfo region : regions) {
      long done = System.currentTimeMillis() + waitTime;
      while (System.currentTimeMillis() < done) {
        AssignmentManager.RegionState rs = am.isRegionInTransition(region);
        if (rs == null) break;
        Threads.sleep(waitingTimeForEvents);
        LOG.debug("Waiting on  region to clear regions in transition; " + rs);
      }
      if (am.isRegionInTransition(region) != null) {
        throw new IOException("Waited hbase.master.wait.on.region (" +
          waitTime + "ms) for region to leave region " +
          region.getRegionNameAsString() + " in transitions");
      }
      LOG.debug("Deleting region " + region.getRegionNameAsString() +
        " from META and FS");
      // Remove region from META
      MetaEditor.deleteRegion(this.server.getCatalogTracker(), region);
      // Delete region from FS
      this.masterServices.getMasterFileSystem().deleteRegion(region);
    }
    // Delete table from FS
    this.masterServices.getMasterFileSystem().deleteTable(tableName);
    // Update table descriptor cache
    this.masterServices.getTableDescriptors().remove(Bytes.toString(tableName));

    // If entry for this table in zk, and up in AssignmentManager, remove it.
    // Call to undisableTable does this. TODO: Make a more formal purge table.
    am.getZKTable().setEnabledTable(Bytes.toString(tableName));
  }
View Full Code Here

    }
  }

  protected void verifyRoundRobinDistribution(HTable ht, int expectedRegions) throws IOException {
    MasterServices services = TEST_UTIL.getMiniHBaseCluster().getMaster();
    AssignmentManager am = services.getAssignmentManager();
    Map<HRegionInfo,HServerAddress> regions = ht.getRegionsInfo();
    for (HRegionInfo regionInfo : regions.keySet()) {
      try {
        am.waitForAssignment(regionInfo);
      } catch (InterruptedException e) {
        LOG.info("Interrupted waiting for region to be assigned during " +
            "create table call", e);
        Thread.currentThread().interrupt();
        return;
View Full Code Here

    }
  }

  protected void verifyRoundRobinDistribution(HTable ht, int expectedRegions) throws IOException {
    MasterServices services = TEST_UTIL.getMiniHBaseCluster().getMaster();
    AssignmentManager am = services.getAssignmentManager();
    Map<HRegionInfo,HServerAddress> regions = ht.getRegionsInfo();
    for (HRegionInfo regionInfo : regions.keySet()) {
      try {
        am.waitForAssignment(regionInfo);
      } catch (InterruptedException e) {
        LOG.info("Interrupted waiting for region to be assigned during " +
            "create table call", e);
        Thread.currentThread().interrupt();
        return;
View Full Code Here

  }

  @Override
  protected void handleTableOperation(List<HRegionInfo> regions)
  throws IOException, KeeperException {
    AssignmentManager am = this.masterServices.getAssignmentManager();
    long waitTime = server.getConfiguration().
      getLong("hbase.master.wait.on.region", 5 * 60 * 1000);
    for (HRegionInfo region : regions) {
      long done = System.currentTimeMillis() + waitTime;
      while (System.currentTimeMillis() < done) {
        AssignmentManager.RegionState rs = am.isRegionInTransition(region);
        if (rs == null) break;
        Threads.sleep(waitingTimeForEvents);
        LOG.debug("Waiting on  region to clear regions in transition; " + rs);
      }
      if (am.isRegionInTransition(region) != null) {
        throw new IOException("Waited hbase.master.wait.on.region (" +
          waitTime + "ms) for region to leave region " +
          region.getRegionNameAsString() + " in transitions");
      }
      LOG.debug("Deleting region " + region.getRegionNameAsString() +
        " from META and FS");
      // Remove region from META
      MetaEditor.deleteRegion(this.server.getCatalogTracker(), region);
      // Delete region from FS
      this.masterServices.getMasterFileSystem().deleteRegion(region);
    }
    // Delete table from FS
    this.masterServices.getMasterFileSystem().deleteTable(tableName);

    // If entry for this table in zk, and up in AssignmentManager, remove it.
    // Call to undisableTable does this. TODO: Make a more formal purge table.
    am.getZKTable().setEnabledTable(Bytes.toString(tableName));
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hbase.master.AssignmentManager$RegionsOnDeadServer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.