Package org.apache.hadoop.hbase.master

Examples of org.apache.hadoop.hbase.master.AssignmentManager$TimeoutMonitor


    // setup the snapshot
    prepareToTakeSnapshot(snapshot);

    // if the table is enabled, then have the RS run actually the snapshot work
    AssignmentManager assignmentMgr = master.getAssignmentManager();
    if (assignmentMgr.getZKTable().isEnabledTable(snapshot.getTable())) {
      LOG.debug("Table enabled, starting distributed snapshot.");
      snapshotEnabledTable(snapshot);
      LOG.debug("Started snapshot: " + SnapshotDescriptionUtils.toString(snapshot));
    }
    // For disabled table, snapshot is created by the master
    else if (assignmentMgr.getZKTable().isDisabledTable(snapshot.getTable())) {
      LOG.debug("Table is disabled, running snapshot entirely on master.");
      snapshotDisabledTable(snapshot);
      LOG.debug("Started snapshot: " + SnapshotDescriptionUtils.toString(snapshot));
    } else {
      LOG.error("Can't snapshot table '" + snapshot.getTable()
View Full Code Here


    }

    for (Map.Entry<HRegionInfo, Result> e : metaHRIs.entrySet()) {
      RegionState rit = services.getAssignmentManager().getRegionsInTransition().get(
          e.getKey().getEncodedName());
      AssignmentManager assignmentManager = this.services.getAssignmentManager();
      if (processDeadRegion(e.getKey(), e.getValue(), assignmentManager,
          this.server.getCatalogTracker())) {
        ServerName addressFromAM = assignmentManager.getRegionServerOfRegion(e.getKey());
        if (rit != null && !rit.isClosing() && !rit.isPendingClose() && !rit.isSplitting()
            && !ritsGoingToServer.contains(e.getKey())) {
          // Skip regions that were in transition unless CLOSING or
          // PENDING_CLOSE
          LOG.info("Skip assigning region " + rit.toString());
        } else if (addressFromAM != null && !addressFromAM.equals(this.serverName)) {
          LOG.debug("Skip assigning region " + e.getKey().getRegionNameAsString()
              + " because it has been opened in " + addressFromAM.getServerName());
          ritsGoingToServer.remove(e.getKey());
        } else {
          if (rit != null) {
            // clean zk node
            try {
              LOG.info("Reassigning region with rs =" + rit + " and deleting zk node if exists");
              ZKAssign.deleteNodeFailSilent(services.getZooKeeper(), e.getKey());
            } catch (KeeperException ke) {
              this.server.abort("Unexpected ZK exception deleting unassigned node " + e.getKey(),
                  ke);
              return null;
            }
          }
          toAssign.add(e.getKey());
        }
      } else if (rit != null && (rit.isSplitting() || rit.isSplit())) {
        // This will happen when the RS went down and the call back for the SPLIITING or SPLIT
        // has not yet happened for node Deleted event. In that case if the region was actually
        // split but the RS had gone down before completing the split process then will not try
        // to assign the parent region again. In that case we should make the region offline
        // and also delete the region from RIT.
        HRegionInfo region = rit.getRegion();
        AssignmentManager am = assignmentManager;
        am.regionOffline(region);
        ritsGoingToServer.remove(region);
      }
      // If the table was partially disabled and the RS went down, we should clear the RIT
      // and remove the node for the region. The rit that we use may be stale in case the table
      // was in DISABLING state but though we did assign we will not be clearing the znode in
View Full Code Here

    if (!assignmentManager.getZKTable().isDisablingOrDisabledTable(
        rit.getRegion().getTableNameAsString())) {
      return toAssign;
    }
    HRegionInfo hri = rit.getRegion();
    AssignmentManager am = assignmentManager;
    am.deleteClosingOrClosedNode(hri);
    am.regionOffline(hri);
    // To avoid region assignment if table is in disabling or disabled state.
    toAssign.remove(hri);
    regionsFromRIT.remove(hri);
    return toAssign;
  }
View Full Code Here

  @Override
  protected void handleTableOperation(List<HRegionInfo> regions)
  throws IOException, KeeperException {
    // 1. Wait because of region in transition
    AssignmentManager am = this.masterServices.getAssignmentManager();
    long waitTime = server.getConfiguration().
      getLong("hbase.master.wait.on.region", 5 * 60 * 1000);
    for (HRegionInfo region : regions) {
      long done = System.currentTimeMillis() + waitTime;
      while (System.currentTimeMillis() < done) {
        AssignmentManager.RegionState rs = am.isRegionInTransition(region);
        if (rs == null) break;
        Threads.sleep(waitingTimeForEvents);
        LOG.debug("Waiting on  region to clear regions in transition; " + rs);
      }
      if (am.isRegionInTransition(region) != null) {
        throw new IOException("Waited hbase.master.wait.on.region (" +
          waitTime + "ms) for region to leave region " +
          region.getRegionNameAsString() + " in transitions");
      }
    }

    // 2. Remove regions from META
    LOG.debug("Deleting regions from META");
    MetaEditor.deleteRegions(this.server.getCatalogTracker(), regions);

    // 3. Move the table in /hbase/.tmp
    LOG.debug("Moving table directory to a temp directory");
    MasterFileSystem mfs = this.masterServices.getMasterFileSystem();
    Path tempTableDir = mfs.moveTableToTemp(tableName);

    try {
      // 4. Delete regions from FS (temp directory)
      FileSystem fs = mfs.getFileSystem();
      for (HRegionInfo hri: regions) {
        LOG.debug("Archiving region " + hri.getRegionNameAsString() + " from FS");
        HFileArchiver.archiveRegion(fs, mfs.getRootDir(),
            tempTableDir, new Path(tempTableDir, hri.getEncodedName()));
      }

      // 5. Delete table from FS (temp directory)
      if (!fs.delete(tempTableDir, true)) {
        LOG.error("Couldn't delete " + tempTableDir);
      }
    } finally {
      // 6. Update table descriptor cache
      this.masterServices.getTableDescriptors().remove(Bytes.toString(tableName));

      // 7. If entry for this table in zk, and up in AssignmentManager, remove it.
      am.getZKTable().setDeletedTable(Bytes.toString(tableName));
    }
  }
View Full Code Here

    }
  }

  private void waitForRITtoBeZero(HMaster master) throws IOException {
    // wait for assignments to finish
    AssignmentManager mgr = master.getAssignmentManager();
    Collection<AssignmentManager.RegionState> transRegions =
        mgr.getRegionsInTransition().values();
    for (AssignmentManager.RegionState state : transRegions) {
      mgr.waitOnRegionToClearRegionsInTransition(state.getRegion());
    }
  }
View Full Code Here

    TEST_UTIL.startMiniCluster(3);

    executorService = new ThreadPoolExecutor(1, Integer.MAX_VALUE, 60, TimeUnit.SECONDS,
        new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));

    AssignmentManager assignmentManager =
      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
    regionStates = assignmentManager.getRegionStates();
  }
View Full Code Here

    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
    HMaster master = cluster.getMaster();
    HBaseAdmin localAdmin = createTable(tableName);
    List<HRegionInfo> tableRegions = localAdmin.getTableRegions(tableName);
    HRegionInfo hri = tableRegions.get(0);
    AssignmentManager am = master.getAssignmentManager();
    assertTrue("Region " + hri.getRegionNameAsString()
      + " should be assigned properly", am.waitForAssignment(hri));
    ServerName server = am.getRegionStates().getRegionServerOfRegion(hri);
    localAdmin.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(server.getServerName()));
    assertEquals("Current region server and region server before move should be same.", server,
      am.getRegionStates().getRegionServerOfRegion(hri));
  }
View Full Code Here

      List<HRegion> daughters = cluster.getRegions(tableName);
      assertTrue(daughters.size() == regions.size() + 1);

      HRegionInfo hri = region.getRegionInfo(); // split parent
      AssignmentManager am = cluster.getMaster().getAssignmentManager();
      RegionStates regionStates = am.getRegionStates();
      long start = EnvironmentEdgeManager.currentTimeMillis();
      while (!regionStates.isRegionInState(hri, State.SPLIT)) {
        assertFalse("Timed out in waiting split parent to be in state SPLIT",
          EnvironmentEdgeManager.currentTimeMillis() - start > 60000);
        Thread.sleep(500);
      }

      // We should not be able to assign it again
      am.assign(hri, true, true);
      assertFalse("Split region can't be assigned",
        regionStates.isRegionInTransition(hri));
      assertTrue(regionStates.isRegionInState(hri, State.SPLIT));

      // We should not be able to unassign it either
      am.unassign(hri, true, null);
      assertFalse("Split region can't be unassigned",
        regionStates.isRegionInTransition(hri));
      assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
    } finally {
      admin.setBalancerRunning(true, false);
View Full Code Here

      // executor pool is always available.
      //
      // If AssignmentManager hasn't finished rebuilding user regions,
      // we are not ready to assign dead regions either. So we re-queue up
      // the dead server for further processing too.
      AssignmentManager am = services.getAssignmentManager();
      if (isCarryingMeta() // hbase:meta
          || !am.isFailoverCleanupDone()) {
        this.services.getServerManager().processDeadServer(serverName, this.shouldSplitHlog);
        return;
      }

      // Wait on meta to come online; we need it to progress.
      // TODO: Best way to hold strictly here?  We should build this retry logic
      // into the MetaReader operations themselves.
      // TODO: Is the reading of hbase:meta necessary when the Master has state of
      // cluster in its head?  It should be possible to do without reading hbase:meta
      // in all but one case. On split, the RS updates the hbase:meta
      // table and THEN informs the master of the split via zk nodes in
      // 'unassigned' dir.  Currently the RS puts ephemeral nodes into zk so if
      // the regionserver dies, these nodes do not stick around and this server
      // shutdown processing does fixup (see the fixupDaughters method below).
      // If we wanted to skip the hbase:meta scan, we'd have to change at least the
      // final SPLIT message to be permanent in zk so in here we'd know a SPLIT
      // completed (zk is updated after edits to hbase:meta have gone in).  See
      // {@link SplitTransaction}.  We'd also have to be figure another way for
      // doing the below hbase:meta daughters fixup.
      NavigableMap<HRegionInfo, Result> hris = null;
      while (!this.server.isStopped()) {
        try {
          this.server.getCatalogTracker().waitForMeta();
          hris = MetaReader.getServerUserRegions(this.server.getCatalogTracker(),
            this.serverName);
          break;
        } catch (InterruptedException e) {
          Thread.currentThread().interrupt();
          throw new IOException("Interrupted", e);
        } catch (IOException ioe) {
          LOG.info("Received exception accessing hbase:meta during server shutdown of " +
            serverName + ", retrying hbase:meta read", ioe);
        }
      }
      if (this.server.isStopped()) {
        throw new IOException("Server is stopped");
      }

      try {
        if (this.shouldSplitHlog) {
          LOG.info("Splitting logs for " + serverName + " before assignment.");
          if (this.distributedLogReplay) {
            LOG.info("Mark regions in recovery before assignment.");
            Set<ServerName> serverNames = new HashSet<ServerName>();
            serverNames.add(serverName);
            this.services.getMasterFileSystem().prepareLogReplay(serverNames);
          } else {
            this.services.getMasterFileSystem().splitLog(serverName);
          }
          am.getRegionStates().logSplit(serverName);
        } else {
          LOG.info("Skipping log splitting for " + serverName);
        }
      } catch (IOException ioe) {
        resubmit(serverName, ioe);
      }

      // Clean out anything in regions in transition.  Being conservative and
      // doing after log splitting.  Could do some states before -- OPENING?
      // OFFLINE? -- and then others after like CLOSING that depend on log
      // splitting.
      List<HRegionInfo> regionsInTransition = am.processServerShutdown(serverName);
      LOG.info("Reassigning " + ((hris == null)? 0: hris.size()) +
        " region(s) that " + (serverName == null? "null": serverName+
        " was carrying (and " + regionsInTransition.size() +
        " regions(s) that were opening on this server)");

      List<HRegionInfo> toAssignRegions = new ArrayList<HRegionInfo>();
      toAssignRegions.addAll(regionsInTransition);

      // Iterate regions that were on this server and assign them
      if (hris != null) {
        RegionStates regionStates = am.getRegionStates();
        for (Map.Entry<HRegionInfo, Result> e: hris.entrySet()) {
          HRegionInfo hri = e.getKey();
          if (regionsInTransition.contains(hri)) {
            continue;
          }
          String encodedName = hri.getEncodedName();
          Lock lock = am.acquireRegionLock(encodedName);
          try {
            RegionState rit = regionStates.getRegionTransitionState(hri);
            if (processDeadRegion(hri, e.getValue(), am, server.getCatalogTracker())) {
              ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
              if (addressFromAM != null && !addressFromAM.equals(this.serverName)) {
                // If this region is in transition on the dead server, it must be
                // opening or pending_open, which should have been covered by AM#processServerShutdown
                LOG.info("Skip assigning region " + hri.getRegionNameAsString()
                  + " because it has been opened in " + addressFromAM.getServerName());
                continue;
              }
              if (rit != null) {
                if (rit.getServerName() != null && !rit.isOnServer(serverName)) {
                  // Skip regions that are in transition on other server
                  LOG.info("Skip assigning region in transition on other server" + rit);
                  continue;
                }
                try{
                  //clean zk node
                  LOG.info("Reassigning region with rs = " + rit + " and deleting zk node if exists");
                  ZKAssign.deleteNodeFailSilent(services.getZooKeeper(), hri);
                  regionStates.updateRegionState(hri, State.OFFLINE);
                } catch (KeeperException ke) {
                  this.server.abort("Unexpected ZK exception deleting unassigned node " + hri, ke);
                  return;
                }
              } else if (regionStates.isRegionInState(
                  hri, State.SPLITTING_NEW, State.MERGING_NEW)) {
                regionStates.regionOffline(hri);
              }
              toAssignRegions.add(hri);
            } else if (rit != null) {
              if (rit.isPendingCloseOrClosing()
                  && am.getZKTable().isDisablingOrDisabledTable(hri.getTable())) {
                // If the table was partially disabled and the RS went down, we should clear the RIT
                // and remove the node for the region.
                // The rit that we use may be stale in case the table was in DISABLING state
                // but though we did assign we will not be clearing the znode in CLOSING state.
                // Doing this will have no harm. See HBASE-5927
                regionStates.updateRegionState(hri, State.OFFLINE);
                am.deleteClosingOrClosedNode(hri);
                am.offlineDisabledRegion(hri);
              } else {
                LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition "
                  + rit + " not to be assigned by SSH of server " + serverName);
              }
            }
          } finally {
            lock.unlock();
          }
        }
      }

      try {
        am.assign(toAssignRegions);
      } catch (InterruptedException ie) {
        LOG.error("Caught " + ie + " during round-robin assignment");
        throw new IOException(ie);
      }

      if (this.shouldSplitHlog && this.distributedLogReplay) {
        // wait for region assignment completes
        for (HRegionInfo hri : toAssignRegions) {
          try {
            if (!am.waitOnRegionToClearRegionsInTransition(hri, regionAssignmentWaitTimeout)) {
              // Wait here is to avoid log replay hits current dead server and incur a RPC timeout
              // when replay happens before region assignment completes.
              LOG.warn("Region " + hri.getEncodedName()
                  + " didn't complete assignment in time");
            }
View Full Code Here

    TEST_UTIL.startMiniCluster(3);

    executorService = new ThreadPoolExecutor(1, Integer.MAX_VALUE, 60, TimeUnit.SECONDS,
        new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));

    AssignmentManager assignmentManager =
      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
    regionStates = assignmentManager.getRegionStates();
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hbase.master.AssignmentManager$TimeoutMonitor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.