Package org.apache.hadoop.hbase.master

Examples of org.apache.hadoop.hbase.master.AssignmentManager$RegionsOnDeadServer


            + " from list of regions to assign because region state: " + rs.getState());
        metaHRIs.remove(rs.getRegion());
      }
    }

    AssignmentManager assignmentManager = this.services.getAssignmentManager();
    for (Map.Entry<HRegionInfo, Result> e : metaHRIs.entrySet()) {
      RegionState rit =
          assignmentManager.getRegionsInTransition().get(e.getKey().getEncodedName());

      if (processDeadRegion(e.getKey(), e.getValue(), assignmentManager,
        this.server.getCatalogTracker())) {
        ServerName addressFromAM = assignmentManager.getRegionServerOfRegion(e.getKey());
        if (rit != null && !rit.isClosing() && !rit.isPendingClose() && !rit.isSplitting()
            && !ritsGoingToServer.contains(e.getKey())) {
          // Skip regions that were in transition unless CLOSING or
          // PENDING_CLOSE
          LOG.info("Skip assigning region " + rit.toString());
        } else if (addressFromAM != null && !addressFromAM.equals(this.serverName)) {
          LOG.debug("Skip assigning region " + e.getKey().getRegionNameAsString()
              + " because it has been opened in " + addressFromAM.getServerName());
          ritsGoingToServer.remove(e.getKey());
        } else {
          if (rit != null) {
            // clean zk node
            try {
              LOG.info("Reassigning region with rs =" + rit + " and deleting zk node if exists");
              ZKAssign.deleteNodeFailSilent(services.getZooKeeper(), e.getKey());
            } catch (KeeperException ke) {
              this.server.abort("Unexpected ZK exception deleting unassigned node " + e.getKey(),
                ke);
              return null;
            }
          }
          toAssign.add(e.getKey());
        }
      } else if (rit != null && (rit.isSplitting() || rit.isSplit())) {
        // This will happen when the RS went down and the call back for the SPLIITING or SPLIT
        // has not yet happened for node Deleted event. In that case if the region was actually
        // split but the RS had gone down before completing the split process then will not try
        // to assign the parent region again. In that case we should make the region offline
        // and also delete the region from RIT.
        HRegionInfo region = rit.getRegion();
        AssignmentManager am = assignmentManager;
        am.regionOffline(region);
        ritsGoingToServer.remove(region);
      }
      // If the table was partially disabled and the RS went down, we should clear the RIT
      // and remove the node for the region. The rit that we use may be stale in case the table
      // was in DISABLING state but though we did assign we will not be clearing the znode in
View Full Code Here


    } catch (IOException e) {
      String node = ZKAssign.getNodeName(regionServer.getZooKeeper(), region
          .getRegionInfo().getEncodedName());

      assertFalse(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
      AssignmentManager am = cluster.getMaster().getAssignmentManager();
      for (int i = 0; !am.getRegionsInTransition().containsKey(
          region.getRegionInfo().getEncodedName())
          && i < 100; i++) {
        Thread.sleep(200);
      }
      assertTrue("region is not in transition "+region,
          am.getRegionsInTransition().containsKey(region.getRegionInfo().getEncodedName()));
      RegionState regionState = am.getRegionsInTransition().get(region.getRegionInfo()
          .getEncodedName());
      assertTrue(regionState.getState() == RegionState.State.SPLITTING);
      assertTrue(st.rollback(regionServer, regionServer));
      assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
      for (int i=0; am.getRegionsInTransition().containsKey(region.getRegionInfo().getEncodedName()) && i<100; i++) {
        // Just in case the nodeDeleted event did not get executed.
        Thread.sleep(200);
      }
      assertFalse("region is still in transition",
          am.getRegionsInTransition().containsKey(region.getRegionInfo().getEncodedName()));
    }
    if (admin.isTableAvailable(tableName) && admin.isTableEnabled(tableName)) {
      admin.disableTable(tableName);
      admin.deleteTable(tableName);
      admin.close();
View Full Code Here

  @Override
  public void process() throws IOException {
    boolean gotException = true;
    try {
      AssignmentManager am = this.services.getAssignmentManager();
      try {
        if (this.shouldSplitHlog) {
          LOG.info("Splitting hbase:meta logs for " + serverName);
          if (this.distributedLogReplay) {
            Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
            regions.add(HRegionInfo.FIRST_META_REGIONINFO);
            this.services.getMasterFileSystem().prepareLogReplay(serverName, regions);
          } else {
            this.services.getMasterFileSystem().splitMetaLog(serverName);
          }
          am.getRegionStates().logSplit(HRegionInfo.FIRST_META_REGIONINFO);
        }
      } catch (IOException ioe) {
        this.services.getExecutorService().submit(this);
        this.deadServers.add(serverName);
        throw new IOException("failed log splitting for " + serverName + ", will retry", ioe);
      }
 
      // Assign meta if we were carrying it.
      // Check again: region may be assigned to other where because of RIT
      // timeout
      if (am.isCarryingMeta(serverName)) {
        LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
        am.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
        verifyAndAssignMetaWithRetries();
      } else if (!this.services.getCatalogTracker().isMetaLocationAvailable()) {
        // the meta location as per master is null. This could happen in case when meta assignment
        // in previous run failed, while meta znode has been updated to null. We should try to
        // assign the meta again.
        verifyAndAssignMetaWithRetries();
      } else {
        LOG.info("META has been assigned to otherwhere, skip assigning.");
      }

      try {
        if (this.shouldSplitHlog && this.distributedLogReplay) {
          if (!am.waitOnRegionToClearRegionsInTransition(HRegionInfo.FIRST_META_REGIONINFO,
            regionAssignmentWaitTimeout)) {
            // Wait here is to avoid log replay hits current dead server and incur a RPC timeout
            // when replay happens before region assignment completes.
            LOG.warn("Region " + HRegionInfo.FIRST_META_REGIONINFO.getEncodedName()
                + " didn't complete assignment in time");
View Full Code Here

      cpHost.preSnapshot(snapshot, desc);
    }

    // if the table is enabled, then have the RS run actually the snapshot work
    TableName snapshotTable = TableName.valueOf(snapshot.getTable());
    AssignmentManager assignmentMgr = master.getAssignmentManager();
    if (assignmentMgr.getZKTable().isEnabledTable(snapshotTable)) {
      LOG.debug("Table enabled, starting distributed snapshot.");
      snapshotEnabledTable(snapshot);
      LOG.debug("Started snapshot: " + ClientSnapshotDescriptionUtils.toString(snapshot));
    }
    // For disabled table, snapshot is created by the master
    else if (assignmentMgr.getZKTable().isDisabledTable(snapshotTable)) {
      LOG.debug("Table is disabled, running snapshot entirely on master.");
      snapshotDisabledTable(snapshot);
      LOG.debug("Started snapshot: " + ClientSnapshotDescriptionUtils.toString(snapshot));
    } else {
      LOG.error("Can't snapshot table '" + snapshot.getTable()
View Full Code Here

      // executor pool is always available.
      //
      // If AssignmentManager hasn't finished rebuilding user regions,
      // we are not ready to assign dead regions either. So we re-queue up
      // the dead server for further processing too.
      AssignmentManager am = services.getAssignmentManager();
      if (isCarryingMeta() // hbase:meta
          || !am.isFailoverCleanupDone()) {
        this.services.getServerManager().processDeadServer(serverName, this.shouldSplitHlog);
        return;
      }

      // Wait on meta to come online; we need it to progress.
      // TODO: Best way to hold strictly here?  We should build this retry logic
      // into the MetaReader operations themselves.
      // TODO: Is the reading of hbase:meta necessary when the Master has state of
      // cluster in its head?  It should be possible to do without reading hbase:meta
      // in all but one case. On split, the RS updates the hbase:meta
      // table and THEN informs the master of the split via zk nodes in
      // 'unassigned' dir.  Currently the RS puts ephemeral nodes into zk so if
      // the regionserver dies, these nodes do not stick around and this server
      // shutdown processing does fixup (see the fixupDaughters method below).
      // If we wanted to skip the hbase:meta scan, we'd have to change at least the
      // final SPLIT message to be permanent in zk so in here we'd know a SPLIT
      // completed (zk is updated after edits to hbase:meta have gone in).  See
      // {@link SplitTransaction}.  We'd also have to be figure another way for
      // doing the below hbase:meta daughters fixup.
      NavigableMap<HRegionInfo, Result> hris = null;
      while (!this.server.isStopped()) {
        try {
          this.server.getCatalogTracker().waitForMeta();
          hris = MetaReader.getServerUserRegions(this.server.getCatalogTracker(),
            this.serverName);
          break;
        } catch (InterruptedException e) {
          Thread.currentThread().interrupt();
          throw (InterruptedIOException)new InterruptedIOException().initCause(e);
        } catch (IOException ioe) {
          LOG.info("Received exception accessing hbase:meta during server shutdown of " +
            serverName + ", retrying hbase:meta read", ioe);
        }
      }
      if (this.server.isStopped()) {
        throw new IOException("Server is stopped");
      }

      try {
        if (this.shouldSplitHlog) {
          LOG.info("Splitting logs for " + serverName + " before assignment.");
          if (this.distributedLogReplay) {
            LOG.info("Mark regions in recovery before assignment.");
            Set<ServerName> serverNames = new HashSet<ServerName>();
            serverNames.add(serverName);
            this.services.getMasterFileSystem().prepareLogReplay(serverNames);
          } else {
            this.services.getMasterFileSystem().splitLog(serverName);
          }
          am.getRegionStates().logSplit(serverName);
        } else {
          LOG.info("Skipping log splitting for " + serverName);
        }
      } catch (IOException ioe) {
        resubmit(serverName, ioe);
      }

      // Clean out anything in regions in transition.  Being conservative and
      // doing after log splitting.  Could do some states before -- OPENING?
      // OFFLINE? -- and then others after like CLOSING that depend on log
      // splitting.
      List<HRegionInfo> regionsInTransition = am.processServerShutdown(serverName);
      LOG.info("Reassigning " + ((hris == null)? 0: hris.size()) +
        " region(s) that " + (serverName == null? "null": serverName+
        " was carrying (and " + regionsInTransition.size() +
        " regions(s) that were opening on this server)");

      List<HRegionInfo> toAssignRegions = new ArrayList<HRegionInfo>();
      toAssignRegions.addAll(regionsInTransition);

      // Iterate regions that were on this server and assign them
      if (hris != null) {
        RegionStates regionStates = am.getRegionStates();
        for (Map.Entry<HRegionInfo, Result> e: hris.entrySet()) {
          HRegionInfo hri = e.getKey();
          if (regionsInTransition.contains(hri)) {
            continue;
          }
          String encodedName = hri.getEncodedName();
          Lock lock = am.acquireRegionLock(encodedName);
          try {
            RegionState rit = regionStates.getRegionTransitionState(hri);
            if (processDeadRegion(hri, e.getValue(), am, server.getCatalogTracker())) {
              ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
              if (addressFromAM != null && !addressFromAM.equals(this.serverName)) {
                // If this region is in transition on the dead server, it must be
                // opening or pending_open, which should have been covered by AM#processServerShutdown
                LOG.info("Skip assigning region " + hri.getRegionNameAsString()
                  + " because it has been opened in " + addressFromAM.getServerName());
                continue;
              }
              if (rit != null) {
                if (rit.getServerName() != null && !rit.isOnServer(serverName)) {
                  // Skip regions that are in transition on other server
                  LOG.info("Skip assigning region in transition on other server" + rit);
                  continue;
                }
                try{
                  //clean zk node
                  LOG.info("Reassigning region with rs = " + rit + " and deleting zk node if exists");
                  ZKAssign.deleteNodeFailSilent(services.getZooKeeper(), hri);
                  regionStates.updateRegionState(hri, State.OFFLINE);
                } catch (KeeperException ke) {
                  this.server.abort("Unexpected ZK exception deleting unassigned node " + hri, ke);
                  return;
                }
              } else if (regionStates.isRegionInState(
                  hri, State.SPLITTING_NEW, State.MERGING_NEW)) {
                regionStates.regionOffline(hri);
              }
              toAssignRegions.add(hri);
            } else if (rit != null) {
              if (rit.isPendingCloseOrClosing()
                  && am.getZKTable().isDisablingOrDisabledTable(hri.getTable())) {
                // If the table was partially disabled and the RS went down, we should clear the RIT
                // and remove the node for the region.
                // The rit that we use may be stale in case the table was in DISABLING state
                // but though we did assign we will not be clearing the znode in CLOSING state.
                // Doing this will have no harm. See HBASE-5927
                regionStates.updateRegionState(hri, State.OFFLINE);
                am.deleteClosingOrClosedNode(hri, rit.getServerName());
                am.offlineDisabledRegion(hri);
              } else {
                LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition "
                  + rit + " not to be assigned by SSH of server " + serverName);
              }
            }
          } finally {
            lock.unlock();
          }
        }
      }

      try {
        am.assign(toAssignRegions);
      } catch (InterruptedException ie) {
        LOG.error("Caught " + ie + " during round-robin assignment");
        throw (InterruptedIOException)new InterruptedIOException().initCause(ie);
      }

      if (this.shouldSplitHlog && this.distributedLogReplay) {
        // wait for region assignment completes
        for (HRegionInfo hri : toAssignRegions) {
          try {
            if (!am.waitOnRegionToClearRegionsInTransition(hri, regionAssignmentWaitTimeout)) {
              // Wait here is to avoid log replay hits current dead server and incur a RPC timeout
              // when replay happens before region assignment completes.
              LOG.warn("Region " + hri.getEncodedName()
                  + " didn't complete assignment in time");
            }
View Full Code Here

          FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
      assertEquals("Expected nothing but found " + storefilesAfter.toString(),
          storefilesAfter.size(), 0);

      hri = region.getRegionInfo(); // split parent
      AssignmentManager am = cluster.getMaster().getAssignmentManager();
      RegionStates regionStates = am.getRegionStates();
      long start = EnvironmentEdgeManager.currentTimeMillis();
      while (!regionStates.isRegionInState(hri, State.SPLIT)) {
        assertFalse("Timed out in waiting split parent to be in state SPLIT",
          EnvironmentEdgeManager.currentTimeMillis() - start > 60000);
        Thread.sleep(500);
      }

      // We should not be able to assign it again
      am.assign(hri, true, true);
      assertFalse("Split region can't be assigned",
        regionStates.isRegionInTransition(hri));
      assertTrue(regionStates.isRegionInState(hri, State.SPLIT));

      // We should not be able to unassign it either
      am.unassign(hri, true, null);
      assertFalse("Split region can't be unassigned",
        regionStates.isRegionInTransition(hri));
      assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
    } finally {
      admin.setBalancerRunning(true, false);
View Full Code Here

    } catch (IOException e) {
      String node = ZKAssign.getNodeName(regionServer.getZooKeeper(), region
          .getRegionInfo().getEncodedName());

      assertFalse(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
      AssignmentManager am = cluster.getMaster().getAssignmentManager();
      for (int i = 0; !am.getRegionsInTransition().containsKey(
          region.getRegionInfo().getEncodedName())
          && i < 100; i++) {
        Thread.sleep(200);
      }
      assertTrue("region is not in transition "+region,
          am.getRegionsInTransition().containsKey(region.getRegionInfo().getEncodedName()));
      RegionState regionState = am.getRegionsInTransition().get(region.getRegionInfo()
          .getEncodedName());
      assertTrue(regionState.getState() == RegionState.State.SPLITTING);
      assertTrue(st.rollback(regionServer, regionServer));
      assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
      for (int i=0; am.getRegionsInTransition().containsKey(region.getRegionInfo().getEncodedName()) && i<100; i++) {
        // Just in case the nodeDeleted event did not get executed.
        Thread.sleep(200);
      }
      assertFalse("region is still in transition",
          am.getRegionsInTransition().containsKey(region.getRegionInfo().getEncodedName()));
    }
    if (admin.isTableAvailable(tableName) && admin.isTableEnabled(tableName)) {
      admin.disableTable(tableName);
      admin.deleteTable(tableName);
      admin.close();
View Full Code Here

        cp.wasBalanceCalled());
  }

  private void waitForRITtoBeZero(HMaster master) throws IOException {
    // wait for assignments to finish
    AssignmentManager mgr = master.getAssignmentManager();
    Collection<AssignmentManager.RegionState> transRegions =
        mgr.getRegionsInTransition().values();
    for (AssignmentManager.RegionState state : transRegions) {
      mgr.waitOnRegionToClearRegionsInTransition(state.getRegion());
    }
  }
View Full Code Here

            // has not yet happened for node Deleted event.  In that case if the region was actually split
            // but the RS had gone down before completing the split process then will not try to
            // assign the parent region again. In that case we should make the region offline and
            // also delete the region from RIT.
            HRegionInfo region = rit.getRegion();
            AssignmentManager am = this.services.getAssignmentManager();
            am.regionOffline(region);
          }
          // If the table was partially disabled and the RS went down, we should clear the RIT
          // and remove the node for the region.
          // The rit that we use may be stale in case the table was in DISABLING state
          // but though we did assign we will not be clearing the znode in CLOSING state.
          // Doing this will have no harm. See HBASE-5927
          if (rit != null
              && (rit.isClosing() || rit.isPendingClose())
              && this.services.getAssignmentManager().getZKTable()
                  .isDisablingOrDisabledTable(rit.getRegion().getTableNameAsString())) {
            HRegionInfo hri = rit.getRegion();
            AssignmentManager am = this.services.getAssignmentManager();
            am.deleteClosingOrClosedNode(hri);
            am.regionOffline(hri);
          }
        }
      }
    } finally {
      this.deadServers.finish(serverName);
View Full Code Here

        master.move(openRegions.get(i).getEncodedNameAsBytes(), destRS);
      }
    }

    // wait for assignments to finish
    AssignmentManager mgr = master.getAssignmentManager();
    Collection<AssignmentManager.RegionState> transRegions =
        mgr.getRegionsInTransition().values();
    for (AssignmentManager.RegionState state : transRegions) {
      mgr.waitOnRegionToClearRegionsInTransition(state.getRegion());
    }

    // now trigger a balance
    master.balanceSwitch(true);
    boolean balanceRun = master.balance();
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hbase.master.AssignmentManager$RegionsOnDeadServer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.