Package org.apache.hadoop.hbase.catalog

Source Code of org.apache.hadoop.hbase.catalog.CatalogTracker

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.catalog;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.RetriesExhaustedException;
import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.hadoop.ipc.RemoteException;

import java.io.EOFException;
import java.io.IOException;
import java.net.ConnectException;
import java.net.NoRouteToHostException;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.net.UnknownHostException;

/**
* Tracks the availability of the catalog tables
* <code>hbase:meta</code>.
*
* This class is "read-only" in that the locations of the catalog tables cannot
* be explicitly set.  Instead, ZooKeeper is used to learn of the availability
* and location of <code>hbase:meta</code>.
*
* <p>Call {@link #start()} to start up operation.  Call {@link #stop()}} to
* interrupt waits and close up shop.
*/
@InterfaceAudience.Private
public class CatalogTracker {
  // TODO JDC 11/30 We don't even have ROOT anymore, revisit
  // TODO: This class needs a rethink.  The original intent was that it would be
  // the one-stop-shop for meta locations and that it would get this
  // info from reading and watching zk state.  The class was to be used by
  // servers when they needed to know of meta movement but also by
  // client-side (inside in HTable) so rather than figure meta
  // locations on fault, the client would instead get notifications out of zk.
  //
  // But this original intent is frustrated by the fact that this class has to
  // read an hbase table, the -ROOT- table, to figure out the hbase:meta region
  // location which means we depend on an HConnection.  HConnection will do
  // retrying but also, it has its own mechanism for finding root and meta
  // locations (and for 'verifying'; it tries the location and if it fails, does
  // new lookup, etc.).  So, at least for now, HConnection (or HTable) can't
  // have a CT since CT needs a HConnection (Even then, do want HT to have a CT?
  // For HT keep up a session with ZK?  Rather, shouldn't we do like asynchbase
  // where we'd open a connection to zk, read what we need then let the
  // connection go?).  The 'fix' is make it so both root and meta addresses
  // are wholey up in zk -- not in zk (root) -- and in an hbase table (meta).
  //
  // But even then, this class does 'verification' of the location and it does
  // this by making a call over an HConnection (which will do its own root
  // and meta lookups).  Isn't this verification 'useless' since when we
  // return, whatever is dependent on the result of this call then needs to
  // use HConnection; what we have verified may change in meantime (HConnection
  // uses the CT primitives, the root and meta trackers finding root locations).
  //
  // When meta is moved to zk, this class may make more sense.  In the
  // meantime, it does not cohere.  It should just watch meta and root and not
  // NOT do verification -- let that be out in HConnection since its going to
  // be done there ultimately anyways.
  //
  // This class has spread throughout the codebase.  It needs to be reigned in.
  // This class should be used server-side only, even if we move meta location
  // up into zk.  Currently its used over in the client package. Its used in
  // MetaReader and MetaEditor classes usually just to get the Configuration
  // its using (It does this indirectly by asking its HConnection for its
  // Configuration and even then this is just used to get an HConnection out on
  // the other end). I made https://issues.apache.org/jira/browse/HBASE-4495 for
  // doing CT fixup. St.Ack 09/30/2011.
  //

  // TODO: Timeouts have never been as advertised in here and its worse now
  // with retries; i.e. the HConnection retries and pause goes ahead whatever
  // the passed timeout is.  Fix.
  private static final Log LOG = LogFactory.getLog(CatalogTracker.class);
  private final HConnection connection;
  private final ZooKeeperWatcher zookeeper;
  private final MetaRegionTracker metaRegionTracker;
  private boolean instantiatedzkw = false;
  private Abortable abortable;

  private boolean stopped = false;

  static final byte [] META_REGION_NAME =
    HRegionInfo.FIRST_META_REGIONINFO.getRegionName();

  /**
   * Constructs a catalog tracker. Find current state of catalog tables.
   * Begin active tracking by executing {@link #start()} post construction. Does
   * not timeout.
   *
   * @param conf
   *          the {@link Configuration} from which a {@link HConnection} will be
   *          obtained; if problem, this connections
   *          {@link HConnection#abort(String, Throwable)} will be called.
   * @throws IOException
   */
  public CatalogTracker(final Configuration conf) throws IOException {
    this(null, conf, null);
  }

  /**
   * Constructs the catalog tracker.  Find current state of catalog tables.
   * Begin active tracking by executing {@link #start()} post construction.
   * Does not timeout.
   * @param zk If zk is null, we'll create an instance (and shut it down
   * when {@link #stop()} is called) else we'll use what is passed.
   * @param conf
   * @param abortable If fatal exception we'll call abort on this.  May be null.
   * If it is we'll use the Connection associated with the passed
   * {@link Configuration} as our Abortable.
   * @throws IOException
   */
  public CatalogTracker(final ZooKeeperWatcher zk, final Configuration conf,
      Abortable abortable)
  throws IOException {
    this(zk, conf, HConnectionManager.getConnection(conf), abortable);
  }

  public CatalogTracker(final ZooKeeperWatcher zk, final Configuration conf,
      HConnection connection, Abortable abortable)
  throws IOException {
    this.connection = connection;
    if (abortable == null) {
      // A connection is abortable.
      this.abortable = this.connection;
    }
    Abortable throwableAborter = new Abortable() {

      @Override
      public void abort(String why, Throwable e) {
        throw new RuntimeException(why, e);
      }

      @Override
      public boolean isAborted() {
        return true;
      }

    };
    if (zk == null) {
      // Create our own.  Set flag so we tear it down on stop.
      this.zookeeper =
        new ZooKeeperWatcher(conf, "catalogtracker-on-" + connection.toString(),
          abortable);
      instantiatedzkw = true;
    } else {
      this.zookeeper = zk;
    }
    this.metaRegionTracker = new MetaRegionTracker(zookeeper, throwableAborter);
  }

  /**
   * Starts the catalog tracker.
   * Determines current availability of catalog tables and ensures all further
   * transitions of either region are tracked.
   * @throws IOException
   * @throws InterruptedException
   */
  public void start() throws IOException, InterruptedException {
    LOG.debug("Starting catalog tracker " + this);
    try {
      this.metaRegionTracker.start();
    } catch (RuntimeException e) {
      Throwable t = e.getCause();
      this.abortable.abort(e.getMessage(), t);
      throw new IOException("Attempt to start meta tracker failed.", t);
    }
  }

  /**
   * Stop working.
   * Interrupts any ongoing waits.
   */
  public void stop() {
    if (!this.stopped) {
      LOG.debug("Stopping catalog tracker " + this);
      this.stopped = true;
      this.metaRegionTracker.stop();
      try {
        if (this.connection != null) {
          this.connection.close();
        }
      } catch (IOException e) {
        // Although the {@link Closeable} interface throws an {@link
        // IOException}, in reality, the implementation would never do that.
        LOG.error("Attempt to close catalog tracker's connection failed.", e);
      }
      if (this.instantiatedzkw) {
        this.zookeeper.close();
      }
    }
  }

  /**
   * Gets the current location for <code>hbase:meta</code> or null if location is
   * not currently available.
   * @return {@link ServerName} for server hosting <code>hbase:meta</code> or null
   * if none available
   * @throws InterruptedException
   */
  public ServerName getMetaLocation() throws InterruptedException {
    return this.metaRegionTracker.getMetaRegionLocation();
  }

  /**
   * Checks whether meta regionserver znode has some non null data.
   * @return true if data is not null, false otherwise.
   */
  public boolean isMetaLocationAvailable() {
    return this.metaRegionTracker.isLocationAvailable();
  }
  /**
   * Gets the current location for <code>hbase:meta</code> if available and waits
   * for up to the specified timeout if not immediately available.  Returns null
   * if the timeout elapses before root is available.
   * @param timeout maximum time to wait for root availability, in milliseconds
   * @return {@link ServerName} for server hosting <code>hbase:meta</code> or null
   * if none available
   * @throws InterruptedException if interrupted while waiting
   * @throws NotAllMetaRegionsOnlineException if meta not available before
   * timeout
   */
  public ServerName waitForMeta(final long timeout)
  throws InterruptedException, NotAllMetaRegionsOnlineException {
    ServerName sn = metaRegionTracker.waitMetaRegionLocation(timeout);
    if (sn == null) {
      throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms");
    }
    return sn;
  }

  /**
   * Gets a connection to the server hosting meta, as reported by ZooKeeper,
   * waiting up to the specified timeout for availability.
   * @param timeout How long to wait on meta location
   * @see #waitForMeta for additional information
   * @return connection to server hosting meta
   * @throws InterruptedException
   * @throws NotAllMetaRegionsOnlineException if timed out waiting
   * @throws IOException
   * @deprecated Use #getMetaServerConnection(long)
   */
  public AdminService.BlockingInterface waitForMetaServerConnection(long timeout)
  throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
    return getMetaServerConnection(timeout);
  }

  /**
   * Gets a connection to the server hosting meta, as reported by ZooKeeper,
   * waiting up to the specified timeout for availability.
   * <p>WARNING: Does not retry.  Use an {@link HTable} instead.
   * @param timeout How long to wait on meta location
   * @see #waitForMeta for additional information
   * @return connection to server hosting meta
   * @throws InterruptedException
   * @throws NotAllMetaRegionsOnlineException if timed out waiting
   * @throws IOException
   */
  AdminService.BlockingInterface getMetaServerConnection(long timeout)
  throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
    return getCachedConnection(waitForMeta(timeout));
  }

  /**
   * Waits indefinitely for availability of <code>hbase:meta</code>.  Used during
   * cluster startup.  Does not verify meta, just that something has been
   * set up in zk.
   * @see #waitForMeta(long)
   * @throws InterruptedException if interrupted while waiting
   */
  public void waitForMeta() throws InterruptedException {
    while (!this.stopped) {
      try {
        if (waitForMeta(100) != null) break;
      } catch (NotAllMetaRegionsOnlineException e) {
        if (LOG.isTraceEnabled()) {
          LOG.trace("hbase:meta still not available, sleeping and retrying." +
          " Reason: " + e.getMessage());
        }
      }
    }
  }

  /**
   * @param sn ServerName to get a connection against.
   * @return The AdminProtocol we got when we connected to <code>sn</code>
   * May have come from cache, may not be good, may have been setup by this
   * invocation, or may be null.
   * @throws IOException
   */
  private AdminService.BlockingInterface getCachedConnection(ServerName sn)
  throws IOException {
    if (sn == null) {
      return null;
    }
    AdminService.BlockingInterface service = null;
    try {
      service = connection.getAdmin(sn);
    } catch (RetriesExhaustedException e) {
      if (e.getCause() != null && e.getCause() instanceof ConnectException) {
        // Catch this; presume it means the cached connection has gone bad.
      } else {
        throw e;
      }
    } catch (SocketTimeoutException e) {
      LOG.debug("Timed out connecting to " + sn);
    } catch (NoRouteToHostException e) {
      LOG.debug("Connecting to " + sn, e);
    } catch (SocketException e) {
      LOG.debug("Exception connecting to " + sn);
    } catch (UnknownHostException e) {
      LOG.debug("Unknown host exception connecting to  " + sn);
    } catch (IOException ioe) {
      Throwable cause = ioe.getCause();
      if (ioe instanceof ConnectException) {
        // Catch. Connect refused.
      } else if (cause != null && cause instanceof EOFException) {
        // Catch. Other end disconnected us.
      } else if (cause != null && cause.getMessage() != null &&
        cause.getMessage().toLowerCase().contains("connection reset")) {
        // Catch. Connection reset.
      } else {
        throw ioe;
      }

    }
    return service;
  }

  /**
   * Verify we can connect to <code>hostingServer</code> and that its carrying
   * <code>regionName</code>.
   * @param hostingServer Interface to the server hosting <code>regionName</code>
   * @param address The servername that goes with the <code>metaServer</code>
   * Interface.  Used logging.
   * @param regionName The regionname we are interested in.
   * @return True if we were able to verify the region located at other side of
   * the Interface.
   * @throws IOException
   */
  // TODO: We should be able to get the ServerName from the AdminProtocol
  // rather than have to pass it in.  Its made awkward by the fact that the
  // HRI is likely a proxy against remote server so the getServerName needs
  // to be fixed to go to a local method or to a cache before we can do this.
  private boolean verifyRegionLocation(AdminService.BlockingInterface hostingServer,
      final ServerName address, final byte [] regionName)
  throws IOException {
    if (hostingServer == null) {
      LOG.info("Passed hostingServer is null");
      return false;
    }
    Throwable t = null;
    try {
      // Try and get regioninfo from the hosting server.
      return ProtobufUtil.getRegionInfo(hostingServer, regionName) != null;
    } catch (ConnectException e) {
      t = e;
    } catch (RetriesExhaustedException e) {
      t = e;
    } catch (RemoteException e) {
      IOException ioe = e.unwrapRemoteException();
      t = ioe;
    } catch (IOException e) {
      Throwable cause = e.getCause();
      if (cause != null && cause instanceof EOFException) {
        t = cause;
      } else if (cause != null && cause.getMessage() != null
          && cause.getMessage().contains("Connection reset")) {
        t = cause;
      } else {
        t = e;
      }
    }
    LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) +
      " at address=" + address + ", exception=" + t);
    return false;
  }

  /**
   * Verify <code>hbase:meta</code> is deployed and accessible.
   * @param timeout How long to wait on zk for meta address (passed through to
   * the internal call to {@link #waitForMetaServerConnection(long)}.
   * @return True if the <code>hbase:meta</code> location is healthy.
   * @throws IOException
   * @throws InterruptedException
   */
  public boolean verifyMetaRegionLocation(final long timeout)
  throws InterruptedException, IOException {
    AdminService.BlockingInterface service = null;
    try {
      service = waitForMetaServerConnection(timeout);
    } catch (NotAllMetaRegionsOnlineException e) {
      // Pass
    } catch (ServerNotRunningYetException e) {
      // Pass -- remote server is not up so can't be carrying root
    } catch (UnknownHostException e) {
      // Pass -- server name doesn't resolve so it can't be assigned anything.
    } catch (RegionServerStoppedException e) {
      // Pass -- server name sends us to a server that is dying or already dead.
    }
    return (service == null)? false:
      verifyRegionLocation(service,
          this.metaRegionTracker.getMetaRegionLocation(), META_REGION_NAME);
  }

  public HConnection getConnection() {
    return this.connection;
  }
}
TOP

Related Classes of org.apache.hadoop.hbase.catalog.CatalogTracker

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.