Package bitronix.tm.recovery

Source Code of bitronix.tm.recovery.Recoverer$AtomicBoolean

/*
* Bitronix Transaction Manager
*
* Copyright (c) 2010, Bitronix Software.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package bitronix.tm.recovery;

import bitronix.tm.BitronixXid;
import bitronix.tm.TransactionManagerServices;
import bitronix.tm.utils.Decoder;
import bitronix.tm.utils.ManagementRegistrar;
import bitronix.tm.utils.Uid;
import bitronix.tm.utils.Service;
import bitronix.tm.internal.*;
import bitronix.tm.journal.TransactionLogRecord;
import bitronix.tm.resource.ResourceLoader;
import bitronix.tm.resource.ResourceRegistrar;
import bitronix.tm.resource.common.XAResourceProducer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.transaction.Status;
import javax.transaction.xa.XAException;
import javax.transaction.xa.XAResource;
import javax.transaction.xa.Xid;
import java.io.IOException;
import java.util.*;

/**
* Recovery process implementation. Here is Mike Spille's description of XA recovery:
* <p>
* Straight Line Recovery:
* <ul>
*   <li>1. Find transactions that the TM considers dangling and unresolved</li>
*   <li>2. Find and reconstitute any {@link XAResource}s which were being used when chunk blowing occured.</li>
*   <li>3. Call the <code>recover()</code> method on each of these {@link XAResource}s.</li>
*   <li>4. Throw out any {@link Xid}'s in the {@link XAResource}' recover lists which are not owned by this TM.</li>
*   <li>5. Correlate {@link Xid}'s that the TM knows about with remaining {@link Xid}'s that the {@link XAResource}s
*          reported.</li>
*   <li>6. For {@link XAResource} {@link Xid}'s that match the global transaction ID which the TM found dangling with
*          a "Committing..." record, call <code>commit()</code> on those {@link XAResource}s for those {@link Xid}s.</li>
*   <li>7. For {@link XAResource} {@link Xid}'s that do not match any dangling "Committing..." records, call
*          <code>rollback()</code> on those {@link XAResource}s for those {@link Xid}s.</li>
* </ul>
* Exceptional conditions:
* <ul>
*   <li>1. For any <code>rollback()</code> calls from step 6 which reported a Heuristic Commit, you are in danger or
*          doubt, so run in circles, scream and shout.</li>
*   <li>2. For any <code>commit()</code> calls from step 7 which reported a Heuristic Rollback, you are in danger or
*          doubt, so run in circles, scream and shout.</li>
*   <li>3. For any resource you can't reconstitute in in step #2, or who fails on recover in step #3, or who reports
*          anything like an XAER_RMFAILURE in step 6 or step 7, keep trying to contact them in some implementation
*          defined manner.</li>
*   <li>4. For any heuristic outcome you see reported from an XAResource, call <code>forget()</code> for that
*          {@link XAResource}/{@link Xid} pair so that the resource can stop holding onto a reference to that transaction</li>
* </ul>
* </p>
* <p>To achieve this, {@link Recoverer} must have access to all previously used resources, even if the journal contains
* no trace of some of them. There are two ways of achieving this: either you use the {@link ResourceLoader} to configure
* all your resources and everything will be working automatically or by making sure resources are re-created and re-registered.</p>
* <p>Those are the three steps of the Bitronix implementation:
* <ul>
*   <li>call <code>recover()</code> on all known resources (Mike's steps 1 to 5)</li>
*   <li>commit dangling COMMITTING transactions (Mike's step 6)</li>
*   <li>rollback any remaining recovered transaction (Mike's step 7)</li>
* </ul></p>
*
* @author lorban
*/
public class Recoverer implements Runnable, Service, RecovererMBean {

    private final static Logger log = LoggerFactory.getLogger(Recoverer.class);

    private final Map registeredResources = new HashMap();
    private final Map recoveredXidSets = new HashMap();

    private volatile Exception completionException;
    private volatile int committedCount;
    private volatile int rolledbackCount;
    private volatile int executionsCount;
    private final AtomicBoolean isRunning = new AtomicBoolean(false);
    private final String jmxName;


    public Recoverer() {
        String serverId = TransactionManagerServices.getConfiguration().getServerId();
        if (serverId == null) serverId = "";
        this.jmxName = "bitronix.tm:type=Recoverer,ServerId=" + ManagementRegistrar.makeValidName(serverId);
        ManagementRegistrar.register(jmxName, this);
    }

    public void shutdown() {
        ManagementRegistrar.unregister(jmxName);
    }

    /**
     * Run the recovery process. This method is automatically called by the transaction manager, you should never
     * call it manually.
     */
    public void run() {
        if (!isRunning.compareAndSet(false, true)) {
            log.info("recoverer is already running, abandoning this recovery request");
            return;
        }

        try {
            committedCount = 0;
            rolledbackCount = 0;
            long oldestTransactionTimestamp = Long.MAX_VALUE;

            // Query resources from ResourceRegistrar
            synchronized (ResourceRegistrar.class) {
                Iterator it = ResourceRegistrar.getResourcesUniqueNames().iterator();
                while (it.hasNext()) {
                    String name = (String) it.next();
                    registeredResources.put(name, ResourceRegistrar.get(name));
                }

                if (TransactionManagerServices.isTransactionManagerRunning()) {
                    oldestTransactionTimestamp = TransactionManagerServices.getTransactionManager().getOldestInFlightTransactionTimestamp();
                }
            }

            Map danglingRecords = TransactionManagerServices.getJournal().collectDanglingRecords();

            // 1. call recover on all known resources
            recoverAllResources();

            // 2. commit dangling COMMITTING transactions
            Set committedGtrids = commitDanglingTransactions(oldestTransactionTimestamp, danglingRecords);
            committedCount = committedGtrids.size();

            // 3. rollback any remaining recovered transaction
            rolledbackCount = rollbackAbortedTransactions(oldestTransactionTimestamp, committedGtrids);

            if (executionsCount == 0 || committedCount > 0 || rolledbackCount > 0) {
                log.info("recovery committed " + committedCount + " dangling transaction(s) and rolled back " + rolledbackCount +
                        " aborted transaction(s) on " + registeredResources.size() + " resource(s) [" + getRegisteredResourcesUniqueNames() + "]" +
                        ((TransactionManagerServices.getConfiguration().isCurrentNodeOnlyRecovery()) ? " (restricted to serverId '" + TransactionManagerServices.getConfiguration().getServerId() + "')" : ""));
            }
            else if (log.isDebugEnabled()) {
                log.debug("recovery committed " + committedCount + " dangling transaction(s) and rolled back " + rolledbackCount +
                        " aborted transaction(s) on " + registeredResources.size() + " resource(s) [" + getRegisteredResourcesUniqueNames() + "]" +
                        ((TransactionManagerServices.getConfiguration().isCurrentNodeOnlyRecovery()) ? " (restricted to serverId '" + TransactionManagerServices.getConfiguration().getServerId() + "')" : ""));               
            }
            this.completionException = null;
        } catch (Exception ex) {
            this.completionException = ex;
            log.warn("recovery failed, registered resource(s): " + getRegisteredResourcesUniqueNames(), ex);
        }
        finally {
            recoveredXidSets.clear();
            registeredResources.clear();
            executionsCount++;
            isRunning.set(false);
        }
    }

    /**
     * Get the exception reported when recovery failed.
     * @return the exception that made recovery fail or null if last recovery execution was successful.
     */
    public Exception getCompletionException() {
        return completionException;
    }

    /**
     * Get the amount of transactions committed during the last recovery run.
     * @return the amount of committed transactions.
     */
    public int getCommittedCount() {
        return committedCount;
    }

    /**
     * Get the amount of transactions rolled back during the last recovery run.
     * @return the amount of rolled back transactions.
     */
    public int getRolledbackCount() {
        return rolledbackCount;
    }

    /**
     * Get how many times the recoverer has run since the transaction manager started.
     * @return how many times the recoverer has run since the transaction manager started.
     */
    public int getExecutionsCount() {
        return executionsCount;
    }

    /**
     * Check if the recoverer currently is running.
     * @return true if the recoverer currently is running, false otherwise.
     */
    public boolean isRunning() {
        return isRunning.get();
    }

    /**
     * Recover all configured resources and fill the <code>recoveredXidSets</code> with all recovered XIDs.
     * Step 1.
     */
    private void recoverAllResources() {
        Iterator it = new HashMap(registeredResources).entrySet().iterator(); // a cloned registeredResources Map must be iterated as the original one can be modified in the loop
        while (it.hasNext()) {
            Map.Entry entry = (Map.Entry) it.next();
            String uniqueName = (String) entry.getKey();
            XAResourceProducer producer = (XAResourceProducer) entry.getValue();

            try {
                if (log.isDebugEnabled()) log.debug("performing recovery on " + uniqueName);
                Set xids = recover(producer);
                if (log.isDebugEnabled()) log.debug("recovered " + xids.size() + " XID(s) from resource " + uniqueName);
                recoveredXidSets.put(uniqueName, xids);
                producer.setFailed(false);
            } catch (XAException ex) {
                producer.setFailed(true);
                registeredResources.remove(uniqueName);
                log.warn("error running recovery on resource '" + uniqueName + "', resource marked as failed (background recoverer will retry recovery) (error=" + Decoder.decodeXAExceptionErrorCode(ex) + ")", ex);
            } catch (Exception ex) {
                producer.setFailed(true);
                registeredResources.remove(uniqueName);
                log.warn("error running recovery on resource '" + uniqueName + "', resource marked as failed (background recoverer will retry recovery)", ex);
            }
        }
    }

    /**
     * Run the recovery process on the target resource.
     * Step 1.
     * @return a Set of BitronixXids.
     * @param producer the {@link XAResourceProducer} to recover.
     * @throws javax.transaction.xa.XAException if {@link XAResource#recover(int)} call fails.
     * @throws RecoveryException if an error preventing recovery happened.
     */
    private Set recover(XAResourceProducer producer) throws XAException, RecoveryException {
        if (producer == null)
            throw new IllegalArgumentException("recoverable resource cannot be null");

        try {
            if (log.isDebugEnabled()) log.debug("running recovery on " + producer);
            XAResourceHolderState xaResourceHolderState = producer.startRecovery();
            return RecoveryHelper.recover(xaResourceHolderState);
        } finally {
            producer.endRecovery();
        }
    }

    /**
     * Commit transactions that have a dangling COMMITTING record in the journal.
     * Step 2.
     * @param oldestTransactionTimestamp the timestamp of the oldest transaction still in-flight.
     * @param danglingRecords a Map using Uid objects GTRID as key and {@link TransactionLogRecord} as value.
     * @return a Set of all committed GTRIDs encoded as strings.
     * @throws java.io.IOException if there is an I/O error reading the journal.
     * @throws RecoveryException if an error preventing recovery happened.
     */
    private Set commitDanglingTransactions(long oldestTransactionTimestamp, Map danglingRecords) throws IOException, RecoveryException {
        Set committedGtrids = new HashSet();

        if (log.isDebugEnabled()) log.debug("found " + danglingRecords.size() + " dangling record(s) in journal");
        Iterator it = danglingRecords.entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry entry = (Map.Entry) it.next();
            Uid gtrid = (Uid) entry.getKey();
            TransactionLogRecord tlog = (TransactionLogRecord) entry.getValue();

            Set uniqueNames = tlog.getUniqueNames();
            Set danglingTransactions = getDanglingTransactionsInRecoveredXids(uniqueNames, tlog.getGtrid());

            long txTimestamp = gtrid.extractTimestamp();
            if (log.isDebugEnabled()) log.debug("recovered XID timestamp: " + txTimestamp + " - oldest in-flight TX timestamp: " + oldestTransactionTimestamp);

            if (txTimestamp < oldestTransactionTimestamp) {
                if (log.isDebugEnabled()) log.debug("committing dangling transaction with GTRID " + gtrid);
                commit(danglingTransactions);
                if (log.isDebugEnabled()) log.debug("committed dangling transaction with GTRID " + gtrid);
                committedGtrids.add(gtrid);

                Set participatingUniqueNames = filterParticipatingUniqueNamesInRecoveredXids(uniqueNames);

                if (participatingUniqueNames.size() > 0) {
                    if (log.isDebugEnabled()) log.debug("updating journal's transaction with GTRID " + gtrid + " status to COMMITTED for names [" + buildUniqueNamesString(participatingUniqueNames) + "]");
                    TransactionManagerServices.getJournal().log(Status.STATUS_COMMITTED, tlog.getGtrid(), participatingUniqueNames);
                }
                else {
                    if (log.isDebugEnabled()) log.debug("not updating journal's transaction with GTRID " + gtrid + " status to COMMITTED as no resource could be found (incremental recovery will need to clean this)");
                    committedGtrids.remove(gtrid);
                }
            }
            else {
                if (log.isDebugEnabled()) log.debug("skipping in-flight transaction with GTRID " + gtrid);
            }
        }
        if (log.isDebugEnabled()) log.debug("committed " + committedGtrids.size() + " dangling transaction(s)");
        return committedGtrids;
    }

    /**
     * Return {@link DanglingTransaction}s with {@link Xid}s corresponding to the GTRID parameter found in resources
     * specified by their <code>uniqueName</code>s.
     * <code>recoverAllResources</code> must have been called before or else the returned list will always be empty.
     * Step 2.
     * @param uniqueNames a set of <code>uniqueName</code>s.
     * @param gtrid the GTRID to look for.
     * @return a set of {@link DanglingTransaction}s.
     */
    private Set getDanglingTransactionsInRecoveredXids(Set uniqueNames, Uid gtrid) {
        Set danglingTransactions = new HashSet();

        Iterator it = uniqueNames.iterator();
        while (it.hasNext()) {
            String uniqueName = (String) it.next();
            if (log.isDebugEnabled()) log.debug("finding dangling transaction(s) in recovered XID(s) of resource " + uniqueName);
            Set recoveredXids = (Set) recoveredXidSets.get(uniqueName);
            if (recoveredXids == null) {
                if (log.isDebugEnabled()) log.debug("resource " + uniqueName + " did not recover, skipping commit");
                continue;
            }

            Iterator it2 = recoveredXids.iterator();
            while (it2.hasNext()) {
                BitronixXid recoveredXid = (BitronixXid) it2.next();
                if (gtrid.equals(recoveredXid.getGlobalTransactionIdUid())) {
                    if (log.isDebugEnabled()) log.debug("found a recovered XID matching dangling log's GTRID " + gtrid + " in resource " + uniqueName);
                    danglingTransactions.add(new DanglingTransaction(uniqueName, recoveredXid));
                }
            } // while it2.hasNext()
        }

        return danglingTransactions;
    }

    private Set filterParticipatingUniqueNamesInRecoveredXids(Set uniqueNames) {
        Set recoveredUniqueNames = new HashSet();

        Iterator it = uniqueNames.iterator();
        while (it.hasNext()) {
            String uniqueName = (String) it.next();
            if (log.isDebugEnabled()) log.debug("finding dangling transaction(s) in recovered XID(s) of resource " + uniqueName);
            Set recoveredXids = (Set) recoveredXidSets.get(uniqueName);
            if (recoveredXids == null) {
                if (log.isDebugEnabled()) log.debug("cannot find resource '" + uniqueName + "' present in the journal, leaving it for incremental recovery");
            }
            else {
                recoveredUniqueNames.add(uniqueName);
            }
        }

        return recoveredUniqueNames;
    }

    /**
     * Commit all branches of a dangling transaction.
     * Step 2.
     * @param danglingTransactions a set of {@link DanglingTransaction}s to commit.
     * @throws RecoveryException if an error preventing recovery happened.
     */
    private void commit(Set danglingTransactions) throws RecoveryException {
        if (log.isDebugEnabled()) log.debug(danglingTransactions.size() + " branch(es) to commit");

        Iterator it = danglingTransactions.iterator();
        while (it.hasNext()) {
            DanglingTransaction danglingTransaction = (DanglingTransaction) it.next();
            Xid xid = danglingTransaction.getXid();
            String uniqueName = danglingTransaction.getUniqueName();

            if (log.isDebugEnabled()) log.debug("committing branch with XID " + xid + " on " + uniqueName);
            commit(uniqueName, xid);
        }
    }

    /**
     * Commit the specified branch of a dangling transaction.
     * Step 2.
     * @param uniqueName the unique name of the resource on which the commit should be done.
     * @param xid the {@link Xid} to commit.
     * @return true when commit was successful.
     * @throws RecoveryException if an error preventing recovery happened.
     */
    private boolean commit(String uniqueName, Xid xid) throws RecoveryException {
        XAResourceProducer producer = (XAResourceProducer) registeredResources.get(uniqueName);
        try {
            XAResourceHolderState xaResourceHolderState = producer.startRecovery();
            return RecoveryHelper.commit(xaResourceHolderState, xid);
        } finally {
            producer.endRecovery();
        }
    }

    /**
     * Rollback branches whose {@link Xid} has been recovered on the resource but hasn't been committed.
     * Those are the 'aborted' transactions of the Presumed Abort protocol.
     * Step 3.
     * @param oldestTransactionTimestamp the timestamp of the oldest transaction still in-flight.
     * @param committedGtrids a set of {@link Uid}s already committed on this resource.
     * @return the rolled back branches count.
     * @throws RecoveryException if an error preventing recovery happened.
     */
    private int rollbackAbortedTransactions(long oldestTransactionTimestamp, Set committedGtrids) throws RecoveryException {
        if (log.isDebugEnabled()) log.debug("rolling back aborted branch(es)");
        int rollbackCount = 0;
        Iterator it = recoveredXidSets.entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry entry = (Map.Entry) it.next();
            String uniqueName = (String) entry.getKey();
            Set recoveredXids = (Set) entry.getValue();

            if (log.isDebugEnabled()) log.debug("checking " + recoveredXids.size() + " branch(es) on " + uniqueName + " for rollback");
            int count = rollbackAbortedBranchesOfResource(oldestTransactionTimestamp, uniqueName, recoveredXids, committedGtrids);
            if (log.isDebugEnabled()) log.debug("checked " + recoveredXids.size() + " branch(es) on " + uniqueName + " for rollback");
            rollbackCount += count;
        }

        if (log.isDebugEnabled()) log.debug("rolled back " + rollbackCount + " aborted branch(es)");
        return rollbackCount;
    }

    /**
     * Rollback aborted branches of the resource specified by uniqueName.
     * Step 3.
     * @param oldestTransactionTimestamp the timestamp of the oldest transaction still in-flight.
     * @param uniqueName the unique name of the resource on which to rollback branches.
     * @param recoveredXids a set of {@link BitronixXid} recovered on the reource.
     * @param committedGtrids a set of {@link Uid}s already committed on the resource.
     * @return the rolled back branches count.
     * @throws RecoveryException if an error preventing recovery happened.
     */
    private int rollbackAbortedBranchesOfResource(long oldestTransactionTimestamp, String uniqueName, Set recoveredXids, Set committedGtrids) throws RecoveryException {
        int abortedCount = 0;
        Iterator it = recoveredXids.iterator();
        while (it.hasNext()) {
            BitronixXid recoveredXid = (BitronixXid) it.next();
            if (committedGtrids.contains(recoveredXid.getGlobalTransactionIdUid())) {
                if (log.isDebugEnabled()) log.debug("XID has been committed, skipping rollback: " + recoveredXid + " on " + uniqueName);
                continue;
            }

            long txTimestamp = recoveredXid.getGlobalTransactionIdUid().extractTimestamp();
            if (log.isDebugEnabled()) log.debug("recovered XID timestamp: " + txTimestamp + " - oldest in-flight TX timestamp: " + oldestTransactionTimestamp);
            if (txTimestamp >= oldestTransactionTimestamp) {
                if (log.isDebugEnabled()) log.debug("skipping XID of in-flight transaction: " + recoveredXid);
                continue;
            }

            if (log.isDebugEnabled()) log.debug("rolling back in-doubt branch with XID " + recoveredXid + " on " + uniqueName);
            boolean success = rollback(uniqueName, recoveredXid);
            if (success)
                abortedCount++;
        }
        return abortedCount;
    }

    /**
     * Rollback the specified branch of a dangling transaction.
     * Step 3.
     * @param uniqueName the unique name of the resource on which to rollback branches.
     * @param xid the {@link Xid} to rollback.
     * @return true when rollback was successful.
     * @throws RecoveryException if an error preventing recovery happened.
     */
    private boolean rollback(String uniqueName, Xid xid) throws RecoveryException {
        XAResourceProducer producer = (XAResourceProducer) registeredResources.get(uniqueName);
        if (producer == null) {
            if (log.isDebugEnabled()) log.debug("resource " + uniqueName + " has not recovered, skipping rollback");
            return false;
        }

        try {
            XAResourceHolderState xaResourceHolderState = producer.startRecovery();
            return RecoveryHelper.rollback(xaResourceHolderState, xid);
        } finally {
            producer.endRecovery();
        }
    }

    /**
     * Build a string with comma-separated resources unique names.
     * @return the string.
     */
    private String getRegisteredResourcesUniqueNames() {
        return buildUniqueNamesString(registeredResources.keySet());
    }

    private static String buildUniqueNamesString(Set uniqueNames) {
        StringBuffer resourcesUniqueNames = new StringBuffer();
        Iterator it = uniqueNames.iterator();
        while (it.hasNext()) {
            String uniqueName = (String) it.next();
            resourcesUniqueNames.append(uniqueName);
            if (it.hasNext())
                resourcesUniqueNames.append(", ");
        }
        return resourcesUniqueNames.toString();
    }

    /**
     * A boolean value that may be updated atomically. This is a simplified subset of the JDK 1.5+
     * java.util.concurrent.atomic.AtomicBoolean class.
     */
    private static class AtomicBoolean {
        private boolean value;

        public AtomicBoolean(boolean value) {
            this.value = value;
        }

        public synchronized boolean get() {
            return value;
        }

        public synchronized void set(boolean value) {
            this.value = value;
        }

        /**
         * Atomically sets the value to the given updated value if the current value == the expected value.
         *
         * @param expect the expected value.
         * @param update the new value.
         * @return true if successful. False return indicates that the actual value was not equal to the expected value.
         */
        public synchronized boolean compareAndSet(boolean expect, boolean update) {
            if (this.value == expect) {
                this.value = update;
                return true;
            }
            return false;
        }

    }

}
TOP

Related Classes of bitronix.tm.recovery.Recoverer$AtomicBoolean

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.