Package org.apache.jackrabbit.oak.plugins.blob

Source Code of org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector$Sweeper

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.oak.plugins.blob;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.ArrayDeque;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;

import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import com.google.common.base.StandardSystemProperty;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import com.google.common.io.Files;
import com.google.common.util.concurrent.ListenableFutureTask;
import com.google.common.util.concurrent.MoreExecutors;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import org.apache.jackrabbit.oak.commons.IOUtils;
import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Mark and sweep garbage collector.
*
* Uses the file system to store internal state while in process to account for huge data.
* This class is not thread safe.
*
*/
public class MarkSweepGarbageCollector implements BlobGarbageCollector {

    public static final Logger LOG = LoggerFactory.getLogger(MarkSweepGarbageCollector.class);

    public static final String NEWLINE = StandardSystemProperty.LINE_SEPARATOR.value();

    public static final String TEMP_DIR = StandardSystemProperty.JAVA_IO_TMPDIR.value();

    public static final int DEFAULT_BATCH_COUNT = 2048;

    public static enum State {NOT_RUNNING, MARKING, SWEEPING}

    /** The last modified time before current time of blobs to consider for garbage collection. */
    private final long maxLastModifiedInterval;

    /** Run concurrently when possible. */
    private final boolean runConcurrently;

    /** The blob store to be garbage collected. */
    private final GarbageCollectableBlobStore blobStore;

    /** Helper class to mark blob references which **/
    private final BlobReferenceRetriever marker;
   
    /** The garbage collector file state */
    private final GarbageCollectorFileState fs;

    private final Executor executor;

    /** The batch count. */
    private final int batchCount;

    /** Flag to indicate the state of the gc **/
    private State state = State.NOT_RUNNING;

    /**
     * Creates an instance of MarkSweepGarbageCollector
     *
     * @param marker BlobReferenceRetriever instanced used to fetch refereedd blob entries
     * @param blobStore
     * @param root the root absolute path of directory under which temporary
     *             files would be created
     * @param batchCount batch sized used for saving intermediate state
     * @param runBackendConcurrently - run the backend iterate concurrently
     * @param maxLastModifiedInterval - lastModifiedTime in millis. Only files with time
     *                                less than this time would be considered for GC
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public MarkSweepGarbageCollector(
            BlobReferenceRetriever marker,
            GarbageCollectableBlobStore blobStore,
            Executor executor,
            String root,
            int batchCount,
            boolean runBackendConcurrently,
            long maxLastModifiedInterval)
            throws IOException {
        this.executor = executor;
        this.blobStore = blobStore;
        this.marker = marker;
        this.batchCount = batchCount;
        this.runConcurrently = runBackendConcurrently;
        this.maxLastModifiedInterval = maxLastModifiedInterval;
        fs = new GarbageCollectorFileState(root);       
    }

    /**
     * Instantiates a new blob garbage collector.
     */
    public MarkSweepGarbageCollector(
            BlobReferenceRetriever marker,
            GarbageCollectableBlobStore blobStore,
            Executor executor)
            throws IOException {
        this(marker, blobStore, executor, TEMP_DIR, DEFAULT_BATCH_COUNT, true, TimeUnit.HOURS.toMillis(24));
    }

    public MarkSweepGarbageCollector(
            BlobReferenceRetriever marker,
            GarbageCollectableBlobStore blobStore,
            Executor executor,
            long maxLastModifiedInterval)
            throws IOException {
        this(marker, blobStore, executor, TEMP_DIR, DEFAULT_BATCH_COUNT, true, maxLastModifiedInterval);
    }

    @Override
    public void collectGarbage() throws Exception {
        markAndSweep();
    }

    /**
     * Gets the state of the gc process.
     *
     * @return the state
     */
    public State getState() {
        return state;
    }

    /**
     * Mark and sweep. Main method for GC.
     *
     * @throws Exception
     *             the exception
     */
    private void markAndSweep() throws IOException, InterruptedException {
        boolean threw = true;
        try {
            Stopwatch sw = Stopwatch.createStarted();
            LOG.info("Starting Blob garbage collection");

            mark();
            int deleteCount = sweep();
            threw = false;

            LOG.info("Blob garbage collection completed in {}. Number of blobs " +
                    "deleted [{}]", sw.toString(), deleteCount);
        } finally {
            Closeables.close(fs, threw);
            state = State.NOT_RUNNING;
        }
    }

    /**
     * Mark phase of the GC.
     */
    private void mark() throws IOException, InterruptedException {
        state = State.MARKING;
        LOG.debug("Starting mark phase of the garbage collector");

        // Find all blobs available in the blob store
        ListenableFutureTask<Integer> blobIdRetriever = ListenableFutureTask.create(new BlobIdRetriever());
        if (runConcurrently) {
            executor.execute(blobIdRetriever);
        } else {
            MoreExecutors.sameThreadExecutor().execute(blobIdRetriever);
        }

        // Find all blob references after iterating over the whole repository
        iterateNodeTree();

        try {
            blobIdRetriever.get();
        } catch (ExecutionException e) {
           LOG.warn("Error occurred while fetching all the blobIds from the BlobStore. GC would " +
                   "continue with the blobIds retrieved so far", e.getCause());
        }

        difference();
        LOG.debug("Ending mark phase of the garbage collector");
    }

    /**
     * Difference phase where the GC candidates are identified.
     *
     * @throws IOException
     *             Signals that an I/O exception has occurred.
     */
    private void difference() throws IOException {
        LOG.debug("Starting difference phase of the garbage collector");

        FileLineDifferenceIterator iter = new FileLineDifferenceIterator(
                fs.getMarkedRefs(),
                fs.getAvailableRefs(), batchCount);

        BufferedWriter bufferWriter = null;
        try {
            bufferWriter = Files.newWriter(fs.getGcCandidates(), Charsets.UTF_8);
            List<String> expiredSet = Lists.newArrayList();

            int numCandidates = 0;
            while (iter.hasNext()) {
                expiredSet.add(iter.next());
                if (expiredSet.size() > getBatchCount()) {
                    numCandidates += expiredSet.size();
                    saveBatchToFile(expiredSet, bufferWriter);
                }
            }

            if (!expiredSet.isEmpty()) {
                numCandidates += expiredSet.size();
                saveBatchToFile(expiredSet, bufferWriter);
            }
            LOG.debug("Found GC candidates - " + numCandidates);
        } finally {
            IOUtils.closeQuietly(bufferWriter);
        }

        LOG.debug("Ending difference phase of the garbage collector");
    }

    /**
     * Sweep phase of gc candidate deletion.
     *
     * @throws IOException
     *             Signals that an I/O exception has occurred.
     */
    private int sweep() throws IOException {
        int count = 0;
        state = State.SWEEPING;
        LOG.debug("Starting sweep phase of the garbage collector");

        ConcurrentLinkedQueue<String> exceptionQueue = new ConcurrentLinkedQueue<String>();

        LineIterator iterator =
                FileUtils.lineIterator(fs.getGcCandidates(), Charsets.UTF_8.name());
        List<String> ids = Lists.newArrayList();

        while (iterator.hasNext()) {
            ids.add(iterator.next());

            if (ids.size() > getBatchCount()) {
                count += ids.size();
                executor.execute(new Sweeper(ids, exceptionQueue));
                ids = Lists.newArrayList();
            }
        }
        if (!ids.isEmpty()) {
            count += ids.size();
            executor.execute(new Sweeper(ids, exceptionQueue));
        }

        count -= exceptionQueue.size();
        BufferedWriter writer = null;
        try {
            if (!exceptionQueue.isEmpty()) {
                writer = Files.newWriter(fs.getGarbage(), Charsets.UTF_8);
                saveBatchToFile(Lists.newArrayList(exceptionQueue), writer);
            }
        } finally {
            LineIterator.closeQuietly(iterator);
            IOUtils.closeQuietly(writer);
        }
        if(!exceptionQueue.isEmpty()) {
            LOG.warn("Unable to delete some blob entries from the blob store. Details around such blob entries " +
                    "can be found in [{}]", fs.getGarbage().getAbsolutePath());
        }
        LOG.debug("Ending sweep phase of the garbage collector");
        return count;
    }

    private int getBatchCount() {
        return batchCount;
    }

    private long getLastMaxModifiedTime(){
        return maxLastModifiedInterval > 0 ?
                System.currentTimeMillis() - maxLastModifiedInterval : 0;

    }

    /**
     * Save batch to file.
     */
    static void saveBatchToFile(List<String> ids, BufferedWriter writer) throws IOException {
        writer.append(Joiner.on(NEWLINE).join(ids));
        writer.append(NEWLINE);
        ids.clear();
        writer.flush();
    }

    /**
     * Sweeper thread.
     */
    class Sweeper implements Runnable {

        /** The exception queue. */
        private final ConcurrentLinkedQueue<String> exceptionQueue;

        /** The ids to sweep. */
        private final List<String> ids;

        public Sweeper(List<String> ids, ConcurrentLinkedQueue<String> exceptionQueue) {
            this.exceptionQueue = exceptionQueue;
            this.ids = ids;
        }

        @Override
        public void run() {
            try {
                LOG.debug("Blob ids to be deleted {}", ids);
                boolean deleted = blobStore.deleteChunks(ids,getLastMaxModifiedTime());
                if (!deleted) {
                    exceptionQueue.addAll(ids);
                }
            } catch (Exception e) {
                LOG.warn("Error occurred while deleting blob with ids [{}]", ids, e);
                exceptionQueue.addAll(ids);
            }
        }
    }

    /**
     * Iterates the complete node tree and collect all blob references
     */
    private void iterateNodeTree() throws IOException {
        final BufferedWriter writer = Files.newWriter(fs.getMarkedRefs(), Charsets.UTF_8);
        final AtomicInteger count = new AtomicInteger();
        try {
            marker.collectReferences(
                    new ReferenceCollector() {
                        private final List<String> idBatch = Lists.newArrayListWithCapacity(getBatchCount());

                        private final boolean debugMode = LOG.isTraceEnabled();

                        @Override
                        public void addReference(String blobId) {
                            if (debugMode) {
                                LOG.trace("BlobId : {}",blobId);
                            }

                            try {
                                Iterator<String> idIter = blobStore.resolveChunks(blobId);
                                while (idIter.hasNext()) {
                                    String id = idIter.next();
                                    idBatch.add(id);

                                    if (idBatch.size() >= getBatchCount()) {
                                        saveBatchToFile(idBatch, writer);
                                        idBatch.clear();
                                    }

                                    if (debugMode) {
                                        LOG.trace("chunkId : {}",id);
                                    }
                                    count.getAndIncrement();
                                }

                                if (!idBatch.isEmpty()) {
                                    saveBatchToFile(idBatch, writer);
                                    idBatch.clear();
                                }
                            } catch (Exception e) {
                                throw new RuntimeException("Error in retrieving references", e);
                            }
                        }
                    }
            );
            LOG.info("Number of valid blob references marked under mark phase of " +
                    "Blob garbage collection [{}]",count.get());
            // sort the marked references
            fs.sort(fs.getMarkedRefs());
        } finally {
            IOUtils.closeQuietly(writer);
        }
    }


    /**
     * BlobIdRetriever class to retrieve all blob ids.
     */
    private class BlobIdRetriever implements Callable<Integer> {
        @Override
        public Integer call() throws Exception {
            LOG.debug("Starting retrieve of all blobs");
            BufferedWriter bufferWriter = null;
            int blobsCount = 0;
            try {
                bufferWriter = new BufferedWriter(
                        new FileWriter(fs.getAvailableRefs()));
                Iterator<String> idsIter = blobStore.getAllChunkIds(getLastMaxModifiedTime());
               
                List<String> ids = Lists.newArrayList();

                while (idsIter.hasNext()) {
                    ids.add(idsIter.next());
                    if (ids.size() > getBatchCount()) {
                        blobsCount += ids.size();
                        saveBatchToFile(ids, bufferWriter);
                    }
                }

                if (!ids.isEmpty()) {
                    blobsCount += ids.size();
                    saveBatchToFile(ids, bufferWriter);
                }

                // sort the file
                fs.sort(fs.getAvailableRefs());
                LOG.debug("Number of blobs present in BlobStore : [{}] which have " +
                        "been last modified before [{}]", blobsCount, timestampToString(getLastMaxModifiedTime()));
            } finally {
                IOUtils.closeQuietly(bufferWriter);
            }
            return blobsCount;
        }


    }

    /**
     * FileLineDifferenceIterator class which iterates over the difference of 2 files line by line.
     */
    static class FileLineDifferenceIterator implements Iterator<String> {

        /** The marked references iterator. */
        private final LineIterator markedIter;

        /** The available references iter. */
        private final LineIterator allIter;

        private final ArrayDeque<String> queue;

        private final int batchSize;

        private boolean done;

        /** Temporary buffer. */
        private TreeSet<String> markedBuffer;

        /**
         * Instantiates a new file line difference iterator.
         */
        public FileLineDifferenceIterator(File marked, File available, int batchSize) throws IOException {
            this.markedIter = FileUtils.lineIterator(marked);
            this.allIter = FileUtils.lineIterator(available);
            this.batchSize = batchSize;
            queue = new ArrayDeque<String>(batchSize);
            markedBuffer = Sets.newTreeSet();

        }

        /**
         * Close.
         */
        private void close() {
            LineIterator.closeQuietly(markedIter);
            LineIterator.closeQuietly(allIter);
        }

        @Override
        public boolean hasNext() {
            if (!queue.isEmpty()) {
                return true;
            } else if (done) {
                return false;
            } else {
                if (!markedIter.hasNext() && !allIter.hasNext()) {
                    done = true;
                    close();
                    return false;
                } else {
                    queue.addAll(difference());
                    if (!queue.isEmpty()) {
                        return true;
                    } else {
                        done = true;
                        close();
                    }
                }
            }

            return false;
        }

        @Override
        public String next() {
            return nextDifference();
        }

        /**
         * Next difference.
         *
         * @return the string
         */
        public String nextDifference() {
            if (!hasNext()) {
                throw new NoSuchElementException("No more difference");
            }
            return queue.remove();
        }

        /**
         * Difference.
         *
         * @return the sets the
         */
        protected Set<String> difference() {
            TreeSet<String> gcSet = new TreeSet<String>();

            // Iterate till the gc candidate set is at least SAVE_BATCH_COUNT or
            // the
            // blob id set iteration is complete
            while (allIter.hasNext() &&
                    gcSet.size() < batchSize) {
                TreeSet<String> allBuffer = new TreeSet<String>();

                while (markedIter.hasNext() &&
                        markedBuffer.size() < batchSize) {
                    String stre = markedIter.next();
                    markedBuffer.add(stre);
                }
                while (allIter.hasNext() &&
                        allBuffer.size() < batchSize) {
                    String stre = allIter.next();
                    allBuffer.add(stre);
                }

                if (markedBuffer.isEmpty()) {
                    gcSet = allBuffer;
                } else {
                    gcSet.addAll(
                            Sets.difference(allBuffer, markedBuffer));

                    if (allBuffer.last().compareTo(markedBuffer.last()) < 0) {
                        // filling markedLeftoverBuffer
                        TreeSet<String> markedLeftoverBuffer = Sets.newTreeSet();
                        markedLeftoverBuffer.addAll(markedBuffer.tailSet(allBuffer.last(), false));
                        markedBuffer = markedLeftoverBuffer;
                        markedLeftoverBuffer = null;
                    } else {
                        markedBuffer.clear();
                    }
                }
            }

            return gcSet;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }

    /**
     * Provides a readable string for given timestamp
     */
    private static String timestampToString(long timestamp){
        return (new Timestamp(timestamp) + "00").substring(0, 23);
    }
}
TOP

Related Classes of org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector$Sweeper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.