Package org.apache.jackrabbit.oak.plugins.blob

Source Code of org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector$FileLineDifferenceIterator

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.oak.plugins.blob;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;

import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.base.StandardSystemProperty;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.io.Files;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
import org.apache.jackrabbit.oak.commons.IOUtils;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
import org.apache.jackrabbit.oak.spi.state.NodeStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Mark and sweep garbage collector.
*
* Uses the file system to store internal state while in process to account for huge data.
*
*/
public class MarkSweepGarbageCollector implements BlobGarbageCollector {

    public static final Logger LOG = LoggerFactory.getLogger(MarkSweepGarbageCollector.class);

    public static final String NEWLINE = StandardSystemProperty.LINE_SEPARATOR.value();

    public static final String TEMP_DIR = StandardSystemProperty.JAVA_IO_TMPDIR.value();

    public static final int DEFAULT_BATCH_COUNT = 2048;

    public static final String NOT_RUNNING = "NotRunning";

    public static final String MARKING = "Running-Marking";

    public static final String SWEEPING = "Running-Sweeping";

    /** The max last modified time of blobs to consider for garbage collection. */
    private long maxLastModifiedTime;

    /** Run concurrently when possible. */
    private boolean runConcurrently = true;

    /** The number of sweeper threads to use. */
    private int numSweepers = 1;

    /** The node store. */
    private DocumentNodeStore nodeStore;
   
    /** The garbage collector file state */
    private GarbageCollectorFileState fs;

    /** The configured root to store gc process files. */
    private String root = TEMP_DIR;

    /** The batch count. */
    private int batchCount = DEFAULT_BATCH_COUNT;

    /** Flag to indicate whether to run in a debug mode **/
    private boolean debugMode = Boolean.getBoolean("debugModeGC") | LOG.isDebugEnabled();

    /** Flag to indicate the state of the gc **/
    private String state;

    /**
     * Gets the max last modified time considered for garbage collection.
     *
     * @return the max last modified time
     */
    protected long getMaxLastModifiedTime() {
        return maxLastModifiedTime;
    }

    /**
     * Sets the max last modified time considered for garbage collection.
     *
     * @param maxLastModifiedTime the new max last modified time
     */
    protected void setMaxLastModifiedTime(long maxLastModifiedTime) {
        this.maxLastModifiedTime = maxLastModifiedTime;
    }
   
    /**
     * Gets the root.
     *
     * @return the root
     */
    protected String getRoot() {
        return root;
    }

    /**
     * Gets the batch count.
     *
     * @return the batch count
     */
    protected int getBatchCount() {
        return batchCount;
    }

    /**
     * Checks if run concurrently.
     *
     * @return true, if is run concurrently
     */
    protected boolean isRunConcurrently() {
        return runConcurrently;
    }

    /**
     * Gets the number sweepers.
     *
     * @return the number sweepers
     */
    protected int getNumSweepers() {
        return numSweepers;
    }

    /**
     * Gets the state of the gc process.
     *
     * @return the state
     */
    protected String getState() {
        return state;
    }

    /**
     * @param nodeStore the node store
     * @param root the root
     * @param batchCount the batch count
     * @param runBackendConcurrently - run the backend iterate concurrently
     * @param maxSweeperThreads the max sweeper threads
     * @param maxLastModifiedTime the max last modified time
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public void init(
            NodeStore nodeStore,
            String root,
            int batchCount,
            boolean runBackendConcurrently,
            int maxSweeperThreads,
            long maxLastModifiedTime)
            throws IOException {
        this.batchCount = batchCount;
        this.root = root;
        this.runConcurrently = runBackendConcurrently;
        this.numSweepers = maxSweeperThreads;
        this.maxLastModifiedTime = maxLastModifiedTime;       
        init(nodeStore);
    }

    /**
     * Instantiates a new blob garbage collector.
     *
     * @param nodeStore
     *            the node store
     * @throws IOException
     *             Signals that an I/O exception has occurred.
     */
    public void init(NodeStore nodeStore) throws IOException {
        Preconditions.checkState(!Strings.isNullOrEmpty(root));
        this.nodeStore = (DocumentNodeStore) nodeStore;
        fs = new GarbageCollectorFileState(root);
    }

    @Override
    public void collectGarbage() throws Exception {
        markAndSweep();
    }

    /**
     * Mark and sweep. Main method for GC.
     *
     * @throws Exception
     *             the exception
     */
    protected void markAndSweep() throws Exception {
        try {
            LOG.debug("Starting garbage collector");

            mark();
            difference();
            sweep();

            LOG.debug("garbage collector finished");
        } finally {
            fs.complete();
            state = NOT_RUNNING;
        }
    }

    /**
     * Mark phase of the GC.
     *
     * @throws Exception
     *             the exception
     */
    protected void mark() throws Exception {
        state = MARKING;
        LOG.debug("Starting mark phase of the garbage collector");

        // Find all blobs available in the blob store
        Thread blobIdRetrieverThread = null;
        if (runConcurrently) {
            blobIdRetrieverThread = new Thread(new BlobIdRetriever(),
                    this.getClass().getSimpleName() + "-MarkThread");
            blobIdRetrieverThread.setDaemon(true);
            blobIdRetrieverThread.start();
        } else {
            (new BlobIdRetriever()).retrieve();
        }

        // Find all blob references after iterating over the whole repository
        iterateNodeTree();

        if (runConcurrently) {
            if (blobIdRetrieverThread.isAlive()) {
                blobIdRetrieverThread.join();
            }
        }

        LOG.debug("Ending mark phase of the garbage collector");
    }

    /**
     * Difference phase where the GC candidates are identified.
     *
     * @throws IOException
     *             Signals that an I/O exception has occurred.
     */
    protected void difference() throws IOException {
        LOG.debug("Starting difference phase of the garbage collector");

        FileLineDifferenceIterator<String> iter = new FileLineDifferenceIterator<String>(
                fs.getMarkedRefs(),
                fs.getAvailableRefs());

        BufferedWriter bufferWriter = null;
        try {
            bufferWriter = Files.newWriter(fs.getGcCandidates(), Charsets.UTF_8);
            List<String> expiredSet = Lists.newArrayList();

            int numCandidates = 0;
            while (iter.hasNext()) {
                expiredSet.add(iter.next());
                if (expiredSet.size() > getBatchCount()) {
                    numCandidates += expiredSet.size();
                    saveBatchToFile(expiredSet, bufferWriter);
                }
            }

            if (!expiredSet.isEmpty()) {
                numCandidates += expiredSet.size();
                saveBatchToFile(expiredSet, bufferWriter);
            }
            LOG.debug("Found GC candidates - " + numCandidates);
        } finally {
            IOUtils.closeQuietly(bufferWriter);
        }

        LOG.debug("Ending difference phase of the garbage collector");
    }

    /**
     * Sweep phase of gc candidate deletion.
     *
     * @throws IOException
     *             Signals that an I/O exception has occurred.
     */
    protected void sweep() throws IOException {
        state = SWEEPING;       
        LOG.debug("Starting sweep phase of the garbage collector");

        ConcurrentLinkedQueue<String> exceptionQueue = new ConcurrentLinkedQueue<String>();
        ExecutorService executorService =
                new ThreadPoolExecutor(getNumSweepers(), getNumSweepers(), 1,
                        TimeUnit.MINUTES,
                        new LinkedBlockingQueue<Runnable>(),
                        new ThreadFactory() {
                            private final AtomicInteger threadCounter = new AtomicInteger();

                            private String getName() {
                                return "MarkSweepGarbageCollector-Sweeper-" + threadCounter.getAndIncrement();
                            }

                            @Override
                            public Thread newThread(Runnable r) {
                                Thread thread = new Thread(r, getName());
                                thread.setDaemon(true);
                                return thread;
                            }
                        });

        LineIterator iterator = FileUtils.lineIterator(fs.getGcCandidates(), Charsets.UTF_8.name());
        List<String> ids = Lists.newArrayList();
        int count = 0;
        while (iterator.hasNext()) {
            ids.add(iterator.next());

            if (ids.size() > getBatchCount()) {
                count += ids.size();
                executorService.execute(new Sweeper(ids, exceptionQueue));
                ids = Lists.newArrayList();
            }
        }
        if (!ids.isEmpty()) {
            count += ids.size();
            executorService.execute(new Sweeper(ids, exceptionQueue));
        }

        try {
            executorService.shutdown();
            executorService.awaitTermination(100, TimeUnit.MINUTES);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

        count -= exceptionQueue.size();
        BufferedWriter writer = null;
        try {
            if (!exceptionQueue.isEmpty()) {
                writer = Files.newWriter(fs.getGarbage(), Charsets.UTF_8);
                saveBatchToFile(Lists.newArrayList(exceptionQueue), writer);
            }
        } finally {
            LineIterator.closeQuietly(iterator);
            IOUtils.closeQuietly(writer);
        }

        LOG.debug("Blobs deleted count - " + count);
        LOG.debug("Ending sweep phase of the garbage collector");
    }

    /**
     * Save batch to file.
     *
     * @param ids
     *            the ids
     * @param writer
     *            the writer
     * @throws IOException
     *             Signals that an I/O exception has occurred.
     */
    static void saveBatchToFile(List<String> ids, BufferedWriter writer) throws IOException {
        writer.append(Joiner.on(NEWLINE).join(ids));
        writer.append(NEWLINE);
        ids.clear();
        writer.flush();
    }

    /**
     * Sweeper thread.
     */
    class Sweeper implements Runnable {

        /** The exception queue. */
        private ConcurrentLinkedQueue<String> exceptionQueue;

        /** The ids to sweep. */
        private List<String> ids;

        /**
         * Instantiates a new sweeper.
         *
         * @param ids
         *            the ids
         * @param exceptionQueue
         *            the exception queue
         */
        public Sweeper(List<String> ids, ConcurrentLinkedQueue<String> exceptionQueue) {
            this.exceptionQueue = exceptionQueue;
            this.ids = ids;
        }

        @Override
        public void run() {
            try {
                boolean deleted = ((GarbageCollectableBlobStore) nodeStore.getBlobStore())
                        .deleteChunks(ids, maxLastModifiedTime);
                if (!deleted) {
                    exceptionQueue.addAll(ids);
                }
            } catch (Exception e) {
                e.printStackTrace();
                exceptionQueue.addAll(ids);
            }
        }
    }

    /**
     * Iterates the complete node tree.
     *
     * @return the list
     * @throws Exception
     *             the exception
     */
    private List<String> iterateNodeTree() throws Exception {
        ArrayList<String> referencedBlobs = Lists.newArrayList();
        BufferedWriter writer = null;
        try {
            writer = Files.newWriter(fs.getMarkedRefs(), Charsets.UTF_8);

            fs.sort(fs.getMarkedRefs());

            Iterator<Blob> blobIterator = nodeStore.getReferencedBlobsIterator();
            referencedBlobs.ensureCapacity(getBatchCount());

            int referencesFound = 0;
            while (blobIterator.hasNext()) {
                Blob blob = blobIterator.next();

                if (debugMode) {
                    LOG.debug("BlobId : " + blob.toString());
                }

                if (blob.toString().length() != 0) {
                    Iterator<String> idIter = ((GarbageCollectableBlobStore) nodeStore
                            .getBlobStore())
                            .resolveChunks(blob.toString());
                    while (idIter.hasNext()) {
                        String id = idIter.next();
                        referencedBlobs.add(id);
                        if (debugMode) {
                            LOG.debug("chunkId : " + id);
                        }
                    }
                }

                if (referencedBlobs.size() >= getBatchCount()) {
                    referencesFound += referencedBlobs.size();
                    saveBatchToFile(referencedBlobs, writer);
                }
            }

            if (!referencedBlobs.isEmpty()) {
                referencesFound += referencedBlobs.size();
                saveBatchToFile(referencedBlobs, writer);
            }
            fs.sort(fs.getMarkedRefs());

            LOG.debug("Blob references found (including chunk resolution) " + referencesFound);
        } finally {
            IOUtils.closeQuietly(writer);
        }
        return referencedBlobs;
    }

    /**
     * BlobIdRetriever class to retrieve all blob ids.
     */
    class BlobIdRetriever implements Runnable {
        @Override
        public void run() {
            retrieve();
        }

        /**
         * Retrieve.
         */
        protected void retrieve() {
            LOG.debug("Starting retrieve of all blobs");

            BufferedWriter bufferWriter = null;
            try {
                bufferWriter = new BufferedWriter(
                        new FileWriter(fs.getAvailableRefs()));
                Iterator<String> idsIter = ((GarbageCollectableBlobStore) nodeStore.getBlobStore())
                        .getAllChunkIds(maxLastModifiedTime);
                List<String> ids = Lists.newArrayList();
                int blobsCount = 0;
                while (idsIter.hasNext()) {
                    ids.add(idsIter.next());
                    if (ids.size() > getBatchCount()) {
                        blobsCount += ids.size();
                        saveBatchToFile(ids, bufferWriter);
                    }
                }

                if (!ids.isEmpty()) {
                    blobsCount += ids.size();
                    saveBatchToFile(ids, bufferWriter);
                }

                // sort the file
                fs.sort(fs.getAvailableRefs());
                LOG.debug("Ending retrieve of all blobs : " + blobsCount);
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                IOUtils.closeQuietly(bufferWriter);
            }
        }
    }

    /**
     * FileLineDifferenceIterator class which iterates over the difference of 2 files line by line.
     *
     * @param <T>
     *            the generic type
     */
    class FileLineDifferenceIterator<T> implements Iterator<String> {

        /** The marked references iterator. */
        private LineIterator markedIter;

        /** The available references iter. */
        private LineIterator allIter;

        private ArrayDeque<String> queue;

        private boolean done;

        /** Temporary buffer. */
        private TreeSet<String> markedBuffer;

        /**
         * Instantiates a new file line difference iterator.
         *
         * @param marked
         *            the marked
         * @param available
         *            the available
         * @throws IOException
         *             Signals that an I/O exception has occurred.
         */
        public FileLineDifferenceIterator(File marked, File available) throws IOException {
            this.markedIter = FileUtils.lineIterator(marked);
            this.allIter = FileUtils.lineIterator(available);
            queue = new ArrayDeque<String>(getBatchCount());
            markedBuffer = Sets.newTreeSet();
        }

        /**
         * Close.
         */
        private void close() {
            LineIterator.closeQuietly(markedIter);
            LineIterator.closeQuietly(allIter);
        }

        @Override
        public boolean hasNext() {
            if (!queue.isEmpty()) {
                return true;
            } else if (done) {
                return false;
            } else {
                if (!markedIter.hasNext() && !allIter.hasNext()) {
                    done = true;
                    close();
                    return false;
                } else {
                    queue.addAll(difference());
                    if (!queue.isEmpty()) {
                        return true;
                    } else {
                        done = true;
                        close();
                    }
                }
            }

            return false;
        }

        @Override
        public String next() {
            return nextDifference();
        }

        /**
         * Next difference.
         *
         * @return the string
         */
        public String nextDifference() {
            if (!hasNext()) {
                throw new NoSuchElementException("No more difference");
            }
            return queue.remove();
        }

        /**
         * Difference.
         *
         * @return the sets the
         */
        protected Set<String> difference() {
            TreeSet<String> gcSet = new TreeSet<String>();

            // Iterate till the gc candidate set is at least SAVE_BATCH_COUNT or
            // the
            // blob id set iteration is complete
            while (allIter.hasNext() &&
                    gcSet.size() < getBatchCount()) {
                TreeSet<String> allBuffer = new TreeSet<String>();

                while (markedIter.hasNext() &&
                        markedBuffer.size() < getBatchCount()) {
                    String stre = markedIter.next();
                    markedBuffer.add(stre);
                }
                while (allIter.hasNext() &&
                        allBuffer.size() < getBatchCount()) {
                    String stre = allIter.next();
                    allBuffer.add(stre);
                }

                if (markedBuffer.isEmpty()) {
                    gcSet = allBuffer;
                } else {
                    gcSet.addAll(
                            Sets.difference(allBuffer, markedBuffer));

                    if (allBuffer.last().compareTo(markedBuffer.last()) < 0) {
                        // filling markedLeftoverBuffer
                        TreeSet<String> markedLeftoverBuffer = Sets.newTreeSet();
                        markedLeftoverBuffer.addAll(markedBuffer.tailSet(allBuffer.last(), false));
                        markedBuffer = markedLeftoverBuffer;
                        markedLeftoverBuffer = null;
                    } else {
                        markedBuffer.clear();
                    }
                }
            }

            return gcSet;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }
}
TOP

Related Classes of org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector$FileLineDifferenceIterator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.