Package org.voltdb.sysprocs.saverestore

Source Code of org.voltdb.sysprocs.saverestore.SnapshotUtil$SnapshotFilter

/* This file is part of VoltDB.
* Copyright (C) 2008-2010 VoltDB Inc.
*
* VoltDB is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* VoltDB is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with VoltDB.  If not, see <http://www.gnu.org/licenses/>.
*/

package org.voltdb.sysprocs.saverestore;

import java.io.BufferedInputStream;
import java.io.CharArrayWriter;
import java.io.EOFException;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.zip.CRC32;

import org.voltdb.catalog.*;
import org.voltdb.utils.Pair;
import org.voltdb.utils.DBBPool.BBContainer;

import edu.brown.catalog.CatalogUtil;
import edu.brown.hstore.PartitionExecutor.SystemProcedureExecutionContext;

public class SnapshotUtil {

    /**
     * Create a digest for a snapshot containing the time of the snapshot and the list of tables included.
     * The first item in the comma separated list is the time in milliseconds as a string.
     * @param snapshotTime
     * @param path
     * @param nonce
     * @param tables
     * @throws IOException
     */
    public static void
        recordSnapshotTableList(
            long snapshotTime,
            String path,
            String nonce,
            List<Table> tables) throws IOException {
        final File f = new File(path, constructDigestFilenameForNonce(nonce));
        if (f.exists()) {
            if (!f.delete()) {
                throw new IOException("Unable to write table list file " + f);
            }
        }
        FileOutputStream fos = new FileOutputStream(f);
        StringWriter sw = new StringWriter();
        sw.append(Long.toString(snapshotTime));
        if (!tables.isEmpty()) {
            sw.append(',');
        }
        for (int ii = 0; ii < tables.size(); ii++) {
            sw.append(tables.get(ii).getTypeName());
            if (!(ii == (tables.size() - 1))) {
                sw.append(',');
            } else {
                sw.append('\n');
            }
        }

        final byte tableListBytes[] = sw.getBuffer().toString().getBytes("UTF-8");
        final CRC32 crc = new CRC32();
        crc.update(tableListBytes);
        ByteBuffer fileBuffer = ByteBuffer.allocate(tableListBytes.length + 4);
        fileBuffer.putInt((int)crc.getValue());
        fileBuffer.put(tableListBytes);
        fileBuffer.flip();
        fos.getChannel().write(fileBuffer);
        fos.getFD().sync();
    }

    public static List<String> retrieveRelevantTableNames(String path,
            String nonce) throws Exception {
        return retrieveRelevantTableNamesAndTime(new File(path, constructDigestFilenameForNonce(nonce))).getSecond();
    }

    /**
     * Retrieve a list of tables from a digest. Doesn't return the snapshot time
     * value that is stashed in the digest file.
     * @param f
     * @throws Exception
     */
    public static Pair<Long, List<String>> retrieveRelevantTableNamesAndTime(File f) throws Exception {
        String tableList = CRCCheck(f);
        String tableNames[] = tableList.split(",");
        String actualTableNames[] = new String[tableNames.length - 1];
        System.arraycopy( tableNames, 1, actualTableNames, 0, tableNames.length - 1);
        return Pair.of(Long.valueOf(tableNames[0]),
                       java.util.Arrays.asList(actualTableNames));
    }

    /**
     * Check if the CRC of the snapshot file matches the digest.
     * @param f The snapshot file object
     * @return The table list as a string
     * @throws IOException If CRC does not match
     */
    public static String CRCCheck(File f) throws IOException {
        final FileInputStream fis = new FileInputStream(f);
        try {
            final BufferedInputStream bis = new BufferedInputStream(fis);
            ByteBuffer crcBuffer = ByteBuffer.allocate(4);
            if (4 != bis.read(crcBuffer.array())) {
                throw new EOFException("EOF while attempting to read CRC from snapshot digest");
            }
            final int crc = crcBuffer.getInt();
            final InputStreamReader isr = new InputStreamReader(bis, "UTF-8");
            CharArrayWriter caw = new CharArrayWriter();
            while (true) {
                int nextChar = isr.read();
                if (nextChar == -1) {
                    throw new EOFException("EOF while reading snapshot digest");
                }
                if (nextChar == '\n') {
                    break;
                }
                caw.write(nextChar);
            }
            String tableList = caw.toString();
            byte tableListBytes[] = tableList.getBytes("UTF-8");
            CRC32 tableListCRC = new CRC32();
            tableListCRC.update(tableListBytes);
            tableListCRC.update("\n".getBytes("UTF-8"));
            final int calculatedValue = (int)tableListCRC.getValue();
            if (crc != calculatedValue) {
                throw new IOException("CRC of snapshot digest did not match digest contents");
            }

            return tableList;
        } finally {
            try {
                if (fis != null)
                    fis.close();
            } catch (IOException e) {}
        }
    }

    /**
     * Storage for information about files that are part of a specific snapshot
     */
    public static class Snapshot {
        public final List<File> m_digests = new ArrayList<File>();
        public final List<Set<String>> m_digestTables = new ArrayList<Set<String>>();
        public final Map<String, TableFiles> m_tableFiles = new TreeMap<String, TableFiles>();
    }

    /**
     * Description of all the files for a specific table that is part of a specific snapshot
     */
    public static class TableFiles {
        public final boolean m_isReplicated;
        TableFiles(boolean isReplicated) {
            m_isReplicated = isReplicated;
        }

        public final List<File> m_files = new ArrayList<File>();
        public final List<Boolean> m_completed = new ArrayList<Boolean>();
        public final List<Set<Integer>> m_validPartitionIds = new ArrayList<Set<Integer>>();
        public final List<Set<Integer>> m_corruptParititionIds = new ArrayList<Set<Integer>>();
        public final List<Integer> m_totalPartitionCounts = new ArrayList<Integer>();
    }

    /**
     * Simple filter that includes directories and files that end in .digest or .vpt
     */
    public static class SnapshotFilter implements FileFilter {
        @Override
        public boolean accept(File pathname) {
            if (pathname.isDirectory()) {
                return true;
            }
            if (pathname.getName().endsWith(".digest") || pathname.getName().endsWith(".vpt")) {
                return true;
            }
            return false;
        }
    };

    /**
     * Filter that looks for files related to a specific snapshot.
     */
    public static class SpecificSnapshotFilter extends SnapshotFilter {
        private final Set<String> snapshotNames;

        public SpecificSnapshotFilter(Set<String> snapshotNames) {
            this.snapshotNames = snapshotNames;
        }

        @Override
        public boolean accept(File pathname) {
            if (!super.accept(pathname)) {
                return false;
            }

            if (pathname.isDirectory()) {
                return true;
            }

            for (String snapshotName : snapshotNames) {
                if (pathname.getName().startsWith(snapshotName + "-") ||
                        pathname.getName().equals(constructDigestFilenameForNonce(snapshotName))) {
                    return true;
                }
            }

            return false;
        }
    }

    /**
     * Spider the provided directory applying the provided FileFilter. Optionally validate snapshot
     * files. Return a summary of partition counts, partition information, files, digests etc.
     * that can be used to determine if a valid restore plan exists.
     * @param directory
     * @param snapshots
     * @param filter
     * @param recursion
     * @param validate
     */
    public static void retrieveSnapshotFiles(
            File directory,
            Map<Long, Snapshot> snapshots,
            FileFilter filter,
            int recursion,
            boolean validate) {
        if (recursion == 32) {
            return;
        }
        if (!directory.exists()) {
            System.err.println("Error: Directory " + directory.getPath() + " doesn't exist");
            return;
        }
        if (!directory.canRead()) {
            System.err.println("Error: Directory " + directory.getPath() + " is not readable");
            return;
        }
        if (!directory.canExecute()) {
            System.err.println("Error: Directory " + directory.getPath() + " is not executable");
            return;
        }

        //System.out.println("Processing Dir :"+directory);
        //sleep(1);
        for (File f : directory.listFiles(filter)) {
      //System.out.println("Processing File :"+f);
     
            if (f.isDirectory()) {
                if (!f.canRead() || !f.canExecute()) {
                    System.err.println("Warning: Skipping directory " + f.getPath()
                            + " due to lack of read permission");
                } else {
                    retrieveSnapshotFiles( f, snapshots, filter, recursion++, validate);
                }
                continue;
            }
            if (!f.canRead()) {
                System.err.println("Warning: " + f.getPath() + " is not readable");
                continue;
            }

            FileInputStream fis = null;
            try {
                fis = new FileInputStream(f);
            } catch (FileNotFoundException e1) {
                System.err.println(e1.getMessage());
                continue;
            }

            try {
                if (f.getName().endsWith(".digest")) {
                    Pair<Long, List<String>> result = null;
                    try {
                        result = retrieveRelevantTableNamesAndTime(f);
                    } catch (Exception e) {
                        System.err.println(e.getMessage());
                        System.err.println("Error: Unable to process digest " + f.getPath());
                        continue;
                    }
                    Long snapshotTime = result.getFirst();
                    Snapshot s = snapshots.get(snapshotTime);
                    if (s == null) {
                        s = new Snapshot();
                        snapshots.put(snapshotTime, s);
                    }
                    TreeSet<String> tableSet = new TreeSet<String>();
                    tableSet.addAll(result.getSecond());
                    s.m_digestTables.add(tableSet);
                    s.m_digests.add(f);
                } else {
                    HashSet<Integer> partitionIds = new HashSet<Integer>();
                    TableSaveFile saveFile = new TableSaveFile(fis.getChannel(), 1, null, true);
                    try {
                        for (Integer partitionId : saveFile.getPartitionIds()) {
                            partitionIds.add(partitionId);
                        }
                        if (validate && saveFile.getCompleted()) {
                            while (saveFile.hasMoreChunks()) {
                                BBContainer cont = saveFile.getNextChunk();
                                if (cont != null) {
                                    cont.discard();
                                }
                            }
                        }
                        partitionIds.removeAll(saveFile.getCorruptedPartitionIds());
                        Snapshot s = snapshots.get(saveFile.getCreateTime());
                        if (s == null) {
                            s = new Snapshot();
                            snapshots.put(saveFile.getCreateTime(), s);
                        }

                        TableFiles tableFiles = s.m_tableFiles.get(saveFile.getTableName());
                        if (tableFiles == null) {
                            tableFiles = new TableFiles(saveFile.isReplicated());
                            s.m_tableFiles.put(saveFile.getTableName(), tableFiles);
                        }
                        tableFiles.m_files.add(f);
                        tableFiles.m_completed.add(saveFile.getCompleted());
                        tableFiles.m_validPartitionIds.add(partitionIds);
                        tableFiles.m_corruptParititionIds.add(saveFile.getCorruptedPartitionIds());
                        tableFiles.m_totalPartitionCounts.add(saveFile.getTotalPartitions());
                    } finally {
                        saveFile.close();
                    }
                }
            } catch (IOException e) {
    e.printStackTrace();
                System.err.println(e.getMessage());
                System.err.println("Error: Unable to process " + f.getPath());
            } finally {
                try {
                    if (fis != null) {
                        fis.close();
                    }
                } catch (IOException e) {
                }
            }
        }
    }

    /**
     * Returns a detailed report and a boolean indicating whether the snapshot can be successfully loaded
     * @param snapshotTime
     * @param snapshot
     */
    public static Pair<Boolean, String> generateSnapshotReport(Long snapshotTime, Snapshot snapshot) {
        CharArrayWriter caw = new CharArrayWriter();
        PrintWriter pw = new PrintWriter(caw);
        boolean snapshotConsistent = true;
        String indentString = "";
        pw.println(indentString + "Date: " + new Date(snapshotTime));
        pw.println(indentString + "Digests:");
        indentString = "\t";
        TreeSet<String> digestTablesSeen = new TreeSet<String>();

        if (snapshot.m_digests.isEmpty()) {
            pw.println(indentString + "No digests found.");
            snapshotConsistent = false;
        } else {
            boolean inconsistent = false;

            /*
             * Iterate over the digests and ensure that they all contain the same list of tables
             */
            Map<Integer, List<Integer>> inconsistentDigests = new HashMap<Integer, List<Integer>>();
            for (int ii = 0; ii < snapshot.m_digests.size(); ii++) {
                inconsistentDigests.put( ii, new ArrayList<Integer>());
                Set<String> tables = snapshot.m_digestTables.get(ii);
                for (int zz = 0; zz < snapshot.m_digests.size(); zz++) {
                    if (zz == ii) {
                        continue;
                    }
                    if (!tables.equals(snapshot.m_digestTables.get(zz))) {
                        snapshotConsistent = false;
                        inconsistent = true;
                        inconsistentDigests.get(ii).add(zz);
                    }
                }
            }

            /*
             * Summarize what was inconsistent/consistent
             */
            if (!inconsistent) {
                for (int ii = 0; ii < snapshot.m_digests.size(); ii++) {
                    pw.println(indentString + snapshot.m_digests.get(ii).getPath());
                }
            } else {
                pw.println(indentString + "Not all digests are consistent");
                indentString = indentString + "\t";
                for (Map.Entry<Integer, List<Integer>> entry : inconsistentDigests.entrySet()) {
                    File left = snapshot.m_digests.get(entry.getKey());
                    pw.println(indentString + left.getPath() + " is inconsistent with:");
                    indentString = indentString + "\t";
                    for (Integer id : entry.getValue()) {
                        File right = snapshot.m_digests.get(id);
                        pw.println(indentString + right.getPath());
                    }
                    indentString = indentString.substring(1);
                }
            }

            /*
             * Print the list of tables found in the digests
             */
            indentString = indentString.substring(1);
            pw.print(indentString + "Tables: ");
            int ii = 0;

            for (int jj = 0; jj < snapshot.m_digestTables.size(); jj++) {
                for (String table : snapshot.m_digestTables.get(jj)) {
                    digestTablesSeen.add(table);
                }
            }

            for (String table : digestTablesSeen) {
                if (ii != 0) {
                    pw.print(", ");
                }
                ii++;
                pw.print(table);
            }
            pw.print("\n");
        }

        /*
         * Check that the total partition count is the same in every table file
         */
        Integer totalPartitionCount = null;
        indentString = indentString + "\t";
        for (Map.Entry<String, TableFiles> entry : snapshot.m_tableFiles.entrySet()) {
            if (entry.getValue().m_isReplicated) {
                continue;
            }
            for (Integer partitionCount : entry.getValue().m_totalPartitionCounts){
                if (totalPartitionCount == null) {
                    totalPartitionCount = partitionCount;
                } else if (totalPartitionCount != partitionCount) {
                    snapshotConsistent = false;
                    pw.println(indentString + "Partition count is not consistent throughout snapshot files for "
                            + entry.getKey() + ". Saw "
                            + partitionCount + " and " + totalPartitionCount);
                }
            }
        }

        /*
         * Now check that each individual table has enough information to be restored.
         * It is possible for a valid partition set to be available and still have a restore
         * fail because the restore plan loads a save file with a corrupt partition.
         */
        TreeSet<String> consistentTablesSeen = new TreeSet<String>();
        for (Map.Entry<String, TableFiles> entry : snapshot.m_tableFiles.entrySet()) {
            TableFiles tableFiles = entry.getValue();

            /*
             * Calculate the set of visible partitions not corrupted partitions
             */
            TreeSet<Integer> partitionsAvailable = new TreeSet<Integer>();
            int kk = 0;
            for (Set<Integer> validPartitionIds : tableFiles.m_validPartitionIds) {
                if (tableFiles.m_completed.get(kk++)) {
                    partitionsAvailable.addAll(validPartitionIds);
                }
            }

            /*
             * Ensure the correct range of partition ids is present
             */
            boolean partitionsPresent = false;
            if ((partitionsAvailable.size() == (tableFiles.m_isReplicated ? 1 : totalPartitionCount)) &&
                (partitionsAvailable.first() == 0) &&
                (partitionsAvailable.last() == (tableFiles.m_isReplicated ? 1 : totalPartitionCount) - 1)) {
                partitionsPresent = true;
            }

            /*
             * Report if any of the files have corrupt partitions
             */
            boolean hasCorruptPartitions = false;
            for (Set<Integer> corruptIds : tableFiles.m_corruptParititionIds) {
                if (!corruptIds.isEmpty()) {
                    hasCorruptPartitions = true;
                    snapshotConsistent = false;
                }
            }

            pw.println(indentString + "Table name: " + entry.getKey());
            indentString = indentString + "\t";
            pw.println(indentString + "Replicated: " + entry.getValue().m_isReplicated);
            pw.println(indentString + "Valid partition set available: " + partitionsPresent);
            pw.println(indentString + "Corrupt partitions present: " + hasCorruptPartitions);

            /*
             * Print information about individual files such as the partitions present and whether
             * they are corrupted
             */
            pw.println(indentString + "Files: ");
            indentString = indentString + "\t";
            for (int ii = 0; ii < tableFiles.m_files.size(); ii++) {
                String corruptPartitionIdString = "";
                int zz = 0;
                for (Integer partitionId : tableFiles.m_corruptParititionIds.get(ii)) {
                    if (zz != 0) {
                        corruptPartitionIdString = corruptPartitionIdString + ", ";
                    }
                    zz++;
                    corruptPartitionIdString = corruptPartitionIdString + partitionId;
                }

                String validPartitionIdString = "";
                zz = 0;
                for (Integer partitionId : tableFiles.m_validPartitionIds.get(ii)) {
                    if (zz != 0) {
                        validPartitionIdString = validPartitionIdString + ", ";
                    }
                    zz++;
                    validPartitionIdString = validPartitionIdString + partitionId;
                }

                if (corruptPartitionIdString.isEmpty()) {
                    consistentTablesSeen.add(entry.getKey());
                    pw.println(indentString + tableFiles.m_files.get(ii).getPath() +
                            " Completed: " + tableFiles.m_completed.get(ii) + " Partitions: " +
                            validPartitionIdString);
                } else {
                    pw.println(indentString + tableFiles.m_files.get(ii).getPath() +
                            " Completed: " + tableFiles.m_completed.get(ii) +
                            " Valid Partitions: " +
                            validPartitionIdString +
                            " Corrupt Partitions: " +
                            corruptPartitionIdString);
                }
            }
            indentString = indentString.substring(2);
        }
        indentString = indentString.substring(1);

        StringBuilder missingTables = new StringBuilder(8192);
        if (!consistentTablesSeen.containsAll(digestTablesSeen)) {
            snapshotConsistent = false;
            missingTables.append("Missing tables: ");
            Set<String> missingTablesSet = new TreeSet<String>(digestTablesSeen);
            missingTablesSet.removeAll(consistentTablesSeen);
            int hh = 0;
            for (String tableName : missingTablesSet) {
                if (hh > 0) {
                    missingTables.append(", ");
                }
                missingTables.append(tableName);
                hh++;
            }
            missingTables.append('\n');
        }

        /*
         * Tack on a summary at the beginning to indicate whether a restore is guaranteed to succede
         * with this file set.
         */
        if (snapshotConsistent) {
            return Pair.of( true, "Snapshot valid\n" + caw.toString());
        } else {
            StringBuilder sb = new StringBuilder(8192);
            sb.append("Snapshot corrupted\n").append(missingTables).append(caw.toCharArray());
            return Pair.of( false,  sb.toString());
        }
    }

    /**
     * Generates a Filename to the snapshot file for the given table.
     * @param table
     * @param fileNonce
     * @param hostId
     */
    public static final String constructFilenameForTable(Table table,
                                                         String fileNonce,
                                                         String hostId,
                                                         String siteId,
                                                         String partitionId)
    {
        StringBuilder filename_builder = new StringBuilder(fileNonce);
        filename_builder.append("-");
        filename_builder.append(table.getTypeName());
        if (!table.getIsreplicated())
        {
            filename_builder.append("-host_");
            filename_builder.append(hostId);
            filename_builder.append("-site_");
            filename_builder.append(siteId);
            filename_builder.append("-partition_");
            filename_builder.append(partitionId);
        }
        filename_builder.append(".vpt");//Volt partitioned table
        return filename_builder.toString();
    }

    public static final File constructFileForTable(Table table,
            String filePath,
            String fileNonce,
            String hostId,
            String siteId,
            String partitionId)
    {
        return new File(filePath, SnapshotUtil.constructFilenameForTable(
            table, fileNonce, hostId, siteId, partitionId));
    }

    /**
     * Generates the digest filename for the given nonce.
     * @param nonce
     */
    public static final String constructDigestFilenameForNonce(String nonce) {
        return (nonce + ".digest");
    }

    public static final List<Table> getTablesToSave(Database database)
    {
        ArrayList<Table> my_tables = new ArrayList<Table>();
        for (Table table : database.getTables())
        {
            if (table.getSystable() || table.getMapreduce()) continue;
           
//            // Make a list of all non-materialized, non-export only tables
//            if ((table.getMaterializer() != null) ||
//                    (CatalogUtil.isTableExportOnly(database, table)))
//            {
//                continue;
//            }
            my_tables.add(table);
        }
        return my_tables;
    }

    public static final int[] getPartitionsOnHost(
            SystemProcedureExecutionContext c, Host h) {
        Collection<Partition> results = CatalogUtil.getPartitionsForHost(h);
        final int retval[] = new int[results.size()];
        int ii = 0;
        for (final Partition p : results) {
            retval[ii++] = Integer.parseInt(p.getTypeName());
        }
        return retval;
    }
}
TOP

Related Classes of org.voltdb.sysprocs.saverestore.SnapshotUtil$SnapshotFilter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.
-20639858-1', 'auto'); ga('send', 'pageview');