Package org.socialmusicdiscovery.server.database.sampledata

Source Code of org.socialmusicdiscovery.server.database.sampledata.DiscogsLargeDatabaseSampleCreator

/*
*  Copyright 2010-2011, Social Music Discovery project
*  All rights reserved.
*
*  Redistribution and use in source and binary forms, with or without
*  modification, are permitted provided that the following conditions are met:
*      * Redistributions of source code must retain the above copyright
*        notice, this list of conditions and the following disclaimer.
*      * Redistributions in binary form must reproduce the above copyright
*        notice, this list of conditions and the following disclaimer in the
*        documentation and/or other materials provided with the distribution.
*      * Neither the name of Social Music Discovery project nor the
*        names of its contributors may be used to endorse or promote products
*        derived from this software without specific prior written permission.
*
*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
*  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
*  DISCLAIMED. IN NO EVENT SHALL SOCIAL MUSIC DISCOVERY PROJECT BE LIABLE FOR ANY
*  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
*  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
*  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
*  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package org.socialmusicdiscovery.server.database.sampledata;

import liquibase.Liquibase;
import liquibase.database.jvm.JdbcConnection;
import liquibase.exception.LiquibaseException;
import liquibase.resource.ClassLoaderResourceAccessor;
import org.socialmusicdiscovery.server.api.mediaimport.ProcessingStatusCallback;
import org.socialmusicdiscovery.server.business.logic.InjectHelper;
import org.socialmusicdiscovery.server.business.logic.SearchRelationPostProcessor;
import org.socialmusicdiscovery.server.business.logic.injections.database.DatabaseProvider;
import org.socialmusicdiscovery.server.business.model.core.Contributor;
import org.testng.annotations.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

import javax.xml.parsers.DocumentBuilderFactory;
import java.io.*;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;

public class DiscogsLargeDatabaseSampleCreator extends SampleCreator {
    /**
     * Roles to import from discogs
     */
    private static final Map<String, String> importedRoles = new HashMap<String, String>();

    static {
        importedRoles.put("Conductor", Contributor.CONDUCTOR);
        importedRoles.put("Written-By", Contributor.COMPOSER);
        importedRoles.put("Composed By", Contributor.COMPOSER);
        importedRoles.put("Trumpet", Contributor.PERFORMER);
        importedRoles.put("Vocals", Contributor.PERFORMER);
        importedRoles.put("Lead Vocals", Contributor.PERFORMER);
        importedRoles.put("Saxophone", Contributor.PERFORMER);
        importedRoles.put("Orchestra", Contributor.PERFORMER);
    }

    @Test(groups = {"manual"})
    public void createLargeDiscogsSample() throws Exception {
        final Map<String, List<String>> result = new HashMap<String, List<String>>();
        final Map<String, String> artistCache = new HashMap<String, String>();

        String discogsFile = System.getProperty("org.socialmusicdiscovery.server.sampledata.discogsfile");
        if (discogsFile == null) {
            throw new RuntimeException("You need to specify a releases dump from discogs, for example with -Dorg.socialmusicdiscovery.server.sampledata.discogsfile=/tmp/discogs_20101004_releases.xml");
        }
        if (System.getProperty("org.socialmusicdiscovery.server.database.directory") == null) {
            System.setProperty("org.socialmusicdiscovery.server.database.directory", getTargetDirectory());
        }
        BufferedReader reader = new BufferedReader(new FileReader(discogsFile));

        Long noOfTracks = 10000L;
        Long minTracksPerRelease = 8L;
        if (System.getProperty("org.socialmusicdiscovery.server.sampledata.nooftracks") != null) {
            noOfTracks = Long.parseLong(System.getProperty("org.socialmusicdiscovery.server.sampledata.nooftracks"));
        }
        if (System.getProperty("org.socialmusicdiscovery.server.sampledata.mintracksperrelease") != null) {
            minTracksPerRelease = Long.parseLong(System.getProperty("org.socialmusicdiscovery.server.sampledata.mintracksperrelease"));
        }

        System.out.println("Start parsing discogs dump...");
        StringBuffer sb = new StringBuffer();
        String line;
        int i = 0;
        while ((line = reader.readLine()) != null) {
            i++;
            sb.append(line);
            if (sb.indexOf("</release>") >= 0) {
                String data = sb.substring(0, sb.indexOf("</release>") + 10);
                sb.delete(0, sb.indexOf("</release>") + 10);
                if (importRelease(data, new HashMap<String, List<String>>(), new HashMap<String, String>(), minTracksPerRelease)) {
                    importRelease(data, result, artistCache, minTracksPerRelease);
                }
                if (result.get("tracks") != null && result.get("tracks").size() > noOfTracks) {
                    break;
                }
                if (result.get("tracks") != null && (i == 1 || i % 1000 == 0)) {
                    System.out.println("Found " + result.get("tracks").size() + " of " + noOfTracks + " tracks");
                }
            }
        }

        try {
            System.out.println("Finished parsing discogs dump, starting to load data into database...");
            String directory = getTestClassesDirectory() + File.separator + "org" + File.separator + "socialmusicdiscovery" + File.separator + "server" + File.separator + "database" + File.separator + "sampledata" + File.separator + "large";
            new File(directory).mkdir();
            for (Map.Entry<String, List<String>> entry : result.entrySet()) {
                FileWriter writer = new FileWriter(directory + File.separator + entry.getKey() + ".csv");
                for (String value : entry.getValue()) {
                    writer.write(value);
                    writer.write("\n");
                }
                writer.close();
            }

            DatabaseProvider provider = null;
            String database = InjectHelper.instanceWithName(String.class, "org.socialmusicdiscovery.server.database");
            if (database != null) {
                provider = InjectHelper.instanceWithName(DatabaseProvider.class, database);
                if (provider == null) {
                    throw new RuntimeException("No database provider exists for: " + database);
                }
            } else {
                throw new RuntimeException("No database provider configured");
            }
            provider.start();
            Connection connection = provider.getConnection();
            Liquibase liquibase = new Liquibase("org/socialmusicdiscovery/server/database/smd-database.changelog.xml", new
                    ClassLoaderResourceAccessor(),
                    new JdbcConnection(connection));
            if (System.getProperty("liquibase") == null || !System.getProperty("liquibase").equals("false")) {
                liquibase.update("");
            }
            liquibase = new Liquibase("org/socialmusicdiscovery/server/database/sampledata/large/large.xml", new
                    ClassLoaderResourceAccessor(),
                    new JdbcConnection(connection));
            liquibase.update("");

            // Ensure that we don't delete the database contents
            System.setProperty("hibernate.hbm2ddl.auto", "validate");
            System.out.println("Starting to update search relations...");
            SearchRelationPostProcessor searchRelationPostProcessor = new SearchRelationPostProcessor();
            searchRelationPostProcessor.init(null);
            searchRelationPostProcessor.execute(new ProcessingStatusCallback() {
                public void progress(String module, String currentDescription, Long currentNo, Long totalNo) {
                    System.out.println(currentNo + " of " + totalNo + ": " + currentDescription);
                }

                public void failed(String module, String error) {
                    System.err.println("Failed with error: " + error);
                }

                public void finished(String module) {
                    System.out.println("Finish updating search relations");
                }

                public void aborted(String module) {
                }
            });
        } catch (IOException e) {
            throw new RuntimeException(e);
        } catch (SQLException e) {
            throw new RuntimeException(e);
        } catch (LiquibaseException e) {
            throw new RuntimeException(e);
        }
    }

    private boolean importRelease(String data, Map<String, List<String>> result, Map<String, String> artistCache, Long minTracksPerRelease) throws Exception {

        Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new ByteArrayInputStream(data.getBytes()));
        NodeList releases = doc.getElementsByTagName("release");
        for (int i = 0; i < releases.getLength(); i++) {
            Element release = (Element) releases.item(i);
            String releaseTitle = getChildrenByTagName(release, "title").get(0).getTextContent();
            String releaseId = UUID.randomUUID().toString();

            if (releaseTitle.contains("<") || releaseTitle.contains(">")) {
                // Skip releases with problematic characters
                return false;
            }
            addRelease(result, releaseId, releaseTitle);

            List<Element> albumArtistsElement = getChildrenByTagName(release, "artists");
            if (albumArtistsElement.size() > 0) {
                List<Element> albumArtists = getChildrenByTagName(albumArtistsElement.get(0), "artist");
                for (Element albumArtist : albumArtists) {
                    String name = getChildrenByTagName(albumArtist, "name").get(0).getTextContent();
                    if (name.contains("<") || name.contains(">")) {
                        // Skip releases with strange artist names
                        return false;
                    }
                    if (!name.equals("Various")) {
                        String id = artistCache.get(name);
                        if (id == null) {
                            id = UUID.randomUUID().toString();
                            artistCache.put(name, id);
                            addArtist(result, id, name);
                        }
                        addReleaseContributor(result, releaseId, id, Contributor.PERFORMER);
                    }
                }
            }
            List<Element> extraArtistsElement = getChildrenByTagName(release, "extraartists");
            if (extraArtistsElement.size() > 0) {
                List<Element> artists = getChildrenByTagName(extraArtistsElement.get(0), "artist");
                for (Element artist : artists) {
                    String role = getChildrenByTagName(artist, "role").get(0).getTextContent();
                    if (importedRoles.containsKey(role)) {
                        String name = getChildrenByTagName(artist, "name").get(0).getTextContent();
                        String id = artistCache.get(name);
                        if (name.contains("<") || name.contains(">")) {
                            // Skip releases with problematic characters
                            return false;
                        }
                        if (id == null) {
                            id = UUID.randomUUID().toString();
                            artistCache.put(name, id);
                            addArtist(result, id, name);
                        }
                        addReleaseContributor(result, releaseId, id, importedRoles.get(role));
                    }
                }
            }

            List<Element> tracklist = getChildrenByTagName(release, "tracklist");
            if (tracklist.size() > 0) {
                Map<String, String> mediumCache = new HashMap<String, String>();
                List<Element> tracks = getChildrenByTagName(tracklist.get(0), "track");
                if (tracks.size() < minTracksPerRelease) {
                    // Skip releases with few tracks
                    return false;
                }
                for (Element track : tracks) {
                    String trackNumber = getChildrenByTagName(track, "position").get(0).getTextContent();
                    String trackTitle = getChildrenByTagName(track, "title").get(0).getTextContent();
                    String trackId = UUID.randomUUID().toString();
                    String recordingId = UUID.randomUUID().toString();
                    String workId = UUID.randomUUID().toString();

                    if (trackTitle.contains("<") || trackTitle.contains(">")) {
                        // Skip releases with problematic characters
                        return false;
                    }

                    addWork(result, workId, trackTitle);
                    addRecording(result, recordingId, workId, "NULL");

                    if (trackNumber.toString().matches("^[A-Z][0-9]+$")) {
                        String diskId = mediumCache.get(trackNumber.substring(0, 1));
                        if (diskId == null) {
                            diskId = UUID.randomUUID().toString();
                            mediumCache.put(trackNumber.substring(0, 1), diskId);
                            addMedium(result, releaseId, diskId, trackNumber.substring(0, 1));
                        }
                        addTrack(result, releaseId, recordingId, trackId, diskId, Integer.parseInt(trackNumber.substring(1)));
                    } else if (trackNumber.toString().matches("^[0-9]-[0-9]+$")) {
                        String diskId = mediumCache.get(trackNumber.substring(0, 1));
                        if (diskId == null) {
                            diskId = UUID.randomUUID().toString();
                            mediumCache.put(trackNumber.substring(0, 1), diskId);
                            addMedium(result, releaseId, diskId, trackNumber.substring(0, 1));
                        }
                        addTrack(result, releaseId, recordingId, trackId, diskId, Integer.parseInt(trackNumber.substring(2)));
                    } else if (trackNumber.toString().matches("^[0-9]+$")) {
                        addTrack(result, releaseId, recordingId, trackId, Integer.parseInt(trackNumber));
                    } else {
                        addTrack(result, releaseId, recordingId, trackId);
                    }

                    List<Element> artistsElement = getChildrenByTagName(track, "artists");
                    if (artistsElement.size() > 0) {
                        List<Element> artists = getChildrenByTagName(artistsElement.get(0), "artist");
                        for (Element artist : artists) {
                            String name = getChildrenByTagName(artist, "name").get(0).getTextContent();
                            if (name.contains("<") || name.contains(">")) {
                                // Skip releases with problematic characters
                                return false;
                            }
                            String id = artistCache.get(name);
                            if (id == null) {
                                id = UUID.randomUUID().toString();
                                artistCache.put(name, id);
                                addArtist(result, id, name);
                            }
                            addRecordingContributor(result, recordingId, id, Contributor.PERFORMER);
                        }
                    }

                    extraArtistsElement = getChildrenByTagName(track, "extraartists");
                    if (extraArtistsElement.size() > 0) {
                        List<Element> artists = getChildrenByTagName(extraArtistsElement.get(0), "artist");
                        for (Element artist : artists) {
                            String role = getChildrenByTagName(artist, "role").get(0).getTextContent();
                            if (importedRoles.containsKey(role)) {
                                String name = getChildrenByTagName(artist, "name").get(0).getTextContent();
                                String id = artistCache.get(name);
                                if (name.contains("<") || name.contains(">")) {
                                    // Skip releases with problematic characters
                                    return false;
                                }
                                if (id == null) {
                                    id = UUID.randomUUID().toString();
                                    artistCache.put(name, id);
                                    addArtist(result, id, name);
                                }
                                addRecordingContributor(result, recordingId, id, importedRoles.get(role));
                            }
                        }
                    }
                }
            }
        }
        return true;
    }

    public static String getParentDirectory(String path) {
        String parentDir = "/";
        int lastIndex;

        if (path != null && path.trim().length() > 0) {
            path = path.trim();

            if (path.endsWith("/") && path.length() > 1) {
                path = path.substring(0, path.length() - 1);
            }

            if (path.length() > 1) {
                lastIndex = path.lastIndexOf("/");

                if (lastIndex > 0) {
                    parentDir = path.substring(0, lastIndex);
                }
            }
        }

        return parentDir;
    }

    public String getTestClassesDirectory() {
        String path = getClass().getResource("/META-INF/persistence.xml").getPath();
        if (path != null) {
            path = getParentDirectory(path);
            if (path != null) {
                path = getParentDirectory(path);
            }
        }
        return path;
    }

    public String getTargetDirectory() {
        String path = getClass().getResource("/META-INF/persistence.xml").getPath();
        if (path != null) {
            path = getParentDirectory(path);
            if (path != null) {
                path = getParentDirectory(path);
            }
            if (path != null) {
                path = getParentDirectory(path);
            }
        }
        return path;
    }

    @Override
    protected String getChangedBy() {
        return "discogs";
    }
    @Override
    protected String getChangedTime() {
        return "2011-01-01T00:00:00";
    }
}
TOP

Related Classes of org.socialmusicdiscovery.server.database.sampledata.DiscogsLargeDatabaseSampleCreator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.