}
private void findDuplicates() {
status.setText(I18N.translate("searching"));
for (int i = 0; i < datasets.size() && running; i++) {
Dataset dataset = (Dataset) datasets.get(i);
int percent = (int) ((double) i / (double) datasets.size() * 100);
progress.setValue(percent);
for (int j = 0; j < datasets.size() && running; j++) {
Dataset dataset2 = (Dataset) datasets.get(j);
if (i == j) {
continue;
}
// find duplicates via md5
if (DoubleFinder.getProperty("use_md5").equals("true")) {
if (dataset.getMd5sum().equals(dataset2.getMd5sum())) {
Duplicate d;
if (dataset.getDuplicate() != null) {
d = dataset.getDuplicate();
} else if (dataset2.getDuplicate() != null) {
d = dataset2.getDuplicate();
} else {
d = new Duplicate();
d.setPercentageOfReliability(100);
dataset2.setDuplicate(d);
d.addDuplicate(dataset2);
}
dataset.setDuplicate(d);
d.addDuplicate(dataset);
duplicates.add(d);
}
}
// find duplicates via tag
if (DoubleFinder.getProperty("use_tag").equals("true")) {
if( !dataset.getArtist().equals("") ) {
String artist1 = dataset.getArtist().toLowerCase();
String artist2 = dataset2.getArtist().toLowerCase();
int eqArtist = Utilities.percentageOfEquality(artist1, artist2);
String title1 = dataset.getTitle().toLowerCase();
String title2 = dataset2.getTitle().toLowerCase();
int eqTitle = Utilities.percentageOfEquality(title1, title2);
if( eqArtist > 80 && eqTitle > 80) {
// soundex analysis
int difference = -1;
int length = 0;
RefinedSoundex soundex = new RefinedSoundex();
try {
difference = soundex.difference(title1, title2);
length = Math.max(soundex.encode(title1).length(), soundex.encode(title2).length());
} catch (Exception e) {}
double diff = (double)difference / (double)length;
if(diff >= 0.75) {
Duplicate d;
if (dataset.getDuplicate() != null) {
d = dataset.getDuplicate();
} else if (dataset2.getDuplicate() != null) {
d = dataset2.getDuplicate();
} else {
d = new Duplicate();
d.setPercentageOfReliability( (int)(diff * 100) );
dataset2.setDuplicate(d);
d.addDuplicate(dataset2);
}
dataset.setDuplicate(d);
d.addDuplicate(dataset);
duplicates.add(d);
}
}
}
}
// find duplicates via filename (levenshteindistanz)
if (DoubleFinder.getProperty("use_filename").equals("true")) {
String filename1 = dataset.getFilename().toLowerCase();
String filename2 = dataset2.getFilename().toLowerCase();
int equality = Utilities.percentageOfEquality(filename1, filename2);
if(equality >= 80) {
Duplicate d;
if (dataset.getDuplicate() != null) {
d = dataset.getDuplicate();
} else if (dataset2.getDuplicate() != null) {
d = dataset2.getDuplicate();
} else {
d = new Duplicate();
d.setPercentageOfReliability(equality);
dataset2.setDuplicate(d);
d.addDuplicate(dataset2);
}
dataset.setDuplicate(d);
d.addDuplicate(dataset);
duplicates.add(d);