Package org.rstudio.core.client

Source Code of org.rstudio.core.client.DuplicateHelper$DuplicationInfo

/*
* DuplicateHelper.java
*
* Copyright (C) 2009-12 by RStudio, Inc.
*
* Unless you have received this program directly from RStudio pursuant
* to the terms of a commercial license agreement with RStudio, then
* this program is licensed to you under the terms of version 3 of the
* GNU Affero General Public License. This program is distributed WITHOUT
* ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT,
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the
* AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details.
*
*/
package org.rstudio.core.client;

import org.rstudio.core.client.files.FileSystemItem;
import java.util.*;

public class DuplicateHelper
{
   /**
    * Provides information about duplicates in the list that was tested.
    */
   public static class DuplicationInfo<T>
   {
      public DuplicationInfo(Comparator<T> comparator)
      {
         comparator_ = comparator;
      }

      /**
       * For a given value, return how many times it appears in the original
       * value list (or 0 if never).
       */
      public int occurrences(T value)
      {
         for (Pair<T, Integer> count : valueCounts_)
            if (0 == comparator_.compare(value, count.first))
               return count.second;
         return 0;
      }

      /**
       * Returns a list, each element of which is a list of indices of elements
       * in the original value list whose values are duplicates.
       *
       * For example:
       *
       * a = ["foo", "bar", "bar", "bar", "foo"]
       * dupeInfo = detectDupes(a)
       * dupeInfo.dupes()  ==> [ [0,4], [1,2,3] ]
       */
      public ArrayList<ArrayList<Integer>> dupes()
      {
         return dupes_;
      }

      void addDupeInfo(T value, ArrayList<Integer> indices)
      {
         valueCounts_.add(new Pair<T, Integer>(value, indices.size()));
         if (indices.size() > 1)
            dupes_.add(indices);
      }

      private final Comparator<T> comparator_;
      private ArrayList<ArrayList<Integer>> dupes_ =
            new ArrayList<ArrayList<Integer>>();
      private ArrayList<Pair<T, Integer>> valueCounts_ =
            new ArrayList<Pair<T, Integer>>();
   }

   private static class CaseInsensitiveStringComparator implements Comparator<String>
   {
      public int compare(String s1, String s2)
      {
         return s1.compareToIgnoreCase(s2);
      }
   }

   public static <T> int dedupeSortedList(ArrayList<T> list)
   {
      int removedCount = 0;

      for (int i = list.size() - 1; i > 0; i--)
      {
         T x = list.get(i-1);
         T y = list.get(i);
         if (((x == null) == (y == null)) &&
             ((x == null) || x.equals(y)))
         {
            list.remove(i);
            removedCount++;
         }
      }

      return removedCount;
   }

   /**
    * Detect duplicates and calculate frequency information in the given
    * list, according to the given comparator's definition of equality.
    * The comparator must correctly support not only equality but also
    * comparisons, since the duplicate detection algorithm relies on sorting.
    */
   public static <T> DuplicationInfo<T> detectDupes(List<T> list,
                                                    final Comparator<T> comparator)
   {
      ArrayList<Pair<Integer, T>> sorted = new ArrayList<Pair<Integer, T>>();
      for (int i = 0; i < list.size(); i++)
      {
         sorted.add(new Pair<Integer, T>(i, list.get(i)));
      }

      // Sort our copy of the list, so dupes are right next to each other
      Collections.sort(sorted, new Comparator<Pair<Integer, T>>()
      {
         public int compare(Pair<Integer, T> left,
                            Pair<Integer, T> right)
         {
            return comparator.compare(left.second, right.second);
         }
      });

      DuplicationInfo<T> dupeInfo = new DuplicationInfo<T>(comparator);
      ArrayList<Integer> currentDupes = new ArrayList<Integer>();
      T lastSeenValue = null;
      for (Pair<Integer, T> value : sorted)
      {
         if (lastSeenValue == null ||
             comparator.compare(lastSeenValue, value.second) != 0)
         {
            // This value isn't the same as the previous one. If we've got
            // dupes in our list, then add them to the results. Then start
            // a new list.
            if (currentDupes.size() > 0)
               dupeInfo.addDupeInfo(lastSeenValue, currentDupes);
            currentDupes = new ArrayList<Integer>();
         }

         // Add ourselves to the current list
         currentDupes.add(value.first);
         lastSeenValue = value.second;
      }

      if (currentDupes.size() > 0)
         dupeInfo.addDupeInfo(lastSeenValue, currentDupes);

      return dupeInfo;
   }

   /**
    * Use Mac OS X style prettifying of paths. Display the filename,
    * and if there are multiple entries with the same filename, append
    * a disambiguating folder to those filenames.
    */
   public static ArrayList<String> getPathLabels(ArrayList<String> paths,
                                                 boolean includeExtension)
   {
      ArrayList<String> labels = new ArrayList<String>();
      for (String entry : paths)
      {
         if (includeExtension)
            labels.add(FileSystemItem.getNameFromPath(entry));
         else
            labels.add(FileSystemItem.createFile(entry).getStem());
      }

      DuplicationInfo<String> dupeInfo = DuplicateHelper.detectDupes(
            labels, new CaseInsensitiveStringComparator());

      for (ArrayList<Integer> dupeList : dupeInfo.dupes())
      {
         fixupDupes(paths, dupeList, labels);
      }

      dupeInfo = DuplicateHelper.detectDupes(
            labels, new CaseInsensitiveStringComparator());

      // There are edge cases where we may still end up with dupes at this
      // point. In that case, just disambiguate using the full path.
      // Example:
      // ~/foo/tmp/README
      // ~/bar/tmp/README
      // ~/foo/README
      // ~/bar/README
      for (ArrayList<Integer> dupeList : dupeInfo.dupes())
      {
         for (Integer index : dupeList)
         {
            FileSystemItem fsi = FileSystemItem.createFile(
                  paths.get(index));
            String name = includeExtension ? fsi.getName() : fsi.getStem();
            labels.set(index, disambiguate(name,
                                           fsi.getParentPathString()));
         }
      }


      return labels;
   }

   private static void fixupDupes(ArrayList<String> fullPaths,
                           ArrayList<Integer> indices,
                           ArrayList<String> labels)
   {
      ArrayList<ArrayList<String>> pathElementListList =
            new ArrayList<ArrayList<String>>();

      for (Integer index : indices)
         pathElementListList.add(toPathElements(fullPaths.get(index)));

      while (indices.size() > 0)
      {
         ArrayList<String> lastPathElements = new ArrayList<String>();

         for (int i = 0; i < pathElementListList.size(); i++)
         {
            ArrayList<String> pathElementList = pathElementListList.get(i);

            if (pathElementList.size() == 0)
            {
               int trueIndex = indices.get(i);
               String path = FileSystemItem.createFile(fullPaths.get(trueIndex))
                     .getParentPathString();
               labels.set(trueIndex,
                          disambiguate(labels.get(trueIndex), path));

               indices.remove(i);
               pathElementListList.remove(i);
               i--;
            }
            else
            {
               lastPathElements.add(
                     pathElementList.remove(pathElementList.size() - 1));
            }
         }


         DuplicationInfo<String> dupeInfo = DuplicateHelper.detectDupes(
               lastPathElements,
               new CaseInsensitiveStringComparator());

         for (int i = 0; i < lastPathElements.size(); i++)
         {
            if (1 == dupeInfo.occurrences(lastPathElements.get(i)))
            {
               int trueIndex = indices.get(i);
               labels.set(trueIndex, disambiguate(labels.get(trueIndex),
                                          lastPathElements.get(i)));

               indices.remove(i);
               pathElementListList.remove(i);
               lastPathElements.remove(i);
               i--;
            }
         }

         assert indices.size() == pathElementListList.size();
      }
   }

   private static String disambiguate(String filename, String disambiguatingPath)
   {
      return filename + " \u2014 " + disambiguatingPath;
   }

   private static ArrayList<String> toPathElements(String path)
   {
      FileSystemItem fsi = FileSystemItem.createFile(path);
      return new ArrayList<String>(
            Arrays.asList(fsi.getParentPathString().split("/")));
   }
}
TOP

Related Classes of org.rstudio.core.client.DuplicateHelper$DuplicationInfo

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.