Package dmt.tools

Examples of dmt.tools.CSVFileReader


  {
   
    //run first the preProcessor
    (new CSVPreProcessor(inCSV, tmpFile)).run();
   
    CSVFileReader in = new CSVFileReader(tmpFile, ',');
    CSVFileWriter out = new CSVFileWriter(outCSV, ',');
   
    //adding headers
    out.writeFields(getHeaders());

      Vector<String> fields = in.readFields();
      int k=0;
     
      while(fields!=null)
      {
        //update the hashMap
        hashMap.put(fields.get(0)+fields.get(1), fields.get(2).equalsIgnoreCase("0") ? false : true);
       
        //preserve existing fields
       
        //compute the features
        Vector<Feature> features = Feature.getFeatures();
      for(int i=0; i < features.size(); i++)
      {
        Object[] values = features.get(i).getValues(fields, hashMap);
        for(int j = 0; j < values.length; j++)
        {
          try
          {
            fields.add(values[j].toString());
          }
          catch(NullPointerException exception)
          {
            fields.add("?");
          }
         
        }
      }
      fields.remove(3);
      //fields.set(3, new Integer(fields.get(3).length()).toString());
        out.writeFields(fields);
        fields = in.readFields();
        k++;
        //if(k>1000) break;
       
      }
 
      in.close();
      out.close();
     
      ElementTypeFeature.showElementTypes();
      NumberOfCharsStemmedFeature.printWords();
  }
View Full Code Here


    Vector<String> headers = new Vector<String>();
    headers.add("Euclidean");
    headers.add("Manhattan");
    out.writeFields(headers);

    CSVFileReader in1 = new CSVFileReader("csv_out.csv", ',');
    // skip the headers
    Vector<String> fields1 = in1.readFields();
    fields1 = in1.readFields();
    int count1 = 0;
    while (fields1 != null)
    {
      String id1 = fields1.get(0) + fields1.get(1);
      Object[] bagOfWordsTfIdf1 = fields1.subList(4, fields1.size())
          .toArray();
      TextInstance instance1 = new TextInstance(bagOfWordsTfIdf1, id1);
      CSVFileReader in2 = new CSVFileReader("csv_out.csv", ',');
      Vector<String> fields2 = in2.readFields();
      fields2 = in2.readFields();
      TextInstance instance2;
      Object[] bagOfWordsTfIdf2;
      int count2=0;
      while (fields2 != null)
      {     
        String id2 = fields2.get(0) + fields2.get(1);
        bagOfWordsTfIdf2 = fields2.subList(4, fields2.size()).toArray();
        instance2 = new TextInstance(bagOfWordsTfIdf2, id2);
        Vector<String> distances = new Vector<String>();
        double d1 = TextInstance.computeEuclideanDistance(
            instance1, instance2);
        distances.add(d1+"");
        distances.add(TextInstance.computeManhattanDistance(instance1, instance2)
            + "");
        out.writeFields(distances);
        fields2 = in2.readFields();
        count2++;
        if(count2==num_instances) break;
      }
      fields1 = in1.readFields();
      count1++;
View Full Code Here

    this.outCSV  = outCSV;
  }
 
  public void run() throws IOException
  {
    CSVFileReader in = new CSVFileReader(inCSV, ',');
    CSVFileWriter out = new CSVFileWriter(outCSV, ',');
   
      Vector<String> fields = in.readFields();
      int k=0;
     
      while(fields!=null)
      {
        //run the prefeatures
        Vector<PreFeature> preFeatures = PreFeature.getFeatures();
      for(int i=0; i < preFeatures.size(); i++)
      {
        preFeatures.get(i).run(fields);
      }
      out.writeFields(fields);
        fields = in.readFields();
        k++;
        //if(k>10) break;
       
      }
      in.close();
      out.close();
  }
View Full Code Here

  private double norm;
  public static Set<TextInstance> textInstancesSet = new HashSet<TextInstance>();

  public static void loadTextInstances() throws IOException
  {
    CSVFileReader in1 = new CSVFileReader("csv_out.csv", ',');
    // skip the headers
    Vector<String> fields1 = in1.readFields();
    fields1 = in1.readFields();
    int no_instances = 1000;
    int k = 0;
    while (fields1 != null)
    {
      String id = fields1.get(0) + fields1.get(1);
      Object[] bagOfWordsTfIdf1 = fields1.subList(4, fields1.size())
          .toArray();
      TextInstance instance1 = new TextInstance(bagOfWordsTfIdf1, id);
      // instance1.clearBagOfWordsTfIdf();
      textInstancesSet.add(instance1);
      fields1 = in1.readFields();
      k++;
      if (no_instances == k) break;
    }
  }
View Full Code Here

 
  public void run() throws IOException
  {
    loadBagOfWords();
   
    CSVFileReader in = new CSVFileReader(inCSV, ',');

      Vector<String> fields = in.readFields();
     
      while(fields!=null)
      { 
        //generate here the id of the element
        fields.set(0, fields.get(0) + fields.get(1));
       
        String text = fields.get(3).toLowerCase();
       
        loadStopWords();
      SnowballStemmer stemmer = new org.tartarus.snowball.ext.englishStemmer();
     
      for(int i=0; i < stopWords.size(); i++)
      {
        text = text.replaceAll("[\\s]"+stopWords.get(i)+"[\\s]", " ");
      }
     
      Pattern p = Pattern.compile("[^a-zA-Z]+");
      String [] words = p.split(text);
      text = "";
      for(int i=0; i < words.length; i++)
      {
        if(words[i].length() < 3)
        {
          continue;
        }
       
        stemmer.setCurrent(words[i]);
        stemmer.stem();
        String stemmed = stemmer.getCurrent();
       
        if(!loadedWords.contains(stemmed))
        {
          continue;
        }
       
        if(stemWordsTotal.containsKey(stemmed))
        {
          stemWordsTotal.put(stemmed, stemWordsTotal.get(stemmed) + 1);
        }
        else
        {
          stemWordsTotal.put(stemmed, 1);
        }
       
        if(text.indexOf(stemmed) >= 0)
        {
          continue;
        }
       
        if(documentsWords.containsKey(stemmed))
        {
          if(documentsWords.get(stemmed).indexOf(fields.get(0)) < 0)
          {
            documentsWords.put(stemmed, documentsWords.get(stemmed) + " " + fields.get(0));
            stemWords.put(stemmed, stemWords.get(stemmed) + 1);
          }
        }
        else
        {
          stemWords.put(stemmed, 1);
          documentsWords.put(stemmed, fields.get(0));
        }
      }
       
        fields = in.readFields();
      }
 
      in.close();
     
      listBagOfWords();
  }
View Full Code Here

  }
 
  private void loadBagOfWords() throws IOException
  {
    loadedWords = new Vector<String>();
    CSVFileReader in = new CSVFileReader(inBagOfWords, ',');

      Vector<String> fields = in.readFields();
     
      while(fields!=null)
      {
        loadedWords.add(fields.get(0));
        fields = in.readFields();
      }
 
      in.close();
  }
View Full Code Here

    if(stopWords != null)
    {
      return;
    }
   
    CSVFileReader in;
    try
    {
      in = new CSVFileReader("datasets/common-english-words-with-contractions.txt", ',');
      stopWords = in.readFields();
      in.close();
    } catch (IOException e)
    {
      e.printStackTrace();
    }
  }
View Full Code Here

  }
 
  public void run() throws IOException
  {
    Vector<String> fields;
    CSVFileReader clusters1In = new CSVFileReader(clusters1,',');
    fields = clusters1In.readFields();
    fields = clusters1In.readFields();
    while(fields!=null)
      {
      hashMap1.put(fields.get(0)+fields.get(1), fields.get(2).substring(7));
      fields = clusters1In.readFields();
      }
   
    clusters1In.close();
   
    CSVFileReader clusters2In = new CSVFileReader(clutsers2,',');
    fields = clusters2In.readFields();
    fields = clusters2In.readFields();
    while(fields!=null)
      {
      hashMap2.put(fields.get(0)+fields.get(1), fields.get(2).substring(7));
      fields = clusters2In.readFields();
      }
   
    clusters2In.close();
   
    CSVFileReader in = new CSVFileReader(inCSV, ',');
    CSVFileWriter out = new CSVFileWriter(outCSV, ',');
   
    //adding headers
      fields = in.readFields();
      fields.add("ClusterVictor");
      fields.add("ClusterMarina");
      String toc = fields.get(2);
      fields.remove(2);
      fields.add(toc);
      out.writeFields(fields);
      fields = in.readFields();
      int k=0;
     
      while(fields!=null)
      {
        if(hashMap1.containsKey(fields.get(0)+fields.get(1)))
        {
          fields.add(hashMap1.get(fields.get(0)+fields.get(1)));
        }
        else
        {
          fields.add(""+no_clusters1);
        }
        if(hashMap2.containsKey(fields.get(0)+fields.get(1)))
        {
          fields.add(hashMap2.get(fields.get(0)+fields.get(1)));
        }
        else
        {
          fields.add(""+no_clusters2);
        }
       
        toc = fields.get(2);
        fields.remove(2);
        fields.add(toc);
       
        out.writeFields(fields);
        fields = in.readFields();
        k++;
       
      }
 
      in.close();
      out.close();
  }
View Full Code Here

  {
   
    //run first the preProcessor
    (new CSVPreProcessor(inCSV, tmpFile)).run();
   
    CSVFileReader in = new CSVFileReader(tmpFile, ',');
    CSVFileWriter out = new CSVFileWriter(outCSV, ',');
   
    //adding headers
    out.writeFields(getHeaders());

      Vector<String> fields = in.readFields();
      int k=0;
     
      while(fields!=null)
      {
        //update the hashMap
        hashMap.put(fields.get(0)+fields.get(1), fields.get(2).equalsIgnoreCase("0") ? false : true);
       
        //preserve existing fields
       
        //compute the features
        Feature bagOfWordsFeature = new MostFrequentWordsFeature();
      Object[] values = bagOfWordsFeature.getValues(fields, hashMap);
      for(int j = 0; j < values.length; j++)
      {
        try
        {
          fields.add(values[j].toString());
        }
        catch(NullPointerException exception)
        {
          fields.add("?");
        }
      }
      fields.remove(3);
      //fields.set(3, new Integer(fields.get(3).length()).toString());
        out.writeFields(fields);
        fields = in.readFields();
        k++;
        //if(k>10) break;
       
      }
 
      in.close();
      out.close();
     
      ElementTypeFeature.showElementTypes();
      NumberOfCharsStemmedFeature.printWords();
  }
View Full Code Here

  }
 
  public void run() throws IOException
  {
   
    CSVFileReader in = new CSVFileReader(inCSV, ',');
    CSVFileWriter out = new CSVFileWriter(outCSV, ',');

      Vector<String> fields = in.readFields();
      int k=0;
     
      while(fields!=null)
      {
        fields.remove(0);
        fields.remove(0);
        fields.remove(0);
        if (k == 0)
        {
          out.writeFields(fields);
          for(int i=0; i < NO_CLUSTERS; i++)
          {
            clusters.add(new Vector<Double>(fields.size()));
            for(int j=0;  j < fields.size(); j++)
            {
              clusters.get(i).add(0.0);
            }
          }
        }
        else
        {
         
          int cluster = Integer.parseInt(fields.get(fields.size()-1).substring(7));
         
          if(cluster >= NO_CLUSTERS || cluster < 0)
          {
            continue;
          }
         
          clusters.get(cluster).set(fields.size()-1, clusters.get(cluster).get(fields.size()-1) + 1);
         
          for(int i=0; i < fields.size()-1; i++)
          {
            Double word = Double.parseDouble(fields.get(i));
            clusters.get(cluster).set(i, clusters.get(cluster).get(i) + word);
          }
        }
       
        fields = in.readFields();
        k++;
        //if(k>1000) break;
       
      }
     
      for(int i=0; i < NO_CLUSTERS; i++)
      {
        Vector<String> newFields = new Vector<String>();
        Double noInstances = clusters.get(i).get(clusters.get(i).size()-1);
        for(int j=0; j < clusters.get(i).size() - 1; j++)
        {
         
          newFields.add((new Double(
              clusters.get(i).get(j) / noInstances
          )).toString());
        }
        newFields.add((new Double(i)).toString());
        out.writeFields(newFields);
      }
 
      in.close();
      out.close();
     
  }
View Full Code Here

TOP

Related Classes of dmt.tools.CSVFileReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.