Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermFreqVector


                        }
                    }
                }
                separator = " ";
            }
            TermFreqVector tfv = reader.getTermFreqVector(
                    docNumber, FieldNames.FULLTEXT);
            if (tfv instanceof TermPositionVector) {
                return createExcerpt((TermPositionVector) tfv, text.toString(),
                        maxFragments, maxFragmentSize);
            } else {
View Full Code Here


                        }
                    }
                }
                separator = " ";
            }
            TermFreqVector tfv = reader.getTermFreqVector(
                    docNumber, FieldNames.FULLTEXT);
            if (tfv instanceof TermPositionVector) {
                return createExcerpt((TermPositionVector) tfv, text.toString(),
                        maxFragments, maxFragmentSize);
            } else {
View Full Code Here

                        }
                    }
                }
                separator = " ";
            }
            TermFreqVector tfv = reader.getTermFreqVector(
                    docNumber, FieldNames.FULLTEXT);
            if (tfv instanceof TermPositionVector) {
                return createExcerpt((TermPositionVector) tfv, text.toString(),
                        maxFragments, maxFragmentSize);
            } else {
View Full Code Here

                        }
                    }
                }
                separator = " ";
            }
            TermFreqVector tfv = reader.getTermFreqVector(
                    docNumber, FieldNames.FULLTEXT);
            if (tfv instanceof TermPositionVector) {
                return createExcerpt((TermPositionVector) tfv, text.toString(),
                        maxFragments, maxFragmentSize);
            } else {
View Full Code Here

      long sTime, eTime;
     
      sTime = System.currentTimeMillis();
     
      int maxid = reader.maxDoc();
      TermFreqVector tv;
      String[] terms;
      String term = "";
     
      Term t;
     
      int tfreq = 0;
      float idf;
      float tf;
      float tfidf;
      double inlinkBoost;
      double sum;
     
      int wikiID;
     
      int hashInt;
           
      int numDocs = reader.numDocs();
     
      TermEnum tnum = reader.terms();
      HashMap<String, Float> idfMap = new HashMap<String, Float>(500000);
     
      HashMap<String, Float> tfidfMap = new HashMap<String, Float>(5000);

      HashMap<String, Integer> termHash = new HashMap<String, Integer>(500000);
                 
      FileOutputStream fos = new FileOutputStream("vector.txt");
    OutputStreamWriter osw = new OutputStreamWriter(fos,"UTF-8");
     
      tnum = reader.terms();
     
      hashInt = 0;
      while(tnum.next()){
        t = tnum.term();
        term = t.text();
       
        tfreq = tnum.docFreq()// get DF for the term
       
        // skip rare terms
        if(tfreq < 3){
          continue;
        }
       
        // idf = (float)(Math.log(numDocs/(double)(tfreq+1)) + 1.0);
        idf = (float)(Math.log(numDocs/(double)(tfreq)));  
        // idf = (float)(Math.log(numDocs/(double)(tfreq)) / Math.log(2));  

        idfMap.put(term, idf);
        termHash.put(term, hashInt++);
       
      }

     
      for(int i=0;i<maxid;i++){
        if(!reader.isDeleted(i)){
          //System.out.println(i);
         
          wikiID = Integer.valueOf(reader.document(i).getField("id").stringValue());
          inlinkBoost = inlinkMap.get(wikiID);
                   
          tv = reader.getTermFreqVector(i, "contents");
          try {
            terms = tv.getTerms();
           
            int[] fq = tv.getTermFrequencies();
         
         
            sum = 0.0;    
            tfidfMap.clear();
           
View Full Code Here

      long sTime, eTime;
     
      sTime = System.currentTimeMillis();
     
      int maxid = reader.maxDoc();
      TermFreqVector tv;
      String[] terms;
      String term = "";
     
      Term t;
     
      int tcount;
      int tfreq = 0;
      float idf;
      float tf;
      float tfidf;
      double inlinkBoost;
      double sum;
     
      int wikiID;
     
      int hashInt;
           
      int numDocs = reader.numDocs();
     
      TermEnum tnum = reader.terms();
      HashMap<String, Float> idfMap = new HashMap<String, Float>(500000);
     
      HashMap<String, Float> tfidfMap = new HashMap<String, Float>(5000);

      HashMap<String, Integer> termHash = new HashMap<String, Integer>(500000);
                 
     
      tnum = reader.terms();
     
      hashInt = 0;
      tcount = 0;
      while(tnum.next()){
        t = tnum.term();
        term = t.text();
       
        tfreq = tnum.docFreq()// get DF for the term
       
        // skip rare terms
        if(tfreq < 3){
          continue;
        }
       
        // idf = (float)(Math.log(numDocs/(double)(tfreq+1)) + 1.0);
        idf = (float)(Math.log(numDocs/(double)(tfreq)));  
        // idf = (float)(Math.log(numDocs/(double)(tfreq)) / Math.log(2));  

        idfMap.put(term, idf);
        termHash.put(term, hashInt++);
       
        tcount++;
      }
     
      matrix = new HashMap<String, ArrayList<DocScore>>(tcount);

     
      for(int i=0;i<maxid;i++){
        if(!reader.isDeleted(i)){
          //System.out.println(i);
         
          wikiID = Integer.valueOf(reader.document(i).getField("id").stringValue());
          inlinkBoost = inlinkMap.get(wikiID);
                   
          tv = reader.getTermFreqVector(i, "contents");
          try {
            terms = tv.getTerms();
           
            int[] fq = tv.getTermFrequencies();
         
         
            sum = 0.0;    
            tfidfMap.clear();
           
View Full Code Here

     
      // verify vectors result
      verifyVectors(vectors, docId);
     
      start = System.currentTimeMillis();
      TermFreqVector vector = reader.getTermFreqVector(docId, "field");
      timeElapsed += System.currentTimeMillis()-start;
     
      vectors = new TermFreqVector[1];
      vectors[0] = vector;
     
View Full Code Here

   * @throws IOException if there was an error loading
   */
  public static TokenStream getAnyTokenStream(IndexReader reader, int docId, String field, Document doc, Analyzer analyzer) throws IOException{
    TokenStream ts=null;

    TermFreqVector tfv=(TermFreqVector) reader.getTermFreqVector(docId,field);
    if(tfv!=null)
    {
        if(tfv instanceof TermPositionVector)
        {
            ts=getTokenStream((TermPositionVector) tfv);
View Full Code Here

     */
    public static TokenStream getAnyTokenStream(IndexReader reader,int docId, String field,Analyzer analyzer) throws IOException
    {
    TokenStream ts=null;

    TermFreqVector tfv=(TermFreqVector) reader.getTermFreqVector(docId,field);
    if(tfv!=null)
    {
        if(tfv instanceof TermPositionVector)
        {
            ts=getTokenStream((TermPositionVector) tfv);
View Full Code Here

        return new StoredTokenStream(tokensInOriginalOrder);
    }

    public static TokenStream getTokenStream(IndexReader reader,int docId, String field) throws IOException
    {
    TermFreqVector tfv=(TermFreqVector) reader.getTermFreqVector(docId,field);
    if(tfv==null)
    {
        throw new IllegalArgumentException(field+" in doc #"+docId
                  +"does not have any term position data stored");
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.TermFreqVector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.