Examples of org.apache.lucene.document.Field

org.apache.lucene.document.Field
A field is a section of a Document. Each field has two parts, a name and a value. Values may be free text, provided as a String or as a Reader, or they may be atomic keywords, which are not further processed. Such keywords may be used to represent dates, urls, etc. Fields are optionally stored in the index, so that they may be returned with hits on the document.

            }
            boolean store = auxiliaryFieldArr[i].isStored();
            boolean index = auxiliaryFieldArr[i].isIndexed();
            boolean token = auxiliaryFieldArr[i].isTokenized();


            doc.add(new Field(fieldName, value,
                store ? Field.Store.YES : Field.Store.NO,
                index ? (token ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO));
          }
        }
      }
    }
    
    // Add the groups of the document
    if (mCrawlerAccessController != null) {
      String[] groupArr = mCrawlerAccessController.getDocumentGroups(rawDocument);
      
      // Check the Group array
      RegainToolkit.checkGroupArray(mCrawlerAccessController, groupArr);


      // Add the field
      // NOTE: The field "groups" is tokenized, but not stemmed.
      //       See: RegainToolkit.WrapperAnalyzer
      Iterator groupIter = Arrays.asList(groupArr).iterator();
      StringBuilder tokenBuilder = new StringBuilder();
      while (groupIter.hasNext()) {
        tokenBuilder.append((String) groupIter.next());
        tokenBuilder.append(" ");
      }
    
      //doc.add(new Field("groups", new IteratorTokenStream(groupIter)));
      doc.add(new Field("groups", new WhitespaceTokenizer(new StringReader(tokenBuilder.toString()))));
    }


    // Add the URL of the document
    doc.add(new Field("url", url, Field.Store.YES, Field.Index.NOT_ANALYZED));
    
    // Add the file name (without protocol, drive-letter and path)
    String filenameWithVariants = RegainToolkit.urlToWhitespacedFileName(url);
    doc.add(new Field("filename", new WhitespaceTokenizer(new StringReader(filenameWithVariants))));
    PathFilenamePair pfPair = RegainToolkit.fragmentUrl(url);


    // Add the filename field for sorting
    doc.add(new Field("filename_sort", pfPair.getFilename(), Field.Store.YES, Field.Index.NOT_ANALYZED));


    // Add the document's size
    int size = rawDocument.getLength();
    doc.add(new Field("size", Integer.toString(size), Field.Store.YES, Field.Index.NOT_ANALYZED));


    // Add the mime-type
    String mimeType = rawDocument.getMimeType();
    doc.add(new Field("mimetype", mimeType, Field.Store.YES, Field.Index.NOT_ANALYZED));
    
    // Add last modified
    Date lastModified = rawDocument.getLastModified();
    if (lastModified == null) {
      // We don't know when the document was last modified
      // -> Take the current time
      lastModified = new Date();
    }
    doc.add(new Field("last-modified", 
      DateTools.dateToString(lastModified, DateTools.Resolution.DAY), Field.Store.YES,
        Field.Index.NOT_ANALYZED));


    // Write the raw content to an analysis file
    writeContentAnalysisFile(rawDocument);
    
    // Add the additional fields
    if (additionalFieldMap != null) {
      Iterator iter = additionalFieldMap.keySet().iterator();
      while (iter.hasNext()) {
        String fieldName = (String) iter.next();
        String fieldValue = (String) additionalFieldMap.get(fieldName);
        //doc.add(new Field(fieldName, fieldValue, Field.Store.COMPRESS, Field.Index.ANALYZED));
        doc.add(new Field(fieldName, fieldValue, Field.Store.NO, Field.Index.ANALYZED));
        doc.add(new Field(fieldName, CompressionTools.compressString(fieldValue), Field.Store.YES));
      }
    }


    if (hasContent(cleanedContent)) {
      // Write the clean content to an analysis file
      writeAnalysisFile(url, "clean", cleanedContent);


      // Add the cleaned content of the document
      doc.add(new Field("content", cleanedContent, 
        this.storeContentForPreview ? Field.Store.YES : Field.Store.NO, Field.Index.ANALYZED));
    } else {
      // We have no content! This is a substitute document
      // -> Add a "preparation-error"-field
      doc.add(new Field("preparation-error", "true", Field.Store.YES,
          Field.Index.NO));
    }


    // Check whether to use the link text as title
    for (int i = 0; i < mUseLinkTextAsTitleReArr.length; i++) {
      if (mUseLinkTextAsTitleReArr[i].match(url)) {
        String linkText = rawDocument.getSourceLinkText();
        if (linkText != null) {
          title = linkText;
        }
        break;
      }
    }


    // Add the document's title
    if (hasContent(title)) {
      doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));
      doc.add(new Field("title_sort", title.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED));
    } else {
      doc.add(new Field("title_sort", "", Field.Store.YES, Field.Index.NOT_ANALYZED));
    }


    // Add the document's summary
    if (! hasContent(summary) && hasContent(cleanedContent)) {
      summary = createSummaryFromContent(cleanedContent);
    }
    if (hasContent(summary)) {
      doc.add(new Field("summary", summary, Field.Store.NO, Field.Index.ANALYZED));
      doc.add(new Field("summary", CompressionTools.compressString(summary), Field.Store.YES));
    }


   // Add the document's metadata
    if (hasContent(metadata)) {
      doc.add(new Field("metadata", metadata, Field.Store.YES, Field.Index.ANALYZED));
    }


    // Add the document's headlines
    if (hasContent(headlines)) {
      doc.add(new Field("headlines", headlines, Field.Store.NO,
          Field.Index.ANALYZED));
    }


    // Add the document's path
    if (pfPair.getPath() != null) {
      //String asString = pathToString(path);
      doc.add(new Field("path", pfPair.getPath(), Field.Store.YES, Field.Index.NO));
      doc.add(new Field("path_sort", pfPair.getPath().toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED));


      // Write the path to an analysis file
      writeAnalysisFile(url, "path", pfPair.getPath());
    } else {
      doc.add(new Field("path_sort", "", Field.Store.YES, Field.Index.NOT_ANALYZED));
    }


    return doc;
  }

View Full Code Here

  public Document[] createDataTwo(){
    ArrayList<Document> dataList=new ArrayList<Document>();
      String color = "red";
      String ID = Integer.toString(10);
      Document d=new Document();
      d.add(new Field("id",ID,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      d.add(new Field("color",color,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      d.add(new NumericField("NUM").setIntValue(10));
      dataList.add(d);
      
       color = "green";
       ID = Integer.toString(11);
       d=new Document();
      d.add(new Field("id",ID,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      d.add(new Field("color",color,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      d.add(new NumericField("NUM").setIntValue(11));
      dataList.add(d);
      
    
    return dataList.toArray(new Document[dataList.size()]);

View Full Code Here

      for(int i=0; i<_documentSize; i++)
      {
        String color = (i%2 == 0) ? "red" : "green";
        String ID = Integer.toString(i);
        Document d=new Document();
        d.add(new Field("id",ID,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
        d.add(new Field("color",color,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
        dataList.add(d);
      }
      
      return dataList.toArray(new Document[dataList.size()]);
  }

View Full Code Here

      String order = request.getParameter("order");
      //System.out.println("order: " + order);
      if (!(order == null || order.length() == 0 || order.startsWith(SortingOption.RELEVANCE))) {
        String fieldName = order.substring(0, order.lastIndexOf("_"));
        //System.out.println("none standard order. fieldname: " + fieldName);
        Field field = hit.getField(fieldName);
        String fieldContent = null;
        if (field != null) {
          fieldContent = field.stringValue();
        }
        if (fieldContent == null) {
          fieldContent = "not set";
        }
        request.setContextAttribute(ATTR_CURRENT_HIT_SORT_CONTENT,

View Full Code Here

      else if(i%2 ==0) make="rav4";
      else make = "prius";
      
      String ID = Integer.toString(i);
      Document d=new Document();
      d.add(new Field("id",ID,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      d.add(new Field("color",color,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      d.add(new Field("make",make,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      dataList.add(d);
    }
    return dataList.toArray(new Document[dataList.size()]);
}

View Full Code Here

  };
  
  private static void makeCar(Document car,Document doc)
  {


    car.add(new Field("color", doc.get("color"),Store.YES, Index.NOT_ANALYZED));


    car.add(new Field("category", doc.get("category"),Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("city", doc.get("city"),Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("makemodel", doc.get("makemodel"),Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("price", doc.get("price"),Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("year", doc.get("year"),Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("mileage", doc.get("mileage"),Store.YES, Index.NOT_ANALYZED));
  }

View Full Code Here

    String year=rand.nextInt(10)+1993+"";
    
    String price=""+(rand.nextInt(174)+21)*100;
    String mileage=""+(rand.nextInt(80)+101)*100;
    
    car.add(new Field("color", color,Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("category", category,Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("city", city,Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("makemodel", make,Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("price", price,Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("year", year,Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("mileage", mileage,Store.YES, Index.NOT_ANALYZED));    
  }

View Full Code Here

      if (text != null) {
        // Overwrite the content with a shortend summary
        String resSummary = RegainToolkit.createSummaryFromContent(text, 200);
        document.removeField("summary");
        if (resSummary != null) {
          document.add(new Field("summary", resSummary, Field.Store.NO, Field.Index.NOT_ANALYZED));
          document.add(new Field("summary", CompressionTools.compressString(resSummary), Field.Store.YES));
          // write back the transformed document
          setHitDocument(index, document);
        }
      }
    } catch (DataFormatException dataFormatException) {

View Full Code Here

        // Overwrite the content with a shortend summary
        String resSummary = RegainToolkit.createSummaryFromContent(text, 200);
        document.removeField("summary");
        if (resSummary != null) {
          //System.out.println("resSummary " + resSummary);
          document.add(new Field("summary", resSummary, Field.Store.NO, Field.Index.NOT_ANALYZED));
          document.add(new Field("summary", CompressionTools.compressString(resSummary), Field.Store.YES));


        }


        String resHighlSummary = null;
        // Remove 'html', this works the same way as PageResponse.printNoHTML()
        text = RegainToolkit.replace(text, "<", "&lt;");
        text = RegainToolkit.replace(text, ">", "&gt;");


        TokenStream tokenStream = mAnalyzer.tokenStream("content",
                new StringReader(text));
        // Get 3 best fragments and seperate with a " ... "
        resHighlSummary = highlighter.getBestFragments(tokenStream, text, 3, " ... ");


        if (resHighlSummary != null) {
          //System.out.println("Highlighted summary: " + resHighlSummary);
          // write the result back to the document in a new field
          document.add(new Field("highlightedSummary", resHighlSummary, Field.Store.NO, Field.Index.NOT_ANALYZED));
          document.add(new Field("highlightedSummary", CompressionTools.compressString(resHighlSummary), Field.Store.YES));
        }
      }
      // Highlight the title
      text = document.get("title");
      String resHighlTitle = null;
      if (text != null) {
        TokenStream tokenStream = mAnalyzer.tokenStream("content",
                new StringReader(text));
        // Get the best fragment
        resHighlTitle = highlighter.getBestFragment(tokenStream, text);
      }


      if (resHighlTitle != null) {
        // write the result back to the document in a new field
        //System.out.println("Highlighted title: " + resHighlTitle);
        document.add(new Field("highlightedTitle", resHighlTitle,
                Field.Store.YES, Field.Index.NOT_ANALYZED));


      }
      // write back the transformed document
      setHitDocument(index, document);

View Full Code Here

      for (int i=0;i<vals.length;++i){
        if (vals[i]!=null && vals[i].length()>0){
          if (format!=null){
            vals[i]=format.format(Double.parseDouble(vals[i]));
          }
          doc.add(new Field(name,vals[i],Field.Store.NO,Field.Index.NOT_ANALYZED,Field.TermVector.NO));
        }
      }  
    }
    
    /*String[] tags=tagsMaker.getTags(prop);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.document.Field

com.browseengine.bobo.test.BoboTestCase

com.gentics.cr.lucene.search.query.mocks.SimpleLucene

com.tamingtext.classifier.mlt.TrainMoreLikeThis

com.tamingtext.frankenstein.Frankenstein

it.eng.spagobi.commons.utilities.indexing.LuceneIndexer

org.apache.jackrabbit.core.query.lucene.NodeIndexer

org.apache.lucene.ant.HtmlDocument

org.apache.lucene.codecs.perfield.TestPerFieldPostingsFormat2

org.apache.lucene.document.TestDocument

org.apache.lucene.index.BaseDocValuesFormatTestCase

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.