Examples of org.apache.lucene.document.Document

org.apache.lucene.document.Document
Documents are the unit of indexing and search. A Document is a set of fields. Each field has a name and a textual value. A field may be {@link Fieldable#isStored() stored} with the document, in whichcase it is returned with search hits on the document. Thus each document should typically contain one or more stored fields which uniquely identify it.
Note that fields which are not {@link Fieldable#isStored() stored} arenot available in documents retrieved from the index, e.g. with {@link ScoreDoc#doc}, {@link Searcher#doc(int)} or {@link IndexReader#document(int)}.

      else if(i==2) make="4runner";
      else if(i%2 ==0) make="rav4";
      else make = "prius";
      
      String ID = Integer.toString(i);
      Document d=new Document();
      d.add(new Field("id",ID,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      d.add(new Field("color",color,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      d.add(new Field("make",make,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      dataList.add(d);
    }
    return dataList.toArray(new Document[dataList.size()]);
}

View Full Code Here

      
      for (int i=0;i<numcars;++i){
        if (i!=0 && i%1000==0){
          System.out.println(i+" cars indexed.");
        }
        Document doc=new Document();
        int n=rand.nextInt(10);
        if (n==0){
          makeCar(doc,cars[rand.nextInt(cars.length)]);          
        }
        else{
          Document srcDoc=docCache[rand.nextInt(carcount)];
          makeCar(doc,srcDoc);          
        }
        
        populateDocument(doc,null);                                
        handler.handleDocument(doc);

View Full Code Here

  public Document next() throws IOException{
    while(_reader.isDeleted(curr_docid) && curr_docid<maxdoc){
      curr_docid++;
    }
    if (curr_docid<maxdoc){
      Document doc=_reader.document(curr_docid);
      curr_docid++;
      return doc;
    }
    else{
      return null;

View Full Code Here

   */
  @Override
  public Document getHitDocument(int index) throws RegainException {


    try {
      Document currDoc = (Document) lazyHitList.get(index);
      // The document is empty, so it's created by the factory. Replace it with the real one
      // at this position
      if (currDoc.getFields().isEmpty()) {
        lazyHitList.set(index, mIndexSearcher.doc(hitScoreDocs[index].doc));
      }
    } catch (Exception ex) {
      throw new RegainException("Error while accessing index", ex);
    }

View Full Code Here

   */
  @Override
  public void shortenSummary(int index) throws RegainException {


    try {
      Document document = getHitDocument(index);
      byte[] compressedFieldValue = document.getBinaryValue("summary");
      String text = null;
      if (compressedFieldValue != null) {
        text = CompressionTools.decompressString(compressedFieldValue);
      }


      if (text != null) {
        // Overwrite the content with a shortend summary
        String resSummary = RegainToolkit.createSummaryFromContent(text, 200);
        document.removeField("summary");
        if (resSummary != null) {
          document.add(new Field("summary", resSummary, Field.Store.NO, Field.Index.NOT_ANALYZED));
          document.add(new Field("summary", CompressionTools.compressString(resSummary), Field.Store.YES));
          // write back the transformed document
          setHitDocument(index, document);
        }
      }
    } catch (DataFormatException dataFormatException) {

View Full Code Here

      // We transform this summary into
      // a) a summary matching the search terms (highlighting)
      // b) and a shortend summary (200 characters)
//      int docId = hitScoreDocs[index].doc;


      Document document = getHitDocument(index);
      byte[] compressedFieldValue = document.getBinaryValue("summary");
      String text = null;
      if (compressedFieldValue != null) {
        text = CompressionTools.decompressString(compressedFieldValue);
      }


      if (text != null) {
        // Overwrite the content with a shortend summary
        String resSummary = RegainToolkit.createSummaryFromContent(text, 200);
        document.removeField("summary");
        if (resSummary != null) {
          //System.out.println("resSummary " + resSummary);
          document.add(new Field("summary", resSummary, Field.Store.NO, Field.Index.NOT_ANALYZED));
          document.add(new Field("summary", CompressionTools.compressString(resSummary), Field.Store.YES));


        }


        String resHighlSummary = null;
        // Remove 'html', this works the same way as PageResponse.printNoHTML()
        text = RegainToolkit.replace(text, "<", "&lt;");
        text = RegainToolkit.replace(text, ">", "&gt;");


        TokenStream tokenStream = mAnalyzer.tokenStream("content",
                new StringReader(text));
        // Get 3 best fragments and seperate with a " ... "
        resHighlSummary = highlighter.getBestFragments(tokenStream, text, 3, " ... ");


        if (resHighlSummary != null) {
          //System.out.println("Highlighted summary: " + resHighlSummary);
          // write the result back to the document in a new field
          document.add(new Field("highlightedSummary", resHighlSummary, Field.Store.NO, Field.Index.NOT_ANALYZED));
          document.add(new Field("highlightedSummary", CompressionTools.compressString(resHighlSummary), Field.Store.YES));
        }
      }
      // Highlight the title
      text = document.get("title");
      String resHighlTitle = null;
      if (text != null) {
        TokenStream tokenStream = mAnalyzer.tokenStream("content",
                new StringReader(text));
        // Get the best fragment
        resHighlTitle = highlighter.getBestFragment(tokenStream, text);
      }


      if (resHighlTitle != null) {
        // write the result back to the document in a new field
        //System.out.println("Highlighted title: " + resHighlTitle);
        document.add(new Field("highlightedTitle", resHighlTitle,
                Field.Store.YES, Field.Index.NOT_ANALYZED));


      }
      // write back the transformed document
      setHitDocument(index, document);

View Full Code Here

    format.setMinimumFractionDigits(2);
    formatterHash.put("price",format);
  }
  private static Document makeDocument(Properties prop){


    Document doc=new Document();
    Enumeration nameIter=prop.propertyNames();
    while(nameIter.hasMoreElements()){
      String name=(String)nameIter.nextElement();
      NumberFormat format=formatterHash.get(name);
      String val=prop.getProperty(name);
      
      String[] vals=val.split(",");
      for (int i=0;i<vals.length;++i){
        if (vals[i]!=null && vals[i].length()>0){
          if (format!=null){
            vals[i]=format.format(Double.parseDouble(vals[i]));
          }
          doc.add(new Field(name,vals[i],Field.Store.NO,Field.Index.NOT_ANALYZED,Field.TermVector.NO));
        }
      }  
    }
    
    /*String[] tags=tagsMaker.getTags(prop);

View Full Code Here

        line=reader.readLine();
        if (line==null){
          break;
        }
        if ("<EOD>".equals(line)){    //new record
          Document doc=makeDocument(prop);
          populateDocument(doc,null);
          handler.handleDocument(doc);
          prop=new Properties();
        }
        else{

View Full Code Here

      Queue<Request> batchWork = new LinkedList<Request>();
      boolean cutOff = false;
      boolean isFromTake = true;
      Request req = inputQueue.take();    
      while (req != null && cutOff == false && batchWork.size() < MAX_WORK_SIZE) {        
        Document doc = req.doc;
        if(doc != null){
          if(POISON_DOC.equals(doc) == true){
            cutOff = true;            
          }else{
            String md5Url = doc.get(DocumentCreator.FIELD_URL_MD5);
            if(md5Url != null){
              if(seenMD5Url.contains(md5Url) == true){
                cutOff = true;
              }else{
                seenMD5Url.add(md5Url);

View Full Code Here

          Request aReq = workQueue.poll();
          if(aReq != null && aReq.type != RST){
            writer = createIndexWriter();
          }
          while (aReq != null) {
            Document doc = aReq.doc;
            if (doc != null && POISON_DOC.equals(doc)) {
              logger.debug("Terminate UpdateIndexWorker.");
              stopRunning = true;
            } else if (aReq.type == RST) {
              logger.debug("===================================> Do RESET.");

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.document.Document

com.bericotech.clavin.gazetteer.query.LuceneGazetteer

com.browseengine.bobo.test.BasicIndexingTest

com.browseengine.bobo.test.BoboTestCase

com.browseengine.bobo.test.FacetNotValuesTest

com.gentics.cr.lucene.autocomplete.Autocompleter

com.gentics.cr.lucene.indexer.index.CRLuceneIndexJob

com.gentics.cr.lucene.indexer.index.LuceneIndexUpdateChecker

com.gentics.cr.lucene.search.CRSearcher

com.gentics.cr.lucene.search.query.mocks.SimpleLucene

com.tamingtext.classifier.mlt.TrainMoreLikeThis

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.