Package org.apache.lucene.document

Examples of org.apache.lucene.document.Field


            }
            boolean store = auxiliaryFieldArr[i].isStored();
            boolean index = auxiliaryFieldArr[i].isIndexed();
            boolean token = auxiliaryFieldArr[i].isTokenized();

            doc.add(new Field(fieldName, value,
                store ? Field.Store.YES : Field.Store.NO,
                index ? (token ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO));
          }
        }
      }
    }
   
    // Add the groups of the document
    if (mCrawlerAccessController != null) {
      String[] groupArr = mCrawlerAccessController.getDocumentGroups(rawDocument);
     
      // Check the Group array
      RegainToolkit.checkGroupArray(mCrawlerAccessController, groupArr);

      // Add the field
      // NOTE: The field "groups" is tokenized, but not stemmed.
      //       See: RegainToolkit.WrapperAnalyzer
      Iterator groupIter = Arrays.asList(groupArr).iterator();
      StringBuilder tokenBuilder = new StringBuilder();
      while (groupIter.hasNext()) {
        tokenBuilder.append((String) groupIter.next());
        tokenBuilder.append(" ");
      }
   
      //doc.add(new Field("groups", new IteratorTokenStream(groupIter)));
      doc.add(new Field("groups", new WhitespaceTokenizer(new StringReader(tokenBuilder.toString()))));
    }

    // Add the URL of the document
    doc.add(new Field("url", url, Field.Store.YES, Field.Index.NOT_ANALYZED));
   
    // Add the file name (without protocol, drive-letter and path)
    String filenameWithVariants = RegainToolkit.urlToWhitespacedFileName(url);
    doc.add(new Field("filename", new WhitespaceTokenizer(new StringReader(filenameWithVariants))));
    PathFilenamePair pfPair = RegainToolkit.fragmentUrl(url);

    // Add the filename field for sorting
    doc.add(new Field("filename_sort", pfPair.getFilename(), Field.Store.YES, Field.Index.NOT_ANALYZED));

    // Add the document's size
    int size = rawDocument.getLength();
    doc.add(new Field("size", Integer.toString(size), Field.Store.YES, Field.Index.NOT_ANALYZED));

    // Add the mime-type
    String mimeType = rawDocument.getMimeType();
    doc.add(new Field("mimetype", mimeType, Field.Store.YES, Field.Index.NOT_ANALYZED));
   
    // Add last modified
    Date lastModified = rawDocument.getLastModified();
    if (lastModified == null) {
      // We don't know when the document was last modified
      // -> Take the current time
      lastModified = new Date();
    }
    doc.add(new Field("last-modified",
      DateTools.dateToString(lastModified, DateTools.Resolution.DAY), Field.Store.YES,
        Field.Index.NOT_ANALYZED));

    // Write the raw content to an analysis file
    writeContentAnalysisFile(rawDocument);
   
    // Add the additional fields
    if (additionalFieldMap != null) {
      Iterator iter = additionalFieldMap.keySet().iterator();
      while (iter.hasNext()) {
        String fieldName = (String) iter.next();
        String fieldValue = (String) additionalFieldMap.get(fieldName);
        //doc.add(new Field(fieldName, fieldValue, Field.Store.COMPRESS, Field.Index.ANALYZED));
        doc.add(new Field(fieldName, fieldValue, Field.Store.NO, Field.Index.ANALYZED));
        doc.add(new Field(fieldName, CompressionTools.compressString(fieldValue), Field.Store.YES));
      }
    }

    if (hasContent(cleanedContent)) {
      // Write the clean content to an analysis file
      writeAnalysisFile(url, "clean", cleanedContent);

      // Add the cleaned content of the document
      doc.add(new Field("content", cleanedContent,
        this.storeContentForPreview ? Field.Store.YES : Field.Store.NO, Field.Index.ANALYZED));
    } else {
      // We have no content! This is a substitute document
      // -> Add a "preparation-error"-field
      doc.add(new Field("preparation-error", "true", Field.Store.YES,
          Field.Index.NO));
    }

    // Check whether to use the link text as title
    for (int i = 0; i < mUseLinkTextAsTitleReArr.length; i++) {
      if (mUseLinkTextAsTitleReArr[i].match(url)) {
        String linkText = rawDocument.getSourceLinkText();
        if (linkText != null) {
          title = linkText;
        }
        break;
      }
    }

    // Add the document's title
    if (hasContent(title)) {
      doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));
      doc.add(new Field("title_sort", title.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED));
    } else {
      doc.add(new Field("title_sort", "", Field.Store.YES, Field.Index.NOT_ANALYZED));
    }

    // Add the document's summary
    if (! hasContent(summary) && hasContent(cleanedContent)) {
      summary = createSummaryFromContent(cleanedContent);
    }
    if (hasContent(summary)) {
      doc.add(new Field("summary", summary, Field.Store.NO, Field.Index.ANALYZED));
      doc.add(new Field("summary", CompressionTools.compressString(summary), Field.Store.YES));
    }

   // Add the document's metadata
    if (hasContent(metadata)) {
      doc.add(new Field("metadata", metadata, Field.Store.YES, Field.Index.ANALYZED));
    }

    // Add the document's headlines
    if (hasContent(headlines)) {
      doc.add(new Field("headlines", headlines, Field.Store.NO,
          Field.Index.ANALYZED));
    }

    // Add the document's path
    if (pfPair.getPath() != null) {
      //String asString = pathToString(path);
      doc.add(new Field("path", pfPair.getPath(), Field.Store.YES, Field.Index.NO));
      doc.add(new Field("path_sort", pfPair.getPath().toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED));

      // Write the path to an analysis file
      writeAnalysisFile(url, "path", pfPair.getPath());
    } else {
      doc.add(new Field("path_sort", "", Field.Store.YES, Field.Index.NOT_ANALYZED));
    }

    return doc;
  }
View Full Code Here


  public Document[] createDataTwo(){
    ArrayList<Document> dataList=new ArrayList<Document>();
      String color = "red";
      String ID = Integer.toString(10);
      Document d=new Document();
      d.add(new Field("id",ID,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      d.add(new Field("color",color,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      d.add(new NumericField("NUM").setIntValue(10));
      dataList.add(d);
     
       color = "green";
       ID = Integer.toString(11);
       d=new Document();
      d.add(new Field("id",ID,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      d.add(new Field("color",color,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      d.add(new NumericField("NUM").setIntValue(11));
      dataList.add(d);
     
   
    return dataList.toArray(new Document[dataList.size()]);
View Full Code Here

      for(int i=0; i<_documentSize; i++)
      {
        String color = (i%2 == 0) ? "red" : "green";
        String ID = Integer.toString(i);
        Document d=new Document();
        d.add(new Field("id",ID,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
        d.add(new Field("color",color,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
        dataList.add(d);
      }
     
      return dataList.toArray(new Document[dataList.size()]);
  }
View Full Code Here

      String order = request.getParameter("order");
      //System.out.println("order: " + order);
      if (!(order == null || order.length() == 0 || order.startsWith(SortingOption.RELEVANCE))) {
        String fieldName = order.substring(0, order.lastIndexOf("_"));
        //System.out.println("none standard order. fieldname: " + fieldName);
        Field field = hit.getField(fieldName);
        String fieldContent = null;
        if (field != null) {
          fieldContent = field.stringValue();
        }
        if (fieldContent == null) {
          fieldContent = "not set";
        }
        request.setContextAttribute(ATTR_CURRENT_HIT_SORT_CONTENT,
View Full Code Here

      else if(i%2 ==0) make="rav4";
      else make = "prius";
     
      String ID = Integer.toString(i);
      Document d=new Document();
      d.add(new Field("id",ID,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      d.add(new Field("color",color,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      d.add(new Field("make",make,Field.Store.YES,Index.NOT_ANALYZED_NO_NORMS));
      dataList.add(d);
    }
    return dataList.toArray(new Document[dataList.size()]);
}
View Full Code Here

  };
 
  private static void makeCar(Document car,Document doc)
  {

    car.add(new Field("color", doc.get("color"),Store.YES, Index.NOT_ANALYZED));

    car.add(new Field("category", doc.get("category"),Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("city", doc.get("city"),Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("makemodel", doc.get("makemodel"),Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("price", doc.get("price"),Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("year", doc.get("year"),Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("mileage", doc.get("mileage"),Store.YES, Index.NOT_ANALYZED));
  }
View Full Code Here

    String year=rand.nextInt(10)+1993+"";
   
    String price=""+(rand.nextInt(174)+21)*100;
    String mileage=""+(rand.nextInt(80)+101)*100;
   
    car.add(new Field("color", color,Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("category", category,Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("city", city,Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("makemodel", make,Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("price", price,Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("year", year,Store.YES, Index.NOT_ANALYZED));
    car.add(new Field("mileage", mileage,Store.YES, Index.NOT_ANALYZED));   
  }
View Full Code Here

      if (text != null) {
        // Overwrite the content with a shortend summary
        String resSummary = RegainToolkit.createSummaryFromContent(text, 200);
        document.removeField("summary");
        if (resSummary != null) {
          document.add(new Field("summary", resSummary, Field.Store.NO, Field.Index.NOT_ANALYZED));
          document.add(new Field("summary", CompressionTools.compressString(resSummary), Field.Store.YES));
          // write back the transformed document
          setHitDocument(index, document);
        }
      }
    } catch (DataFormatException dataFormatException) {
View Full Code Here

        // Overwrite the content with a shortend summary
        String resSummary = RegainToolkit.createSummaryFromContent(text, 200);
        document.removeField("summary");
        if (resSummary != null) {
          //System.out.println("resSummary " + resSummary);
          document.add(new Field("summary", resSummary, Field.Store.NO, Field.Index.NOT_ANALYZED));
          document.add(new Field("summary", CompressionTools.compressString(resSummary), Field.Store.YES));

        }

        String resHighlSummary = null;
        // Remove 'html', this works the same way as PageResponse.printNoHTML()
        text = RegainToolkit.replace(text, "<", "&lt;");
        text = RegainToolkit.replace(text, ">", "&gt;");

        TokenStream tokenStream = mAnalyzer.tokenStream("content",
                new StringReader(text));
        // Get 3 best fragments and seperate with a " ... "
        resHighlSummary = highlighter.getBestFragments(tokenStream, text, 3, " ... ");

        if (resHighlSummary != null) {
          //System.out.println("Highlighted summary: " + resHighlSummary);
          // write the result back to the document in a new field
          document.add(new Field("highlightedSummary", resHighlSummary, Field.Store.NO, Field.Index.NOT_ANALYZED));
          document.add(new Field("highlightedSummary", CompressionTools.compressString(resHighlSummary), Field.Store.YES));
        }
      }
      // Highlight the title
      text = document.get("title");
      String resHighlTitle = null;
      if (text != null) {
        TokenStream tokenStream = mAnalyzer.tokenStream("content",
                new StringReader(text));
        // Get the best fragment
        resHighlTitle = highlighter.getBestFragment(tokenStream, text);
      }

      if (resHighlTitle != null) {
        // write the result back to the document in a new field
        //System.out.println("Highlighted title: " + resHighlTitle);
        document.add(new Field("highlightedTitle", resHighlTitle,
                Field.Store.YES, Field.Index.NOT_ANALYZED));

      }
      // write back the transformed document
      setHitDocument(index, document);
View Full Code Here

      for (int i=0;i<vals.length;++i){
        if (vals[i]!=null && vals[i].length()>0){
          if (format!=null){
            vals[i]=format.format(Double.parseDouble(vals[i]));
          }
          doc.add(new Field(name,vals[i],Field.Store.NO,Field.Index.NOT_ANALYZED,Field.TermVector.NO));
        }
      } 
    }
   
    /*String[] tags=tagsMaker.getTags(prop);
View Full Code Here

TOP

Related Classes of org.apache.lucene.document.Field

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.