Package org.apache.lucene.document

Examples of org.apache.lucene.document.Fieldable


    }
  }

  private static void addDoc(RandomIndexWriter iw, int i) throws Exception {
    Document d = new Document();
    Fieldable f;
    int scoreAndID = i + 1;

    f = newField(ID_FIELD, id2String(scoreAndID), Field.Store.YES, Field.Index.NOT_ANALYZED); // for debug purposes
    f.setOmitNorms(true);
    d.add(f);

    f = newField(TEXT_FIELD, "text of doc" + scoreAndID + textLine(i), Field.Store.NO, Field.Index.ANALYZED); // for regular search
    f.setOmitNorms(true);
    d.add(f);

    f = newField(INT_FIELD, "" + scoreAndID, Field.Store.NO, Field.Index.NOT_ANALYZED); // for function scoring
    f.setOmitNorms(true);
    d.add(f);

    f = newField(FLOAT_FIELD, scoreAndID + ".000", Field.Store.NO, Field.Index.NOT_ANALYZED); // for function scoring
    f.setOmitNorms(true);
    d.add(f);

    iw.addDocument(d);
    log("added: " + d);
  }
View Full Code Here


    Collections.sort(ff2, fieldNameComparator);

    assertEquals(ff1 + " : " + ff2, ff1.size(), ff2.size());

    for (int i=0; i<ff1.size(); i++) {
      Fieldable f1 = ff1.get(i);
      Fieldable f2 = ff2.get(i);
      if (f1.isBinary()) {
        assert(f2.isBinary());
      } else {
        String s1 = f1.stringValue();
        String s2 = f2.stringValue();
        assertEquals(ff1 + " : " + ff2, s1,s2);
      }
    }
  }
View Full Code Here

    doVectors = false;
    doVectorPositions = false;
    doVectorOffsets = false;

    for(int i=0;i<count;i++) {
      Fieldable field = fields[i];
      if (field.isIndexed() && field.isTermVectorStored()) {
        doVectors = true;
        doVectorPositions |= field.isStorePositionWithTermVector();
        doVectorOffsets |= field.isStoreOffsetWithTermVector();
      }
    }

    if (doVectors) {
      if (perThread.doc == null) {
View Full Code Here

    Map<Fieldable, LinkedList<Token>> tokensByField = new LinkedHashMap<Fieldable, LinkedList<Token>>(20);

    // tokenize indexed fields.
    for (Iterator<Fieldable> it = (Iterator<Fieldable>) document.getDocument().getFields().iterator(); it.hasNext();) {

      Fieldable field = it.next();

      FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name());

      if (field.isIndexed()) {

        LinkedList<Token> tokens = new LinkedList<Token>();
        tokensByField.put(field, tokens);

        if (field.isTokenized()) {
          int termCounter = 0;
          final TokenStream tokenStream;
          // todo readerValue(), binaryValue()
          if (field.tokenStreamValue() != null) {
            tokenStream = field.tokenStreamValue();
          } else {
            tokenStream = analyzer.tokenStream(field.name(), new StringReader(field.stringValue()));
          }

          // reset the TokenStream to the first token         
          tokenStream.reset();

          while (tokenStream.incrementToken()) {
            // TODO: this is a simple workaround to still work with tokens, not very effective, but as far as I know, this writer should get removed soon:
            final Token token = new Token();
            for (Iterator<AttributeImpl> atts = tokenStream.getAttributeImplsIterator(); atts.hasNext();) {
              final AttributeImpl att = atts.next();
              try {
                att.copyTo(token);
              } catch (Exception e) {
                // ignore unsupported attributes,
                // this may fail to copy some attributes, if a special combined AttributeImpl is used, that
                // implements basic attributes supported by Token and also other customized ones in one class.
              }
            }
            tokens.add(token); // the vector will be built on commit.
            fieldSetting.fieldLength++;
            if (fieldSetting.fieldLength > maxFieldLength) {
              break;
            }
          }
          tokenStream.end();
          tokenStream.close();
        } else {
          // untokenized
          String fieldVal = field.stringValue();
          Token token = new Token(0, fieldVal.length(), "untokenized");
          token.setTermBuffer(fieldVal);
          tokens.add(token);
          fieldSetting.fieldLength++;
        }
      }

      if (!field.isStored()) {
        it.remove();
      }
    }

View Full Code Here

        FieldSelector sel = new SetBasedFieldSelector(new HashSet(), lazyFields);
        doc = reader.document(reader.maxDoc() - 1, sel);
        Fieldable[] fieldables = doc.getFieldables("bin1");
        assertNotNull(fieldables);
        assertEquals(1, fieldables.length);
        Fieldable fb1 = fieldables[0];
        assertTrue(fb1.isBinary());
        assertEquals(bin.length, fb1.getBinaryLength());
        data1 = fb1.getBinaryValue();
        assertEquals(bin.length, fb1.getBinaryLength());
        for (int i = 0; i < bin.length; i++) {
          assertEquals(bin[i], data1[i + fb1.getBinaryOffset()]);
        }
        reader.close();
        // force optimize

View Full Code Here

    // Also absorb any changes to fields we had already
    // seen before (eg suddenly turning on norms or
    // vectors, etc.):

    for(int i=0;i<numDocFields;i++) {
      Fieldable field = docFields.get(i);
      final String fieldName = field.name();

      // Make sure we have a PerField allocated
      final int hashPos = fieldName.hashCode() & hashMask;
      DocFieldProcessorPerField fp = fieldHash[hashPos];
      while(fp != null && !fp.fieldInfo.name.equals(fieldName))
        fp = fp.next;

      if (fp == null) {

        // TODO FI: we need to genericize the "flags" that a
        // field holds, and, how these flags are merged; it
        // needs to be more "pluggable" such that if I want
        // to have a new "thing" my Fields can do, I can
        // easily add it
        FieldInfo fi = fieldInfos.add(fieldName, field.isIndexed(), field.isTermVectorStored(),
                                      field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
                                      field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());

        fp = new DocFieldProcessorPerField(this, fi);
        fp.next = fieldHash[hashPos];
        fieldHash[hashPos] = fp;
        totalFieldCount++;

        if (totalFieldCount >= fieldHash.length/2)
          rehash();
      } else
        fp.fieldInfo.update(field.isIndexed(), field.isTermVectorStored(),
                            field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
                            field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());

      if (thisFieldGen != fp.lastGen) {

        // First time we're seeing this field for this doc
        fp.fieldCount = 0;

        if (fieldCount == fields.length) {
          final int newSize = fields.length*2;
          DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize];
          System.arraycopy(fields, 0, newArray, 0, fieldCount);
          fields = newArray;
        }

        fields[fieldCount++] = fp;
        fp.lastGen = thisFieldGen;
      }

      if (fp.fieldCount == fp.fields.length) {
        Fieldable[] newArray = new Fieldable[fp.fields.length*2];
        System.arraycopy(fp.fields, 0, newArray, 0, fp.fieldCount);
        fp.fields = newArray;
      }

      fp.fields[fp.fieldCount++] = field;
      if (field.isStored()) {
        fieldsWriter.addField(field, fp.fieldInfo);
      }
    }

    // If we are writing vectors then we must visit
View Full Code Here

      for(int i=0; i<reader.maxDoc(); i++) {
        if (!reader.isDeleted(i)) {
          Document d = reader.document(i);
          if (d.get("content3") != null) continue;
          count++;
          Fieldable compressed = d.getFieldable("compressed");
          if (Integer.parseInt(d.get("id")) % 2 == 0) {
            assertFalse(compressed.isBinary());
            assertEquals("incorrectly decompressed string", TEXT_TO_COMPRESS, compressed.stringValue());
          } else {
            assertTrue(compressed.isBinary());
            assertTrue("incorrectly decompressed binary", Arrays.equals(BINARY_TO_COMPRESS, compressed.getBinaryValue()));
          }
        }
      }
     
      // check if field was decompressed after optimize
View Full Code Here

      value = ReflectionHelper.getMemberValue( unproxiedInstance, propertiesMetadata.discriminatorGetter );
    }

    // now we give the discriminator the opportunity to specify a analyzer per field level
    for ( Object o : doc.getFields() ) {
      Fieldable field = (Fieldable) o;
      if ( !processedFieldNames.contains( field.name() ) ) {
        String analyzerName = discriminator.getAnalyzerDefinitionName( value, unproxiedInstance, field.name() );
        if ( analyzerName != null ) {
          fieldToAnalyzerMap.put( field.name(), analyzerName );
        }
        processedFieldNames.add( field.name() );
      }
    }
  }
View Full Code Here

   *
   * @see #createField(SchemaField, String, float)
   * @see #isPolyField()
   */
  public Fieldable[] createFields(SchemaField field, String externalVal, float boost) {
    Fieldable f = createField( field, externalVal, boost);
    return f==null ? new Fieldable[]{} : new Fieldable[]{f};
  }
View Full Code Here

    // using global tlst here, so we shouldn't call any other
    // function that uses it until we are done.
    tlst.clear();
    for (Object obj : doc.getFields()) {
      Fieldable ff = (Fieldable)obj;
      // skip this field if it is not a field to be returned.
      if (returnFields!=null && !returnFields.contains(ff.name())) {
        continue;
      }
      tlst.add(ff);
    }
    Collections.sort(tlst, fieldnameComparator);

    int sz = tlst.size();
    int fidx1 = 0, fidx2 = 0;
    while (fidx1 < sz) {
      Fieldable f1 = (Fieldable)tlst.get(fidx1);
      String fname = f1.name();

      // find the end of fields with this name
      fidx2 = fidx1+1;
      while (fidx2 < sz && fname.equals(((Fieldable)tlst.get(fidx2)).name()) ) {
        fidx2++;
      }

      /***
      // more efficient to use getFieldType instead of
      // getField since that way dynamic fields won't have
      // to create a SchemaField on the fly.
      FieldType ft = schema.getFieldType(fname);
      ***/

      SchemaField sf = schema.getFieldOrNull(fname);
      if( sf == null ) {
        sf = new SchemaField( fname, new TextField() );
      }
      if (fidx1+1 == fidx2) {
        // single field value
        if (version>=2100 && sf.multiValued()) {
          startTag("arr",fname,false);
          doIndent=false;
          sf.write(this, null, f1);
          writer.write("</arr>");
          doIndent=defaultIndent;
        } else {
          sf.write(this, f1.name(), f1);
        }
      } else {
        // multiple fields with same name detected

        startTag("arr",fname,false);
View Full Code Here

TOP

Related Classes of org.apache.lucene.document.Fieldable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.