Examples of org.apache.solr.schema.IndexSchema

org.apache.solr.schema.IndexSchema
IndexSchema contains information about the valid fields in an index and the types of those fields. @version $Id: IndexSchema.java 1150510 2011-07-24 22:46:43Z hossman $

            throw new IOException("Default FST directory exists and "
                    + "is a File. Use #setFstDirectory() to set different one");
        } else if(!fstDirectory.exists()){
            FileUtils.forceMkdir(fstDirectory);
        }
        IndexSchema schema = index.getLatestSchema();
        boolean foundCorpus = false;
        //(0) get basic parameters of the default configuration
        log.debug(" - default config");
        Map<String,String> defaultParams = fstConfig.getDefaultParameters();
        String fstName = defaultParams.get(IndexConfiguration.PARAM_FST);
        String indexField = defaultParams.get(IndexConfiguration.PARAM_FIELD);
        String storeField = defaultParams.get(IndexConfiguration.PARAM_STORE_FIELD);
        if(storeField == null){ 
            //apply indexField as default if indexField is NOT NULL
            storeField = indexField;
        }
        if(indexField == null){ //apply the defaults if null
            indexField = IndexConfiguration.DEFAULT_FIELD;
        }
        if(fstName == null){ //use default
            fstName = getDefaultFstFileName(indexField);
        }
        final boolean allowCreation;
        String allowCreationString = defaultParams.get(IndexConfiguration.PARAM_RUNTIME_GENERATION);
        if(allowCreationString == null){
            allowCreation = IndexConfiguration.DEFAULT_RUNTIME_GENERATION;
        } else {
            allowCreation = Boolean.parseBoolean(allowCreationString);
        }
        //This are all fields actually present in the index (distinguished with
        //those defined in the schema). This also includes actual instances of
        //dynamic field definition in the schema.
        FieldInfos fieldInfos = indexReader.getFieldInfos(); //we need this twice
        
        //(1) in case the fstConfig uses a wildcard we need to search for
        //    languages present in the SolrIndex. For that we use the indexReader
        //    to get the FieldInfos and match them against FST files in the FST
        //    directory and FieldType definitions in the schema of the SolrCore
        //NOTE: this needs only do be done if wildcards are enabled in the fstConfig
        if(fstConfig.useWildcard()){ 
            //(1.a) search for present FST files in the FST directory
            Map<String,File> presentFstFiles = new HashMap<String,File>();
            WildcardFileFilter fstFilter = new WildcardFileFilter(
                fstName+".*.fst");
            @SuppressWarnings("unchecked")
            Iterator<File> fstFiles = FileUtils.iterateFiles(fstDirectory, fstFilter, null);
            while(fstFiles.hasNext()){
                File fstFile = fstFiles.next();
                String fstFileName = fstFile.getName();
                //files are named such as "{name}.{lang}.fst"
                String language = FilenameUtils.getExtension(
                    FilenameUtils.getBaseName(fstFileName));
                presentFstFiles.put(language, fstFile);
            }
            //(1.b) iterate over the fields in the Solr index and search for 
            //      matches against the configured indexField name
            String fieldWildcard = FieldEncodingEnum.encodeLanguage(indexField,
                fieldEncoding, "*");
            for(FieldInfo fieldInfo : fieldInfos){
                //try to match the field names against the wildcard
                if(FilenameUtils.wildcardMatch(fieldInfo.name, fieldWildcard)){
                    //for matches parse the language from the field name
                    String language = FieldEncodingEnum.parseLanguage(
                        fieldInfo.name, fieldEncoding, indexField);
                    if(language != null && //successfully parsed language
                            //is current language is enabled? 
                            fstConfig.isLanguage(language) &&
                            //is there no explicit configuration for this language?
                            !fstConfig.getExplicitlyIncluded().contains(language)){
                        //generate the FST file name
                        StringBuilder fstFileName = new StringBuilder(fstName);
                        if(!language.isEmpty()){
                            fstFileName.append('.').append(language);
                        }
                        fstFileName.append(".fst");
                        File fstFile = new File(fstDirectory,fstFileName.toString());
                        //get the FieldType of the field from the Solr schema
                        FieldType fieldType = schema.getFieldTypeNoEx(fieldInfo.name);
                        if(fieldType != null){ //if the fieldType is present
                            if(allowCreation || fstFile.isFile()){ //and FST is present or can be created
                                //we need also to check if the stored field with
                                //the labels is present
                                //get the stored Field and check if it is present!
                                String storeFieldName;
                                if(storeField == null){ //storeField == indexField
                                    storeFieldName = fieldInfo.name;
                                } else { // check that the storeField is present in the index
                                    storeFieldName = FieldEncodingEnum.encodeLanguage(
                                        storeField, fieldEncoding, language);
                                    FieldInfo storedFieldInfos = fieldInfos.fieldInfo(storeFieldName);
                                    if(storedFieldInfos == null){
                                        log.warn(" ... ignore language {} because Stored Field {} "
                                                + "for IndexField {} does not exist! ", new Object[]{
                                                language,storeFieldName,fieldInfo.name});
                                        storeFieldName = null;
                                    }
                                    
                                }
                                if(storeFieldName != null){ // == valid configuration
                                    CorpusInfo fstInfo = new CorpusInfo(language, 
                                        fieldInfo.name, storeFieldName,  
                                        fieldType, fstFile, allowCreation);
                                    log.debug(" ... init {} ", fstInfo);
                                    addCorpus(fstInfo);
                                    foundCorpus = true;
                                }
                            } else {
                                log.warn(" ... ignore language {} (field: {}) because "
                                    + "FST file '{}' does not exist and runtime creation "
                                    + "is deactivated!",new Object[]{ language,
                                            fieldInfo.name, fstFile.getAbsolutePath()});
                            }
                        } else {
                            log.warn(" ... ignore language {} becuase unknown fieldtype "
                                + "for SolrFied {}",language,fieldInfo.name);
                        }
                    } //else the field matched the wildcard, but has not passed the
                    //encoding test.
                } //Solr field does not match the field definition in the config
            } // end iterate over all fields in the SolrIndex
        } //else Wildcard not enabled in the fstConfig
        
        //(2) process explicit configuration for configured languages
        for(String language : fstConfig.getExplicitlyIncluded()){
            //(2.a) get the language specific config (with fallback to default)
            Map<String,String> config = fstConfig.getLanguageParams(language);
            String langIndexField = config.get(IndexConfiguration.PARAM_FIELD);
            String langStoreField = config.get(IndexConfiguration.PARAM_STORE_FIELD);
            String langFstFileName = config.get(IndexConfiguration.PARAM_FST);
            final boolean langAllowCreation;
            final String langAllowCreationString = config.get(IndexConfiguration.PARAM_RUNTIME_GENERATION);
            if(langIndexField != null){
                //also consider explicit field names as default for the fst name
                if(langFstFileName == null){
                    StringBuilder fileName = new StringBuilder(
                        getDefaultFstFileName(langIndexField));
                    if(!language.isEmpty()){
                        fileName.append('.').append(language);
                    }
                    fileName.append(".fst");
                    langFstFileName = fileName.toString();
                }
            } else {
                langIndexField = indexField;
            }
            if(langStoreField == null){ //fallbacks
                if(storeField != null){ //first to default store field
                    langStoreField = storeField;
                } else { //else to the lang index field
                    langStoreField = langIndexField;
                }
            }
            if(langFstFileName == null){ //no fstFileName config
                // ... use the default
                langFstFileName = new StringBuilder(fstName).append('.')
                        .append(language).append(".fst").toString(); 
            }
            if(langAllowCreationString != null){
                langAllowCreation = Boolean.parseBoolean(langAllowCreationString);
            } else {
                langAllowCreation = allowCreation;
            }
            //(2.b) check if the Solr field is present
            String encodedLangIndexField = FieldEncodingEnum.encodeLanguage(
                langIndexField, fieldEncoding, language);
            String encodedLangStoreField = FieldEncodingEnum.encodeLanguage(
                langStoreField, fieldEncoding, language);
            FieldInfo langIndexFieldInfo = fieldInfos.fieldInfo(encodedLangIndexField);
            if(langIndexFieldInfo != null){
                FieldInfo langStoreFieldInfo = fieldInfos.fieldInfo(encodedLangStoreField);
                if(langStoreFieldInfo != null){
                    FieldType fieldType = schema.getFieldTypeNoEx(langIndexFieldInfo.name);
                    if(fieldType != null){
                        //(2.c) check the FST file
                        File langFstFile = new File(fstDirectory,langFstFileName);
                        if(langFstFile.isFile() || langAllowCreation){
                            CorpusInfo langFstInfo = new CorpusInfo(language,

View Full Code Here

        }
        //build the FST models
        if(fstConfigs != null){
            //(1) FST config initialisation
            log.info(" ... init FST configuration(s)");
            IndexSchema schema = core.getLatestSchema();
            File fstDir = new File(new File(core.getDataDir()),"fst");
            if(!fstDir.isDirectory()){
                try {
                    FileUtils.forceMkdir(fstDir);
                } catch (IOException e) {

View Full Code Here

                    config = new SolrConfig(loader, dcore.getConfigName(), null);
                } catch (Exception e) {
                    log.error("Failed to load file {}", new File(instanceDir, dcore.getConfigName()).getAbsolutePath());
                    throw new SolrException(ErrorCode.SERVER_ERROR, "Could not load config for " + dcore.getConfigName(), e);
                }
                IndexSchema schema = null;
                //indexSchemaCache is now protected (Solr 4.4)
                if (indexSchemaCache != null) {
                  final String resourceNameToBeUsed = IndexSchemaFactory.getResourceNameToBeUsed(dcore.getSchemaName(), config);
                  File schemaFile = new File(resourceNameToBeUsed);
                  if (!schemaFile.isAbsolute()) {

View Full Code Here

    try {
      config = new SolrConfig(instanceDir, SolrConfig.DEFAULT_CONF_FILE, null);
      instanceDir = config.getResourceLoader().getInstanceDir();


      // If the Data directory is specified, initialize SolrCore directly
      IndexSchema schema = new IndexSchema(config, instanceDir+"/conf/schema.xml", null);
      core = new SolrCore( null, dataDir, config, schema, null );
      parser = new SolrRequestParsers( config );
    } 
    catch (Exception ee) {
      throw new RuntimeException(ee);

View Full Code Here

      dataDir = SolrResourceLoader.normalizeDir(dataDir);


      log.info(logid+"Opening new SolrCore at " + loader.getInstanceDir() + ", dataDir="+dataDir);


      if (schema==null) {
        schema = new IndexSchema(config, IndexSchema.DEFAULT_SCHEMA_FILE, null);
      }
      
      //Initialize JMX
      if (config.jmxConfig.enabled) {
        infoRegistry = new JmxMonitoredMap<String, SolrInfoMBean>(name, config.jmxConfig);

View Full Code Here

    String instanceDir = idir.getPath();
    
    // Initialize the solr config
    SolrResourceLoader solrLoader = new SolrResourceLoader(instanceDir, libLoader, dcore.getCoreProperties());
    SolrConfig config = new SolrConfig(solrLoader, dcore.getConfigName(), null);
    IndexSchema schema = new IndexSchema(config, dcore.getSchemaName(), null);
    SolrCore core = new SolrCore(dcore.getName(), null, config, schema, dcore);
    return core;
  }

View Full Code Here

    */


    // Minimum term docFreq in order to use the filterCache for that term.
    int minDfFilterCache = params.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0);


    IndexSchema schema = searcher.getSchema();
    IndexReader r = searcher.getReader();
    FieldType ft = schema.getFieldType(field);


    final int maxsize = limit>=0 ? offset+limit : Integer.MAX_VALUE-1;    
    final BoundedTreeSet<CountPair<String,Integer>> queue = sort ? new BoundedTreeSet<CountPair<String,Integer>>(maxsize) : null;
    final NamedList res = new NamedList();

View Full Code Here

    final String[] fields = params.getParams(FacetParams.FACET_DATE);
    final Date NOW = new Date();
    
    if (null == fields || 0 == fields.length) return resOuter;
    
    final IndexSchema schema = searcher.getSchema();
    for (String f : fields) {
      final NamedList resInner = new SimpleOrderedMap();
      resOuter.add(f, resInner);
      final FieldType trash = schema.getFieldType(f);
      if (! (trash instanceof DateField)) {
        throw new SolrException
          (SolrException.ErrorCode.BAD_REQUEST,
           "Can not date facet on a field which is not a DateField: " + f);
      }

View Full Code Here

    SolrParams params = req.getParams(); 
    if (!isHighlightingEnabled(params))
        return null;
     
     SolrIndexSearcher searcher = req.getSearcher();
     IndexSchema schema = searcher.getSchema();
     NamedList fragments = new SimpleOrderedMap();
     String[] fieldNames = getHighlightFields(query, req, defaultFields);
     Document[] readDocs = new Document[docs.size()];
     {
       // pre-fetch documents using the Searcher's doc cache
       Set<String> fset = new HashSet<String>();
       for(String f : fieldNames) { fset.add(f); }
       // fetch unique key if one exists.
       SchemaField keyField = schema.getUniqueKeyField();
       if(null != keyField)
         fset.add(keyField.getName());  
       searcher.readDocs(readDocs, docs, fset);
     }




    // Highlight each document
    DocIterator iterator = docs.iterator();
    for (int i = 0; i < docs.size(); i++) {
       int docId = iterator.nextDoc();
       Document doc = readDocs[i];
       NamedList docSummaries = new SimpleOrderedMap();
       for (String fieldName : fieldNames) {
          fieldName = fieldName.trim();
          String[] docTexts = doc.getValues(fieldName);
          if (docTexts == null) continue;
          
          TokenStream tstream = null;
          int numFragments = getMaxSnippets(fieldName, params);
          boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);


          String[] summaries = null;
          List<TextFragment> frags = new ArrayList<TextFragment>();
          for (int j = 0; j < docTexts.length; j++) {
            // create TokenStream
            try {
              // attempt term vectors
              tstream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName);
            }
            catch (IllegalArgumentException e) {
              // fall back to anaylzer
              tstream = new TokenOrderingFilter(schema.getAnalyzer().tokenStream(fieldName, new StringReader(docTexts[j])), 10);
            }
             
            Highlighter highlighter;
            if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER))) {
              // wrap CachingTokenFilter around TokenStream for reuse
              tstream = new CachingTokenFilter(tstream);
              
              // get highlighter
              highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream);
               
              // after highlighter initialization, reset tstream since construction of highlighter already used it
              tstream.reset();
            }
            else {
              // use "the old way"
              highlighter = getHighlighter(query, fieldName, req);
            }
            
            int maxCharsToAnalyze = params.getFieldInt(fieldName,
                HighlightParams.MAX_CHARS,
                Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
            if (maxCharsToAnalyze < 0) {
              highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
            } else {
              highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
            }
            
            TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, docTexts[j], mergeContiguousFragments, numFragments);
            for (int k = 0; k < bestTextFragments.length; k++) {
              if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
                frags.add(bestTextFragments[k]);
              }
            }
          }
          // sort such that the fragments with the highest score come first
          Collections.sort(frags, new Comparator<TextFragment>() {
            public int compare(TextFragment arg0, TextFragment arg1) {
              return Math.round(arg1.getScore() - arg0.getScore());
            }
          });
          
           // convert fragments back into text
           // TODO: we can include score and position information in output as snippet attributes
          if (frags.size() > 0) {
            ArrayList<String> fragTexts = new ArrayList<String>();
            for (TextFragment fragment: frags) {
              if ((fragment != null) && (fragment.getScore() > 0)) {
                fragTexts.add(fragment.toString());
              }
              if (fragTexts.size() >= numFragments) break;
            }
            summaries = fragTexts.toArray(new String[0]);
            if (summaries.length > 0) 
            docSummaries.add(fieldName, summaries);
          }
           // no summeries made, copy text from alternate field
           if (summaries == null || summaries.length == 0) {
              String alternateField = req.getParams().getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD);
              if (alternateField != null && alternateField.length() > 0) {
                String[] altTexts = doc.getValues(alternateField);
                if (altTexts != null && altTexts.length > 0){
                  int alternateFieldLen = req.getParams().getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH,0);
                  if( alternateFieldLen <= 0 ){
                    docSummaries.add(fieldName, altTexts);
                  }
                  else{
                    List<String> altList = new ArrayList<String>();
                    int len = 0;
                    for( String altText: altTexts ){
                      altList.add( len + altText.length() > alternateFieldLen ?
                                   altText.substring( 0, alternateFieldLen - len ) : altText );
                      len += altText.length();
                      if( len >= alternateFieldLen ) break;
                    }
                    docSummaries.add(fieldName, altList);
                  }
                }
              }
           }
 
        }
        String printId = schema.printableUniqueKey(doc);
        fragments.add(printId == null ? null : printId, docSummaries);
     }
     return fragments;
  }

View Full Code Here

  public static final int DEFAULT_COUNT = 10;
  
  @Override
  public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception
  {    
    IndexSchema schema = req.getSchema();
    SolrIndexSearcher searcher = req.getSearcher();
    IndexReader reader = searcher.getReader();
    SolrParams params = req.getParams();
    int numTerms = params.getInt( NUMTERMS, DEFAULT_COUNT );
        
    // Always show the core lucene info
    rsp.add("index", getIndexInfo(reader, numTerms>0 ) );


    Integer docId = params.getInt( DOC_ID );
    if( docId == null && params.get( ID ) != null ) {
      // Look for something with a given solr ID
      SchemaField uniqueKey = schema.getUniqueKeyField();
      String v = uniqueKey.getType().toInternal( params.get(ID) );
      Term t = new Term( uniqueKey.getName(), v );
      docId = searcher.getFirstMatch( t );
      if( docId < 0 ) {
        throw new SolrException( SolrException.ErrorCode.NOT_FOUND, "Can't find document: "+params.get( ID ) );

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.solr.schema.IndexSchema

com.alimama.mdrill.index.IndexReducer

com.cloudera.cdk.morphline.solr.GenerateSolrSequenceKeyBuilder$GenerateSolrSequenceKey

com.cloudera.cdk.morphline.solr.SolrLocator

com.cloudera.cdk.morphline.solr.TokenizeTextBuilder$TokenizeText

com.cloudera.cdk.morphline.solrcell.SolrCellMorphlineTest

com.googlecode.solrgeonames.harvester.Harvester

com.googlecode.solrgeonames.server.GeoContextListener

com.scaleunlimited.cascading.scheme.core.SolrSchemeUtil

org.apache.lucene.analysis.Analyzer

org.apache.lucene.document.Fieldable

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.