Examples of IndexingConfig


Examples of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig

        this.loader =  new ResourceLoader(new RdfResourceImporter(indexingDataset), true,true);
        loader.addResource(sourceFileOrDirectory);
    }
    @Override
    public void setConfiguration(Map<String,Object> config) {
        IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
        //first init the RDF Model
        this.indexingDataset = Utils.getTDBDataset(config);
        //second we need to check if we need to import RDF files to the RDF model
        //create the ResourceLoader
        this.loader =  new ResourceLoader(new RdfResourceImporter(indexingDataset), true);
       
        Object value = config.get(PARAM_IMPORTED_FOLDER);
        String importedFolderName;
        if(value != null && !value.toString().isEmpty()){
            importedFolderName = value.toString();
        } else {
            importedFolderName = DEFAULT_IMPORTED_FOLDER_NAME;
        }
        File importedFolder = new File(indexingConfig.getSourceFolder(),importedFolderName);
        log.info("Imported RDF File Folder: {}",importedFolder);
        this.loader.setImportedDir(importedFolder);
        //check if importing is deactivated
        boolean importSource = true; //default is true
        value = config.get(PARAM_IMPORT_SOURCE);
        if(value != null){
            importSource = Boolean.parseBoolean(value.toString());
        }
        if(importSource){ // if we need to import ... check the source config
            log.info("Importing RDF data from:");
            value = config.get(PARAM_SOURCE_FILE_OR_FOLDER);
            if(value == null){ //if not set use the default
                value = DEFAULT_SOURCE_FOLDER_NAME;
            }
            for(String source : value.toString().split(",")){
                File sourceFileOrDirectory = indexingConfig.getSourceFile(source);
                if(sourceFileOrDirectory.exists()){
                    //register the configured source with the ResourceLoader
                    this.loader.addResource(sourceFileOrDirectory);
                } else {
                    if(FilenameUtils.getExtension(source).isEmpty()){
                        //non existent directory -> create
                        //This is typically the case if this method is called to
                        //initialise the default configuration. So we will try
                        //to create the directory users need to copy the source
                        //RDF files.
                        if(!sourceFileOrDirectory.mkdirs()){
                            log.warn("Unable to create directory {} configured to improt RDF data from. " +
                                "You will need to create this directory manually before copying the" +
                                "RDF files into it.",sourceFileOrDirectory);
                            //this would not be necessary because the directory will
                            //be empty - however I like to be consistent and have
                            //all configured and existent files & dirs added the the
                            //resource loader
                            this.loader.addResource(sourceFileOrDirectory);
                        }
                    } else {
                        log.warn("Unable to find RDF source {} within the indexing Source folder ",source,indexingConfig.getSourceFolder());
                    }
                }
            }
            if(log.isInfoEnabled()){
                for(String registeredSource : loader.getResources(ResourceState.REGISTERED)){
                    log.info(" > "+registeredSource);
                }
            }
        } else {
            log.info("Importing RDF data deactivated by parameer {}={}"+PARAM_IMPORT_SOURCE,value);
        }
        //STANBOL-765: parsed bnode-prefix from parsed configuration.
        value = config.get(PARAM_BNODE_STATE);
        final Boolean bnodeState;
        if(value != null){
            bnodeState = value instanceof Boolean ? (Boolean) value :
                Boolean.parseBoolean(value.toString());
        } else if(config.containsKey(PARAM_BNODE_STATE)){ //support key without value
            bnodeState = true;
        } else {
            bnodeState = null; //undefined
        }
        if(bnodeState == null || bnodeState){ //null or enabled -> consider prefix
            value = config.get(PARAM_BNODE_PREFIX);
            if(value != null){
                try {
                    new URI(value.toString());
                } catch (URISyntaxException e) {
                    throw new IllegalArgumentException("The configured "+PARAM_BNODE_PREFIX+"='"
                        + value.toString() + "' MUST BE a valid URI!");
                }
                bnodePrefix = value.toString();
            } else if(bnodeState != null) { //use default prefix if bnodeState is true
                bnodePrefix = String.format("urn:bnode:%s:",indexingConfig.getName());
            } // else bnodeState == null and no custom prefix -> disable by default
        }
        if(bnodePrefix != null){
            log.info("Indexing of Bnodes enabled (prefix: {}",bnodePrefix);
        } else {
View Full Code Here

Examples of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig

     * @throws IllegalArgumentException if the config is <code>null</code>; is
     * missing a value for the {@link IndexingConfig#KEY_INDEXING_CONFIG} or
     * {@link #initTDBDataset(File)} throws an IllegalArgumentException
     */
    public static DatasetGraphTDB getTDBDataset(Map<String,Object> config) {
        IndexingConfig indexingConfig = (IndexingConfig)config.get(KEY_INDEXING_CONFIG);
        if(indexingConfig == null){
            throw new IllegalArgumentException("No IndexingConfig object present as value of key '"
                    + KEY_INDEXING_CONFIG+"'!");
        }
        Object value = config.get(PARAM_MODEL_DIRECTORY);
        File modelLocation;
        if(value == null){
            modelLocation = new File(indexingConfig.getSourceFolder(),DEFAULT_MODEL_DIRECTORY);
        } else {
            modelLocation = new File(indexingConfig.getSourceFolder(),value.toString());
        }
        return initTDBDataset(modelLocation);

    }
View Full Code Here

Examples of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig

        this.loader =  new ResourceLoader(new RdfResourceImporter(indexingDataset), true,true);
        loader.addResource(sourceFileOrDirectory);
    }
    @Override
    public void setConfiguration(Map<String,Object> config) {
        IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
        //first init the RDF Model
        this.indexingDataset = Utils.getTDBDataset(config);
        //second we need to check if we need to import RDF files to the RDF model
        //create the ResourceLoader
        this.loader =  new ResourceLoader(new RdfResourceImporter(indexingDataset), true);
        //check if importing is deactivated
        boolean importSource = true; //default is true
        Object value = config.get(PARAM_IMPORT_SOURCE);
        if(value != null){
            importSource = Boolean.parseBoolean(value.toString());
        }
        if(importSource){ // if we need to import ... check the source config
            log.info("Importing RDF data from:");
            value = config.get(PARAM_SOURCE_FILE_OR_FOLDER);
            if(value == null){ //if not set use the default
                value = DEFAULT_SOURCE_FOLDER_NAME;
            }
            for(String source : value.toString().split(",")){
                File sourceFileOrDirectory = indexingConfig.getSourceFile(source);
                if(sourceFileOrDirectory.exists()){
                    //register the configured source with the ResourceLoader
                    this.loader.addResource(sourceFileOrDirectory);
                } else {
                    if(FilenameUtils.getExtension(source).isEmpty()){
                        //non existent directory -> create
                        //This is typically the case if this method is called to
                        //initialise the default configuration. So we will try
                        //to create the directory users need to copy the source
                        //RDF files.
                        if(!sourceFileOrDirectory.mkdirs()){
                            log.warn("Unable to create directory {} configured to improt RDF data from. " +
                                "You will need to create this directory manually before copying the" +
                                "RDF files into it.",sourceFileOrDirectory);
                            //this would not be necessary because the directory will
                            //be empty - however I like to be consistent and have
                            //all configured and existent files & dirs added the the
                            //resource loader
                            this.loader.addResource(sourceFileOrDirectory);
                        }
                    } else {
                        log.warn("Unable to find RDF source {} within the indexing Source folder ",source,indexingConfig.getSourceFolder());
                    }
                }
            }
            if(log.isInfoEnabled()){
                for(String registeredSource : loader.getResources(ResourceState.REGISTERED)){
View Full Code Here

Examples of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig

     * @param classpathOffset
     * @return
     */
    protected Indexer create(String dir,String classpathOffset){
        Indexer indexer;
        IndexingConfig config;
        if(classpathOffset != null){
            config= new IndexingConfig(dir,classpathOffset){};
        } else {
            config= new IndexingConfig(dir);
        }
        //get the mode based on the configured IndexingComponents
        EntityDataIterable dataIterable = config.getDataInterable();
        EntityIterator idIterator = config.getEntityIdIterator();
        EntityDataProvider dataProvider = config.getEntityDataProvider();
        EntityScoreProvider scoreProvider = config.getEntityScoreProvider();
       
       
        IndexingDestination destination = config.getIndexingDestination();
        if(destination == null){
            log.error("The indexing configuration does not provide an " +
                "indexing destination. This needs to be configured by the key " +
                "'{}' in the indexing.properties within the directory {}",
                IndexingConstants.KEY_INDEXING_DESTINATION,config.getConfigFolder());
            throw new IllegalArgumentException("No IndexingDestination present");
        }
        List<EntityProcessor> processors = config.getEntityProcessors();
        if(processors == null){
            log.error("The indexing configuration does not provide an " +
                "entity processor. This needs to be configured by the key " +
                "'{}' in the indexing.properties within the directory {}",
                IndexingConstants.KEY_ENTITY_PROCESSOR,config.getConfigFolder());
        }
        List<EntityProcessor> postProcessors = config.getEntityPostProcessors();
        log.info("Present Source Configuration:");
        log.info(" - EntityDataIterable: {}",dataIterable);
        log.info(" - EntityIterator: {}",idIterator);
        log.info(" - EntityDataProvider: {}",dataProvider);
        log.info(" - EntityScoreProvider: {}",scoreProvider);
        log.info(" - EntityProcessors ({}):",processors.size());
        if(postProcessors != null){
            log.info(" - EntityPostProcessors ({}):",postProcessors.size());
        }
        int i=0;
        for(EntityProcessor processor : processors){
            i++;
            log.info("    {}) {}",i,processor);
        }
        if(dataIterable != null && scoreProvider != null){
            // iterate over data and lookup scores
            indexer = new IndexerImpl(dataIterable, scoreProvider,
                config.getNormaliser(),destination, processors,
                config.getIndexedEntitiesIdsFile(),postProcessors);
        } else if(idIterator != null && dataProvider != null){
            // iterate over id and lookup data
            indexer = new IndexerImpl(idIterator,dataProvider,
                config.getNormaliser(),destination, processors,
                config.getIndexedEntitiesIdsFile(),postProcessors);
        } else if(dataIterable != null && idIterator != null){
            // create an EntityIterator to EntityScoreProvider adapter
            log.info(
                "Create Adapter from the configured EntityIterator '{}' to the " +
                "required EntityScoreProvider as needed together with the " +
              "configured EntityDataIterable '{}'",
              idIterator.getClass(), dataIterable.getClass());
            indexer = new IndexerImpl(dataIterable,
                new EntityIneratorToScoreProviderAdapter(idIterator),
                config.getNormaliser(),destination, processors,
                config.getIndexedEntitiesIdsFile(),postProcessors);
        } else {
            log.error("Invalid Indexing Source configuration: ");
            log.error(" - To iterate over the data and lookup scores one need to " +
                "configure an EntityDataIterable and an EntityScoreProvider ");
            log.error(" - To iterate over the Id and and lookup data one need to " +
View Full Code Here

Examples of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig

    public boolean needsInitialisation() {
        return false;
    }
    @Override
    public void setConfiguration(Map<String,Object> config) {
        IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
        Object value = config.get(PARAM_MAPPINGS);
        if(value == null || value.toString().isEmpty()){
            //use the mappings configured for the Index
            this.mapper = FieldMappingUtils.createDefaultFieldMapper(
                indexingConfig.getIndexFieldConfiguration());
        } else {
            //load (other) mappings based on the provided mappings parameter
            //final File file = new File(indexingConfig.getConfigFolder(),value.toString());
            File mappings = indexingConfig.getConfigFile(value.toString());
            if(mappings != null){
                try {
                    InputStream in = new FileInputStream(mappings);
                    this.mapper = createMapperFormStream(in);
                    IOUtils.closeQuietly(in);
                } catch (IOException e) {
                    throw new IllegalArgumentException("Unable to access FieldMapping file "+
                        value+" not found in configuration directory "+
                        indexingConfig.getConfigFolder());
                }
            } else {
                throw new IllegalArgumentException("FieldMapping file "+
                    value+" not found in configuration directory "+
                    indexingConfig.getConfigFolder());
            }
        }
        //TODO: get the valueFactory form the config (currently an InMemory is
        //create by the default constructor!
    }
View Full Code Here

Examples of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig

        trimEntityId = DEFAULT_TRIM_ENTITY;
        trimLine = DEFAULT_TRIM_LINE;
    }
    @Override
    public void setConfiguration(Map<String,Object> config) {
        IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
        log.info("Configure {} :",getClass().getSimpleName());
        Object value = config.get(PARAM_CHARSET);
        if(value != null && value.toString() != null){
            this.charset = value.toString();
            log.info("Set charset to '{}'",charset);
        }
        //parse encode/decode EntityIDs
        value = config.get(PARAM_URL_ENCODE_ENTITY_IDS);
        boolean encodeIds;
        if(value != null){
            encodeIds = Boolean.parseBoolean(value.toString());
        } else if (config.containsKey(PARAM_URL_ENCODE_ENTITY_IDS)){
            encodeIds = true;
        } else {
            encodeIds = false;
        }
        value = config.get(PARAM_URL_DECODE_ENTITY_IDS);
        boolean decodeIds;
        if(value != null){
            decodeIds = Boolean.parseBoolean(value.toString());
        } else if (config.containsKey(PARAM_URL_DECODE_ENTITY_IDS)){
            decodeIds = true;
        } else {
            decodeIds = false;
        }
        if(encodeIds && decodeIds){
            throw new IllegalArgumentException(String.format(
                "One can not enable both Parameters '{}' and '{}'!",
                PARAM_URL_DECODE_ENTITY_IDS,PARAM_URL_DECODE_ENTITY_IDS));
        } else if(encodeIds){
            this.encodeEntityIds = 1;
            log.info("activate URL encoding of Entity IDs");
        } else if(decodeIds){
            this.encodeEntityIds = -1;
            log.info("activate URL decoding of Entity IDs");
        }
        value = config.get(PARAM_ENTITY_SCORE_FILE);
        if(reader == null){
            if(value == null || value.toString().isEmpty()){
                scoreFile = indexingConfig.getSourceFile(DEFAULT_ENTITY_SCORE_FILE);
            } else {
                scoreFile = indexingConfig.getSourceFile(value.toString());
            }
            log.info("Set Source File to '"+this.scoreFile+"'");
        } //else reader parsed in the constructor ... nothing todo
        //now done in the initialise() method
//        try {
View Full Code Here

Examples of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig

       provider.close();
    }
    @Override
    public void setConfiguration(Map<String,Object> config) {
        //the IndexingConfig is available via the IndexingConfig.KEY_INDEXING_CONFIG key!
        IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
        //configure first the EntityIterator to adapt
        entityIterator = indexingConfig.getEntityIdIterator();
        if(entityIterator == null){
            throw new IllegalArgumentException("No EntityIterator available via the indexing configuration "+indexingConfig.getName());
        }
    }
View Full Code Here

Examples of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig

    public static void cleanup(){
        System.setProperty("user.dir", userDir);
    }
    @Test
    public void testEntityIdIteratorConfig(){
        IndexingConfig config = new IndexingConfig();
        EntityIterator iterator = config.getEntityIdIterator();
        ScoreNormaliser normaliser = config.getNormaliser();
        if(iterator.needsInitialisation()){
            iterator.initialise();
        }
        float lastScore = Float.MAX_VALUE;
        float lastNormalisedScore = 1f;
View Full Code Here

Examples of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig

    }

    @Override
    public void setConfiguration(Map<String,Object> config) {
        //init fields
        IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
        loader = new ResourceLoader(this, true, false);
        //vcard files are imported from a special folder in the destination dir.
        //this folder needs to be deleted/(re-)created first.
        vcardFileImportFolder = new File(indexingConfig.getDestinationFolder(),"vcard");
        if(vcardFileImportFolder.exists()){
            if(vcardFileImportFolder.isDirectory()){
                try {
                    FileUtils.deleteDirectory(vcardFileImportFolder);
                }catch (IOException e){
                    throw new IllegalStateException("Unable to delete Folder "+
                        vcardFileImportFolder.getAbsolutePath()+" containing the vCard files from a" +
                            "previouse indexing! Please remove this folder manually.",e);
                }
            } else if(!vcardFileImportFolder.delete()){
                throw new IllegalStateException("Unable to delete File "+
                    vcardFileImportFolder.getAbsolutePath()+" containing the vCard data from a" +
                            "previouse indexing! Please remove this File manually.");
            }
        }
        if(!vcardFileImportFolder.mkdirs()){
            throw new IllegalStateException("Unable to delete Folder "+
                vcardFileImportFolder.getAbsolutePath()+" containing the vCard files from a" +
                        "previouse indexing! Please remove this folder manually.");
        }
        //load config
        Object value;
        log.debug("load vcard resources from :");
        value = config.get(PARAM_SOURCE_FILE_OR_FOLDER);
        if(value == null){ //if not set use the default
            value = DEFAULT_SOURCE_FOLDER_NAME;
        }
        for(String source : value.toString().split(",")){
            File sourceFileOrDirectory = indexingConfig.getSourceFile(source);
            if(sourceFileOrDirectory.exists()){
                //register the configured source with the ResourceLoader
                this.loader.addResource(sourceFileOrDirectory);
            } else {
                if(FilenameUtils.getExtension(source).isEmpty()){
                    //non existent directory -> create
                    //This is typically the case if this method is called to
                    //initialise the default configuration. So we will try
                    //to create the directory users need to copy the source
                    //RDF files.
                    if(!sourceFileOrDirectory.mkdirs()){
                        log.warn("Unable to create directory {} configured to improt source data from. " +
                                "You will need to create this directory manually before copying the" +
                                "Source files into it.",sourceFileOrDirectory);
                        //this would not be necessary because the directory will
                        //be empty - however I like to be consistent and have
                        //all configured and existent files & dirs added the the
                        //resource loader
                        this.loader.addResource(sourceFileOrDirectory);
                    }
                } else {
                    log.warn("Unable to find vcard source {} within the indexing Source folder ",source,indexingConfig.getSourceFolder());
                }
            }
        }
        if(log.isDebugEnabled()){
            for(String registeredSource : loader.getResources(ResourceState.REGISTERED)){
View Full Code Here

Examples of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig

    /**
     * In the test setup there is no default configuration
     */
    @Test(expected=IllegalArgumentException.class)
    public void missingDefault(){
        new IndexingConfig(); //there is no indexing folder in the user.dir
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.