Examples of org.elasticsearch.hadoop.cfg.Settings

org.elasticsearch.hadoop.cfg.Settings
Holder class containing the various configuration bits used by ElasticSearch Hadoop. Handles internally the fall back to defaults when looking for undefined, optional settings.

            if (log.isTraceEnabled()) {
                log.trace(String.format("EsRecordWriter instance [%s] initiating discovery of target shard...",
                        currentInstance));
            }


            Settings settings = SettingsManager.loadFrom(cfg).copy();


            if (log.isTraceEnabled()) {
                log.trace(String.format("Init shard writer from cfg %s", HadoopCfgUtils.asProperties(cfg)));
            }


            InitializationUtils.setValueWriterIfNotSet(settings, WritableValueWriter.class, log);
            InitializationUtils.setBytesConverterIfNeeded(settings, WritableBytesConverter.class, log);
            InitializationUtils.setFieldExtractorIfNotSet(settings, MapWritableFieldExtractor.class, log);
            InitializationUtils.discoverNodesIfNeeded(settings, log);
            InitializationUtils.discoverEsVersion(settings, log);
            // pick the host based on id
            List<String> nodes = SettingsUtils.nodes(settings);
            Collections.rotate(nodes, -currentInstance);
            settings.setProperty(InternalConfigurationOptions.INTERNAL_ES_HOSTS, StringUtils.concatenate(nodes, ","));


            beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log);
            beat.start();


            resource = new Resource(settings, false);


            // single index vs multi indices
            IndexExtractor iformat = ObjectUtils.instantiate(settings.getMappingIndexExtractorClassName(), settings);
            iformat.compile(resource.toString());
            if (iformat.hasPattern()) {
                initMultiIndices(settings, currentInstance);
            }
            else {

View Full Code Here


    // Note: data written to the JobConf will be silently discarded
    @Override
    public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {


        Settings settings = SettingsManager.loadFrom(job);
        InitializationUtils.discoverNodesIfNeeded(settings, log);
        InitializationUtils.discoverEsVersion(settings, log);


        String savedSettings = settings.save();


        RestRepository client = new RestRepository(settings);
        boolean indexExists = client.indexExists(true);
        Map<Shard, Node> targetShards = null;


        if (!indexExists) {
            if (settings.getIndexReadMissingAsEmpty()) {
                log.info(String.format("Index [%s] missing - treating it as empty", settings.getResourceRead()));
                targetShards = Collections.emptyMap();
            }
            else {
                client.close();
                throw new EsHadoopIllegalArgumentException(
                        String.format("Index [%s] missing and settings [%s] is set to false", settings.getResourceRead(), ConfigurationOptions.ES_FIELD_READ_EMPTY_AS_NULL));
            }
        }
        else {
            targetShards = client.getReadTargetShards();
            if (log.isTraceEnabled()) {
                log.trace("Creating splits for shards " + targetShards);
            }
        }


        Version.logVersion();
        log.info(String.format("Reading from [%s]", settings.getResourceRead()));


        String savedMapping = null;
        if (!targetShards.isEmpty()) {
            Field mapping = client.getMapping();
            log.info(String.format("Discovered mapping {%s} for [%s]", mapping, settings.getResourceRead()));
            // validate if possible
            FieldPresenceValidation validation = settings.getFieldExistanceValidation();
            if (validation.isRequired()) {
                MappingUtils.validateMapping(settings.getScrollFields(), mapping, validation, log);
            }


            //TODO: implement this more efficiently
            savedMapping = IOUtils.serializeToBase64(mapping);

View Full Code Here

    private void lazyInitializeWrite() {
        if (writeInitialized) {
            return;
        }
        writeInitialized = true;
        Settings settings = SettingsManager.loadFrom(tableProperties);


        InitializationUtils.setValueWriterIfNotSet(settings, HiveValueWriter.class, log);
        InitializationUtils.setFieldExtractorIfNotSet(settings, HiveFieldExtractor.class, log);
        InitializationUtils.setBytesConverterIfNeeded(settings, HiveBytesConverter.class, log);
        this.command = BulkCommands.create(settings);

View Full Code Here

            init((ShardInputSplit) split, compatContext.getConfiguration(), compatContext);
        }


        void init(ShardInputSplit esSplit, Configuration cfg, Progressable progressable) {
            // get a copy to override the host/port
            Settings settings = SettingsManager.loadFrom(cfg).copy().load(esSplit.settings);


            if (log.isTraceEnabled()) {
                log.trace(String.format("Init shard reader from cfg %s", HadoopCfgUtils.asProperties(cfg)));
                log.trace(String.format("Init shard reader w/ settings %s", esSplit.settings));
            }


            // override the global settings to communicate directly with the target node
            settings.setHosts(esSplit.nodeIp).setPort(esSplit.httpPort);


            this.esSplit = esSplit;


            // initialize mapping/ scroll reader
            InitializationUtils.setValueReaderIfNotSet(settings, WritableValueReader.class, log);
            ValueReader reader = ObjectUtils.instantiate(settings.getSerializerValueReaderClassName(), settings);


            String mappingData = esSplit.mapping;


            Field mapping = null;


            if (StringUtils.hasText(mappingData)) {
                mapping = IOUtils.deserializeFromBase64(mappingData);
            }
            else {
                log.warn(String.format("No mapping found for [%s] - either no index exists or the split configuration has been corrupted", esSplit));
            }


            scrollReader = new ScrollReader(reader, mapping);


            // heart-beat
            beat = new HeartBeat(progressable, cfg, settings.getHeartBeatLead(), log);


            // initialize REST client
            client = new RestRepository(settings);


            queryBuilder = QueryBuilder.query(settings)
                    .shard(esSplit.shardId)
                    .onlyNode(esSplit.nodeId);


            queryBuilder.fields(settings.getScrollFields());


            this.progressable = progressable;


            if (log.isDebugEnabled()) {
                log.debug(String.format("Initializing RecordReader for [%s]", esSplit));

View Full Code Here

    @Override
    public void sourcePrepare(FlowProcess<Properties> flowProcess, SourceCall<Object[], ScrollQuery> sourceCall) throws IOException {
        super.sourcePrepare(flowProcess, sourceCall);


        Object[] context = new Object[1];
        Settings settings = SettingsManager.loadFrom(flowProcess.getConfigCopy()).merge(props);
        context[0] = CascadingUtils.alias(settings);
        sourceCall.setContext(context);
        IS_ES_10 = SettingsUtils.isEs10(settings);
    }

View Full Code Here

    @Override
    public void sinkPrepare(FlowProcess<Properties> flowProcess, SinkCall<Object[], Object> sinkCall) throws IOException {
        super.sinkPrepare(flowProcess, sinkCall);


        Object[] context = new Object[1];
        Settings settings = SettingsManager.loadFrom(flowProcess.getConfigCopy()).merge(props);
        context[0] = CascadingUtils.fieldToAlias(settings, getSinkFields());
        sinkCall.setContext(context);
    }

View Full Code Here

        InitializationUtils.checkIndexExistence(client);
    }


    private void initClient(Properties props, boolean read) {
        if (client == null) {
            Settings settings = CascadingUtils.addDefaultsToSettings(props, this.props, LogFactory.getLog(EsTap.class));
            CascadingUtils.init(settings, host, port, resource, query, read);
            client = new RestRepository(settings);
        }
    }

View Full Code Here


        Object[] context = new Object[3];
        context[0] = sourceCall.getInput().createKey();
        context[1] = sourceCall.getInput().createValue();
        // as the tuple _might_ vary (some objects might be missing), we use a map rather then a collection
        Settings settings = loadSettings(flowProcess.getConfigCopy(), true);
        context[2] = CascadingUtils.alias(settings);
        sourceCall.setContext(context);
        IS_ES_10 = SettingsUtils.isEs10(settings);
    }

View Full Code Here

    public void sinkPrepare(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
        super.sinkPrepare(flowProcess, sinkCall);


        Object[] context = new Object[1];
        // the tuple is fixed, so we can just use a collection/index
        Settings settings = loadSettings(flowProcess.getConfigCopy(), false);
        context[0] = CascadingUtils.fieldToAlias(settings, getSinkFields());
        sinkCall.setContext(context);
        IS_ES_10 = SettingsUtils.isEs10(settings);
    }

View Full Code Here

    }


    @Override
    public void sourceConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {
        conf.setInputFormat(EsInputFormat.class);
        Settings set = loadSettings(conf, true);


        Collection<String> fields = CascadingUtils.fieldToAlias(set, getSourceFields());
        // load only the necessary fields
        conf.set(InternalConfigurationOptions.INTERNAL_ES_TARGET_FIELDS, StringUtils.concatenate(fields, ","));

View Full Code Here

0 1 2 3 4 5

TOP

Related Classes of org.elasticsearch.hadoop.cfg.Settings

org.elasticsearch.hadoop.cascading.CascadingUtils

org.elasticsearch.hadoop.cascading.EsHadoopScheme

org.elasticsearch.hadoop.cascading.EsLocalScheme

org.elasticsearch.hadoop.cascading.EsLocalTap

org.elasticsearch.hadoop.hive.EsHiveInputFormat

org.elasticsearch.hadoop.hive.EsHiveOutputFormat

org.elasticsearch.hadoop.hive.EsSerDe

org.elasticsearch.hadoop.hive.EsStorageHandler

org.elasticsearch.hadoop.integration.rest.AbstractRestQueryTest

org.elasticsearch.hadoop.mr.EsInputFormat

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.