Package net.yacy.cora.services.federated.solr

Source Code of net.yacy.cora.services.federated.solr.SolrSingleConnector$Worker

/**
*  SolrSingleConnector
*  Copyright 2011 by Michael Peter Christen
*  First released 14.04.2011 at http://yacy.net
*
*  $LastChangedDate: 2011-04-14 22:05:04 +0200 (Do, 14 Apr 2011) $
*  $LastChangedRevision: 7654 $
*  $LastChangedBy: orbiter $
*
*  This library is free software; you can redistribute it and/or
*  modify it under the terms of the GNU Lesser General Public
*  License as published by the Free Software Foundation; either
*  version 2.1 of the License, or (at your option) any later version.
*
*  This library is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
*  Lesser General Public License for more details.
*
*  You should have received a copy of the GNU Lesser General Public License
*  along with this program in the file lgpl21.txt
*  If not, see <http://www.gnu.org/licenses/>.
*/

package net.yacy.cora.services.federated.solr;

import java.io.File;
import java.io.IOException;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;

import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.document.Document;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.UsernamePasswordCredentials;
import org.apache.commons.httpclient.auth.AuthPolicy;
import org.apache.commons.httpclient.auth.AuthScope;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;


public class SolrSingleConnector {

    private final String solrurl, host, solrpath, solraccount, solrpw;
    private final int port;
    private SolrServer server;
    private final SolrScheme scheme;

    private final static int transmissionQueueCount = 4; // allow concurrent http sessions to solr
    private final static int transmissionQueueSize = 50; // number of documents that are collected until a commit is sent
    private final Worker[] transmissionWorker; // the transmission workers to solr
    private final BlockingQueue<SolrInputDocument>[] transmissionQueue; // the queues quere documents are collected
    private int transmissionRoundRobinCounter; // a rount robin counter for the transmission queues

    @SuppressWarnings("unchecked")
    public SolrSingleConnector(final String url, final SolrScheme scheme) throws IOException {
        this.solrurl = url;
        this.scheme = scheme;
        this.transmissionRoundRobinCounter = 0;
        this.transmissionQueue = new ArrayBlockingQueue[transmissionQueueCount];
        for (int i = 0; i < transmissionQueueCount; i++) {
            this.transmissionQueue[i] = new ArrayBlockingQueue<SolrInputDocument>(transmissionQueueSize);
        }

        // connect using authentication
        final MultiProtocolURI u = new MultiProtocolURI(this.solrurl);
        this.host = u.getHost();
        this.port = u.getPort();
        this.solrpath = u.getPath();
        final String userinfo = u.getUserInfo();
        if (userinfo == null || userinfo.length() == 0) {
            this.solraccount = ""; this.solrpw = "";
        } else {
            final int p = userinfo.indexOf(':');
            if (p < 0) {
                this.solraccount = userinfo; this.solrpw = "";
            } else {
                this.solraccount = userinfo.substring(0, p); this.solrpw = userinfo.substring(p + 1);
            }
        }
        if (this.solraccount.length() > 0) {
            final HttpClient client = new HttpClient();
            final AuthScope scope = new AuthScope(AuthScope.ANY_HOST, AuthScope.ANY_PORT, null, null);
            client.getState().setCredentials(scope,new UsernamePasswordCredentials(this.solraccount, this.solrpw));
            final List<String> authPrefs = new ArrayList<String>(2);
            authPrefs.add(AuthPolicy.DIGEST);
            authPrefs.add(AuthPolicy.BASIC);
            // This will exclude the NTLM authentication scheme
            client.getParams().setParameter(AuthPolicy.AUTH_SCHEME_PRIORITY, authPrefs);
            client.getParams().setAuthenticationPreemptive(true);
            try {
                this.server = new CommonsHttpSolrServer("http://" + this.host + ":" + this.port + this.solrpath, client);
            } catch (final MalformedURLException e) {
                throw new IOException("bad auth connector url: " + this.solrurl);
            }
        } else {
            try {
                this.server = new CommonsHttpSolrServer(this.solrurl);
            } catch (final MalformedURLException e) {
                throw new IOException("bad connector url: " + this.solrurl);
            }
        }

        // start worker
        this.transmissionWorker = new Worker[transmissionQueueCount];
        for (int i = 0; i < transmissionQueueCount; i++) {
            this.transmissionWorker[i] = new Worker(i);
            this.transmissionWorker[i].start();
        }
    }

    private class Worker extends Thread {
        boolean shallRun;
        int idx;
        public Worker(final int i) {
            this.idx = i;
            this.shallRun = true;
        }
        public void pleaseStop() {
            this.shallRun = false;
        }
        public void run() {
            while (this.shallRun) {
                if (SolrSingleConnector.this.transmissionQueue[this.idx].size() > 0) {
                    try {
                        flushTransmissionQueue(this.idx);
                    } catch (final IOException e) {
                        Log.logSevere("SolrSingleConnector", "flush Transmission failed in worker:IO", e);
                        continue;
                    } catch (final SolrException e) {
                        Log.logSevere("SolrSingleConnector", "flush Transmission failed in worker:Solr", e);
                        continue;
                    }
                } else {
                    try {Thread.sleep(1000);} catch (final InterruptedException e) {}
                }
            }
            try {
                flushTransmissionQueue(this.idx);
            } catch (final IOException e) {}
        }
    }

    public void close() {
        for (int i = 0; i < transmissionQueueCount; i++) {
            if (this.transmissionWorker[i].isAlive()) {
                this.transmissionWorker[i].pleaseStop();
                try {this.transmissionWorker[i].join();} catch (final InterruptedException e) {}
            }
        }
        for (int i = 0; i < transmissionQueueCount; i++) {
            try {
                flushTransmissionQueue(i);
            } catch (final IOException e) {
                Log.logException(e);
            } catch (final SolrException e) {
                Log.logException(e);
            }

        }
    }

    /**
     * delete everything in the solr index
     * @throws IOException
     */
    public void clear() throws IOException {
        try {
            this.server.deleteByQuery("*:*");
            this.server.commit();
        } catch (final SolrServerException e) {
            throw new IOException(e);
        }
    }

    public void delete(final String id) throws IOException {
        try {
            this.server.deleteById(id);
        } catch (final SolrServerException e) {
            throw new IOException(e);
        }
    }

    public void delete(final List<String> ids) throws IOException {
        try {
            this.server.deleteById(ids);
        } catch (final SolrServerException e) {
            throw new IOException(e);
        }
    }

    public void add(final File file, final String solrId) throws IOException {
        final ContentStreamUpdateRequest up = new ContentStreamUpdateRequest("/update/extract");
        up.addFile(file);
        up.setParam("literal.id", solrId);
        up.setParam("uprefix", "attr_");
        up.setParam("fmap.content", "attr_content");
        //up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
        try {
            this.server.request(up);
            this.server.commit();
        } catch (final SolrServerException e) {
            throw new IOException(e);
        }
    }

    public void add(final String id, final ResponseHeader header, final Document doc) throws IOException, SolrException {
        add(this.scheme.yacy2solr(id, header, doc));
    }

    protected void add(final SolrInputDocument solrdoc) throws IOException, SolrException {
        int thisrrc = this.transmissionRoundRobinCounter;
        int nextrrc = thisrrc++;
        if (nextrrc >= transmissionQueueCount) nextrrc = 0;
        this.transmissionRoundRobinCounter = nextrrc;
        if (this.transmissionWorker[thisrrc].isAlive()) {
            this.transmissionQueue[thisrrc].offer(solrdoc);
        } else {
            if (this.transmissionQueue[thisrrc].size() > 0) flushTransmissionQueue(thisrrc);
            final Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
            docs.add(solrdoc);
            addSolr(docs);
        }
    }

    protected void addSolr(final Collection<SolrInputDocument> docs) throws IOException, SolrException {
        try {
            this.server.add(docs);
            this.server.commit();
            /* To immediately commit after adding documents, you could use:
                  UpdateRequest req = new UpdateRequest();
                  req.setAction( UpdateRequest.ACTION.COMMIT, false, false );
                  req.add( docs );
                  UpdateResponse rsp = req.process( server );
             */
        } catch (final SolrServerException e) {
            throw new IOException(e);
        }
    }

    public void err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {

            final SolrInputDocument solrdoc = new SolrInputDocument();
            solrdoc.addField("id", ASCII.String(digestURI.hash()));
            solrdoc.addField("sku", digestURI.toNormalform(true, false), 3.0f);
            final InetAddress address = Domains.dnsResolve(digestURI.getHost());
            if (address != null) solrdoc.addField("ip_s", address.getHostAddress());
            if (digestURI.getHost() != null) solrdoc.addField("host_s", digestURI.getHost());

            // path elements of link
            final String path = digestURI.getPath();
            if (path != null) {
                final String[] paths = path.split("/");
                if (paths.length > 0) solrdoc.addField("attr_paths", paths);
            }

            solrdoc.addField("failreason_t", failReason);
            solrdoc.addField("httpstatus_i", httpstatus);

            add(solrdoc);
    }

    private void flushTransmissionQueue(final int idx) throws IOException, SolrException {
        final Collection<SolrInputDocument> c = new ArrayList<SolrInputDocument>();
        while (this.transmissionQueue[idx].size() > 0) {
            try {
                c.add(this.transmissionQueue[idx].take());
            } catch (final InterruptedException e) {
                continue;
            }
        }
        addSolr(c);
    }


    /**
     * get a query result from solr
     * to get all results set the query String to "*:*"
     * @param querystring
     * @throws IOException
     */
    public SolrDocumentList get(final String querystring, final int offset, final int count) throws IOException {
        // construct query
        final SolrQuery query = new SolrQuery();
        query.setQuery(querystring);
        query.setRows(count);
        query.setStart(offset);
        //query.addSortField( "price", SolrQuery.ORDER.asc );

        // query the server
        //SearchResult result = new SearchResult(count);
        try {
            final QueryResponse rsp = this.server.query( query );
            final SolrDocumentList docs = rsp.getResults();
            return docs;
            // add the docs into the YaCy search result container
            /*
            for (SolrDocument doc: docs) {
                result.put(element)
            }
            */
        } catch (final SolrServerException e) {
            throw new IOException(e);
        }

        //return result;
    }

    public static void main(final String args[]) {
        SolrSingleConnector solr;
        try {
            solr = new SolrSingleConnector("http://127.0.0.1:8983/solr", new SolrScheme());
            solr.clear();
            final File exampleDir = new File("/Data/workspace2/yacy/test/parsertest/");
            long t, t0, a = 0;
            int c = 0;
            for (final String s: exampleDir.list()) {
                if (s.startsWith(".")) continue;
                t = System.currentTimeMillis();
                solr.add(new File(exampleDir, s), s);
                t0 = (System.currentTimeMillis() - t);
                a += t0;
                c++;
                System.out.println("pushed file " + s + " to solr, " + t0 + " milliseconds");
            }
            System.out.println("pushed " + c + " files in " + a + " milliseconds, " + (a / c) + " milliseconds average; " + (60000 / a * c) + " PPM");
        } catch (final IOException e) {
            e.printStackTrace();
        }
    }

}
TOP

Related Classes of net.yacy.cora.services.federated.solr.SolrSingleConnector$Worker

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.