Package it.unimi.dsi.io

Examples of it.unimi.dsi.io.WordReader


    @Override
    public int execute() throws Throwable {
        Gson gson = new Gson();
        Type collectionType = new TypeToken<Collection<Query>>() {
        }.getType();
        WordReader wordReader = wordReaderType.getWordReader();

        final Collection<Query> queries = gson.fromJson(new InputStreamReader(System.in), collectionType);
        MutableString word = new MutableString();
        MutableString delimiter = new MutableString();

        for (Query query : queries) {
            if (query.query == null) throw new RuntimeException("A query has no 'query' field");
            if (query.id == null) throw new RuntimeException("A query has no 'id' field");
            wordReader.setReader(new StringReader(query.query));
            System.out.print(query.id);
            while (wordReader.next(word, delimiter)) {
                System.out.print('\t');
                System.out.print(word);
            }
            System.out.println();
        }
View Full Code Here


        DocumentCollection collection = (DocumentCollection) Scan.getSequence(this.sequence,
                IdentityDocumentFactory.class, new String[]{},
                Scan.DEFAULT_DELIMITER, LOGGER);
        DocumentFactory factory = collection.factory();

        final WordReader wordReader = wordReaderType.getWordReader();
        LOGGER.info(String.format("Term processor class is %s", termProcessor.getClass()));

        // -*- Get the fields
        int[] fields = new int[fieldNames.size()];
        DocumentFactory.FieldType[] types = new DocumentFactory.FieldType[fields.length];
        for (int i = fields.length; --i >= 0; ) {
            fields[i] = factory.fieldIndex(fieldNames.get(i));
            types[i] = factory.fieldType(i);
        }


        final long numberOfDocuments = collection.size();
        MutableString word = new MutableString();
        MutableString delimiter = new MutableString();

        while (true) {
            // Select a starting document
            final int start = (int) (Math.random() * (numberOfDocuments - batchSize));
            try(final DocumentIterator iterator =
                    collection instanceof SegmentedDocumentCollection ?
                            ((SegmentedDocumentCollection) collection).iterator(start)
                            : null) {

                for (int docid = start; docid < start + batchSize; docid++) {
                    final Document document =
                            iterator == null ?
                                    collection.document(docid)
                                    : iterator.nextDocument();

                    System.out.format("%d\t%s", docid, document.uri());
                    for (int i = 0; i < fields.length; i++) {
                        final Object content = document.content(0);
                        switch (types[i]) {
                            case TEXT: {
                                wordReader.setReader((FastBufferedReader) content);
                                while (wordReader.next(word, delimiter)) {
                                    System.out.print('\t');
                                    System.out.print(word);
                                }
                                break;
                            }
View Full Code Here

TOP

Related Classes of it.unimi.dsi.io.WordReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.