Package org.apache.stanbol.enhancer.topic

Examples of org.apache.stanbol.enhancer.topic.ClassifierException


            solrServer.request(request);
            // the commit is done by the caller in batch
        } catch (Exception e) {
            String msg = String.format("Error updating topic with id '%s' on Solr Core '%s'", conceptUri,
                solrCoreId);
            throw new ClassifierException(msg, e);
        }
        long stop = System.currentTimeMillis();
        log.debug("Sucessfully updated topic {} in {}s", conceptUri, (double) (stop - start) / 1000.);
    }
View Full Code Here


    }

    public int updatePerformanceEstimates(boolean incremental) throws ClassifierException,
                                                              TrainingSetException {
        if (evaluationFolder != null) {
            throw new ClassifierException("Another evaluation is already running");
        }
        int updatedTopics = 0;
        int cvFoldCount = 3; // 3-folds CV is hardcoded for now
        int cvIterationCount = 1; // only one 3-folds CV iteration
        TopicClassificationEngine classifier = new TopicClassificationEngine();
        classifier.setTrainingSet(trainingSet);
        try {
            // TODO: make the temporary folder path configurable with a property
            evaluationFolder = File.createTempFile("stanbol-classifier-evaluation-", "-solr");
            for (int cvFoldIndex = 0; cvFoldIndex < cvIterationCount; cvFoldIndex++) {
                updatedTopics = performCVFold(classifier, cvFoldIndex, cvFoldCount, cvIterationCount,
                    incremental);
            }
        } catch (ConfigurationException e) {
            throw new ClassifierException(e);
        } catch (IOException e) {
            throw new ClassifierException(e);
        } finally {
            FileUtils.deleteQuietly(evaluationFolder);
            evaluationFolder = null;
        }
        return updatedTopics;
View Full Code Here

        try {
            EmbeddedSolrServer evaluationServer = EmbeddedSolrHelper.makeEmbeddedSolrServer(evaluationFolder,
                "evaluationclassifierserver", "classifier", "classifier");
            classifier.configure(getCanonicalConfiguration(evaluationServer));
        } catch (Exception e) {
            throw new ClassifierException(e);
        }

        // iterate over all the topics to register them in the evaluation classifier
        batchOverTopics(new BatchProcessor<SolrDocument>() {
            @Override
            public int process(List<SolrDocument> batch) throws ClassifierException {
                for (SolrDocument topicEntry : batch) {
                    String conceptId = topicEntry.getFirstValue(conceptUriField).toString();
                    Collection<Object> broader = topicEntry.getFieldValues(broaderField);
                    if (broader == null) {
                        classifier.addConcept(conceptId, null, null);
                    } else {
                        List<String> broaderConcepts = new ArrayList<String>();
                        for (Object broaderConcept : broader) {
                            broaderConcepts.add(broaderConcept.toString());
                        }
                        classifier.addConcept(conceptId, null, broaderConcepts);
                    }
                }
                return batch.size();
            }
        });

        // build the model on the for the current train CV folds
        classifier.setCrossValidationInfo(cvFoldIndex, cvFoldCount);
        classifier.updateModel(false);

        final int foldCount = cvFoldCount;
        final int foldIndex = cvFoldIndex;

        // iterate over the topics again to compute scores on the test fold
        int updatedTopics = batchOverTopics(new BatchProcessor<SolrDocument>() {

            @Override
            public int process(List<SolrDocument> batch) throws TrainingSetException, ClassifierException {
                int offset;
                for (SolrDocument topicMetadata : batch) {
                    String topic = topicMetadata.getFirstValue(conceptUriField).toString();
                    List<String> topics = Arrays.asList(topic);
                    List<String> falseNegativeExamples = new ArrayList<String>();
                    int truePositives = 0;
                    int falseNegatives = 0;
                    int positiveSupport = 0;
                    offset = 0;
                    Batch<Example> examples = Batch.emtpyBatch(Example.class);
                    do {
                        examples = trainingSet.getPositiveExamples(topics, examples.nextOffset);
                        for (Example example : examples.items) {
                            if (!(offset % foldCount == foldIndex)) {
                                // this example is not part of the test fold, skip it
                                offset++;
                                continue;
                            }
                            positiveSupport++;
                            offset++;
                            List<TopicSuggestion> suggestedTopics = classifier
                                    .suggestTopics(example.contents);
                            boolean match = false;
                            for (TopicSuggestion suggestedTopic : suggestedTopics) {
                                if (topic.equals(suggestedTopic.conceptUri)) {
                                    match = true;
                                    truePositives++;
                                    break;
                                }
                            }
                            if (!match) {
                                falseNegatives++;
                                if (falseNegativeExamples.size() < MAX_COLLECTED_EXAMPLES / foldCount) {
                                    falseNegativeExamples.add(example.id);
                                }
                            }
                        }
                    } while (examples.hasMore && offset < MAX_EVALUATION_SAMPLES);

                    List<String> falsePositiveExamples = new ArrayList<String>();
                    int falsePositives = 0;
                    int negativeSupport = 0;
                    offset = 0;
                    examples = Batch.emtpyBatch(Example.class);
                    do {
                        examples = trainingSet.getNegativeExamples(topics, examples.nextOffset);
                        for (Example example : examples.items) {
                            if (!(offset % foldCount == foldIndex)) {
                                // this example is not part of the test fold, skip it
                                offset++;
                                continue;
                            }
                            negativeSupport++;
                            offset++;
                            List<TopicSuggestion> suggestedTopics = classifier
                                    .suggestTopics(example.contents);
                            for (TopicSuggestion suggestedTopic : suggestedTopics) {
                                if (topic.equals(suggestedTopic.conceptUri)) {
                                    falsePositives++;
                                    if (falsePositiveExamples.size() < MAX_COLLECTED_EXAMPLES / foldCount) {
                                        falsePositiveExamples.add(example.id);
                                    }
                                    break;
                                }
                            }
                            // we don't need to collect true negatives
                        }
                    } while (examples.hasMore && offset < MAX_EVALUATION_SAMPLES);

                    // compute precision, recall and f1 score for the current test fold and topic
                    float precision = 0;
                    if (truePositives != 0 || falsePositives != 0) {
                        precision = truePositives / (float) (truePositives + falsePositives);
                    }
                    float recall = 0;
                    if (truePositives != 0 || falseNegatives != 0) {
                        recall = truePositives / (float) (truePositives + falseNegatives);
                    }
                    updatePerformanceMetadata(topic, precision, recall, positiveSupport, negativeSupport,
                        falsePositiveExamples, falseNegativeExamples);
                }
                try {
                    getActiveSolrServer().commit();
                } catch (Exception e) {
                    throw new ClassifierException(e);
                }
                return batch.size();
            }
        });
View Full Code Here

            }
        } catch (Exception e) {
            String msg = String
                    .format("Error updating performance metadata for topic '%s' on Solr Core '%s'",
                        conceptId, solrCoreId);
            throw new ClassifierException(msg, e);
        }
    }
View Full Code Here

        SolrQuery query = new SolrQuery(entryTypeField + ":" + METADATA_ENTRY + " AND " + conceptUriField
                                        + ":" + ClientUtils.escapeQueryChars(conceptId));
        try {
            SolrDocumentList results = solrServer.query(query).getResults();
            if (results.isEmpty()) {
                throw new ClassifierException(String.format("'%s' is not a registered topic", conceptId));
            }
            SolrDocument metadata = results.get(0);
            Float precision = computeMeanValue(metadata, precisionField);
            Float recall = computeMeanValue(metadata, recallField);
            int positiveSupport = computeSumValue(metadata, positiveSupportField);
            int negativeSupport = computeSumValue(metadata, negativeSupportField);
            Date evaluationDate = (Date) metadata.getFirstValue(modelEvaluationDateField);
            boolean uptodate = evaluationDate != null;
            ClassificationReport report = new ClassificationReport(precision, recall, positiveSupport,
                    negativeSupport, uptodate, evaluationDate);
            if (metadata.getFieldValues(falsePositivesField) == null) {
                metadata.setField(falsePositivesField, new ArrayList<Object>());
            }
            for (Object falsePositiveId : metadata.getFieldValues(falsePositivesField)) {
                report.falsePositiveExampleIds.add(falsePositiveId.toString());
            }
            if (metadata.getFieldValues(falseNegativesField) == null) {
                metadata.setField(falseNegativesField, new ArrayList<Object>());
            }
            for (Object falseNegativeId : metadata.getFieldValues(falseNegativesField)) {
                report.falseNegativeExampleIds.add(falseNegativeId.toString());
            }
            return report;
        } catch (SolrServerException e) {
            throw new ClassifierException(String.format("Error fetching the performance report for topic "
                                                        + conceptId));
        }
    }
View Full Code Here

TOP

Related Classes of org.apache.stanbol.enhancer.topic.ClassifierException

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.