Package org.apache.stanbol.enhancer.topic.api

Examples of org.apache.stanbol.enhancer.topic.api.ClassifierException


                        "evaluationclassifierserver", "default-topic-model", "default-topic-model");
                }
                classifier.configure(getCanonicalConfiguration(__evaluationServer,solrCoreConfig));
            }
        } catch (Exception e) {
            throw new ClassifierException(e);
        }

        // clean all previous concepts from the evaluation classifier in case we are reusing an existing solr
        // index from OSGi.
        classifier.removeAllConcepts();

        // iterate over all the topics to register them in the evaluation classifier
        batchOverTopics(new BatchProcessor<SolrDocument>() {
            @Override
            public int process(List<SolrDocument> batch) throws ClassifierException {
                for (SolrDocument topicEntry : batch) {
                    String conceptId = topicEntry.getFirstValue(conceptUriField).toString();
                    Collection<Object> broader = topicEntry.getFieldValues(broaderField);
                    if (broader == null) {
                        classifier.addConcept(conceptId, null, null);
                    } else {
                        List<String> broaderConcepts = new ArrayList<String>();
                        for (Object broaderConcept : broader) {
                            broaderConcepts.add(broaderConcept.toString());
                        }
                        classifier.addConcept(conceptId, null, broaderConcepts);
                    }
                }
                return batch.size();
            }
        });

        // build the model on the for the current train CV folds
        classifier.setCrossValidationInfo(cvFoldIndex, cvFoldCount);
        // bind our new classifier to the same training set at the parent
        classifier.setTrainingSet(getTrainingSet());
        classifier.updateModel(false);

        final int foldCount = cvFoldCount;
        final int foldIndex = cvFoldIndex;

        // iterate over the topics again to compute scores on the test fold
        int updatedTopics = batchOverTopics(new BatchProcessor<SolrDocument>() {

            @Override
            public int process(List<SolrDocument> batch) throws TrainingSetException, ClassifierException {
                int offset;
                int updated = 0;
                for (SolrDocument topicMetadata : batch) {
                    String topic = topicMetadata.getFirstValue(conceptUriField).toString();
                    List<String> topics = Arrays.asList(topic);
                    List<String> falseNegativeExamples = new ArrayList<String>();
                    int truePositives = 0;
                    int falseNegatives = 0;
                    int positiveSupport = 0;
                    offset = 0;
                    Batch<Example> examples = Batch.emtpyBatch(Example.class);
                    boolean skipTopic = false;
                    do {
                        examples = getTrainingSet().getPositiveExamples(topics, examples.nextOffset);
                        if (offset == 0 && examples.items.size() < MIN_EVALUATION_SAMPLES) {
                            // we need a minimum about of examples otherwise it's really not
                            // worth computing statistics
                            skipTopic = true;
                            break;
                        }
                        for (Example example : examples.items) {
                            if (!(offset % foldCount == foldIndex)) {
                                // this example is not part of the test fold, skip it
                                offset++;
                                continue;
                            }
                            positiveSupport++;
                            offset++;
                            List<TopicSuggestion> suggestedTopics = classifier
                                    .suggestTopics(example.contents);
                            boolean match = false;
                            for (TopicSuggestion suggestedTopic : suggestedTopics) {
                                if (topic.equals(suggestedTopic.conceptUri)) {
                                    match = true;
                                    truePositives++;
                                    break;
                                }
                            }
                            if (!match) {
                                falseNegatives++;
                                if (falseNegativeExamples.size() < MAX_COLLECTED_EXAMPLES / foldCount) {
                                    falseNegativeExamples.add(example.id);
                                }
                            }
                        }
                    } while (!skipTopic && examples.hasMore && offset < MAX_EVALUATION_SAMPLES);

                    List<String> falsePositiveExamples = new ArrayList<String>();
                    int falsePositives = 0;
                    int negativeSupport = 0;
                    offset = 0;
                    examples = Batch.emtpyBatch(Example.class);
                    do {
                        if (skipTopic) {
                            break;
                        }
                        examples = getTrainingSet().getNegativeExamples(topics, examples.nextOffset);
                        for (Example example : examples.items) {
                            if (!(offset % foldCount == foldIndex)) {
                                // this example is not part of the test fold, skip it
                                offset++;
                                continue;
                            }
                            negativeSupport++;
                            offset++;
                            List<TopicSuggestion> suggestedTopics = classifier
                                    .suggestTopics(example.contents);
                            for (TopicSuggestion suggestedTopic : suggestedTopics) {
                                if (topic.equals(suggestedTopic.conceptUri)) {
                                    falsePositives++;
                                    if (falsePositiveExamples.size() < MAX_COLLECTED_EXAMPLES / foldCount) {
                                        falsePositiveExamples.add(example.id);
                                    }
                                    break;
                                }
                            }
                            // we don't need to collect true negatives
                        }
                    } while (examples.hasMore && offset < MAX_EVALUATION_SAMPLES);

                    if (skipTopic) {
                        log.debug("Skipping evaluation of {} because too few positive examples.", topic);
                    } else {
                        // compute precision, recall and f1 score for the current test fold and topic
                        float precision = 0;
                        if (truePositives != 0 || falsePositives != 0) {
                            precision = truePositives / (float) (truePositives + falsePositives);
                        }
                        float recall = 0;
                        if (truePositives != 0 || falseNegatives != 0) {
                            recall = truePositives / (float) (truePositives + falseNegatives);
                        }
                        updatePerformanceMetadata(topic, precision, recall, positiveSupport, negativeSupport,
                            falsePositiveExamples, falseNegativeExamples);
                        updated += 1;
                    }
                }
                try {
                    getActiveSolrServer().commit();
                } catch (Exception e) {
                    throw new ClassifierException(e);
                }
                return updated;
            }
        });
View Full Code Here


                positiveSupport, negativeSupport));
        } catch (Exception e) {
            String msg = String
                    .format("Error updating performance metadata for topic '%s' on Solr Core '%s'",
                        conceptId, solrCoreId);
            throw new ClassifierException(msg, e);
        }
    }
View Full Code Here

        query.addFilterQuery(entryTypeField + ":" + METADATA_ENTRY);
        query.addFilterQuery(conceptUriField + ":" + ClientUtils.escapeQueryChars(conceptId));
        try {
            SolrDocumentList results = solrServer.query(query).getResults();
            if (results.isEmpty()) {
                throw new ClassifierException(String.format("'%s' is not a registered topic", conceptId));
            }
            SolrDocument metadata = results.get(0);
            Float precision = computeMeanValue(metadata, precisionField);
            Float recall = computeMeanValue(metadata, recallField);
            int positiveSupport = computeSumValue(metadata, positiveSupportField);
            int negativeSupport = computeSumValue(metadata, negativeSupportField);
            Date evaluationDate = (Date) metadata.getFirstValue(modelEvaluationDateField);
            boolean uptodate = evaluationDate != null;
            ClassificationReport report = new ClassificationReport(precision, recall, positiveSupport,
                    negativeSupport, uptodate, evaluationDate);
            if (metadata.getFieldValues(falsePositivesField) == null) {
                metadata.setField(falsePositivesField, new ArrayList<Object>());
            }
            for (Object falsePositiveId : metadata.getFieldValues(falsePositivesField)) {
                report.falsePositiveExampleIds.add(falsePositiveId.toString());
            }
            if (metadata.getFieldValues(falseNegativesField) == null) {
                metadata.setField(falseNegativesField, new ArrayList<Object>());
            }
            for (Object falseNegativeId : metadata.getFieldValues(falseNegativesField)) {
                report.falseNegativeExampleIds.add(falseNegativeId.toString());
            }
            return report;
        } catch (SolrServerException e) {
            throw new ClassifierException(String.format("Error fetching the performance report for topic "
                                                        + conceptId));
        }
    }
View Full Code Here

TOP

Related Classes of org.apache.stanbol.enhancer.topic.api.ClassifierException

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.