Package com.bericotech.clavin.gazetteer.query

Examples of com.bericotech.clavin.gazetteer.query.QueryBuilder


    }

    @Before
    public void setUp() throws ClavinException {
        instance = new LuceneGazetteer(INDEX_DIRECTORY);
        queryBuilder = new QueryBuilder().maxResults(1).fuzzyMode(FuzzyMode.OFF);
    }
View Full Code Here


        // did we filter *everything* out?
        if (filteredLocations.isEmpty()) {
            return Collections.EMPTY_LIST;
        }

        QueryBuilder builder = new QueryBuilder()
                .maxResults(maxHitDepth)
                // translate CLAVIN 1.x 'fuzzy' parameter into NO_EXACT or OFF; it isn't
                // necessary, or desirable to support FILL for the CLAVIN resolution algorithm
                .fuzzyMode(fuzzy ? FuzzyMode.NO_EXACT : FuzzyMode.OFF)
                .includeHistorical(true);

        if (maxHitDepth > 1) { // perform context-based heuristic matching
            // stores all possible matches for each location name
            List<List<ResolvedLocation>> allCandidates = new ArrayList<List<ResolvedLocation>>();

            // loop through all the location names
            for (LocationOccurrence location : filteredLocations) {
                // get all possible matches
                List<ResolvedLocation> candidates = gazetteer.getClosestLocations(builder.location(location).build());

                // if we found some possible matches, save them
                if (candidates.size() > 0) {
                    allCandidates.add(candidates);
                }
            }

            // initialize return object
            List<ResolvedLocation> bestCandidates = new ArrayList<ResolvedLocation>();

            // split-up allCandidates into reasonably-sized chunks to
            // limit computational load when heuristically selecting
            // the best matches
            for (List<List<ResolvedLocation>> theseCandidates : ListUtils.chunkifyList(allCandidates, maxContextWindow)) {
                // select the best match for each location name based
                // based on heuristics
                bestCandidates.addAll(pickBestCandidates(theseCandidates));
            }

            return bestCandidates;
        } else { // use no heuristics, simply choose matching location with greatest population
            // initialize return object
            List<ResolvedLocation> resolvedLocations = new ArrayList<ResolvedLocation>();

            // stores possible matches for each location name
            List<ResolvedLocation> candidateLocations;

            // loop through all the location names
            for (LocationOccurrence location : filteredLocations) {
                // choose the top-sorted candidate for each individual
                // location name
                candidateLocations = gazetteer.getClosestLocations(builder.location(location).build());

                // if a match was found, add it to the return list
                if (candidateLocations.size() > 0) {
                    resolvedLocations.add(candidateLocations.get(0));
                }
View Full Code Here

     * @throws ClavinException   if an error occurs while resolving locations
     */
    public ResolvedMultipartLocation resolveMultipartLocation(MultipartLocationName location, boolean fuzzy)
            throws ClavinException {
        // find all component locations in the gazetteer
        QueryBuilder queryBuilder = new QueryBuilder()
                // translate CLAVIN 1.x 'fuzzy' parameter into NO_EXACT or OFF; it isn't
                // necessary, or desirable to support FILL for the multi-part resolution algorithm
                .fuzzyMode(fuzzy ? FuzzyMode.NO_EXACT : FuzzyMode.OFF)
                .includeHistorical(true)
                .maxResults(MAX_RESULTS);

        // country query should only include country-like feature codes
        queryBuilder.location(location.getCountry()).addCountryCodes();
        List<ResolvedLocation> countries = gazetteer.getClosestLocations(queryBuilder.build());
        // remove all "countries" that are not considered top-level administrative divisions; this
        // filters out territories that do not contain descendant GeoNames
        Iterator<ResolvedLocation> iter = countries.iterator();
        while (iter.hasNext()) {
            if (!iter.next().getGeoname().isTopLevelAdminDivision()) {
                iter.remove();
            }
        }

        Set<CountryCode> foundCountries = EnumSet.noneOf(CountryCode.class);
        // state query should only include admin-level feature codes with ancestors
        // in the list of located countries
        queryBuilder.location(location.getState()).clearFeatureCodes().addAdminCodes();
        for (ResolvedLocation country : countries) {
            queryBuilder.addParentIds(country.getGeoname().getGeonameID());
            foundCountries.add(country.getGeoname().getPrimaryCountryCode());
        }
        List<ResolvedLocation> states = gazetteer.getClosestLocations(queryBuilder.build());

        // city query should only include city-level feature codes; ancestry is restricted
        // to the discovered states or, if no states were found, the discovered countries or,
        // if neither states nor countries were found, no ancestry restrictions are added and
        // the most populated city will be selected
        queryBuilder.location(location.getCity()).clearFeatureCodes().addCityCodes();
        if (!states.isEmpty()) {
            Set<CountryCode> stateCodes = EnumSet.noneOf(CountryCode.class);
            // only clear the parent ID restrictions if states were found; otherwise
            // we will continue our search based on the existing country restrictions, if any
            queryBuilder.clearParentIds();
            for (ResolvedLocation state : states) {
                // only include the first administrative division found for each target
                // country
                if (!stateCodes.contains(state.getGeoname().getPrimaryCountryCode())) {
                    queryBuilder.addParentIds(state.getGeoname().getGeonameID());
                    stateCodes.add(state.getGeoname().getPrimaryCountryCode());
                }
                // since we are only including one "state" per country, short-circuit
                // the loop if we have added one for each unique country code returned
                // by the countries search
                if (!foundCountries.isEmpty() && foundCountries.equals(stateCodes)) {
                    break;
                }
            }
        }
        List<ResolvedLocation> cities = gazetteer.getClosestLocations(queryBuilder.build());

        // initialize return objects components
        ResolvedLocation finalCity = null;
        ResolvedLocation finalState = null;
        ResolvedLocation finalCountry = null;
View Full Code Here

            return null;
        }

        Set<MatchedLocation> candidates = new HashSet<MatchedLocation>();
        Deque<SearchResult> matches = new LinkedList<SearchResult>();
        QueryBuilder query = new QueryBuilder()
                .maxResults(MAX_RESULTS)
                // translate CLAVIN 1.x 'fuzzy' parameter into NO_EXACT or OFF; it isn't
                // necessary, or desirable to support FILL for the multi-part resolution algorithm
                .fuzzyMode(fuzzy ? FuzzyMode.NO_EXACT : FuzzyMode.OFF)
                .includeHistorical(true);
View Full Code Here

        // did we filter *everything* out?
        if (filteredLocations.isEmpty()) {
            return Collections.EMPTY_LIST;
        }

        QueryBuilder builder = new QueryBuilder()
                .maxResults(maxHitDepth)
                // translate CLAVIN 1.x 'fuzzy' parameter into NO_EXACT or OFF; it isn't
                // necessary, or desirable to support FILL for the CLAVIN resolution algorithm
                .fuzzyMode(fuzzy ? FuzzyMode.NO_EXACT : FuzzyMode.OFF)
                .includeHistorical(true);

        if (maxHitDepth > 1) { // perform context-based heuristic matching
            // stores all possible matches for each location name
            List<List<ResolvedLocation>> allCandidates = new ArrayList<List<ResolvedLocation>>();

            // loop through all the location names
            for (LocationOccurrence location : filteredLocations) {
                // get all possible matches
                List<ResolvedLocation> candidates = getGazetteer().getClosestLocations(builder.location(location).build());

                // if we found some possible matches, save them
                if (candidates.size() > 0) {
                    allCandidates.add(candidates);
                }
            }

            // initialize return object
            List<ResolvedLocation> bestCandidates = new ArrayList<ResolvedLocation>();

            //RB: use out heuristic disambiguation instead of the CLAVIN default
            bestCandidates = disambiguationStrategy.select(this, allCandidates);
            /*
            // split-up allCandidates into reasonably-sized chunks to
            // limit computational load when heuristically selecting
            // the best matches
            for (List<List<ResolvedLocation>> theseCandidates : ListUtils.chunkifyList(allCandidates, maxContextWindow)) {
                // select the best match for each location name based
                // based on heuristics
                bestCandidates.addAll(pickBestCandidates(theseCandidates));
            }
            */

            return bestCandidates;
        } else { // use no heuristics, simply choose matching location with greatest population
            // initialize return object
            List<ResolvedLocation> resolvedLocations = new ArrayList<ResolvedLocation>();

            // stores possible matches for each location name
            List<ResolvedLocation> candidateLocations;

            // loop through all the location names
            for (LocationOccurrence location : filteredLocations) {
                // choose the top-sorted candidate for each individual
                // location name
                candidateLocations = getGazetteer().getClosestLocations(builder.location(location).build());

                // if a match was found, add it to the return list
                if (candidateLocations.size() > 0) {
                    resolvedLocations.add(candidateLocations.get(0));
                }
View Full Code Here

TOP

Related Classes of com.bericotech.clavin.gazetteer.query.QueryBuilder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.