Examples of org.dbpedia.spotlight.lucene.LuceneManager

org.dbpedia.spotlight.lucene.LuceneManager
This class defines a policy for storing/searching in lucene. It defines the correct behavior if you want to search for a Surface Form, DBpedia Resource or Context. Whenever possible you should use the methods here instead of implementing your own lucene-specific code. Since the Analyzer and Similarity classes have to be the same for indexing and searching, they are kept here. The case insensitivity behavior also should be used consistently across classes, so we keep it here as well. TODO Should be singleton to assure that reader and writer are using the same config? TODO there should be a generic AnalyzedSurfaceforms that takes a SpotlightConfiguration object, pulls the analyzer and uses it. see the redundancy between N-Gram and Phonetic. The CaseInsensitive could be done the same way. @author pablomendes

    public static void main(String[] args) throws IOException {
      String luceneIndexFileName = "data/apple-example/LuceneIndex-apple50_test";
      String resourcePriorsFileName = "data/apple-example/3apples-scores.tsv";


      // Lucene Manager - Controls indexing and searching
      LuceneManager luceneManager = new LuceneManager(FSDirectory.open(new File(luceneIndexFileName)));


        try {
            new CustomScoresDisambiguator(new LuceneCandidateSearcher(luceneManager, false), new DataLoader(new DataLoader.TSVParser(), new File("data/Distinct-surfaceForm-By-uri.grouped")));
        } catch (IOException e) {
            e.printStackTrace();

View Full Code Here

    }


    public static void main(String[] args) throws IOException, SearchException, ItemNotFoundException {
        //String dir = "/home/pablo/workspace/spotlight/output/candidateIndexTitRedDis";
        String dir = "/home/pablo/workspace/spotlight/index/output/candidateIndexTitRedDis";
        LuceneManager luceneManager = new LuceneManager.CaseSensitiveSurfaceForms(FSDirectory.open(new File(dir)));
        CandidateSearcher searcher = new LuceneCandidateSearcher(luceneManager, true);
        System.out.println(searcher.getCandidates(new SurfaceForm("berlin")));
        System.out.println(searcher.getCandidates(new SurfaceForm("Berlin")));
        System.out.println(searcher.getCandidates(new SurfaceForm("sdaf")));
    }

View Full Code Here

        File testFile = new File("E:/dbpa/data/Person_newSurrogates/wikipediaTest."+(new Double(percentageSplit*100)).intValue()+"."+targetType+".amb.tsv");


        // using the next few lines, to create "confusable-with", split in training and testing
        File instancesFile = new File("data/dbpedia/instance_types_en.nt");
        File surrogateIndexDir = new File("data/SurrogateIndex.TitRedDisOcc.lowerCase");
        LuceneManager manager = new LuceneManager.CaseInsensitiveSurfaceForms(FSDirectory.open(surrogateIndexDir));
        LuceneCandidateSearcher surrogateSearcher = new LuceneCandidateSearcher(manager, false);
        Set<String> surfaceForms = getConfusableSurfaceForms(targetType, instancesFile, surrogateSearcher);


        DatasetSplitter splitter = new BySurfaceForm(trainingFile, testFile, minSize, percentageSplit, surfaceForms);
        //DatasetSplitter splitter = new BySize(trainingFile, testFile, minSize, percentageSplit);

View Full Code Here


        try { minCount = Integer.valueOf(args[2]); } catch(ArrayIndexOutOfBoundsException ignored) {}
        try { luceneManagerType = args[3]; } catch(ArrayIndexOutOfBoundsException ignored) {}
        try { shouldOverwrite = args[4].contains("overwrite"); } catch(Exception ignored) {}


        LuceneManager mLucene;
        if (luceneManagerType.contains("case-sensitive")) {
            mLucene = new LuceneManager.CaseSensitiveSurfaceForms(FSDirectory.open(new File(outputDirName)));
        } else if (luceneManagerType.contains("buffered")){
            mLucene = new LuceneManager.BufferedMerging(FSDirectory.open(new File(outputDirName)));
        } else if (luceneManagerType.contains("phonetic")){

View Full Code Here


    public static LuceneManager getSourceManager(String fileName, IndexingConfiguration config) throws IOException {
        File indexFile = new File(fileName);
        if (!indexFile.exists())
            throw new IOException("source index dir "+indexFile+" does not exist; ");
        LuceneManager lucene = new LuceneManager.BufferedMerging(LuceneManager.pickDirectory(indexFile));
        lucene.setDefaultAnalyzer(config.getAnalyzer());
        return lucene;
    }

View Full Code Here

    }
    public static LuceneManager getTargetManager(String fileName, IndexingConfiguration config) throws IOException {
        File indexFile = new File(fileName);
        if (indexFile.exists())
            throw new IOException("target index dir "+indexFile+" exists; I am afraid of overwriting. ");
        LuceneManager lucene = new LuceneManager.BufferedMerging(LuceneManager.pickDirectory(indexFile));
        lucene.setDefaultAnalyzer(config.getAnalyzer());
        return lucene;
    }

View Full Code Here

TOP

Related Classes of org.dbpedia.spotlight.lucene.LuceneManager

org.apache.lucene.analysis.PerFieldAnalyzerWrapper

org.apache.lucene.analysis.standard.StandardAnalyzer

org.apache.lucene.analysis.StopAnalyzer

org.apache.lucene.document.Document

org.apache.lucene.document.Field

org.apache.lucene.index.MultiReader

org.apache.lucene.index.Term

org.apache.lucene.queryParser.QueryParser

org.apache.lucene.search.similar.MoreLikeThis

org.dbpedia.spotlight.disambiguate.CustomScoresDisambiguator

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.