Examples of SurfaceForm


Examples of org.dbpedia.spotlight.model.SurfaceForm

     
            //System.out.println("\n\nNo of Kea Keyphrases extracted: " + keaPhrases.size());
      for( KeyPhrase kp: keaPhrases) {
        //System.out.println("KP: " + kp.getPhrase() + " ( Weight: " + kp.getWeight() + " Rank: " + kp.getRank() + ")");
        LOG.debug("Occurrences of kp " + kp.getPhrase() + " are: " + kp.getOffsetslist());
        SurfaceForm surfaceForm = new SurfaceForm(kp.getPhrase());
        for (Integer s: kp.getOffsetslist()) {
          SurfaceFormOccurrence sfocc =  new SurfaceFormOccurrence(surfaceForm, intext, s);
          sfOccurrences.add(sfocc);
        }
      }
View Full Code Here

Examples of org.dbpedia.spotlight.model.SurfaceForm

                    System.out.println("End = " + entEnd);
                    System.out.println("Sentence = " + sentence);
                    System.out.println("Text = " + text);
                    */

                    SurfaceForm surfaceForm = new SurfaceForm(surfaceFormStr);
                    SurfaceFormOccurrence sfocc =  new SurfaceFormOccurrence(surfaceForm, text, entStart);
                    sfocc.features().put("type", new Feature("type",oType.toString()));
                    sfOccurrences.add(sfocc);
                }
            }
View Full Code Here

Examples of org.dbpedia.spotlight.model.SurfaceForm

      String endtknTxt = intext.substring(endtkn_begin,endtkn_end);
      if (isStopWord(endtknTxt)) ignorephrase = true;

      if (!ignorephrase) {               
        NGram ng = new NGram(txtform, begin, end);
        SurfaceForm surfaceForm = new SurfaceForm(ng.getTextform());

                assert !ng.getTextform().isEmpty();

        SurfaceFormOccurrence sfocc =  new SurfaceFormOccurrence(surfaceForm, text, ng.getStart());
        if (surfaceForm.name().trim().length()>0 && !sfOccurrences.contains(sfocc)) {
          sfOccurrences.add(sfocc);
        }
      }
    }
  }
View Full Code Here

Examples of org.dbpedia.spotlight.model.SurfaceForm

            if (start == 1)
                break;
            int end = textWithMarkedSurfaceForms.indexOf("]]", start);
            if (end == -1)
                break;
            SurfaceForm sf = new SurfaceForm(textWithMarkedSurfaceForms.substring(start, end));
            int offset = start - (sfOccs.size()*4) - 2;
            SurfaceFormOccurrence sfOcc = new SurfaceFormOccurrence(sf, unMarkedUpText, offset);
            sfOccs.add(sfOcc);
            i = end + 2;
        }
View Full Code Here

Examples of org.dbpedia.spotlight.model.SurfaceForm

                        if(isSurfaceFormField) { // Here we set the surface form specific information.
                            surfaceFormTerm = term;                  // Store the surface form
                            maxSf = termCache.cardinality(reader, surfaceFormTerm); // This is the number of documents that contain the surface form (size of surrogate set)
                            sf = maxSf; // setting sf = maxSf generates isf=1, leading to tf*isf = tf
                            promiscuity = termCache.getPromiscuity(reader, new SurfaceForm(term.text()));
                        } else {
                            sf = termCache.cardinality(reader, surfaceFormTerm, term); // This is the number of docs containing sf + term
                        }

                    } catch (IOException e) {
View Full Code Here

Examples of org.dbpedia.spotlight.model.SurfaceForm

        Set<SurfaceForm> surfaceForms = new HashSet<SurfaceForm>();

        // search index for resource, iterate through the results
        for (ScoreDoc hit : getHits(mLucene.getQuery(res))) {
            int docNo = hit.doc;
            SurfaceForm sf = getSurfaceForm(docNo);
            surfaceForms.add(sf);
        }

        LOG.debug("Surrogates for "+res+"("+surfaceForms.size()+"): "+surfaceForms);
View Full Code Here

Examples of org.dbpedia.spotlight.model.SurfaceForm

    public static void main(String[] args) throws IOException, SearchException, ItemNotFoundException {
        //String dir = "/home/pablo/workspace/spotlight/output/candidateIndexTitRedDis";
        String dir = "/home/pablo/workspace/spotlight/index/output/candidateIndexTitRedDis";
        LuceneManager luceneManager = new LuceneManager.CaseSensitiveSurfaceForms(FSDirectory.open(new File(dir)));
        CandidateSearcher searcher = new LuceneCandidateSearcher(luceneManager, true);
        System.out.println(searcher.getCandidates(new SurfaceForm("berlin")));
        System.out.println(searcher.getCandidates(new SurfaceForm("Berlin")));
        System.out.println(searcher.getCandidates(new SurfaceForm("sdaf")));
    }
View Full Code Here

Examples of org.dbpedia.spotlight.model.SurfaceForm

    List<SurfaceFormOccurrence> surfaceFormOccurrences = new LinkedList<SurfaceFormOccurrence>();

    for(TaggedToken taggedToken : taggedTokens) {

      if(!filterPOS.isOnUnigramBlacklist(taggedToken.getPOSTag())) {
        surfaceFormOccurrences.add(new SurfaceFormOccurrence(new SurfaceForm(taggedToken.getToken()), null, taggedToken.getOffset()));
      }

    }

    return surfaceFormOccurrences;
View Full Code Here

Examples of org.dbpedia.spotlight.model.SurfaceForm

        }
        if (sf.toLowerCase().endsWith("[\\.\\,]")) {
            offsetFromEnd = 1;
        }
        int end = sfo.surfaceForm().name().length()-1;
        SurfaceForm variation = new SurfaceForm(sf.substring(offsetFromStart, end-offsetFromEnd).trim());
        return new SurfaceFormOccurrence(variation, sfo.context(), sfo.textOffset()+offsetFromStart, sfo.provenance(), sfo.spotProb());
    }
View Full Code Here

Examples of org.dbpedia.spotlight.model.SurfaceForm

        NxParser nxParser = new NxParser(new FileInputStream(surfaceFormsDataSet), false);
        while (nxParser.hasNext()) {
            Node[] nodes = nxParser.next();
            String resourceString = nodes[0].toString().replace(SpotlightConfiguration.DEFAULT_NAMESPACE,"");
            String surfaceFormString = nodes[2].toString();
            List<SurfaceForm> surfaceForms = AddSurfaceFormsToIndex.fromTitlesToAlternativesJ(new SurfaceForm(surfaceFormString));
            add(surfaceForms, new DBpediaResource(resourceString));
        }

        LOG.info("Done.");
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.