Package de.tud.kom.stringmatching.shinglecloud

Examples of de.tud.kom.stringmatching.shinglecloud.ShingleCloud


      System.exit(1);
    }
   
    // Prepare comparison
    System.out.println("Load Shingle Cloud");
    ShingleCloud shingleCloud = new TEIComparatorShingleCloud(derivedDoc, configuration.getPreprocessingAlgorithm(), configuration.getNGramSize(), configuration.getMinimumNumberOfOnesInMatch(), configuration.getMaximumNumberOfZerosBetweenMatches());
    shingleCloud.setCombineMatchesInGroups(configuration.isCombineMatchesInGroups());
   
    // create file writer
    ComparisonFileWriter cfw = null;
    try {
      cfw = new ComparisonFileWriter(this, configuration);
View Full Code Here


  public SearchResults proposeMatches(String id, int type) {
    DocumentContainer docContainer = DocumentContainer.getInstance();

    // get shingle cloud from context
    ShingleCloud sCloud = SearchManager.getInstance().getShingleCloudForDerived(type);
   
    // get needle
    Element elToMatch = docContainer.getElementFor(id, DocumentContainer.MASTER);
    String needle = elToMatch.getTextContent();
   
    // perform match
    sCloud.match(needle);
   
    // prepare list of matches
    double threshold = PropertiesProvider.getInstance().getRatingThresholdForProposals();
    if(type == SearchManager.TYPE_FUZZY)
      threshold = 0;
    // dirty fix
    int maxNumberOfMatches = 100;
   
    List<MatchProposal> matches = new ArrayList<MatchProposal>();
    for(ShingleCloudMatch sm : sCloud.getMatches()){
      if(sm.hasUpperBound() && maxNumberOfMatches > matches.size()){
        // create proposal
        MatchProposal proposal = new MatchProposal(sm.getUpperBound().getId());
        proposal.setDirectRating(sm.getRating() * 100);
        if(sCloud.getNeedleShingles().size() > 0)
          proposal.setIndirectRating(sm.getContainmentInNeedle());

        // add proposal if rating is good enough
        if(proposal.getIndirectRating() >= threshold){
          String text = docContainer.getElementAsStringFor(sm.getUpperBound().getId(), DocumentContainer.DERIVED);
View Full Code Here

TOP

Related Classes of de.tud.kom.stringmatching.shinglecloud.ShingleCloud

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.