Package org.mediameter.cliff.test.util

Examples of org.mediameter.cliff.test.util.FileSystemCache


       
    public GdeltFocusChecker(){
    }
   
    public void check() throws Exception {
        FileSystemCache cache = new FileSystemCache("gdelt-articles");
        ArrayList<GdeltEvent> events = GdeltCsv.allEvents(BASE_DIR);
        //TODO: run through events grabbing source text, running that through CLIFF, and checking results
        int mentionedSuccesses = 0;
        int mentionedFailures = 0;
        for(GdeltEvent event:events){
            logger.debug("-------------------------------------------------------------------------------------------");
            logger.debug("Checking event "+event);
            try{
                URL url = event.getSourceUrl();
                String text;
                if(cache.contains(url.toString())){
                    text = cache.get(url.toString());
                    logger.debug("  Fetched from cache:"+url.toString());
                } else {
                    HTMLDocument htmlDoc = HTMLFetcher.fetch(url);
                    TextDocument doc = new BoilerpipeSAXInput(htmlDoc.toInputSource()).getTextDocument();
                    text = ArticleExtractor.INSTANCE.getText(doc);
                    cache.put(url.toString(), text);
                    logger.debug("Fetched from web:"+url.toString());
                }
                if(text.length()<100){
                    logger.debug("  Skipping because it is too short");
                    continue; //assume we didn't fetch/extract it right
View Full Code Here

TOP

Related Classes of org.mediameter.cliff.test.util.FileSystemCache

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.