Package net.yacy.kelondro.data.meta

Examples of net.yacy.kelondro.data.meta.DigestURI


        images.addAll(document.getImages().values()); // iterates images in descending size order!
        // a measurement for the size of the images can be retrieved using the htmlFilterImageEntry.hashCode()

        final Iterator<ImageEntry> i = images.iterator();
        ImageEntry ientry;
        DigestURI url;
        String desc;
        final List<MediaSnippet> result = new ArrayList<MediaSnippet>();
        while (i.hasNext()) {
            ientry = i.next();
            url = new DigestURI(ientry.url());
            final String u = url.toString();
            if (u.indexOf(".ico") >= 0 || u.indexOf("favicon") >= 0) continue;
            if (ientry.height() > 0 && ientry.height() < 32) continue;
            if (ientry.width() > 0 && ientry.width() < 32) continue;
            desc = ientry.alt();
            final int appcount = queryhashes.size()  * 2 -
                           removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() -
                           removeAppearanceHashes(desc, queryhashes).size();
            final long ranking = Long.MAX_VALUE - (ientry.height() + 1) * (ientry.width() + 1) * (appcount + 1);
            result.add(new MediaSnippet(ContentDomain.IMAGE, url, MimeTable.url2mime(url), desc, ientry.fileSize(), ientry.width(), ientry.height(), ranking, source));
        }
        return result;
View Full Code Here


                    int pos = -1;
                    if ((pos = oldUrlStr.indexOf("://")) != -1) {
                        // trying to correct the url
                        final String newUrlStr = "http://" + oldUrlStr.substring(pos + 3);
                        final DigestURI newUrl = new DigestURI(newUrlStr);

                        if (client.HEADResponse(newUrl.toString()) != null
                            && client.getHttpResponse().getStatusLine().getStatusCode() == 200) {
                            entry.setCol(1, UTF8.getBytes(newUrl.toString()));
                            urlIndexFile.put(entry);
                            if (log.isInfo()) log.logInfo("UrlDB-Entry with urlHash '" + ASCII.String(urlHashBytes) + "' corrected\n\tURL: " + oldUrlStr + " -> " + newUrlStr);
                        } else {
                            remove(urlHashBytes);
                            if (log.isInfo()) log.logInfo("UrlDB-Entry with urlHash '" + ASCII.String(urlHashBytes) + "' removed\n\tURL: " + oldUrlStr + "\n\tConnection Status: " + (client.getHttpResponse() == null ? "null" : client.getHttpResponse().getStatusLine()));
View Full Code Here

        // TODO: this does not work at the moment
        System.out.println("this does not work at the moment");
        System.exit(0);
        final char[] buffer = new char[512];
        try {
            final ContentScraper scraper = new ContentScraper(new DigestURI("http://localhost:8090"));
            final Transformer transformer = new ContentTransformer();
            final Reader is = new FileReader(args[0]);
            final FileOutputStream fos = new FileOutputStream(new File(args[0] + ".out"));
            final Writer os = new TransformerWriter(fos, UTF8.charset, scraper, transformer, false);
            int i;
View Full Code Here

        ScoreMap<String> hosthashScore = new ConcurrentScoreMap<String>();
        for (Map.Entry<String, URLHashCounter> e: domainSamples.entrySet()) {
            hosthashScore.inc(ASCII.String(e.getValue().urlhashb, 6, 6), e.getValue().count);
        }
        URIMetadataRow.Components comps;
        DigestURI url;
        for (Map.Entry<String, URLHashCounter> e: domainSamples.entrySet()) {
            urlref = this.load(e.getValue().urlhashb);
            comps = urlref.metadata();
            url = comps.url();
            hostMap.put(e.getKey(), new HostStat(url.getHost(), url.getPort(), e.getKey(), hosthashScore.get(e.getKey())));
        }
        return hostMap;
    }
View Full Code Here

        String urlhash;
        count += 10; // make some more to prevent that we have to do this again after deletions too soon.
        if (count < 0 || domainScore.sizeSmaller(count)) count = domainScore.size();
        statsDump = new ArrayList<HostStat>();
        URIMetadataRow.Components comps;
        DigestURI url;
        while (j.hasNext()) {
            urlhash = j.next();
            if (urlhash == null) continue;
            urlref = this.load(ASCII.getBytes(urlhash));
            if (urlref == null || urlref.metadata() == null || urlref.metadata().url() == null || urlref.metadata().url().getHost() == null) continue;
            if (statsDump == null) return new ArrayList<HostStat>().iterator(); // some other operation has destroyed the object
            comps = urlref.metadata();
            url = comps.url();
            statsDump.add(new HostStat(url.getHost(), url.getPort(), urlhash.substring(6), domainScore.get(urlhash)));
            count--;
            if (count == 0) break;
        }
        // finally return an iterator for the result array
        return (statsDump == null) ? new ArrayList<HostStat>().iterator() : statsDump.iterator();
View Full Code Here

        dbreader.start();
        return queue;
    }
   
    protected DCEntry parseResultSet(ResultSet rs) throws SQLException, MalformedURLException {
        DigestURI url;
        int item = rs.getInt("post_id");
        url = new DigestURI(this.urlstub + "/viewtopic.php?t=" + item);
        String subject = rs.getString("post_subject");
        String text = xmlCleaner(rs.getString("post_text"));
        String user = getUser(rs.getInt("poster_id"));
        Date date = new Date(rs.getLong("post_time") * 1000L);
        return new DCEntry(url, date, subject, user, text, 0.0f, 0.0f);
View Full Code Here

        String versioninfo,
        int maxEntriesInFile
    ) {
        try {
            // generate output file name and attributes
            String targethost = new DigestURI(this.urlstub).getHost();
            int fc = 0;
            File outputfiletmp = null, outputfile = null;
           
            // write the result from the query concurrently in a file
            OutputStreamWriter osw = null;
View Full Code Here

    this.loader = loader;
    this.merge = true;
  }

  private Document loadDocument(final String url) {
    DigestURI uri;
    Response response;
    try {
      uri = new DigestURI(url);
    } catch (final MalformedURLException e) {
      Log.logWarning(YMarkTables.BOOKMARKS_LOG, "loadDocument failed due to malformed url: "+url);
      return null;
    }
    try {
View Full Code Here

        public void run() {
            Log.logInfo("INDEXCLEANER", "IndexCleaner-Thread started");
            ReferenceContainer<WordReference> container = null;
            WordReferenceVars entry = null;
            DigestURI url = null;
            final HandleSet urlHashs = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
            try {
                Iterator<ReferenceContainer<WordReference>> indexContainerIterator = Segment.this.termIndex.references(this.startHash, false, 100, false).iterator();
                while (indexContainerIterator.hasNext() && this.run) {
                    waiter();
View Full Code Here

  }

  public YMarkMetadata(final Document document) {
    this.document = document;
    try {
      this.uri = new DigestURI(this.document.dc_identifier());
    } catch (final MalformedURLException e) {
      this.uri = null;
    }
    this.indexSegment = null;
  }
View Full Code Here

TOP

Related Classes of net.yacy.kelondro.data.meta.DigestURI

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.