Examples of DCEntry


Examples of net.yacy.document.content.DCEntry

            }
        }
    }
   
    public DCEntry getDCEntry() {
        final DCEntry dc = new DCEntry();
        for (BOOKMARK b : BOOKMARK.values()) {
            if(!b.dc_attrb.isEmpty() && this.containsKey(b.key())) {
                dc.put(b.dc_attrb, this.get(b.key()));
            }
        }
        return dc;
    }
View Full Code Here

Examples of net.yacy.document.content.DCEntry

        url = new DigestURI(this.urlstub + "/viewtopic.php?t=" + item);
        String subject = rs.getString("post_subject");
        String text = xmlCleaner(rs.getString("post_text"));
        String user = getUser(rs.getInt("poster_id"));
        Date date = new Date(rs.getLong("post_time") * 1000L);
        return new DCEntry(url, date, subject, user, text, 0.0f, 0.0f);
    }
View Full Code Here

Examples of net.yacy.document.content.DCEntry

            int fc = 0;
            File outputfiletmp = null, outputfile = null;
           
            // write the result from the query concurrently in a file
            OutputStreamWriter osw = null;
            DCEntry e;
            int c = 0;
            while ((e = queue.take()) != DCEntry.poison) {
                if (osw == null) {
                    outputfiletmp = new File(targetdir, targethost + "." + versioninfo + "." + fc + ".xml.prt");
                    outputfile = new File(targetdir, targethost + "." + versioninfo + "." + fc + ".xml");
                    if (outputfiletmp.exists()) outputfiletmp.delete();
                    if (outputfile.exists()) outputfile.delete();
                    osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(outputfiletmp)), "UTF-8");
                    osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + SurrogateReader.SURROGATES_MAIN_ELEMENT_OPEN + "\n");
                }
                e.writeXML(osw);
                c++;
                if (c >= maxEntriesInFile) {
                    osw.write("</surrogates>\n");
                    osw.close();
                    outputfiletmp.renameTo(outputfile);
View Full Code Here

Examples of net.yacy.document.content.DCEntry

    public void processSurrogate(final InputStream is, final String name) throws IOException {
        final SurrogateReader reader = new SurrogateReader(is, 100);
        final Thread readerThread = new Thread(reader, name);
        readerThread.start();
        DCEntry surrogate;
        Response response;
        while ((surrogate = reader.take()) != DCEntry.poison) {
            // check if url is in accepted domain
            assert surrogate != null;
            assert this.crawlStacker != null;
            final String urlRejectReason = this.crawlStacker.urlInAcceptedDomain(surrogate.getIdentifier(true));
            if (urlRejectReason != null) {
                this.log.logWarning("Rejected URL '" + surrogate.getIdentifier(true) + "': " + urlRejectReason);
                continue;
            }

            // create a queue entry
            final Document document = surrogate.document();
            final Request request = new Request(
                    ASCII.getBytes(this.peers.mySeed().hash),
                    surrogate.getIdentifier(true),
                    null,
                    "",
                    surrogate.getDate(),
                    this.crawler.defaultSurrogateProfile.handle(),
                    0,
                    0,
                    0,
                    0
View Full Code Here

Examples of net.yacy.document.content.DCEntry

        url = new DigestURI(this.urlstub + "/viewtopic.php?t=" + item);
        String subject = rs.getString("post_subject");
        String text = xmlCleaner(rs.getString("post_text"));
        String user = getUser(rs.getInt("poster_id"));
        Date date = new Date(rs.getLong("post_time") * 1000L);
        return new DCEntry(url, date, subject, user, text, 0.0f, 0.0f);
    }
View Full Code Here

Examples of net.yacy.document.content.DCEntry

            int fc = 0;
            File outputfiletmp = null, outputfile = null;
           
            // write the result from the query concurrently in a file
            OutputStreamWriter osw = null;
            DCEntry e;
            int c = 0;
            while ((e = queue.take()) != DCEntry.poison) {
                if (osw == null) {
                    outputfiletmp = new File(targetdir, targethost + "." + versioninfo + "." + fc + ".xml.prt");
                    outputfile = new File(targetdir, targethost + "." + versioninfo + "." + fc + ".xml");
                    if (outputfiletmp.exists()) outputfiletmp.delete();
                    if (outputfile.exists()) outputfile.delete();
                    osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(outputfiletmp)), "UTF-8");
                    osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + SurrogateReader.SURROGATES_MAIN_ELEMENT_OPEN + "\n");
                }
                e.writeXML(osw);
                c++;
                if (c >= maxEntriesInFile) {
                    osw.write("</surrogates>\n");
                    osw.close();
                    outputfiletmp.renameTo(outputfile);
View Full Code Here

Examples of net.yacy.document.content.DCEntry

            }
        }
    }
   
    public DCEntry getDCEntry() {
        final DCEntry dc = new DCEntry();
        for (BOOKMARK b : BOOKMARK.values()) {
            if(!b.dc_attrb.isEmpty() && this.containsKey(b.key())) {
                dc.put(b.dc_attrb, this.get(b.key()));
            }
        }
        return dc;
    }
View Full Code Here

Examples of net.yacy.document.content.DCEntry

    public void processSurrogate(final InputStream is, final String name) throws IOException {
        final SurrogateReader reader = new SurrogateReader(is, 100);
        final Thread readerThread = new Thread(reader, name);
        readerThread.start();
        DCEntry surrogate;
        Response response;
        while ((surrogate = reader.take()) != DCEntry.poison) {
            // check if url is in accepted domain
            assert surrogate != null;
            assert this.crawlStacker != null;
            final String urlRejectReason = this.crawlStacker.urlInAcceptedDomain(surrogate.getIdentifier(true));
            if (urlRejectReason != null) {
                this.log.logWarning("Rejected URL '" + surrogate.getIdentifier(true) + "': " + urlRejectReason);
                continue;
            }

            // create a queue entry
            final Document document = surrogate.document();
            final Request request = new Request(
                    ASCII.getBytes(this.peers.mySeed().hash),
                    surrogate.getIdentifier(true),
                    null,
                    "",
                    surrogate.getDate(),
                    this.crawler.defaultSurrogateProfile.handle(),
                    0,
                    0,
                    0,
                    0
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.