Package net.sf.josser.rdf.impl

Source Code of net.sf.josser.rdf.impl.Content

/*
****************************************************************************************
* Copyright © Giovanni Novelli                                            
* All Rights Reserved.                                                                
****************************************************************************************
*
* Title:       JOSSER
*
* Description: JOSSER - A Java Tool capable to parse DMOZ RDF dumps and export them to
*              any JDBC compliant relational database
*              
* Content.java
*
* Created on 22 October 2005, 22.00 by Giovanni Novelli
*
****************************************************************************************
* JOSSER is available under the terms of the GNU General Public License Version 2.   
*                                                                                     
* The author does NOT allow redistribution of modifications of JOSSER under the terms
* of the GNU General Public License Version 3 or any later version.                  
*                                                                                    
* This program is distributed in the hope that it will be useful, but WITHOUT ANY    
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A    
* PARTICULAR PURPOSE.                                                                
*                                                                                    
* For more details read file LICENSE
*****************************************************************************************
*
* $Revision: 20 $
* $Id: Content.java 20 2008-01-17 12:47:41Z gnovelli $
* $HeadURL: https://josser.svn.sourceforge.net/svnroot/josser/trunk/project/src/net/sf/josser/rdf/impl/Content.java $
*
*****************************************************************************************
*/

package net.sf.josser.rdf.impl;

import net.sf.josser.jdbc.impl.Category;
import net.sf.josser.jdbc.impl.ExternalPage;
import net.sf.josser.Josser;
import net.sf.josser.util.Static;

/**
* @author Copyright © Giovanni Novelli. All rights reserved.
*/
public class Content extends Structure {

  private boolean processingExternalPage = false;

  private ExternalPage externalPageRow = null;

  private boolean processingExternalPageDescription = false;

  private String externalPageDescription = null;

  public Content(final String path) {
    super(path);
    this.setCategoryRow(new Category());

    this.setProcessingExternalPage(false);
    this.setExternalPageRow(null);
    this.setProcessingExternalPageDescription(false);
    this.setExternalPageDescription(null);
    this.setPhase(0);
  }

  @Override
  protected void processCategoryStart(final String line) {
    this.setProcessed(true);
    String Topic = null;
    String[] tokens = null;
    tokens = line.split("<Topic r:id=\"");
    if (tokens.length == 2) {
      tokens = tokens[1].split("\">");
      if (tokens.length == 1) {
        Topic = tokens[0];
        this.setProcessingCategory(true);
        this.getCategoryRow().setTopic(Topic);
        Static
            .setFiltermatching(Topic.startsWith(Josser
                .getTopicfilter()));
        if ((this.getPhase() == 0)
            && Topic.startsWith(Josser.getTopicfilter())) {
          this.setPhase(1);
        } else if ((this.getPhase() == 1)
            && !Topic.startsWith(Josser.getTopicfilter())) {
          this.setPhase(2);
        }
      }
    } else {
      this.setProcessed(false);
    }
  }

  @Override
  public void processCategory(final String line) {
    this.setProcessed(true);
    String[] tokens = null;
    if (line.startsWith("  <catid>")) {
      int catid = 0;
      tokens = line.split("  <catid>");
      if (tokens.length == 2) {
        tokens = tokens[1].split("</catid>");
        if (tokens.length == 1) {
          catid = Integer.parseInt(tokens[0]);
          this.getCategoryRow().setCatid(catid);
        }
      }
    } else if (line.startsWith("</Topic>")) {
      this.setProcessingCategory(false);
    } else if (line.startsWith("  <link r:resource=\"")) {
      /*
       * FIXME At the moment parsing is done on nodes of type ExternalPage
       */
    } else if (line.startsWith("  <link1 r:resource=\"")) {
      /*
       * FIXME At the moment parsing is done on nodes of type ExternalPage
       */
    } else if (line.startsWith("  <rss r:resource=\"")) {
      /*
       * FIXME At the moment parsing is done on nodes of type ExternalPage
       */
    } else if (line.startsWith("  <atom r:resource=\"")) {
      /*
       * FIXME At the moment parsing is done on nodes of type ExternalPage
       */
    } else if (line.startsWith("  <rss1 r:resource=\"")) {
      /*
       * FIXME At the moment parsing is done on nodes of type ExternalPage
       */
    } else if (line.startsWith("  <pdf r:resource=\"")) {
      /*
       * FIXME At the moment parsing is done on nodes of type ExternalPage
       */
    } else if (line.startsWith("  <pdf1 r:resource=\"")) {
      /*
       * FIXME At the moment parsing is done on nodes of type ExternalPage
       */
    } else {
      this.setProcessed(false);
    }
  }

  protected void processExternalPage(final String line) {
    this.setProcessed(true);
    String[] tokens = null;
    if (line.startsWith("  <d:Title>")) {
      String Title = null;
      tokens = line.split("  <d:Title>");
      if (tokens.length == 2) {
        tokens = tokens[1].split("</d:Title>");
        if (tokens.length == 1) {
          Title = tokens[0];
          this.getExternalPageRow().setTitle(Title);
        }
      }
    } else if (line.startsWith("</ExternalPage>")) {
      this.setProcessingExternalPage(false);
      this.getExternalPageRow().addBatch();
    } else if (line.startsWith("  <d:Description>")) {
      tokens = line.split("  <d:Description>");
      if (tokens.length == 2) {
        if (tokens[1].endsWith("</d:Description>")) {
          tokens = tokens[1].split("</d:Description>");
          if (tokens.length == 1) {
            this.setExternalPageDescription(tokens[0]);
          } else {
            this.setExternalPageDescription("");
          }
          this.getExternalPageRow().setDescription(
              this.getExternalPageDescription());
        } else {
          this.setProcessingExternalPageDescription(true);
          this.setExternalPageDescription(tokens[1]);
        }
      }
    } else if (line.endsWith("  </d:Description>")) {
      tokens = line.split("  </d:Description>");
      if (tokens.length == 2) {
        this.setExternalPageDescription(this
            .getExternalPageDescription()
            + tokens[0]);
      }
      this.setProcessingExternalPageDescription(false);
      this.getExternalPageRow().setDescription(
          this.getExternalPageDescription());
    } else if (this.isProcessingExternalPageDescription()) {
      this.setExternalPageDescription(this.getExternalPageDescription()
          + line);
    } else if (line.startsWith("  <topic>")) {
      /*
       * FIXME At the moment parsing of Topic is done once in nodes of
       * type Topic and not in nodes of type ExternalPage
       */
    } else if (line.startsWith("  <priority>")) {
      int priority = 0;
      tokens = line.split("  <priority>");
      if (tokens.length == 2) {
        tokens = tokens[1].split("</priority>");
        if (tokens.length == 1) {
          priority = Integer.parseInt(tokens[0]);
          this.getExternalPageRow().setPriority(priority);
        }
      }
    } else if (line.startsWith("  <mediadate>")) {
      String mediadate = null;
      tokens = line.split("  <mediadate>");
      if (tokens.length == 2) {
        tokens = tokens[1].split("</mediadate>");
        if (tokens.length == 1) {
          mediadate = tokens[0];
          this.getExternalPageRow().setMediadate(mediadate);
        }
      }
    } else if (line.startsWith("  <ages>")) {
      String ages = null;
      tokens = line.split("  <ages>");
      if (tokens.length == 2) {
        tokens = tokens[1].split("</ages>");
        if (tokens.length == 1) {
          ages = tokens[0];
          this.getExternalPageRow().setAges(ages);
        }
      }
    } else if (line.startsWith("  <type>")) {
      String type = null;
      tokens = line.split("  <type>");
      if (tokens.length == 2) {
        tokens = tokens[1].split("</type>");
        if (tokens.length == 1) {
          type = tokens[0];
          this.getExternalPageRow().setType(type);
        }
      }
    } else {
      this.setProcessed(false);
    }
  }

  protected void processExternalPageStart(final String line) {
    this.setProcessed(true);
    String about = null;
    String[] tokens = null;
    tokens = line.split("<ExternalPage about=\"");
    if (tokens.length == 2) {
      tokens = tokens[1].split("\">");
      if (tokens.length == 1) {
        about = tokens[0];
        this.setExternalPageRow(new ExternalPage());
        this.getExternalPageRow().setCatid(
            this.getCategoryRow().getCatid());
        this.getExternalPageRow().setLink(about);
        this.setProcessingExternalPage(true);
      } else {
        this.setExternalPageRow(new ExternalPage());
        this.getExternalPageRow().setLink("");
        this.setProcessingExternalPage(true);
      }
    } else {
      this.setProcessed(false);
    }
  }

  @Override
  public void process(final String line) {
    this.setProcessed(false);
    if (this.isProcessingCategory() || this.isProcessingExternalPage()) {
      if (this.isProcessingCategory()) {
        this.processCategory(line);
      } else if (this.isProcessingExternalPage()) {
        this.processExternalPage(line);
      }
      if (!this.isProcessed() && (line.length() > 0)) {
      }
    } else {
      if (!this.isProcessingExternalPage()) {
        if (line.startsWith("<ExternalPage about=\"")) {
          this.processExternalPageStart(line);
        }
      }
      if (!this.isProcessingCategory()) {
        if (line.startsWith("<Topic r:id=\"")) {
          this.processCategoryStart(line);
        }
      }
      if (!this.isProcessed() && (line.length() > 0)) {
      }
    }
  }

  @Override
  public int batchStore() {
    int result = 0;
    result += this.getExternalPageRow().executeBatch();
    return result;
  }

  @Override
  public int batchClear() {
    return this.getExternalPageRow().batchClear();
  }

  /**
   * @param externalPageDescription
   *            The externalPageDescription to set.
   */
  protected void setExternalPageDescription(
      final String externalPageDescription) {
    this.externalPageDescription = externalPageDescription;
  }

  /**
   * @return Returns the externalPageDescription.
   */
  protected String getExternalPageDescription() {
    return this.externalPageDescription;
  }

  /**
   * @param externalPageRow
   *            The externalPageRow to set.
   */
  protected void setExternalPageRow(final ExternalPage externalPageRow) {
    this.externalPageRow = externalPageRow;
  }

  /**
   * @return Returns the externalPageRow.
   */
  protected ExternalPage getExternalPageRow() {
    return this.externalPageRow;
  }

  /**
   * @param processingExternalPage
   *            The processingExternalPage to set.
   */
  protected void setProcessingExternalPage(
      final boolean processingExternalPage) {
    this.processingExternalPage = processingExternalPage;
  }

  /**
   * @return Returns the processingExternalPage.
   */
  protected boolean isProcessingExternalPage() {
    return this.processingExternalPage;
  }

  /**
   * @param processingExternalPageDescription
   *            The processingExternalPageDescription to set.
   */
  protected void setProcessingExternalPageDescription(
      final boolean processingExternalPageDescription) {
    this.processingExternalPageDescription = processingExternalPageDescription;
  }

  /**
   * @return Returns the processingExternalPageDescription.
   */
  protected boolean isProcessingExternalPageDescription() {
    return this.processingExternalPageDescription;
  }
}
TOP

Related Classes of net.sf.josser.rdf.impl.Content

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.