Package edu.ucla.sspace.graph.io

Source Code of edu.ucla.sspace.graph.io.GraphMLReader$GraphMLParser

/*
* Copyright 2012 David Jurgens
*
* This file is part of the S-Space package and is covered under the terms and
* conditions therein.
*
* The S-Space package is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation and distributed hereunder to you.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
* EXPRESS OR IMPLIED ARE MADE.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
* NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
* PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
* WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
* RIGHTS.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package edu.ucla.sspace.graph.io;

import edu.ucla.sspace.graph.DirectedEdge;
import edu.ucla.sspace.graph.DirectedGraph;
import edu.ucla.sspace.graph.DirectedMultigraph;
import edu.ucla.sspace.graph.DirectedTypedEdge;
import edu.ucla.sspace.graph.Edge;
import edu.ucla.sspace.graph.Multigraph;
import edu.ucla.sspace.graph.Graph;
import edu.ucla.sspace.graph.SimpleDirectedEdge;
import edu.ucla.sspace.graph.SimpleDirectedTypedEdge;
import edu.ucla.sspace.graph.SimpleEdge;
import edu.ucla.sspace.graph.SimpleTypedEdge;
import edu.ucla.sspace.graph.SimpleWeightedDirectedTypedEdge;
import edu.ucla.sspace.graph.SimpleWeightedEdge;
import edu.ucla.sspace.graph.SparseDirectedGraph;
import edu.ucla.sspace.graph.SparseWeightedGraph;
import edu.ucla.sspace.graph.SparseUndirectedGraph;
import edu.ucla.sspace.graph.TypedEdge;
import edu.ucla.sspace.graph.UndirectedMultigraph;
import edu.ucla.sspace.graph.WeightedDirectedMultigraph;
import edu.ucla.sspace.graph.WeightedEdge;
import edu.ucla.sspace.graph.WeightedGraph;
import edu.ucla.sspace.graph.WeightedDirectedTypedEdge;

import edu.ucla.sspace.util.HashIndexer;
import edu.ucla.sspace.util.Indexer;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;

import java.util.logging.Level;
import java.util.logging.Logger;

import static edu.ucla.sspace.util.LoggerUtil.verbose;
import static edu.ucla.sspace.util.LoggerUtil.veryVerbose;

import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import org.w3c.dom.*;
import org.xml.sax.SAXException;


import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXParseException;
import org.xml.sax.SAXException;

import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.ParserAdapter;



/**
* An {@link GraphReader} implementation that supports reading files in the <a
* href="http://graphml.graphdrawing.org/primer/graphml-primer.html> GraphML
* </a> language.
*/
public class GraphMLReader extends GraphReaderAdapter implements GraphReader {

    /**
     * The logger to which clustering status updates will be written.
     */
    private static final Logger LOGGER =
        Logger.getLogger(GraphMLReader.class.getName());


    public GraphMLReader() { }


    public WeightedDirectedMultigraph<String> readWeightedDirectedMultigraph(
            File f, Indexer<String> vertexLabels) throws IOException {       
        try {
            SAXParserFactory spf = SAXParserFactory.newInstance();
            SAXParser sp = spf.newSAXParser();
            ParserAdapter pa = new ParserAdapter(sp.getParser());
           
            GraphMLParser parser = new GraphMLParser(vertexLabels);
           
            pa.setContentHandler(parser);
            pa.setErrorHandler(parser);
            pa.parse(new InputSource(new BufferedInputStream(new FileInputStream(f))));
            return parser.g;
        } catch (SAXException saxe) {
            throw new IOException(saxe);
        } catch (ParserConfigurationException saxe) {
            throw new IOException(saxe);
        }
    }

    public WeightedDirectedMultigraph<String> readWeightedDirectedMultigraphFromDOM(
            File f, Indexer<String> vertexLabels) throws IOException {       

        try {
            DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance();
            DocumentBuilder db = dbfac.newDocumentBuilder();
            Document graphDoc = db.parse(f);
            verbose(LOGGER, "Finished parsing %s", f);
           
            // <key id="nd0" for="node" attr.name="type" attr.type="int"><default>0</default></key>
            // <key id="nd1" for="node" attr.name="group" attr.type="int"><default>0</default></key>
            // <key id="sd0" for="edge" attr.name="weight" attr.type="double"><default>0.0</default></key>
            // <key id="sd1" for="edge" attr.name="type" attr.type="int"><default>0</default></key>
           
            NodeList graphElemList = graphDoc.getElementsByTagName("graph");
            if (graphElemList.getLength() == 0)
                throw new IOException("Missing <graph> element");
            if (graphElemList.getLength() > 1)
                LOGGER.warning(f + " has more than one <graph> element"
                               + "; returning only the first");
            Element graphElem = (Element)(graphElemList.item(0));
           
            String weightKeyId = null;
            String typeKeyId = null;
           
            WeightedDirectedMultigraph<String> g =
                new WeightedDirectedMultigraph<String>();

            NodeList keyElemList = graphElem.getElementsByTagName("key");
            for (int i = 0; i < keyElemList.getLength(); ++i) {
                Element key = (Element)(keyElemList.item(i));
                if (key.getAttribute("for").equals("edge")) {
                    if (key.getAttribute("attr.name").equals("weight"))
                        weightKeyId = key.getAttribute("id");
                    else if (key.getAttribute("attr.name").equals("type"))
                        typeKeyId = key.getAttribute("id");
                }
            }         
           
            NodeList nodeElemList = graphElem.getElementsByTagName("node");
            for (int i = 0; i < nodeElemList.getLength(); ++i) {
                Element node = (Element)(nodeElemList.item(i));
                String id = node.getAttribute("id");
                g.add(vertexLabels.index(id));
                if ((i+1) % 1000 == 0)
                    verbose(LOGGER, "Added %d vertices", i);
            }
            verbose(LOGGER, "Found %d total vertices", g.order());

            NodeList edgeElemList = graphElem.getElementsByTagName("edge");
            for (int i = 0; i < edgeElemList.getLength(); ++i) {
                Element edge = (Element)(edgeElemList.item(i));
                String fromId = edge.getAttribute("source");
                String toId = edge.getAttribute("target");
                // Get its children which have the weight and type attributes
               
                String weightStr = null;
                String type = null;

                NodeList dataElemList = edge.getElementsByTagName("data");
                for (int j = 0; j < dataElemList.getLength(); ++j) {
                    Element data = (Element)(dataElemList.item(j));
                    if (data.getAttribute("key").equals(weightKeyId))
                        weightStr = data.getTextContent();
                    else if (data.getAttribute("key").equals(typeKeyId))
                        type = data.getTextContent();
                }

                if (weightStr == null)
                    throw new IOException("No weight specified for edge " +
                                          edge.getAttribute("id"));
                if (type == null)
                    throw new IOException("No type specified for edge " +
                                          edge.getAttribute("id"));

                int from = vertexLabels.find(fromId);
                if (from < 0)
                    throw new IOException("Unknown source node for edge " +
                                      edge.getAttribute("id") + ": " + fromId);
                int to = vertexLabels.find(toId);
                if (to < 0)
                    throw new IOException("Unknown target node for edge " +
                                      edge.getAttribute("id") + ": " + toId);
              
                double weight = 0;
                try {
                    weight = Double.parseDouble(weightStr);
                } catch (NumberFormatException nfe) {
                    throw new IOException("Invalid weight for edge " +
                                    edge.getAttribute("id") + ": " + weightStr);
                }

                WeightedDirectedTypedEdge<String> e =
                    new SimpleWeightedDirectedTypedEdge<String>(
                        type, from, to, weight);
                g.add(e);
                if ((i+1) % 1000 == 0)
                    verbose(LOGGER, "Added %d edges", i);

            }

            verbose(LOGGER, "Loaded a directed, weighted multigraph with %d " +
                    "vertices and %d edges", g.order(), g.size());
            return g;
        }
        catch (IOException ioe) {
            throw ioe; // rethrow
        }
        // Generic catch all for all the XML exception
        catch (Exception e) {
            throw new IOException(e);
        }
    }

   
    public class GraphMLParser extends DefaultHandler {
       
        // State variables when parsing edges
        private int from;
        private int to;
        private String type;
        private double weight;
 
  private static final String NODE = "node";
  private static final String EDGE = "edge";
        private static final String DATA = "data";

        private final WeightedDirectedMultigraph<String> g;
        private final Indexer<String> vertexLabels;

        private String weightKeyId;
        private String typeKeyId;

        private String curDataKey = null;
        private String curData = null;

  GraphMLParser(Indexer<String> vertexLabels) {
            this.vertexLabels = vertexLabels;
            this.g = new WeightedDirectedMultigraph<String>();
  }

  public void startDocument() { }

  public void endDocument() throws SAXException {
            verbose(LOGGER, "Loaded a directed, weighted multigraph with %d " +
                    "vertices and %d edges", g.order(), g.size());
        }

  public void startElement(String namespace, String localName, String qName,
                                 Attributes atts) throws SAXException {
            if (qName.equals("key")) {
                if (atts.getValue("for").equals(EDGE)) {
                    if (atts.getValue("attr.name").equals("weight"))
                        weightKeyId = atts.getValue("id");
                    else if (atts.getValue("attr.name").equals("type"))
                        typeKeyId = atts.getValue("id");
                }
            }
            else if (qName.equals(NODE)) {
                String id = atts.getValue("id");
                g.add(vertexLabels.index(id));
                if (g.order() % 1000 == 0)
                    verbose(LOGGER, "Added %d vertices", g.order());
            }
            else if (qName.equals(EDGE)) {
                String fromId = atts.getValue("source");
                String toId = atts.getValue("target");
               
                from = vertexLabels.find(fromId);
                if (from < 0)
                    throw new SAXException("Unknown source node for edge "
                                          + fromId);
                to = vertexLabels.find(toId);
                if (to < 0)
                    throw new SAXException("Unknown target node for edge "
                                          + toId);
            }
            else if (qName.equals(DATA)) {
                curDataKey = atts.getValue("key");
            }
  }

  public void characters(char[] ch, int start, int length) {
            curData = new String(ch, start, length);
        }

  public void endElement(String uri, String localName, String qName)
      throws SAXException {
            if (qName.equals(EDGE)) {
                if (from == to)
                    return;
                WeightedDirectedTypedEdge<String> e =
                    new SimpleWeightedDirectedTypedEdge<String>(
                        type, from, to, weight);
                g.add(e);
                if (g.size() % 1000 == 0)
                    verbose(LOGGER, "Added %d edges", g.size());
            }

            else if (qName.equals(DATA)) {
                if (curDataKey.equals(weightKeyId)) {                  
                    try {
                        weight = Double.parseDouble(curData);
                    } catch (NumberFormatException nfe) {
                        throw new SAXException("Invalid weight: " + curData);
                    }
                }
                else if (curDataKey.equals(typeKeyId)) {
                    type = curData;
                }                             
            }
        }
    }   
}
TOP

Related Classes of edu.ucla.sspace.graph.io.GraphMLReader$GraphMLParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.