Source code for BPSpatial.FileIO.Graph

"""
@author: bspark
"""

import networkx as nx
import geopandas as gpd
import math
import pandas as pd
import json

import sys
import os
sys.path.insert(0, os.path.abspath('..'))
import BPSpatial.Analysis.Graph as AG


TYPES_EXCEL = ['float64', 'int64', 'float32', 'int8']

[docs]class Point():
    """
    class to handle point-type graph that only has nodes without edges
    """
[docs]    @staticmethod
    def fromShapefile_nx(shpPath, attrs=[], decimals = -1):
        """
        Method for importing a shapefile and converting it to a newtorkx graph
        using networkx. This may have a problem when the data has duplicate points
        
        Parameters
        ----------
        shpPath:        string
                        path of shapefile to handle
                        
        attrs:          list of string
                        attributes to add to a graph from the input shp file
                        
        Return
        ------
        graph:          networkx Graph
                        a graph only with nodes, not edges
                        graph.nodes - key:(x, y, ind), value:{column:value}
        """
        shp = nx.read_shp(shpPath)
        graph = nx.DiGraph()
        
        attrDict = {}
        for attr in attrs:
            attrDict[attr] = {}
            
        if decimals == -1:
            for ind, node in enumerate(shp.nodes(data=True)):
                graph.add_node((node[0][0], node[0][1], ind), Ind = ind)
                
                for attr in attrs:
                    attrDict[attr][(node[0][0], node[0][1], ind)]=node[1][attr]
        else:
            
            for ind, node in enumerate(shp.nodes(data=True)):
                graph.add_node((round(node[0][0], decimals), round(node[0][1], decimals), ind), Ind = ind)
                
                for attr in attrs:
                    attrDict[attr][(round(node[0][0], decimals), round(node[0][1], decimals), ind)]=node[1][attr]
            
        # add attributes
        for attr in attrs:
            nx.set_node_attributes(graph, attr, attrDict[attr])
        
        return graph
    
[docs]    @staticmethod
    def fromShapefile(shpPath, attrs=[], decimals = -1):
        """
        Method for importing a shapefile and converting it to a newtorkx graph
        using geopandas to avoid of removal of points with the same coordinates
        
        Parameters
        ----------
        shpPath:        string
                        path of shapefile to handle
                        
        attrs:          list of string
                        attributes to add to a graph from the input shp file
                        
        Return
        ------
        graph:          networkx Graph
                        a graph only with nodes, not edges
                        graph.nodes - key:(x, y, ind), value:{column:value}
        """
        graph = nx.DiGraph()
        shp = gpd.read_file(shpPath)
        
        for attr in attrs:
            assert(attr in shp.keys())
        
        #construct attribute dataset in dictionary- key:(x, y, ind), value:{column:value}
        if decimals == -1:
            attrDict = {}
            for attr in attrs:
                attrDict[attr] = {}
                for ind, (geo, i) in enumerate(zip(shp.geometry, shp[attr])):
                    attrDict[attr][(geo.x, geo.y, ind)]= i
                    
            #add nodes
            for ind, geo in enumerate(shp.geometry):
                graph.add_node((geo.x, geo.y, ind), Ind = ind)
                
        else:
            attrDict = {}
            for attr in attrs:
                attrDict[attr] = {}
                for ind, (geo, i) in enumerate(zip(shp.geometry, shp[attr])):
                    attrDict[attr][(round(geo.x, decimals), round(geo.y, decimals), ind)]= i
                    
            #add nodes
            for ind, geo in enumerate(shp.geometry):
                graph.add_node((round(geo.x, decimals), round(geo.y, decimals), ind), Ind = ind)


        #set attributes
        for attr in attrs:
            nx.set_node_attributes(graph, attr, attrDict[attr])
        
        return graph
    
[docs]    @staticmethod
    def fromExcel(excelPath, sheetName, geometryKey, header, columns, decimals=6):
        """
        Method for creating a networkx graph from excel file
        
        Parameters
        ----------
        excelPath:      string
                        path of excel file to access
        
        sheetName:      string
                        name of sheet name to access
                        
        geometryKey:    tuple
                        key of geometry(x,y) or (lng, lat) in columns or header list
                        
        header:         0 or None
                        0, if data contains column names, 
                        None, otherwise
        
        columns:        list of tuple
                        column name and type [(col1,dtype), (col2, dtype), ...]
        
        Return
        ------
        graph:          networkx graph
                        a point-type graph, only having nodes
        """
        assert(header == 0 or header == None)
        exc = pd.read_excel(excelPath, sheetName, header=header)
        
        if header == None:
            assert(columns != [] and len(columns) == len(exc.values[0]))
        else:
            columns = []
            for i, j in exc.dtypes.items():
                if str(j) in TYPES_EXCEL:
                    columns.append((i, TYPES_EXCEL[str(j)]))
                else:
                    columns.append((i, 'str'))
        
        exc = Point._nan(exc)
        geometryCol = (columns[geometryKey[0]][0], columns[geometryKey[1]][0])
        
        graph = nx.DiGraph()
        
        for ind, (x,y) in enumerate(zip(exc[geometryCol[0]], exc[geometryCol[1]])):
            graph.add_node((round(x, decimals), round(y, decimals), ind), Ind = ind)
        
        #construct attribute dataset in dictionary- key:(x, y, ind), value:{column:value}
        attrDict = {}
        for attr in columns:
            attrDict[attr[0]] = {}
            for ind, (x, y, a) in enumerate(zip(exc[geometryCol[0]], exc[geometryCol[1]], exc[attr[0]])):
                attrDict[attr[0]][(round(x, decimals), round(y, decimals), ind)] = a
            
        for attr in columns:
            nx.set_node_attributes(graph, attr[0], attrDict[attr[0]])
            
        return graph
    

[docs]    @staticmethod
    def fromCsv(csvPath, geometryKey, header=0, columns=[], decimals=6):
        """
        Method for creating a networkx graph from excel file
        
        Parameters
        ----------
        csvPath:        string
                        path of csv file to access
        
        geometryKey:    tuple
                        key of geometry(x,y) or (lng, lat) in columns or header list
                        
        header:         0 or None
                        0, if data contains column names, 
                        None, otherwise
        
        columns:        list of tuple
                        column name and type [(col1,dtype), (col2, dtype), ...]
        
        Return
        ------
        graph:          networkx graph
                        a point-type graph, only having nodes
        """
        assert(header == 0 or header == None)
        csv = pd.read_csv(csvPath, header=header)
        
        if header == None:
            assert(columns != [] and len(columns) == len(csv.values[0]))
        else:
            columns = []
            for i, j in csv.dtypes.items():
                if str(j) in TYPES_EXCEL:
                    columns.append((i, TYPES_EXCEL[str(j)]))
                else:
                    columns.append((i, 'str'))
                    
        csv = pd.read_csv(csvPath, header=header)
        assert(len(columns) == len(csv.keys()))
        
        csv = Point._nan(csv)
        
        graph = nx.DiGraph()
        
        for ind, row in enumerate(csv.values):
            x = row[geometryKey[0]]
            y = row[geometryKey[1]]
            graph.add_node((round(x, decimals), round(y, decimals), ind), Ind = ind)
        
        #construct attribute dataset in dictionary- key:(x, y, ind), value:{column:value}
        attrDict = {}
        for n, attr in enumerate(columns):
            attrDict[attr[0]] = {}
            for ind, row in enumerate(csv.values):
                x = row[geometryKey[0]]
                y = row[geometryKey[1]]
                a = row[n]
                attrDict[attr[0]][(round(x, decimals), round(y, decimals), ind)] = a
            
        for ind, attr in enumerate(columns):
            nx.set_node_attributes(graph, attr[0], attrDict[attr[0]])
            
        return graph
        
    @staticmethod    
    def _nan(pandasDF):
        for ind, row in enumerate(pandasDF.values): 
            for key, val in zip(pandasDF.keys(), row[:len(pandasDF.keys())]):
                if type(val) == float and math.isnan(val):
                    replace = -1
                    pandasDF.at[ind, key] = replace
        return pandasDF
    

        
[docs]class Polyline():
    """
    Class to handle polyline-type graph that has both nodes and edges.
    But nodes has no attributes.
    """
    
[docs]    @staticmethod
    def fromShapefile(shpPath, attrs=[], decimals=6):
        """
        Method for importing a shapefile and converting it to a newtorkx graph
        using networkx. This may have a problem when the data has duplicate points
        
        Parameters
        ----------
        shpPath:        string
                        path of shapefile to handle
                        
        attrs:          list of string
                        attributes to add to a graph from the input shp file
                        
        decimals:       int
                        number of decimals to round
                        
        Return
        ------
        graph:          networkx Graph
                        a graph only with nodes, not edges
                        graph.nodes - key:((x1, y2), (x2,y2)) value:{column:value}
        """
        shp = nx.read_shp(shpPath)
        graph = nx.DiGraph()
        
        for ind, node in enumerate(shp.nodes(data=True)):
            graph.add_node((round(node[0][0],decimals), round(node[0][1], decimals)), Ind = ind)
        
        attrDict = {}
        for attr in attrs:
            attrDict[attr] = {}
            
        for ind, edge in enumerate(shp.edges(data=True)):
            coordRound = []
            for coord in json.loads(edge[2]['Json'])['coordinates']:
                xyRound = []
                for ele in coord:
                    xyRound.append(round(ele,decimals))
                coordRound.append(xyRound)
            
            graph.add_edge((round(edge[0][0], decimals), round(edge[0][1], decimals)), \
                           (round(edge[1][0], decimals), round(edge[1][1], decimals)), \
                                Ind= ind, coordinates= coordRound)
            for attr in attrs:
                attrDict[attr][((round(edge[0][0], decimals), round(edge[0][1], decimals)), \
                               (round(edge[1][0], decimals), round(edge[1][1], decimals)))] \
                                =edge[2][attr]
            
        for attr in attrs:
            nx.set_edge_attributes(graph, attr, attrDict[attr])
                
        AG.graphCalculate.addDistance(graph)
        
        return graph