I decided to start reviewing data structures and algorithms again, and the first thing I chose is my favorite graph.
There are quite a few ways to represent a graph, the most common two are adjacency matrix and adjacency list. In this post, I use adjacency matrix representation.
Graph and Adjacency Matrix Representation
A graph (see
here for formal definition) contains vertices and edges. Edges are used to connect vertices. Directed graphs have directions (of course!), e.g., edge(a, b) != edge (b, a). Undirected graphs, on the contrary, have no directions, e.g., edge(a, b) = edge(b, a).
So the question is, how to represent graph in program?
Adjacency matrix representation refers to using a matrix of size (number of vertices * number of vertices) to represent the connectivity, i.e., edges.
Consider the following graph:
The adjacency matrix representation is as follows:
Each row and column represents a vertex, the cell represents the weight of the edge between them. If there is no edge between two vertices, the weight is 0. In this example, all weights are set to 1, it is possible to personalize the weight, e.g., weight(1, 3) = 2, weight(2, 4) = 0.5.
Since the graph is undirected, if there is an edge between v1 and v2, then there is an edge between v2 and v1, so the matrix is symmetric. In case of a directed graph, an edge direct from 1 to 3 leads to a value in matrix[1][3], but that does not indicates there is a value in matrix[3][1], unless there is an edge that directs 3 to 1.
Traverse the graph
There are two ways to traverse a graph, depth-first search (DFS) and breadth-first search (BFS). DFS goes as deep (yeah...) as it can until all the vertices rooted from one vertex are visited then visit a neighbor vertex. BFS goes as broad as it can until all the neighbors of a given vertex are visited then go to the next level. The following two graphs illustrate the path of DFS and BFS if we start by traversing vertex 3.
Java code
public class Graph {
protected int adjacencyMatrix[][];
protected int size; //number of vertices
public Graph(int size) {
this.size = size;
adjacencyMatrix = new int[size][size];
}
public void addEdge(int v1, int v2) {
if (validity(v1) == false || validity(v2) == false)
return;
if (v1 == v2){
System.out.printf("Same vertex %d and %d\n", v1, v2);
return;
}
adjacencyMatrix[v1][v2] += 1;
adjacencyMatrix[v2][v1] += 1;
}
public void removeEdge(int v1, int v2){
if (validity(v1) == false || validity(v2) == false)
return;
if (adjacencyMatrix[v1][v2] == 0){
System.out.printf("No edge between %d and %d\n", v1, v2);
}
adjacencyMatrix[v1][v2] -= 1;
adjacencyMatrix[v2][v1] -= 1;
}
public boolean containsEdge(int v1, int v2){
if (validity(v1) == false || validity(v2) == false)
return false;
return adjacencyMatrix[v1][v2] > 0 ? true : false;
}
public int getSize(){
return size;
}
protected boolean validity(int v){
if (v >= size || v < 0) {
System.out.printf("Invalid vertex index %d\n", v);
return false;
}
else
return true;
}
public void dfs(int startVertex){
boolean[] visited= new boolean[this.size];
List<integer> path = new ArrayList<integer> ();
dfs(path, startVertex, visited);
System.out.println(getPath(path));
}
private void dfs(List<integer> pathList, int v, boolean[] visited){
if (pathList.size() == this.size)
return;
if (!validity(v)|| visited[v])
return;
visited[v] = true;
pathList.add(v);
for (int i = 0; i < this.size; i++){
if (containsEdge(v, i)){
dfs(pathList, i, visited);
}
}
}
public void bfs(int v){
if (!validity(v))
return;
boolean[] visited = new boolean[this.size];
List<integer> path = new ArrayList<integer> (this.size);
Queue<integer> neighbors = new LinkedList<integer> ();
neighbors.offer(v);
while(!neighbors.isEmpty() && (path.size() < this.size)){
int vertex = neighbors.poll();
path.add(vertex);
visited[vertex] = true;
for (int i = 0; i < this.size; i++){
if(containsEdge(vertex, i) && !visited[i]) {
neighbors.offer(i);
}
}
}
System.out.println(getPath(path));
}
public String getPath(List<integer> path){
String p = "";
for (int v : path)
p += String.format("%d -> ", v);
return p.substring(0, p.length() - 3);
}
}
Python code:
The easiest way to write a graph using Python is:
To use the SparseGraph class provided by
APGL library.
from apgl.graph import SparseGraph
import time
def main():
graph = SparseGraph(5)
graph.addEdge(0, 3)
graph.addEdge(1, 4)
graph.addEdge(2, 3)
graph.addEdge(2, 4)
graph.addEdge(1, 3)
print(graph.depthFirstSearch(3))
print(graph.breadthFirstSearch(3))
if __name__ == '__main__':
start_time = time.time()
main()
print("Running time: %f microseconds" % ((time.time() - start_time)*1000))
Where is the fun of writing code?!
Using the same structure as I do in Java:
from collections import deque
class Graph(object):
def __init__(self, size):
self.adjacencyMatrix = []
for i in range(size):
self.adjacencyMatrix.append([0 for i in range(size)])
self.size = size
def addEdge(self, v1, v2):
if not self.validity(v1) and not self.validity(v2):
return
if v1 == v2:
print("Same vertex %d and %d" % (v1, v2))
self.adjacencyMatrix[v1][v2] += 1
self.adjacencyMatrix[v2][v1] += 1
def removeEdge(self, v1, v2):
if not self.validity(v1) and not self.validity(v2):
return
if self.adjacencyMatrix[v1][v2] == 0:
print("No edge between %d and %d" % (v1, v2))
return
self.adjacencyMatrix[v1][v2] -= 1
self.adjacencyMatrix[v2][v1] -= 1
def containsEdge(self, v1, v2):
if not self.validity(v1) and not self.validity(v2):
return False
return True if self.adjacencyMatrix[v1][v2] > 0 else False
def __len__(self):
return self.size
def validity(self, v):
if v >= self.size or v < 0:
print("Invalid vertex index %d" % v)
return False
else:
return True
def dfs(self, startVertex):
if not self.validity(startVertex):
return;
visited = []
for i in range(self.size):
visited.append(False)
path = []
self._dfsR(path, startVertex, visited)
print(self.getPath(path))
def _dfsR(self, path, v, visited):
if(len(path) == self.size):
return;
if visited[v]:
return;
visited[v] = True;
path.append(v);
for i in range(self.size):
if self.containsEdge(v, i):
self._dfsR(path, i, visited)
def bfs(self, v):
if v >= self.size or v < 0:
return
visited = []
for i in range(self.size):
visited.append(False)
path = []
neighbors = deque()
neighbors.append(v)
while neighbors and len(path) < self.size:
vertex = neighbors.popleft()
path.append(vertex)
visited[vertex] = True
for i in range(self.size):
if self.containsEdge(vertex, i) and not visited[i]:
neighbors.append(i)
print(self.getPath(path))
def getPath(self, path):
p = ""
for v in path:
p += "%d -> " % v
return p[:len(p) - 3]
Performance
This is always my favorite part. Using the following pieces of test code for Java and Python:
public class GraphTester {
public static void main(String[] args) {
final long startTime = System.nanoTime();
Graph g = new Graph(5);
g.addEdge(0, 3);
g.addEdge(1, 4);
g.addEdge(2, 3);
g.addEdge(2, 4);
g.addEdge(1, 3);
//g.addEdge(1, 5);
g.dfs(3);
g.bfs(3);
System.out.println("Running time: " + ((System.nanoTime() - startTime)/1000) + "ms");
}
}
from graph.Graph import Graph
import time
def main():
g = Graph(5)
g.addEdge(0, 3)
g.addEdge(1, 4)
g.addEdge(2, 3)
g.addEdge(2, 4)
g.addEdge(1, 3)
g.dfs(3)
g.bfs(3)
if __name__ == '__main__':
start_time = time.time()
main()
print("Running time: %f microseconds" % ((time.time() - start_time)*1000))
And the result:
Python 0.157 ms vs. Java 4495 ms.
And guess what, using SparseGraph class takes 0.118 ms (feeling good
).