Graphs

Package diagram is used for making simple diagrams (check reference). Some egs:

library(diagram)

names <- c("0", "0", "1", "1")
M <- matrix(nrow = 4, ncol = 4, byrow = TRUE, data = 0)
M[2, 1] <- M[4, 3] <- ""

par(mar = c(0, 0, 0, 0))
plotmat(M, pos = c(2, 2), # pos: the first 2 nodes in the same line, next 2 in the line below
           name = names, lwd = 1, curve = 0,
           box.lwd = 1, cex.txt = 0.8, box.size = .1,
           box.type = "circle", box.prop = 0.5)
text(0.25,.5,"X"); text(.75,.5,"Y");

plot of chunk unnamed-chunk-2

names <- c("0", "1", "2", "1", "3", "4")
M <- matrix(nrow = 6, ncol = 6, byrow = TRUE, data = 0)
M[2, 1] <- "1/2" 
M[3, 1] <- "1/2"
M[5, 4] <- "1/3" 
M[6, 4] <- "2/3"

par(mar = c(0, 0, 0, 0))
plotmat(M, pos = matrix(c(.25,.875, .75,.875, .75,.675, .25,.5, .75,.5, .75,.25), ncol=2, byrow=TRUE),
        # in this case, pos is given the coordenates for each node (inside the [(0,0),(1,1)] box)
           name = names, lwd = 1, curve = 0,
           box.lwd = 1, cex.txt = 0.8, box.size = .1,
           box.type = "circle", box.prop = 0.5)
text(0.1,.5,"X"); text(.9,.5,"Y");

plot of chunk unnamed-chunk-3

names <- c("0", "0", "1", "1")
M <- matrix(nrow = 4, ncol = 4, byrow = TRUE, data = 0)
M[2, 1] <- M[4, 3] <- "1-e"
M[4, 1] <- M[2, 3] <- "e"

par(mar = c(0, 0, 0, 0))
plotmat(M, pos = c(2, 2), name = names, lwd = 1, curve = 0,
           box.lwd = 1, cex.txt = 0.8, box.size = .1,
           box.type = "circle", box.prop = 0.5)
text(0.25,.5,"X"); text(.75,.5,"Y");

plot of chunk unnamed-chunk-4

There is also the igraph package. Check their website for documentation, also online. Check this tutorial.

Another source was http://horicky.blogspot.pt/2012/04/basic-graph-analytics-using-igraph.html

library(igraph)
par(mfrow=c(1,2))
plot(graph.ring(5,circular=TRUE))
plot(graph.ring(5,directed=TRUE,mutual=TRUE))

plot of chunk unnamed-chunk-5

par(mfrow=c(1,3))
plot(graph.star(7,mode="in"))
plot(graph.star(7,mode="out"))
plot(graph.star(7,mode="undirected"))

plot of chunk unnamed-chunk-5

par(mfrow=c(1,1))
plot(graph.lattice( c(3,3) ))

plot of chunk unnamed-chunk-6

plot(graph.lattice( c(3,3), directed=TRUE ))

plot of chunk unnamed-chunk-6

# In a circular lattice the difference of the coordinates of the vertices is calculated modulo the size of the lattice along the given dimension so for example in the circular 5x3 two dimensional lattice vertices (1,1) and (1,3) are also connected just like (1,1) and (5,1).
plot(graph.lattice( c(3,3), circular=TRUE ))

plot of chunk unnamed-chunk-6

plot(graph.tree(20))

plot of chunk unnamed-chunk-6

plot(graph.tree(20, children=3))

plot of chunk unnamed-chunk-6

plot(graph.tree(20, mode="out"))

plot of chunk unnamed-chunk-6

plot(graph.tree(20, mode="in"))

plot of chunk unnamed-chunk-6

plot(graph.tree(20, mode="undirected"))

plot of chunk unnamed-chunk-6

Some more messy graphs:

g <- graph( c(1,2, 1,3, 1,1, 3,4, 4,5, 5,6), directed=TRUE )
plot(g)

plot of chunk unnamed-chunk-7

are.connected(g,1,3)

[1] TRUE

are.connected(g,3,1)

[1] FALSE

g <- graph.full(4, directed=TRUE)
plot(g)

plot of chunk unnamed-chunk-7

is.directed(g)

[1] TRUE

g <- graph( c(1,2, 1,3, 1,1, 3,4, 4,5, 5,6), directed=TRUE, n=8 )
plot(g)

plot of chunk unnamed-chunk-7

get.edgelist(g)

     [,1] [,2]
[1,]    1    2
[2,]    1    3
[3,]    1    1
[4,]    3    4
[5,]    4    5
[6,]    5    6

# a matrix can be used to create a graph
edgelist <- matrix(c(1:5,2:5,1),ncol=2)
edgelist

     [,1] [,2]
[1,]    1    2
[2,]    2    3
[3,]    3    4
[4,]    4    5
[5,]    5    1

g <- graph(t(edgelist))
plot(g)

plot of chunk unnamed-chunk-7

adjacency.matrix <- matrix( (runif(64)>.5)+0, nrow=8 )
g <- graph.adjacency(adjacency.matrix)
plot(g)
get.adjacency(g)

plot of chunk unnamed-chunk-7

8 x 8 sparse Matrix of class "dgCMatrix"

[1,] . . 1 1 . 1 . .
[2,] . 1 1 . 1 . . .
[3,] . . 1 . . . . 1
[4,] 1 1 . 1 . 1 1 1
[5,] . 1 . . . 1 1 .
[6,] 1 1 1 . 1 1 . .
[7,] . 1 . 1 . . . .
[8,] . . 1 . . 1 1 .

# data frame can also be used
set.seed(151)
size <- 10
df <- data.frame(name = letters[1:10],
                 age = rpois(size,20),
                 gender = sample(c("F","M"),size,replace=TRUE))
df

   name age gender
1     a  19      M
2     b  23      M
3     c  19      M
4     d  19      M
5     e  18      F
6     f  23      F
7     g  13      F
8     h  21      F
9     i  18      F
10    j  17      M

df.edges <- data.frame(origin = sample(letters[1:size],size,replace=TRUE),
                       destiny = sample(letters[1:size],size,replace=TRUE),
                       friend = sample(c("Y","N"),size,replace=TRUE))
df.edges

   origin destiny friend
1       a       a      Y
2       h       f      N
3       j       d      N
4       j       i      N
5       h       i      Y
6       f       g      N
7       g       c      N
8       a       a      Y
9       a       d      Y
10      g       b      Y

g <- graph.empty()
g <- add.vertices(g, nrow(df), 
                  name=as.character(df[,1]), 
                  age=df[,2],
                  gender=as.character(df[,3]))
V(g)$name

 [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"

V(g)$age

 [1] 19 23 19 19 18 23 13 21 18 17

V(g)$gender

 [1] "M" "M" "M" "M" "F" "F" "F" "F" "F" "M"

vcount(g) # number of vertices

[1] 10

ids <- 1:length(V(g)$name)
names(ids) <- V(g)$name
ids

 a  b  c  d  e  f  g  h  i  j 
 1  2  3  4  5  6  7  8  9 10

from <- as.character(df.edges[,1])
to   <- as.character(df.edges[,2])
edges <- matrix(c(ids[from], ids[to]), ncol=2)
edges

      [,1] [,2]
 [1,]    1    1
 [2,]    8    6
 [3,]   10    4
 [4,]   10    9
 [5,]    8    9
 [6,]    6    7
 [7,]    7    3
 [8,]    1    1
 [9,]    1    4
[10,]    7    2

g <- add.edges(g, t(edges), 
                  friend=as.character(df.edges[,3]))
E(g)

Edge sequence:

[1]  a -> a
[2]  h -> f
[3]  j -> d
[4]  j -> i
[5]  h -> i
[6]  f -> g
[7]  g -> c
[8]  a -> a
[9]  a -> d
[10] g -> b

ecount(g) # number of edges

[1] 10


# customize graph for ploting
V(g)[gender=="F"]$color <- "green"
V(g)[gender=="M"]$color <- "red"
E(g)$color <- "black"
E(g)[friend=="Y"]$color <- "red"
E(g)$labels <- 1:ecount(g) #E(g)$friend
E(g)$weight <- 1:2

E(g)$names <- letters[1:10] # just an an eg

igraph.options(arrow.width=13)
plot(g)

plot of chunk unnamed-chunk-7


# for all options check plot.igraph at reference manual

plot.igraph(g, layout=layout.fruchterman.reingold,
            vertex.label.dist=0,
            vertex.label.cex=1:2,  
            vertex.label.degree=pi/2,  
            vertex.shape=c("square","circle"),
            vertex.label.color=c(0,1),
            edge.color=E(g)$color,
            edge.width=E(g)$weight,
            edge.label=E(g)$names, 
            edge.label.cex=2, 
            edge.lty=2, 
            edge.curved=TRUE,
            edge.loop.angle=pi/4,
            edge.arrow.size=1,
            frame=TRUE)

plot of chunk unnamed-chunk-7

g <- barabasi.game(100, directed=FALSE)
d <- get.diameter(g)
E(g)$color <- "SkyBlue2"
E(g)$width <- 1
E(g, path=d)$color <- "red"
E(g, path=d)$width <- 2
V(g)$labelcolor <- V(g)$color  <- "blue"
V(g)[ d ]$labelcolor <- V(g)[ d ]$color <- "red"
igraph.options(label.dist=0.4)

plot(g, layout=layout.kamada.kawai, 
     edge.color=E(g)$color,edge.width=E(g)$width,
     vertex.color=V(g)$color, 
     vertex.size=3)

plot of chunk unnamed-chunk-8

Some extra manipulations and a mention to the Nexus repository

library(igraph)
nexus.info(2)  # Nexus repository is an online collection of network data sets.

NEXUS D-W- 81 817 #2 UKfaculty -- UK faculty social network
+ tags: directed; social network; weighted
+ nets: 1/UKfaculty
+ attr: weight (e/n), Group (v/n) 
+ date: 2011-01-05
+ licence: Creative Commons by-sa 2.0 UK
+ licence url: http://creativecommons.org/licenses/by-sa/2.0/uk/
+ summary: Social network among faculty members at a UK university.
+ description: The personal friendship network of a faculty of a UK university, consisting of 81
  vertices (individuals) and 817 directed and weighted connections. The school affiliation of each
  individual is stored as a vertex attribute. This dataset can serve as a testbed for community
  detection algorithms.
+ formats: GraphML(5306);R-igraph(7951);Python-igraph(5705)
+ citation: Nepusz T., PetrÃ³czi A., NÃ©gyessy L., BazsÃ³ F.: Fuzzy communities and the concept of
  bridgeness in complex networks. Physical Review E 77:016107, 2008.

gg <- as.undirected( nexus.get(2) )
plot(gg)

plot of chunk unnamed-chunk-9

head(get.adjacency(gg)) # show adjacency matrix

6 x 81 sparse Matrix of class "dgCMatrix"

[1,] . . . 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 1 . 1 . . . . . 1 1 . . . . . . 1 . . .
[2,] . . . . . . . . . . . . . . 1 . . 1 . 1 1 . . . 1 1 . . 1 . 1 1 . . 1 . 1 1 1 . 1 . 1 . . 1 . . . . 1 1 . 1 .
[3,] . . . 1 . . . . 1 . . . . . . . 1 . . . . . . . . . . . . . . . . . . 1 . . . . . . . . . . . . . . . . 1 . .
[4,] 1 . 1 . . . . . 1 . . . . . . . 1 . . . . . . . . . . . 1 . . . . . . 1 . . . . . . . . 1 . . . . . . . 1 . .
[5,] . . . . . 1 1 . 1 1 . 1 1 . . 1 . . . . . 1 1 . . . 1 1 . . . . . . . . 1 1 . 1 . 1 . . . . . 1 1 1 . 1 . . .
[6,] . . . . 1 . 1 . . . . . . . . . . . . 1 . . . . . . . . 1 1 . . . . . . . . . . . . . . . . 1 . . 1 . . . . .

[1,] . . . . . 1 1 . . . . . . . . . . . . . . . . . . 1
[2,] . 1 . . . . 1 . . . . . . . 1 . . . . . . . . 1 1 .
[3,] . . . 1 . 1 . . . . . . . . . . . . . . . . . . . .
[4,] . . . . 1 1 . . . . . . . . . . . . 1 1 . . 1 . . .
[5,] . . . . . 1 . . 1 1 . 1 1 1 . . 1 . . . 1 1 . . . .
[6,] . . 1 . . . . . . . 1 . 1 1 . . 1 . . . . . . . . .

head(get.data.frame(gg),n=12) # convert to data.frame

   from to weight
1     1  4      4
2     3  4      1
3     5  6      1
4     5  7      2
5     6  7     28
6     3  9      1
7     4  9      1
8     5  9      1
9     5 10     12
10    7 10      8
11    5 12      1
12    7 12      1


gg.com <- fastgreedy.community(gg) # tries to find dense subgraphs (aka communities)
V(gg)$color <- gg.com$membership + 1 # color each community differently
plot(gg)

plot of chunk unnamed-chunk-9


# select one community
gg.one <- delete.vertices(gg, V(gg)[V(gg)$color != 2 ])
plot(gg.one)

plot of chunk unnamed-chunk-9


gg.one <- delete.edges(gg.one, E(gg.one)[ E(gg.one)$weight < 10]) # remove edges less than 10
plot(gg.one)

plot of chunk unnamed-chunk-9


# get the number of neighbors for each vertice
n.neighbors <- sapply(V(gg.one), function(v)length(neighbors(gg.one, v)))
# change the shape of all the vertices with less than 3 neighbors
V(gg.one)[V(gg.one)[n.neighbors<3]]$shape <- "square"
V(gg.one)[V(gg.one)[n.neighbors>=3]]$shape <- "circle"
plot(gg.one)

plot of chunk unnamed-chunk-9


# add the strenght of the edge
plot.igraph(gg.one, layout=layout.reingold.tilford,
            edge.width=E(gg.one)$weight/4, edge.color="black")

plot of chunk unnamed-chunk-9

Next ref: http://statistics.ats.ucla.edu/stat/r/faq/snplot.htm

library(igraph)

# this is the context of mat25.txt

# 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0
# 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0
# 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
# 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 1 0
# 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 1
# 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 0 1 0 0 0 0
# 1 0 0 0 1 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0
# 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0
# 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 1
# 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1
# 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0
# 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0
# 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1
# 1 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0
# 1 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0
# 0 0 1 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1
# 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1
# 0 0 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1
# 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 1 1 0 0
# 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0
# 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0
# 0 0 0 1 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0
# 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0
# 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0
# 1 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

x <- read.table("http://www.ats.ucla.edu/stat/data/mat25.txt", header = FALSE)
head(x)

  V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25
1  0  0  1  0  0  1  0  0  0   1   0   0   0   0   0   0   0   0   0   0   0   1   0   1   0
2  0  0  0  0  1  1  0  0  0   1   0   0   0   0   0   0   0   0   0   1   1   0   0   0   0
3  0  1  0  0  1  0  1  1  0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0
4  0  0  0  0  1  0  0  0  0   0   0   1   1   0   0   0   0   1   0   0   0   0   0   1   0
5  0  0  0  0  0  0  0  1  0   0   0   1   0   0   0   0   0   0   0   1   1   0   0   0   1
6  0  0  0  0  0  0  0  1  0   0   0   1   0   1   0   1   0   0   0   0   1   0   0   0   0


# In order for the igraph package to recognize this table as a network, we can first convert it to a matrix. Then, if we wish to calculate graph-related statistics on it (betweenness, closeness, degree), we can use the matrix to create a graph object.

network <- as.matrix(x)
g1 <- graph.adjacency(network)  

# Betweenness is a centrality measure of a vertex within a graph (there is also edge betweenness, which is not discussed here). Betweenness centrality quantifies the number of times a node acts as a bridge along the shortest path between two other nodes.
(b1 <- betweenness(g1, directed = FALSE))

    V1     V2     V3     V4     V5     V6     V7     V8     V9    V10    V11    V12    V13    V14    V15    V16 
12.510  4.109 10.409  4.920 11.346 12.489  1.835 14.577  6.052  6.901  4.176 10.283  7.496  9.331  2.147  4.066 
   V17    V18    V19    V20    V21    V22    V23    V24    V25 
 1.069  4.217  4.420  9.077 10.155  9.407  4.019 12.067  9.920


# In connected graphs there is a natural distance metric between all pairs of nodes, defined by the length of their shortest paths. The farness of a node s is defined as the sum of its distances to all other nodes, and its closeness is defined as the inverse of the farness.
(c1 <- closeness(g1, mode = "out"))

     V1      V2      V3      V4      V5      V6      V7      V8      V9     V10     V11     V12     V13     V14 
0.01471 0.01408 0.01351 0.01408 0.01429 0.01408 0.01389 0.01408 0.01389 0.01389 0.01408 0.01389 0.01429 0.01389 
    V15     V16     V17     V18     V19     V20     V21     V22     V23     V24     V25 
0.01408 0.01429 0.02041 0.01449 0.01389 0.01449 0.01429 0.01429 0.01449 0.01449 0.01370


# the degree (or valency) of a vertex of a graph is the number of edges incident to the vertex, with loops counted twice.
(d1 <- degree(g1, mode = "out"))

 V1  V2  V3  V4  V5  V6  V7  V8  V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 
  5   5   5   5   5   5   5   5   5   5   5   5   5   5   5   5   5   5   5   5   5   5   5   5   5


###########

# Let file elist1.txt be a edge list like this:
# 
# 1 2
# 1 3
# 1 4
# 3 5
# 4 6
# 6 4
#
# We can read in this file as a graph, indicating that the format is an "edgelist".

xlist <- read.graph("http://www.ats.ucla.edu/stat/data/elist1.txt", format = "edgelist")
str(xlist)

IGRAPH D--- 7 6 -- 
+ edges:
[1] 2->3 2->4 2->5 4->6 5->7 7->5

plot.igraph(xlist)

plot of chunk unnamed-chunk-10


# Looking at the summary of our graph object, R believes our graph has 7 vertices although we only listed edges ranging from vertices 1 through 6. R makes a few assumptions unless otherwise specified:

# Vertices are indexed from zero and go through the highest numbered vertex in the edged list. You can specify that your graph contains more vertices than this, but not less.
# Edges are directed, going from the first vertex listed to the second.

# So let's amend considering that we have 8 vertices and the graph is indirected:

xlist.8un <- read.graph("http://www.ats.ucla.edu/stat/data/elist1.txt", format = "edgelist", n = 8, directed = FALSE)
str(xlist)

IGRAPH D--- 7 6 -- 
+ edges:
[1] 2->3 2->4 2->5 4->6 5->7 7->5

plot.igraph(xlist.8un)

plot of chunk unnamed-chunk-10


# Our first graph has an unconnected 0 vertex and arrows on the edges. Our second has unconnected 0 and 7 vertices and no arrows on the edges. We could also enter our data in a single vector of vertex indices where an edge connects the first and second, third and fourth, fifth and sixth entries and so on.

g2 <- graph(c(1, 2, 2, 3, 2, 4, 2, 5, 4, 6, 5, 7, 7, 5))
str(g2)

IGRAPH D--- 7 7 -- 
+ edges:
[1] 1->2 2->3 2->4 2->5 4->6 5->7 7->5

plot.igraph(g2)

plot of chunk unnamed-chunk-10

Package igraph also includes graph algorithms. Some egs:

Shortest Path(s)

g <- graph.ring(10)
shortest.paths(g)

      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
 [1,]    0    1    2    3    4    5    4    3    2     1
 [2,]    1    0    1    2    3    4    5    4    3     2
 [3,]    2    1    0    1    2    3    4    5    4     3
 [4,]    3    2    1    0    1    2    3    4    5     4
 [5,]    4    3    2    1    0    1    2    3    4     5
 [6,]    5    4    3    2    1    0    1    2    3     4
 [7,]    4    5    4    3    2    1    0    1    2     3
 [8,]    3    4    5    4    3    2    1    0    1     2
 [9,]    2    3    4    5    4    3    2    1    0     1
[10,]    1    2    3    4    5    4    3    2    1     0

get.shortest.paths(g, 5)

[[1]]
[1] 5 4 3 2 1

[[2]]
[1] 5 4 3 2

[[3]]
[1] 5 4 3

[[4]]
[1] 5 4

[[5]]
[1] 5

[[6]]
[1] 5 6

[[7]]
[1] 5 6 7

[[8]]
[1] 5 6 7 8

[[9]]
[1] 5 6 7 8 9

[[10]]
[1]  5  4  3  2  1 10

get.all.shortest.paths(g, 1, 6:8)

$res
$res[[1]]
[1]  1 10  9  8  7  6

$res[[2]]
[1] 1 2 3 4 5 6

$res[[3]]
[1]  1 10  9  8  7

$res[[4]]
[1]  1 10  9  8


$nrgeo
 [1] 1 1 1 1 1 2 1 1 1 1

average.path.length(g)

[1] 2.778

## Weighted shortest paths
el <- matrix(nc=3, byrow=TRUE,
             c(1,2,0, 
               1,3,2, 
               1,4,1, 
               2,3,0, 
               2,5,5, 
               2,6,2,
               3,2,1, 
               3,4,1,
               3,7,1,
               4,3,0, 
               4,7,2, 
               5,6,2, 
               5,8,8, 
               6,3,2, 
               6,7,1, 
               6,9,1,
               6,10,3,
               8,6,1, 
               8,9,1, 
               9,10,4) )
g2 <- add.edges(graph.empty(10), t(el[,1:2]), weight=el[,3])
plot.igraph(g2,edge.label=el[,3])

plot of chunk unnamed-chunk-11

shortest.paths(g2, mode="out")

      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
 [1,]    0    0    0    1    5    2    1   13    3     5
 [2,]  Inf    0    0    1    5    2    1   13    3     5
 [3,]  Inf    1    0    1    6    3    1   14    4     6
 [4,]  Inf    1    0    0    6    3    1   14    4     6
 [5,]  Inf    5    4    5    0    2    3    8    3     5
 [6,]  Inf    3    2    3    8    0    1   16    1     3
 [7,]  Inf  Inf  Inf  Inf  Inf  Inf    0  Inf  Inf   Inf
 [8,]  Inf    4    3    4    9    1    2    0    1     4
 [9,]  Inf  Inf  Inf  Inf  Inf  Inf  Inf  Inf    0     4
[10,]  Inf  Inf  Inf  Inf  Inf  Inf  Inf  Inf  Inf     0

# Another example
g <- erdos.renyi.game(12, 0.25)
plot(g, layout=layout.fruchterman.reingold)

plot of chunk unnamed-chunk-11

pa <- get.shortest.paths(g, 5, 9)[[1]]
pa

[1] 5 2 6 9

V(g)[pa]$color <- 'green'
E(g)$color <- 'grey'
E(g, path=pa)$color <- 'red'
E(g, path=pa)$width <- 3
plot(g, layout=layout.fruchterman.reingold)

plot of chunk unnamed-chunk-11

Minimum Spanning Tree

Minimum Spanning Tree algorithm is to find a Tree that connect all the nodes within a connected graph while the sum of edges weight is minimum.

# Create the graph and assign random edge weights
g <- erdos.renyi.game(12, 0.35)
E(g)$weight <- round(runif(length(E(g))),2) * 50
plot(g, layout=layout.fruchterman.reingold,  edge.label=E(g)$weight)

plot of chunk unnamed-chunk-12

# Compute the minimum spanning tree
mst <- minimum.spanning.tree(g)
plot(mst, layout=layout.reingold.tilford, edge.label=E(mst)$weight)

plot of chunk unnamed-chunk-12

Clustering

g <- erdos.renyi.game(20, 1/20)
plot(g)

plot of chunk unnamed-chunk-13

clusters(g)

$membership
 [1]  1  2  3  4  4  4  4  5  4  4  4  6  4  4  7  8  9 10  4  4

$csize
 [1]  1  1  1 11  1  1  1  1  1  1

$no
[1] 10

# membership: numeric vector giving the cluster id to which each vertex belongs.
# csize: numeric vector giving the sizes of the clusters.
# no: numeric constant, the number of clusters

Subcomponents

# subcomponent() finds all vertices reachable from a given vertex, or the opposite: all vertices from which a given vertex is reachable via a directed path.

set.seed(100)
g <- erdos.renyi.game(20, 1/50, directed=TRUE)
plot(g)

plot of chunk unnamed-chunk-14

subcomponent(g, 3, "in")

[1] 3 1

subcomponent(g, 3, "out")

[1]  3 15 18

subcomponent(g, 3, "all")

[1]  3  1 15 18  7  6

Graph Statistics

There are many statistics that we can look to get a general ideas of the shape of the graph. At the highest level, we can look at summarized statistics of the graph. This includes

Size of the graph (number of nodes and edges)
Density of the graph measure weither the graph dense (|E| proportional to |V|²⁾ or sparse (|E| proportional to |V|) ?
Is the graph very connected (large portion of nodes can reach each other), or is it disconnected (many islands) ?
Diameter of the graph measure the longest distance between any two nodes
Reciprocity measures in a directed graph, how symmetric the relationships are
Distribution of in/out “degrees”

set.seed(121)
# Create a random graph
g <- erdos.renyi.game(200, 0.02)
plot(g, layout=layout.fruchterman.reingold, 
       vertex.label=NA, vertex.size=3)

plot of chunk unnamed-chunk-15

# No of nodes
length(V(g))

[1] 200

# No of edges
length(E(g))

[1] 401

# Density (No of edges / possible edges)
graph.density(g)

[1] 0.02015

# Number of islands
clusters(g)$no

[1] 3

# Global cluster coefficient:
#(close triplets/all triplets)
transitivity(g, type="global")

[1] 0.02846

# Edge connectivity, 0 since graph is disconnected
edge.connectivity(g)

[1] 0

# Same as graph adhesion
graph.adhesion(g)

[1] 0

# Diameter of the graph
diameter(g)

[1] 10

# Reciprocity of the graph
reciprocity(g)

[1] 1

# Diameter of the graph
diameter(g)

[1] 10

# Reciprocity of the graph
reciprocity(g)

[1] 1

Drill down a level, we can also look at statistics of each pair of nodes, such as …

Connectivity between two nodes measure the distinct paths with no shared edges between two nodes. (ie: how much edges need to be removed to disconnect them)
Shortest path between two nodes
Trust between two nodes (a function of number of distinct path and distance of each path)

# Create a random graph
g <- erdos.renyi.game(9, 0.5)
plot(g, layout=layout.fruchterman.reingold)

plot of chunk unnamed-chunk-16

# Compute the shortest path matrix
shortest.paths(g)

      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
 [1,]    0    2    1    2    1    1    2    2    2
 [2,]    2    0    3    2    1    2    2    1    1
 [3,]    1    3    0    3    2    1    1    2    2
 [4,]    2    2    3    0    1    2    2    1    2
 [5,]    1    1    2    1    0    2    1    1    2
 [6,]    1    2    1    2    2    0    1    1    1
 [7,]    2    2    1    2    1    1    0    2    1
 [8,]    2    1    2    1    1    1    2    0    1
 [9,]    2    1    2    2    2    1    1    1    0

M <- matrix(rep(0, 81), nrow=9)
for (i in 1:9) {
  for (j in 1:9) {
    if (i == j) {
      M[i, j] <- -1
    } else {
      M[i, j] <- edge.connectivity(g, i, j)
    }
  }
}
M

      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
 [1,]   -1    3    3    2    3    3    3    3    3
 [2,]    3   -1    3    2    3    3    3    3    3
 [3,]    3    3   -1    2    3    3    3    3    3
 [4,]    2    2    2   -1    2    2    2    2    2
 [5,]    3    3    3    2   -1    5    4    5    4
 [6,]    3    3    3    2    5   -1    4    5    4
 [7,]    3    3    3    2    4    4   -1    4    4
 [8,]    3    3    3    2    5    5    4   -1    4
 [9,]    3    3    3    2    4    4    4    4   -1

Centrality Measures

At the fine grain level, we can look at statistics of individual nodes. Centrality score measure the social importance of a node in terms of how “central” it is based on a number of measures

Degree centrality gives a higher score to a node that has a high in/out-degree
Closeness centrality gives a higher score to a node that has short path distance to every other nodes
Betweenness centrality gives a higher score to a node that sits on many shortest path of other node pairs
Eigenvector centrality gives a higher score to a node if it connects to many high score nodes
Local cluster coefficient measures how my neighbors are inter-connected with each other, which means the node becomes less important.

# Degree
degree(g)

[1] 3 3 3 2 5 5 4 5 4

# Closeness (inverse of average dist)
closeness(g)

[1] 0.07692 0.07143 0.06667 0.06667 0.09091 0.09091 0.08333 0.09091 0.08333

# Betweenness
betweenness(g)

[1] 1.3667 0.3333 0.3333 0.0000 6.2333 4.4000 2.4000 4.2000 1.7333

# Local cluster coefficient
transitivity(g, type="local")

[1] 0.3333 0.6667 0.6667 1.0000 0.2000 0.4000 0.3333 0.4000 0.5000

# Eigenvector centrality
evcent(g)$vector

[1] 0.6350 0.7004 0.6231 0.4782 0.9168 1.0000 0.8571 0.9960 0.8884

# Now rank them
order(degree(g))

[1] 4 1 2 3 7 9 5 6 8

order(closeness(g))

[1] 3 4 2 1 7 9 5 6 8

order(betweenness(g))

[1] 4 2 3 1 9 7 8 6 5

order(evcent(g)$vector)

[1] 4 3 1 2 7 9 5 8 6

From his studies, Drew Conway has found that people with low Eigenvector centrality but high Betweenness centrality are important gate keepers, while people with high Eigenvector centrality but low Betweenness centrality has direct contact to important persons. So lets plot Eigenvector centrality against Betweenness centrality.

# Create a graph
g1 <- barabasi.game(100, directed=F)
g2 <- barabasi.game(100, directed=F)
g <- g1 %u% g2 # union of the two graphs, only edges which are included in at least one graph will be part of the new graph
lay <- layout.fruchterman.reingold(g)
# Plot the eigevector and betweenness centrality
plot(evcent(g)$vector, betweenness(g))
text(evcent(g)$vector, betweenness(g), 0:100, 
       cex=0.6, pos=4)

plot of chunk unnamed-chunk-18

V(g)[12]$color <- 'red'
V(g)[8]$color <- 'green'
plot(g, layout=lay, vertex.size=8,vertex.label.cex=0.6)

plot of chunk unnamed-chunk-18

More info

https://sites.google.com/site/daishizuka/toolkits/sna/igraph-vs-statnet