For my personal code archive and everybody who finds interest in it I publish the R code which I used to create the frames for the animations showing the carpoolings formed through the booking system until a certain date. The graphs are created using igraph and plotted into frames which are later glued into an MPEG clip using ffmpeg.
Guidance for the code
The graph and its growth is all contained in one CSV file keeping three columns. A driver’s ID, a passenger’s ID and the the date the passenger took a ride with the driver – according to our booking system – this list defines the possible edges.
From this data set I extract two data frames – one keeping all ID’s for the drivers and the date of his earlieast offered ride (where a passenger was found for) and the other one keeping all ID’ for the passengers and the date of his earliest participation – this list defines the possible nodes.
Why do I need the date of earliest participation as driver or passenger? Because the animation goes through the dates sequentially, starting April 1 2011. So to create the graph for a given date D I only keep the nodes (drivers/passengers) who participated on date D or earlier. For those nodes I then select the necessary edges and I have all I need to define the graph. Also I use the first date of participation to determine whether or not to emphasize the node as new.
Another key aspect about my chosen approach is to calculate the positions for all nodes together – using a force-directed algorithm called Fruchterman-Reingold. All the nodes not yet to be displayed will be set to transparent color. So the positions are determined for all nodes and edges (the final state) but only the ones with activity until D are displayed. That was the best solution I found using igraph.
The code!
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
library(sqldf) library(igraph) # what passenger took a ride with what driver on what day?! links <- read.table("C:\\Users\\Raffael\\Desktop\\cp\\all_bs_lifts.csv", header=T, sep=",") # turn the string into a date links <- data.frame(drv = links$drv, pas = links$pas, d = as.Date(links$d)) # extract the drvs/pass and the first date of participation nodes_pas <- sqldf("select pas as user_id, 'p' as type, min(d) as date from links group by user_id") nodes_drv <- sqldf("select drv as user_id, 'd' as type, min(d) as date from links group by user_id") # cast the date into a date relative to Jan 1 1970 nodes_pas$date <- as.Date(nodes_pas$date, origin="1970-01-01") nodes_pas$date <- as.Date(nodes_pas$date, origin="1970-01-01") # union of passenger- and driver-nodes nodes <- sqldf("select user_id, type, date from nodes_pas union select user_id, type, date from nodes_drv") # group by ID because a person can be pas and a drv! # then the type is "pd" or "dp" nodes <- sqldf("select user_id, group_concat(type,'') as type, min(date) as date from nodes group by user_id") # adding a gaplessly incrementing ID and again cast to date # (either I was drunk or it makes sense and I forgot why ...) nodes <- data.frame(i = 1:length(nodes$user_id), user_id = nodes$user_id, type=nodes$type, date=as.Date(nodes$date, origin="1970-01-01")) # compute the edges using the new neater IDs link_list <- sqldf("select n_drv.i as drv_i, n_pas.i as pas_i, l.d as date from links l join nodes n_drv on l.drv = n_drv.user_id join nodes n_pas on l.pas = n_pas.user_id") # and now we create the (i)graph - undirected and using Fruchterman- # Reingold for positioning. Looks like no big deal but took about 8 hours # on my notebook for the big animation. g <- graph.data.frame(link_list, directed=F, vertices=nodes) layout_g <- layout.fruchterman.reingold(g) # IDs for the edges link_list$i <- 1:length(link_list$drv_i) # this function creates a PNG with the graph for every day create_graph_png <- function(max_date,i) { # gives the for until 'max_date' relevant edges gx <- subgraph.edges(g, link_list[which(link_list$date <= max_date),'i'], delete.vertices=F) V(gx)$color="black" V(gx)$frame.color="black" col_driver <- hsv(0,1,1,alpha=.5) V(gx)[which(nodes$type == "d")]$color=col_driver V(gx)[which(nodes$type == "d")]$frame.color=col_driver col_passenger <- hsv(0.66,1,1,alpha=.5) V(gx)[which(nodes$type == "p")]$color=col_passenger V(gx)[which(nodes$type == "p")]$frame.color=col_passenger col_hybrid <- hsv(.7,1,1,alpha=.5) V(gx)[which(nodes$type == "pd" | nodes$type == "dp")]$color=col_hybrid V(gx)[which(nodes$type == "pd" | nodes$type == "dp")] $frame.color=col_hybrid # the size of a node grows with the number of attached edges V(gx)$size <- degree(gx)^(1/3) # takes care of the new ones being emphasized by bright color # and a changing size newones <- hsv(.36,1,1,alpha=.5) V(gx)[which(abs(as.numeric(nodes$date - max_date)) < 5)]$color=newones V(gx)[which(abs(as.numeric(nodes$date - max_date)) < 5)] $frame.color=newones V(gx)[which(abs(as.numeric(nodes$date - max_date)) < 3)]$size=1.6 V(gx)[which(abs(as.numeric(nodes$date - max_date)) < 1)]$size=2.5 #V(gx)[which(abs(as.numeric(nodes$date - max_date)) < 3)]$size=3 #V(gx)[which(abs(as.numeric(nodes$date - max_date)) < 2)]$size=5 #V(gx)[which(abs(as.numeric(nodes$date - max_date)) < 1)]$size=3 # get's rid of the not yet to be displayed nodes notyet <- hsv(1,1,1,alpha=0) V(gx)[which(degree(gx) < 1)]$frame.color=notyet V(gx)[which(degree(gx) < 1)]$color=notyet V(gx)[which(degree(gx) < 1)]$size=0 bgcolor <- hsv(0.66,0.05,1) # the textual annotations plot_title <- paste("Inner-German route / ",max_date) if(max_date >= as.Date("2012-03-06")) { plot_title <- paste(plot_title," - L 1") } if(max_date >= as.Date("2012-08-09")) { plot_title <- paste(plot_title," - L 2") } if(max_date >= as.Date("2012-11-05")) { plot_title <- paste(plot_title," - L 600km") } if(max_date >= as.Date("2013-01-24")) { plot_title <- paste(plot_title," - Cash") } # advertisement sub_title <- "Raffael Vogler - www.joyofdata.de" par(bg=bgcolor) # let's create a PNG and plot the graph onto it png(sprintf("C:\\Users\\Raffael\\Desktop\\cp\\graph_M_HH3\\%03d.png",i) ,width=5000, height=5000, bg=bgcolor, res=372) plot(gx, margin=0, frame=F, main=plot_title, sub=sub_title, vertex.label=NA, edge.color=rgb(.2,.2,.2), edge.arrow.mode=0, layout=layout_g, edge.width=.8) text(10,10, label=max_date) dev.off() } # let's get it started! start_date <- as.Date("2011-03-31") for(i in 1:720) { create_graph_png(start_date + i, i) } |