## Force-Directed Graph Visualization of Cloud Data

So, I've written about the process of loading the csv cloud data and sanatizing it in [this post,](http://praxis.scholarslab.org/blog/2015/11/16/cloud-data-cleaning/) but now I'm going to take those skills and try to visualize the data in some way using d3.js. Since I don't quite know how d3.js works, I'm going to try to reuse [their example.](http://bl.ocks.org/mbostock/4062045)

First, we load and clean-up the data in the same way as the earlier post.

In [1]:
var csv = require('csv');
var fs = require('fs');
var cloud_data = [];

var parser = csv.parse({delimiter: ',', columns: true}, function(err, data){
 cloud_data = data;
}
 );

fs.createReadStream('2015-11-16-cloud_data.csv').pipe(parser);

So, we've loaded the csv, let's clean it up. First, we fill in blank sceduled times with actual times and second we delete the entirely blank records.

In [2]:
for (eachRowIndex in cloud_data) {
 if (cloud_data[eachRowIndex].Scheduled === "") {
 cloud_data[eachRowIndex].Scheduled = cloud_data[eachRowIndex].Observed;
 }
}

for (eachRowIndex in cloud_data) {
 if (cloud_data[eachRowIndex].Scheduled === "") {
 delete cloud_data[eachRowIndex];
 }
}


There are a lot of ways to do something with this data, but let's do something simple as a first experiment. Let's make a force-directed graph of the words in any description and draw an edge between words which both occur on the same day. Let's put "Present", "Location", "Observation", and "Conditions" into separately colored groups though.

In [3]:
var cloudJson = {"nodes":[], "links":[]}; //nodes and edges to draw

function getNodeIndex (cloudJsonIn, searchWordIn) {
 for ( var nodeIndex = 0 ; nodeIndex < cloudJsonIn.nodes.length ; nodeIndex++) {
 if (cloudJsonIn.nodes[nodeIndex].name == searchWordIn ) {
 return nodeIndex;
 }
 }
 return false;
}


//first, let's get the Present nodes out
for ( var eachRowIndex in cloud_data) { //iterate through each row of data
 
 var words = cloud_data[eachRowIndex].Present.match(/\w+/g); //extracts the words
 for ( var eachWordIndex in words) {
 if (getNodeIndex(cloudJson, words[eachWordIndex]) === false) {
 cloudJson.nodes.push({"name":words[eachWordIndex],"group":1}); //adds new words to the cloudNodes
 }}
 
 //now, count through each word and link to the later words
 for (var fromEachWordIndex = 0 ; fromEachWordIndex <= words.length ; fromEachWordIndex++) {
 var toEachWordIndex = fromEachWordIndex + 1;
 while (toEachWordIndex < words.length) {
 cloudJson.links.push({
 "source": getNodeIndex(cloudJson,words[fromEachWordIndex]),
 "target": getNodeIndex(cloudJson,words[toEachWordIndex]),
 "value": 1
 });
 toEachWordIndex++; 
 }}
}


Now we have an object with nodes and edges that matches the example code, let's open a file, stringify the json, and export.

In [4]:
JSON.stringify(cloudJson);

'{"nodes":[{"name":"JA","group":1},{"name":"RC","group":1},{"name":"neighbors","group":1},{"name":"passing","group":1},{"name":"car","group":1},{"name":"AW","group":1},{"name":"birds","group":1},{"name":"traffic","group":1},{"name":"upstairs","group":1},{"name":"JD","group":1},{"name":"M","group":1},{"name":"BS","group":1},{"name":"and","group":1},{"name":"Lundy","group":1},{"name":"near","group":1},{"name":"crickets","group":1},{"name":"radio","group":1},{"name":"two","group":1},{"name":"trash","group":1},{"name":"dudes","group":1},{"name":"inside","group":1},{"name":"cars","group":1},{"name":"Tia","group":1},{"name":"MER","group":1},{"name":"8","group":1},{"name":"students","group":1},{"name":"6","group":1},{"name":"bikes","group":1},{"name":"plane","group":1},{"name":"ER","group":1},{"name":"JB","group":1},{"name":"PL","group":1},{"name":"LW","group":1},{"name":"Rosie","group":1},{"name":"music","group":1},{"name":"outside","group":1},{"name":"construction","group":1},{"name":"neigh