Published: What We Know About Who is Donating to the D9 City Council Race

Well, I just had my work published in a news outlet for the first time! The publication is the Voice of San Diego, a local nonprofit news agency that’s well known for its investigative reporting on local affairs. Looking forward to doing more of this kind of stuff in the future.

Click here to check it out!

Here’s the code. Note that I jiggered with the dimensions of all the graphs to make them a little nicer. :

library(plyr)
library(dplyr)
library(stringr)
library(ggplot2)
library(ggmap)
library(reshape2)
setwd("~/Dropbox/Work/VOSD/D9")
contributions1 <- read.csv("contributions.csv")
## Tried this but it missed Araceli Martinez. Not sure why, if anyone is holler at ## ## me
contributions1$recipient_name <- as.character(contributions1$recipient_name)
contributions1$report_period_to <- as.Date(contributions1$report_period_to, format="%m/%d/%y")
##Switch to only primary voters 
contributionsP <- contributions1[which(contributions1$report_period_to <= "2016-06-30"),]
rows <- which(str_detect(contributionsP$recipient_name, c("Saez","Martinez","Flores","Gomez")) == TRUE)
## Manually pick names out. Ugh. 
names <- unique(contributionsP[,5])[c(3,11,18,49,75,87,122)]

newCont <- contributionsP[which(contributionsP$recipient_name %in% names),]
## Separate
flores <- newCont[which(newCont$recipient_name %in% c("Flores Ricardo", "Ricardo Flores", "Flores for City Council 2016", "Urban Neighbors United supporting Ricardo Flores for City Council 2016 sponsrd by and w/ Major Funding from the SD Regional Chamber of Commerce PAC ")),]
saez <- newCont[which(newCont$recipient_name %in% c("Sarah Saez for District 9 City Council 2016", "Sarah Saez")),]
gomez <- newCont[which(newCont$recipient_name %in% c("Gomez for City Council 2016")),]
martinez <- newCont[which(newCont$recipient_name %in% c("Araceli Martinez for San Diego City Council District 9 2016")),]
sums <- data.frame(sum(flores$contribution_amount),sum(gomez$contribution_amount),
                   sum(saez$contribution_amount), sum(martinez$contribution_amount))
colnames(sums) <- c("Flores", "Gomez", "Saez", "Martinez")
campaignCont <- melt(sums)
## No id variables; using all as measure variables
ggplot(data=campaignCont, aes(x=variable,y=value, fill=variable)) + 
    geom_bar(stat="identity") + labs(x="Candidate", y="Total Contributions") + 
    theme(legend.title=element_blank(), panel.background = element_rect(fill = 'white'), panel.grid.major = element_line(color = "black", size=.1), axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=0.5)) + geom_text(aes(label = paste0("$",prettyNum(as.numeric(value), big.mark = ",", scientific=FALSE)), y= value, vjust = -.5, hjust = -.015)) + ggtitle("Total Candidate Contributions")

##Vote Counts Pulled from

one <- c("Flores", 92365.76, 7348.00)
two <- c("Gomez", 65412.02, 6567.00)
three <- c("Saez", 16273.75, 5023.00)
andToThe4 <- c("Martinez", 230.64, 2589.00)
votes <- as.data.frame(rbind(one,two,three,andToThe4))
colnames(votes) <- c("Candidate","Contributions","Votes")
row.names(votes) <- NULL
votes$Contributions <- as.numeric(as.character(votes$Contributions))
votes$Votes <- as.numeric(as.character(votes$Votes))
ggplot(data=votes, aes(x=Votes,y=Contributions, color= Candidate)) + theme(legend.title=element_blank(), panel.background = element_rect(fill = 'white'), panel.grid.major = element_line(color = "black", size=.1), axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=0.5)) + geom_point(size=5) + ggtitle("Total Contribution Amount vs. Votes")

## Combine contributor_first and contributor_last to create something a little more useful

names <- unique(contributions1[,5])[c(3,11,18,49,75,87,122)]

newCont <- contributions1[which(contributions1$recipient_name %in% names),]

## Separate

flores <- newCont[which(newCont$recipient_name %in% c("Flores Ricardo", "Ricardo Flores", "Flores for City Council 2016", "Urban Neighbors United supporting Ricardo Flores for City Council 2016 sponsrd by and w/ Major Funding from the SD Regional Chamber of Commerce PAC ")),]

gomez <- newCont[which(newCont$recipient_name %in% c("Gomez for City Council 2016")),]

flores$contributor_name <- paste(flores$contributor_first, flores$contributor_last, sep = " ")

gomez$contributor_name paste(gomez$contributor_first, gomez$contributor_last, sep = " ")

## Count unique voters

f <-summary(unique(flores$contributor_name))

g <- summary(unique(gomez$contributor_name))

## Create new dataframe for visualization with all contributions contributions <- data.frame(cbind(flores$contribution_amount, c(gomez$contribution_amount, rep(NA, 45))))

## NAs added because Gomez has fewer contributors. Prevents repetition. 45 = nrow(flores) - nrow(gomez)

colnames(contributions) <- c("Flores", "Gomez")

newCont <- melt(contributions)

## No id variables; using all as measure variables
knitr::kable(summary(contributions), caption = "Candidate Donor Pool Summary Statistics")
Candidate Donor Pool Summary Statistics
Flores Gomez
Min. : -550.0 Min. : 0.0
1st Qu.: 100.0 1st Qu.: 99.0
Median : 200.0 Median : 100.0
Mean : 512.5 Mean : 209.7
3rd Qu.: 550.0 3rd Qu.: 200.0
Max. :25000.0 Max. :8000.0
NA NA’s :45

This next chart may require some explaination. Here we see the number of times each candidate has received a donation at a given dollar amount.

ggplot(newCont, aes(x=value, color=variable, fill=variable)) + geom_freqpoly(bins=200, size=.5) + theme(legend.title=element_blank(), panel.background = element_rect(fill = 'white'), panel.grid.major = element_line(color = "black", size=.1), axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=0.5)) + labs(x="Donation Amount", y="Frequency") + geom_hline(yintercept=0, colour="grey75", size=.4) + ggtitle("Donation Amount Frequency")
## Warning: Removed 45 rows containing non-finite values (stat_bin).

floresOccupation <- data.frame(flores$contributor_name, flores$contributor_occ)
colnames(floresOccupation) <- c("Name", "Occupation")
floresOccupation <- distinct(floresOccupation)
floresJobs <- as.data.frame(count(floresOccupation, floresOccupation[,2]))
floresJobs <- arrange(floresJobs, desc(n))
floresTen <- floresJobs[c(1:10),]
## Blank Occupation Field
colnames(floresTen) <- c("Occupation", "Count")
## Adjust factor levels
floresTen$Occupation <- factor(floresTen$Occupation, levels = floresTen$Occupation[order(floresTen$Count)])


gomezOccupation <- data.frame(gomez$contributor_name, gomez$contributor_occ)
colnames(gomezOccupation) <- c("Name", "Occupation")
gomezOccupation <- distinct(gomezOccupation)
gomezJobs <- as.data.frame(count(gomezOccupation, gomezOccupation[,2]))

gomezJobs <- arrange(gomezJobs, desc(n))
gomezTen <- gomezJobs[c(1:10),]
## Blank Occupation Field
colnames(gomezTen) <- c("Occupation", "Count")
## Adjust factor levels
gomezTen$Occupation <- factor(gomezTen$Occupation, levels = gomezTen$Occupation[order(gomezTen$Count)])


floresContributorsPlot <- ggplot(floresTen, aes(x=Occupation, y=Count, fill=Occupation)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ggtitle("Flores") + theme(legend.title=element_blank(), panel.background = element_rect(fill = 'white'), panel.grid.major = element_line(color = "black", size=.1), axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=0.5)) + labs(x="Donor Occupation", y="Frequency") + geom_hline(yintercept=0, colour="grey75", size=.4) 

gomezContributorsPlot <- ggplot(gomezTen, aes(x=Occupation, y=Count, fill=Occupation)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ggtitle("Gomez") + theme(legend.title=element_blank(), panel.background = element_rect(fill = 'white'), panel.grid.major = element_line(color = "black", size=.1), axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=0.5)) + labs(x="Donor Occupation", y="Frequency") + geom_hline(yintercept=0, colour="grey75", size=.4)

## Multiplot function, creds to: http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/

multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
    library(grid)
    
    # Make a list from the ... arguments and plotlist
    plots <- c(list(...), plotlist)
    
    numPlots = length(plots)
    
    # If layout is NULL, then use 'cols' to determine layout
    if (is.null(layout)) {
        # Make the panel
        # ncol: Number of columns of plots
        # nrow: Number of rows needed, calculated from # of cols
        layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                         ncol = cols, nrow = ceiling(numPlots/cols))
    }
    
    if (numPlots==1) {
        print(plots[[1]])
        
    } else {
        # Set up the page
        grid.newpage()
        pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
        
        # Make each plot, in the correct location
        for (i in 1:numPlots) {
            # Get the i,j matrix positions of the regions that contain this subplot
            matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
            
            print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                            layout.pos.col = matchidx$col))
        }
    }
}

multiplot(gomezContributorsPlot,floresContributorsPlot, cols=2)

gomezTop <- aggregate(gomez$contribution_amount, list(gomez$contributor_name), sum)
floresTop <- aggregate(flores$contribution_amount, list(flores$contributor_name), sum)
gomezTop <- arrange(gomezTop, desc(x))
floresTop <- arrange(floresTop, desc(x))
colnames(gomezTop) <- c("Donor", "Amount")
colnames(floresTop) <- c("Donor", "Amount")
floresTop[2,1] <- "Urban Neighbors United spnsred by SD Chamber"
gomezTop <- gomezTop[-2,]
knitr::kable(gomezTop[c(1:10),], caption = "Top Ten Donors: Gomez")
Top Ten Donors: Gomez
Donor Amount
1 San Diego County Democratic Party 10300
3 Christine Kehoe 1100
4 Frances Zimmerman 1100
5 Jane Reldan 1100
6 Muayad Kassab 800
7 Rena Marrocco 750
8 Roberta Alexander 750
9 Octavio Aguilar 700
10 Harold Georgiou 650
11 Georgette Gomez 561
knitr::kable(floresTop[c(1:10),], caption = "Top Ten Donors: Flores")
Top Ten Donors: Flores
Donor Amount
San Diego Regional Chamber of Commerce Political Action Committee (SD Chamber PAC) 47000.0
Urban Neighbors United spnsred by SD Chamber 43611.9
San Diego County Democratic Party 10300.0
Alan Viterbi 1100.0
David Scott Grimes 1100.0
William Ayyad 1100.0
William Jones 1100.0
Mathew Kostrinksy 1000.0
Nashwan Habib 1000.0
Robert P Ottilie 850.0

Then we just lay the shapefile over the map using ggmap,

library(ggmap)
library(rgdal)

## Shapefiles downloaded from SANDAG GIS Warehouse http://rdw.sandag.org/Account/DisplayREmail
## Special Thanks to R-Bloggers for this tutorial (and for literally hundreds of insights over the years)
## https://www.r-bloggers.com/shapefile-polygons-plotted-on-google-maps-using-ggmap-in-r-throw-some-throw-some-stats-on-that-mappart-2/
districts 

Which yields something like this (also cleaned up a smidge in Photoshop)

Heatmap Final.png

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s