library(plyr) yr <- 16 readcsv <- TRUE len_name <- 40 len_cmte_nm <- 40 len_occupation <- 32 len_head <- 80 len_plot <- 50 pos_left <- 1 end_date <- "December 2015" csvfile <- paste("20", yr, "/cacm", yr, ".csv", sep="") if (!exists("oo")){ if (readcsv && file.exists(csvfile)){ print(paste("READ", csvfile)) oo <- read.csv(csvfile) }else{ cmfilename <- paste("20", yr, "/cm", yr, "/cm.txt", sep="") cnfilename <- paste("20", yr, "/cm", yr, "/cmte", yr, ".csv", sep="") ddfilename <- paste("20", yr, "/indiv", yr, "/itcont.txt", sep="") print(paste("READ", cmfilename)) cm <- read.table(cmfilename, sep="|", header=FALSE, quote="", comment.char="") print(paste("READ", cnfilename)) cn <- read.csv(cnfilename) print(paste("READ", ddfilename)) dd <- read.table(ddfilename, sep="|", header=FALSE, quote="\"", comment.char="") names_df <- read.csv("headers/cm_header_file.csv") names(cm) <- names(names_df) names_df <- read.csv("headers/indiv_header_file.csv") names(dd) <- names(names_df) print("DO MERGE") m1 <- merge(dd, cm, by.x="CMTE_ID", by.y="CMTE_ID", all.x=TRUE) mm <- merge(m1, cn, by.x="CMTE_NM", by.y="CMTE_NM", all.x=TRUE) mm$NAME0 <- mm$NAME mm$NAME <- sub(" +MR\\.","", mm$NAME0) mm$NAME <- sub(" +MS\\.","", mm$NAME) mm$NAME <- sub(" +MRS\\.","", mm$NAME) mm$NAME <- sub(" +SR\\.","", mm$NAME) mm$NAME <- sub(" +.\\.? *$","",mm$NAME) #print("DO SUBSET") #mm <- subset(ee, select = c(TRANSACTION_DT, TRANSACTION_AMT, NAME, CITY, STATE, CMTE_ID, CMTE_NM)) print("DO DDPLY") dp <- ddply(mm, .(CMTE_NM, PRTY, CANDIDATE), summarize, TOTAL_CONTRIB=sum(as.numeric(TRANSACTION_AMT)), NCONTRIB=length(TRANSACTION_AMT), ND=table(PRTY)["D"], NR=table(PRTY)["R"], N_=NCONTRIB-(ND+NR), .progress="text") print("ORDER BY TOTAL CONTRIBUTIONS") oo <- dp[order(-dp$TOTAL_CONTRIB),] #print("ORDER BY NUMBER OF CONTRIBUTIONS") #xx <- dp[order(-dp$NUM_CONTRIB),] print(paste("WRITE", csvfile)) write.csv(oo, csvfile) } } xx <- head(oo, n=len_head) if ("X" %in% colnames(xx)) xx <- subset(xx, select = -c(X)) rownames(xx) <- c(1:len_head) TOTAL_CONTRIB_NUM <- head(xx$TOTAL_CONTRIB/1000000, n=len_plot) xx$TOTAL_CONTRIB <- format(xx$TOTAL_CONTRIB, big.mark=",",scientific=FALSE) xx$NCONTRIB <- format(xx$NCONTRIB, big.mark=",",scientific=FALSE) #xx$NAME <- strtrim(xx$NAME, width=len_name) xx$CMTE_NM <- strtrim(xx$CMTE_NM, width=len_cmte_nm) #xx$OCCUPATION <- strtrim(xx$OCCUPATION, width=len_occupation) #xx$TRANSACTION_DT <- as.Date(sprintf("%08d", xx$TRANSACTION_DT), "%m%d%Y") print(xx) readline("Press enter to continue, escape to exit") #yy <- head(xx, n=len_plot) yy <- xx[1:len_plot,] TOTAL_CONTRIB_NUM <- TOTAL_CONTRIB_NUM[1:len_plot] x11() yy$col <- "green3" yy$col[yy$ND > yy$NR] <- "blue3" yy$col[yy$ND < yy$NR] <- "red3" yy$pos <- 4 yy$pos[1:pos_left] <- 2 yy$pch <- 16 yy$pch[yy$CANDIDATE==""] <- 3 yy$pch[yy$CANDIDATE=="CLINTON"] <- 8 yy$pch[yy$CANDIDATE=="SANDERS"] <- 1 yy$pch[yy$CANDIDATE=="BUSH"] <- 8 yy$pch[yy$CANDIDATE=="CHRISTIE"] <- 1 yy$pch[yy$CANDIDATE=="CRUZ"] <- 2 yy$pch[yy$CANDIDATE=="HUCKABEE"] <- 9 yy$pch[yy$CANDIDATE=="JINDAL"] <- 4 yy$pch[yy$CANDIDATE=="PAUL"] <- 0 yy$pch[yy$CANDIDATE=="PERRY"] <- 4 yy$pch[yy$CANDIDATE=="RUBIO"] <- 6 yy$pch[yy$CANDIDATE=="WALKER"] <- 4 #"CARSON, BENJAMIN S SR MD" #"COCHRAN, THAD" #"FIORINA" #"KASICH, JOHN R" #"LOPEZ-CANTERA, CARLOS" #"MOULTON, SETH" #"MURPHY, ???" #"NUNN, MARY MICHELLE" #"PATAKI, GEORGE E" #"PORTMAN, ROB" #"ROMNEY, MITT / PAUL D. RYAN" #"SCHWEITZER, ???" #"TRUMP, DONALD J" #"WARD, ???" legtxt <- c("Clinton","Sanders","Bush","Christie","Cruz","Huckabee","Paul","Rubio","(withdrawn)","(blank)","(other)") legcol <- c("blue3", "blue3", "red3","red3", "red3","red3", "red3","red3", "red3", "red3", "red3") legpch <- c( 8, 1, 8, 1, 2, 9, 0, 6, 4, 3, 16) RANK <- c(1:length(TOTAL_CONTRIB_NUM)) title <- paste("TOP 50 TOTAL CONTRIBUTIONS BY COMMITTEE\n(2015-2016 cycle through ", end_date, ")", sep = "") xlabel <- "TOTAL CONTRIBUTIONS (millions of dollars)" #sub <- "Source: http://econdataus.com/cfin16.htm" plot(TOTAL_CONTRIB_NUM, RANK, cex=0.6, col=yy$col, pch=yy$pch, main=title, xlab=xlabel) grid() yy$labs <- yy$CMTE_NM with(yy, text(TOTAL_CONTRIB_NUM, RANK, labels=labs, cex=0.6, col=col, pch=pch, pos=pos)) legend("bottomleft", c("Democrat", "Republican", "Non-partisan"), cex=0.7, col=c("blue3", "red3", "green3"), pch=3) legend("topright", legtxt, cex=0.7, col=legcol, pch=legpch) #legend("bottom", "Source: http://econdataus.com/cfin16.htm", cex=0.6) #identify(TOTAL_CONTRIB_NUM, INDEX, labels=yy$NAME, n=100, pos=4, cex=0.6, col="red")