The purpose of this code repository is to create a one-stop-shop for code for most common static and interactive data visualizations used for projects with examples from previous RDA projects.
Handy Reference Links:
Code includes:
Applying Weighted Average functions and Race Counts functions to Proximity to Hazards indicator.
Weighted Average functions calculate the percentage of each group’s target geo (county/state) population within each sub geo. The function works from census tract to County or State aggregation. See script: “W:\RDA Team\R\Functions\Cnty_St_Wt_Avg_Functions.R”
Race Counts Functions calculate the number of _rate values, index of disparity, ranks, population or sample variance, county and state Z-scores, quadrant colors. See script: “W:\Project\RACE COUNTS\2022_v4\RaceCounts\RC_Functions.R”
# Proximity to Hazards Weighted Avg for RC 2022
library(dplyr)
library(data.table)
library(tidycensus)
library(sf)
library(RPostgreSQL)
library(stringr)
library(tidyr)
options(scipen=999)
###### SET UP WORKSPACE #######
# load census api key
#census_api_key("25fb5e48345b42318ae435e4dcd28ad3f196f2c4", install = TRUE)
#readRenviron("~/.Renviron")
# load the PostgreSQL driver
drv <- dbDriver("PostgreSQL")
# create connection for rda database
con <- dbConnect(drv, dbname = "rda_shared_data",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
user = "postgres", password = "password")
#set source for RC Functions script
source("W:/RDA Team/R/Functions/Cnty_St_Wt_Avg_Functions.R")
##### GET INDICATOR DATA ######
# load indicator data
ind_df <- st_read(con, query = "select ct_geoid AS geoid, cleanup from built_environment.oehha_ces4_tract_2021")
ind_df <- dplyr::rename(ind_df, sub_id = geoid, indicator = cleanup) # rename columns for functions
############# COUNTY ##################
###### DEFINE VALUES FOR FUNCTIONS ######
# set values for weighted average functions - You may need to update these
year <- c(2020) # define your data vintage
subgeo <- c('tract') # define your sub geolevel: tract (unless the WA functions are adapted for a different subgeo)
targetgeolevel <- c('county') # define your target geolevel: county (state is handled separately)
survey <- "acs5" # define which Census survey you want
pop_threshold = 250 # define population threshold for screening
##### CREATE COUNTY GEOID & NAMES TABLE ###### You will NOT need this chunk if your indicator data table has target geolevel names already
targetgeo_names <- county_names(vars = vars_list, yr = year, srvy = survey)
targetgeo_names <- select(as.data.frame(targetgeo_names), target_id = GEOID, target_name = NAME) %>% # get targetgeolevel names
mutate(target_name = sub(" County, California", "", target_name)) # rename columns
targetgeo_names <- distinct(targetgeo_names, .keep_all = FALSE) # keep only unique rows, 1 per target geo
#####
##### GET SUB GEOLEVEL POP DATA ######
pop <- update_detailed_table(vars = vars_list, yr = year, srvy = survey) # subgeolevel pop
# transform pop data to wide format
pop_wide <- lapply(pop, to_wide)
#### add target_id field, you may need to update this bit depending on the sub and target_id's in the data you're using
pop_wide <- as.data.frame(pop_wide) %>% mutate(target_id = substr(GEOID, 1, 5)) # use left 5 characters as target_id
pop_wide <- dplyr::rename(pop_wide, sub_id = GEOID) # rename to generic column name for WA functions
# calc target geolevel pop and number of sub geolevels per target geolevel
pop_df <- targetgeo_pop(pop_wide)
##### COUNTY WEIGHTED AVG CALCS ######
pct_df <- pop_pct(pop_df) # calc pct of target geolevel pop in each sub geolevel
wa <- wt_avg(pct_df) # calc weighted average and apply reliability screens
wa <- wa %>% left_join(targetgeo_names, by = "target_id") # add in target geolevel names
############# STATE CALCS ##################
# get and prep state pop
ca_pop_wide <- state_pop(vars = vars_list, yr = year, srvy = survey)
# calc state wa
ca_pct_df <- ca_pop_pct(ca_pop_wide)
ca_wa <- ca_wt_avg(ca_pct_df)
############ JOIN COUNTY & STATE WA TABLES ##################
wa <- union(wa, ca_wa)
wa <- rename(wa, geoid = target_id, geoname = target_name) # rename columns for RC functions
wa <- wa %>% dplyr::relocate(geoname, .after = geoid) # move geoname column
d <- wa
#View(d)
############## CALC RACE COUNTS STATS ##############
############ To use the following RC Functions, 'd' will need the following columns at minimum:
############ county_id and total and raced _rate (following RC naming conventions) columns. If you use a rate calc function, you will need _pop and _raw columns as well.
#set source for RC Functions script
source("W:/Project/RACE COUNTS/2022_v4/RaceCounts/RC_Functions.R")
d$asbest = 'min' #YOU MUST UPDATE THIS FIELD AS NECESSARY: assign 'min' or 'max'
d <- count_values(d) #calculate number of "_rate" values
d <- calc_best(d) #calculate best rates -- be sure to define 'asbest' accordingly before running this function.
d <- calc_diff(d) #calculate difference from best
d <- calc_avg_diff(d) #calculate (row wise) mean difference from best
d <- calc_s_var(d) #calculate (row wise) population or sample variance. be sure to use calc_s_var for sample data or calc_p_var for population data.
d <- calc_id(d) #calculate index of disparity
#split STATE into separate table and format id, name columns
state_table <- d[d$geoname == 'California', ]
#calculate STATE z-scores
state_table <- calc_state_z(state_table)
state_table <- rename(state_table, state_id = geoid, state_name = geoname)
#View(state_table)
#remove state from county table
county_table <- d[d$geoname != 'California', ]
#calculate COUNTY z-scores
county_table <- calc_z(county_table)
county_table <- calc_ranks(county_table)
county_table <- rename(county_table, county_id = geoid, county_name = geoname)
#View(county_table)
###update info for postgres tables###
county_table_name <- "arei_hben_haz_weighted_avg_county_2022"
state_table_name <- "arei_hben_haz_weighted_avg_state_2022"
indicator <- "Proximity to Hazards Score (sum of weighted EnviroStor cleanup sites within buffered distances to populated blocks of census tracts)"
source <- "CalEnviroScreen 4.0 https://oehha.ca.gov/calenviroscreen/report/calenviroscreen-40"
#send tables to postgres
#to_postgres()
Code includes:
# Run automated reports
#### Set up workspace ####
#library(here)
#library(sf)
#library(RPostgreSQL)
#library(dplyr)
# loads the PostgreSQL driver
#drv <- dbDriver("PostgreSQL")
# create connection for rda database
#drv <- dbDriver("PostgreSQL")
#con <- dbConnect(drv, dbname = "lafed22",
# host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
# user = "postgres", password = "password")
# source functions
#source("W:/Project/RDA Team/LAFed/R/LAFed/lafed_data_functions.R")
#### Prep Data: SD ####
# prep data of statistics by CD/SD
# read SD in data from postgres
#sd<-dbGetQuery(con, "SELECT * FROM supervisor_rates_long")
# clean up and prep sd table
#sd <- sd %>%
# mutate(group = recode(group,
# sup_dist_1 = "1",
# sup_dist_2 = "2",
# sup_dist_3 = "3",
# sup_dist_4 = "4",
# sup_dist_5 = "5"))%>%
# filter(!grepl('total', group))%>%
# mutate_if(is.numeric, round, 1)%>%
# rename("district"="group")
#sd <- cv_pop_prep(sd)
#sd <- add_labels(sd)
#sd <- remove_unknown_demo(sd)
#sd <- clean_surveyresponse(sd)
#sd<-sd%>%
# mutate(geo="Supervisorial District")
#
#make survey responses lower case
#sd$survey_response_label<-tolower(sd$survey_response_label)
# revise order of columns
#sd<-sd[,c(1,11,2,3,4,5,6,7,8,9,10)]
# manipulate data to get top 4 responses/values to each question
#sd<-sd%>%
# group_by(district, geo, survey_question)%>%
# arrange(-response_rate, .by_group = TRUE)%>%
# slice_head(n=4)%>% ##choose threshold for top "n" responses to question
# mutate(rank = row_number()) #add rank # for subsetting in template
# split sd table into list
#sd_list <- split(sd, sd$district, drop=FALSE)
#### Prep Data: CD ####
# read in CD data:
#cd <- dbGetQuery(con, "SELECT * FROM city_council_rates_long")
# clean up and prep cd table
#cd <- cd %>%
# mutate(group = recode(group,
# council_dist_1 = "1",
# council_dist_2 = "2",
# council_dist_3 = "3",
# council_dist_4 = "4",
# council_dist_5 = "5",
# council_dist_6 = "6",
# council_dist_7 = "7",
# council_dist_8 = "8",
# council_dist_9 = "9",
# council_dist_10 = "10",
# council_dist_11 = "11",
# council_dist_12 = "12",
# council_dist_13 = "13",
# council_dist_14 = "14",
# council_dist_15 = "15"))%>%
# filter(!grepl('total', group))%>%
# rename(district=group)%>%
# mutate_if(is.numeric, round, 1)
#cd <- cv_pop_prep(cd)
#cd <- add_labels(cd)
#cd <- clean_surveyresponse(cd)
#cd<-cd%>%
# mutate(geo="City Council District")
#make survey responses lower case
#cd$survey_response_label<-tolower(cd$survey_response_label)
# revise order of columns
#cd<-cd[,c(1,11,2,3,4,5,6,7,8,9,10)]
# manipulate data to get top 4 responses/values to each question
#cd<-cd%>%
# group_by(district, geo, survey_question)%>%
# arrange(-response_rate, .by_group = TRUE)%>%
# slice_head(n=4)%>% ##choose threshold for top "n" responses to question
# mutate(rank = row_number()) #add rank # for subsetting in template
# custom order factor levels to be numerical order
#cd$district <- factor(cd$district ,levels = c("1","2","3","4","5","6","7","8","9","10","11","12",
# "13","14","15"))
# split cd table into list
#cd_list <- split(cd, cd$district, drop=FALSE)
#### Loop through CD / SD data lists ####
# loop through for CD one pagers:
# for(i in seq_along(cd_list)) {
# dat <- cd_list[[i]]
# rmarkdown::render(input = "W:/Project/RDA Team/LAFed/R/LAFed/One_Pager_Template.Rmd",
# output_format = "word_document",
# output_file = paste0("CD_",i, ".doc"),
# output_dir = here("W:/Project/RDA Team/LAFed/Deliverables/One Pagers"))
# }
# loop through for SD one pagers:
# for(i in seq_along(sd_list)) {
# dat <- sd_list[[i]]
# rmarkdown::render(input = "W:/Project/RDA Team/LAFed/R/LAFed/One_Pager_Template.Rmd",
# output_format = "word_document",
# output_file = paste0("SD_", i, ".doc"),
# output_dir = here("W:/Project/RDA Team/LAFed/Deliverables/One Pagers"))
#}
#dbDisconnect(con)
Code includes:
# load update functions
## Be sure to check the ZCTA data year matches the data year you are calling here ##
source("W:/RDA Team/R/ACS Updates/ACS Update Functions - template.R")
# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")
# create connection for rda database
con <- dbConnect(drv, dbname = "rda_shared_data",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
user = "postgres", password = "password")
# load ACS vars. Manually define year and ACS data set, 5-year, 1-year etc.
acs19 <- load_variables(2019, "acs5", cache = TRUE)
##### Update Table #####
## UPDATE POSTGRES SCHEMA, TABLE NAME, AND TABLE COMMENT ##
# define table destination schema and final table name here, to be used in column comments in the rest of the script.
# define the table source, to be used in the table comments.
table_schema <- "economic."
table_name <- "acs_5yr_b19301_multigeo_2019"
table_comment_source <- "ACS 2015-2019 5-Year Estimate Table B19301."
# MANUAL UPDATE REQUIRED--QUERY YOUR VARIABLES FROM THE ACS Variable List (B01001A-I or B01001 are the examples here)
# define variables - Select either raced tables or Total table option from below.
# For all raced tables use the following:
#vars_list <- subset(acs19, name %like% "B19301" & !(name %like% "B19301_"))
# this query example excludes the Total detailed table (e.e B01001) because the column format is different (more fields, more age groups). It should be handled separately.
# For total table use the following:
vars_list <- subset(acs19, name %like% "B19301_")
# Character vector of variable names
vars_list <- vars_list$name
# get data for all geographies with ACS Update functions
df <- update_detailed_table(vars = vars_list, yr = 2019, srvy = "acs5")
# get metadata
df_metadata <- update_detailed_table_metadata(vars = vars_list, yr = 2019, srvy = "acs5")
# transform to wide data format
df_wide <-
lapply(df, function(x) x %>%
# rename estimate and moe to be better for wide format
rename("e" = estimate,
"m" = moe) %>%
# make wide
pivot_wider(id_cols = c(GEOID, NAME, geolevel),
names_from = variable,
values_from = c(e, m),
names_glue = "{variable}{.value}") # specify the order of col names
)
# collapse list to make a multigeo table
df_wide_multigeo <- do.call("rbind", df_wide)
# make colnames lower case
colnames(df_wide_multigeo) <- tolower(colnames(df_wide_multigeo))
# make character vector for field types in postgresql db
charvect = rep('numeric', dim(df_wide_multigeo)[2])
# change data type for first three columns
charvect[1:3] <- "varchar" # first three are characters for the geoid and names
# add names to the character vector
names(charvect) <- colnames(df_wide_multigeo)
##### Export Table to postgres db #####
# write schema and table name here
#dbWriteTable(con, c("economic", "acs_5yr_b19301_multigeo_2019"), df_wide_multigeo,
# overwrite = TRUE, row.names = FALSE,
# field.types = charvect)
# TABLE COMMENTS
# write comment to table, and the first three fields that won't change.
#table_comment_df_wide <- paste0("COMMENT ON TABLE ", table_schema, table_name, " IS '", table_comment_source,
#" Wide data format, multigeo table with state, county, place, PUMA, ZCTA (CA) and #tract.';
#COMMENT ON COLUMN ", table_schema, table_name, ".geoid IS 'Census geographic ID';
#COMMENT ON COLUMN ", table_schema, table_name, ".name IS 'Census name of #geography.';
#COMMENT ON COLUMN ", table_schema, table_name, ".geolevel IS 'Geographic level #(state, county, place, PUMA, ZCTA (CA) or tract). Downloaded using template scripts here: W > RDA Team > R > ACS Updates > Economic > Update Detailed Tables_B19301.R';")
# send table comment + first three (static) col comments to database
#dbSendQuery(conn = con, table_comment_df_wide)
# COLUMN COMMENTS
# format metadata
#colcomments <-
# change the metadata name depending on the table -- should improve this to not be hard coded
# df_metadata %>%
# mutate(variable = tolower(variable),
# label = gsub("'", "", label),
# label = paste0(gsub("!!", " ", label), ": ", concept)) # remove apostrophes from the metadata
# make character vectors for column names and metadata.
#colcomments_charvar <- colcomments$label
#colname_charvar <- colcomments$variable
# loop through the columns that will change depending on the table. This loop writes a comment for the estimate and MOE for a particular column, then sends to the db.
#for (i in seq_along(colname_charvar)){
# sqlcolcomment <-
# change the table name here depending -- could improve this to not be hardcoded.
# paste0("COMMENT ON COLUMN ", table_schema, table_name, ".",
# colname_charvar[[i]], "e IS '", colcomments_charvar[[i]], "'; COMMENT ON #COLUMN ", table_schema, table_name, ".",
# colname_charvar[[i]], "m IS 'MOE for ", colcomments_charvar[[i]], "';" )
# send sql comment to database
# dbSendQuery(conn = con, sqlcolcomment)
#}
# index the table
#dbSendQuery(conn = con,
# paste0("create index ", table_name, "_geoid on ", table_schema, #table_name, " (geo
This single bar graph shows the annual percentage of of mothers who did not receive prenatal care until after their first trimester in pregnancy by supervisorial district, along with the Los Angeles County percentage for comparison. The graph clearly shows that Districts 1, 2 and 4 both exceed the county percentage for women who receive late prenatal care. This graph was used for the First 5 LA project.
Code includes:
library(RPostgreSQL)
library(highcharter)
library(dplyr)
library(tidyr)
# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")
# create connection for rda database
con_f5la <- dbConnect(drv, dbname = "f5la_v2",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
user = "postgres", password = "password")
####### ESTABLISH HIGHCHART THEME ############
thm <- hc_theme(
colors = c("#033E8C", "#EC098D", "#04B2D9","#26BFB0", "#64BF4B"),
chart = list(
backgroundColor = ""
),
title = list(
style = list(
color = "#333333",
fontFamily = "Franklin Gothic Condensed"
)
),
subtitle = list(
style = list(
color = "#666666",
fontFamily = "Franklin Gothic"
)
),
legend = list(
itemStyle = list(
fontFamily = "Franklin Gothic",
color = "black"
),
itemHoverStyle = list(
color = "gray"
)
)
)
#grab data for visualizing
prenatal<-dbGetQuery(con_f5la, "SELECT * FROM lamb_prenatal_care_supdist_2016")
#remove the 'total' row because we will plot it as a y intercept line to match other graphs
prenatal<-prenatal%>%
filter(!grepl('Los Angeles County', bos_name))%>%
mutate(num_total = as.character(formattable::comma(num_total, 0)))
#graph
hchart(prenatal, "column",
hcaes(x=bos_name, y=pct_total),
tooltip = list(pointFormat = "Percent: {point.pct_total:.1f}%<br>
Number: {point.num_total}"))%>%
hc_tooltip(crosshairs = TRUE)%>%
hc_xAxis(title = list(text = "Supervisorial Districts"))%>%
hc_yAxis(title = list(text = ""),
labels = list(
formatter = JS("function () {
return Math.abs(this.value) + '%';
}")), min=0,max=15,
plotLines = list(
list(
label = list(text = "LA County = 8.2%", align = "left", y=-10, x=600),
color = "#FF0000",
width = 2,
value = 8.2,
zIndex=5
)
))%>%
hc_title(
text = "Annual Percentage of Mothers Who Entered Prenatal Care After 1st Trimester of Pregnancy",
margin = 20,
align = "left",
style = list(useHTML = TRUE, fontWeight="bold")
)%>%
hc_subtitle(
text = '<a href="http://publichealth.lacounty.gov/mch/lamb/LAMB.html">Los Angeles County Department of Public Health Los Angeles Mommy and Baby Project</a>',
style = list( fontWeight = "bold"),
align = "left"
)%>%
hc_legend(enabled = TRUE)%>%
hc_add_theme(thm)
This horizontal single bar graph shows the race and ethnicity percentages by PUMA in the Wilmington Best Start Geography. The hover pop up shows both the percentage and estimate for each racial group. There is also an export option on the graph that allows the graph to be downloaded as an image, with data labels appearing on the static download image.
Code includes:
### Set up workspace ####
#install.packages("extrafont")
library(extrafont)
library(here)
library(dplyr)
library(RPostgreSQL)
library(formattable)
library(svglite)
library(stringr)
library(tidyr)
library(showtext)
library(scales)
library(highcharter)
options(scipen=999)
# set global options to ensure that comma separator is a comma for highchart graphs
lang <- getOption("highcharter.lang")
lang$thousandsSep <- ","
options(highcharter.lang = lang)
# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "tnp",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
user = "postgres", password = "password")
####################
#### Style Guide ###
####################
## COLORS: There is only blue ##
black <- "#000000"
mainblue <- "#22BCB8"
textgrey <- "#919191"
linecolor<-"#FF0000"
# define fonts in chart
font_title <- "Arial"
font_subtitle <- "Arial"
font_caption <- "Arial"
font_bar_label <- "Arial"
font_axis_label <- "Arial"
font_table_text<-"Arial"
####################################
########Highchart theme#############
####################################
#### Define Highcharts theme ####
tnp_theme <- hc_theme(
colors = c("#22BCB8" # main blue
),
chart = list(
backgroundColor = "white",
style = list(
fontFamily = font_subtitle
)
),
plotOptions =
list(
line =
list(
marker =
list(
enabled = FALSE
)
)
),
title = list(
style = list(
color = black,
fontFamily = font_title,
textAlign="left",
fontsize='200px'
)
),
subtitle = list(
style = list(
color = black, # medium grey
fontFamily = font_subtitle,
textAlign="left",
fontsize='100px'
)
),
caption = list(
style = list(
color = black,
fontFamily = font_caption,
textAlign = "left",
fontsize="12px"
),
useHTML = TRUE
),
axis = list(
style = list(
color = black,
fontFamily = font_axis_label,
fontSize='50px'
)
),
legend = list(
itemStyle = list(
fontFamily = font_axis_label,
color = black
),
itemHoverStyle = list(
fontFamily = font_table_text,
color = black
)
)
)
###################################
###Set Up Data Captions and Info###
###################################
### Info: Race
lang <- getOption("highcharter.lang")
lang$thousandsSep <- ","
options(highcharter.lang = lang)
# name your issue area:
# name your issue area:
issue <- "Demographics"
# write the name of your indicator and geography (CLB, WIL, or Reg4) here: [indicator name_geography]
var <- "Race_WIL"
# Title of chart here:
title_text <- "Population by Race and Ethnicity"
#Subtitle text if necessary:
subtitle_text<-""
#Geography level
geolevel<-"Geography: Wilmington"
# Write your data source here:
datasource <- "Data source: American Community Survey Public Use Microdata, 2015-2019 5-Year Estimate."
# write a brief version of your methodology here if necessary, which INCLUDES language about race coding:
methodbrief<- "Data Note: Race groups are Latinx-exclusive, except AIAN and NHPI, which include all people who identify as AIAN or NHPI\nincluding in combination with other races and ethnicities."
# grab view or table from postgres that will be visualized
# see indicator matrix for list of views and tables for each indicator:
# https://advancementproject.sharepoint.com/:x:/r/sites/Portal/_layouts/15/Doc.aspx?sourcedoc=%7Be5a1f3e6-15ca-480e-b33e-07bbfd55ae26%7D&action=editnew&cid=d736e070-c6f6-4063-8a1d-efbf3b48beb0
##########################
###########Graph##########
##########################
df<-dbGetQuery(con, "SELECT * FROM puma_race_2019")
#### Graph: Race ####
#Data frame prep: convert data frame to long if necessary ; Recode 'Race Values' as necessary for nicer race labeling on the graph
df<-df%>%
mutate(race_label=ifelse(race %in% 'AIAN Alone or in Combination', 'American Indian/Alaska Native',
ifelse(race %in% 'Asian NL', 'Asian',
ifelse(race %in% "Two or More NL", 'Two or More',
ifelse(race %in% 'Latinx', 'Latinx',
ifelse(race %in% 'NHPI Alone or in Combination', 'Native Hawaiian/Pacific Islander',
ifelse(race %in% 'Black NL', 'Black',
ifelse(race %in% 'White NL', 'White',
ifelse(race %in% 'Other NL', 'Other',
ifelse(race %in% 'Total', 'Total', 'Blank'))))))))))%>%
mutate(race_pct=race_pct*100)
#Note: We DO NOT need to divide percents by 100 for highcharts. If percents in df are in decimal form, then MULTIPLY by 100
plot_pums_race<-df %>%
filter(!is.na(race_pct) & best_start =='Wilmington' & race_label !='Blank') %>%
arrange(-race_pct) %>%
hchart(
type = "bar",
hcaes(y = round(race_pct, 2), x = race_label),
color = mainblue,
tooltip = list(
pointFormat = "Total: {point.race_tot:,.0f}<br>
Rate: {point.race_pct:.1f}%"),
) %>%
hc_tooltip(crosshairs = TRUE) %>%
hc_xAxis( title = list(text = ""),
labels=list(style=list(fontSize='15px'))) %>%
hc_yAxis( title = list(text = ""),
labels = list(format = "{value}%")) %>% ##this is for rate indicators only adding % signs
# title elements
hc_title(
text = paste0(title_text),
#margin = 20,
align = "left",
style = list(useHTML = TRUE, fontWeight = "bold", fontSize='30px')
) %>%
hc_subtitle(
text = paste0(subtitle_text)
) %>%
hc_caption(
text = paste0("<br>",geolevel,"<br>",
'<a href="https://jzhang514.github.io/TNP/Additional-Resources">Data Source: American Community Survey, 2015-2019 5-Year Estimate.</a>',
"<br>",methodbrief),
style=list(fontSize='15px')
#align = "left"
) %>%
hc_add_theme(tnp_theme)%>%
hc_exporting(
enabled = TRUE, sourceWidth=900, sourceHeight=600,
chartOptions=list(plotOptions=list(series=list(dataLabels=list(enabled=TRUE, format='{y} %')))),
filename = "plot"
)
# review the plot
plot_pums_race
This grouped graph shows households that earn above and below the Real Cost Measure in the Region 4 Best Start Geographies (Central Long Beach and Wilmington). Data is from the 2019 United Way Real Cost Measure report and was used for the First 5 LA project.
Code includes:
##################
######Set up######
#################
library(ggplot2)
library(dplyr)
library(stringr)
library(RPostgreSQL)
library(tidyr)
library(scales)
library(knitr)
library(data.table)
library(sp)
library(sf)
library(rgdal)
library(rpostgis)
library(highcharter)
library(kableExtra)
## set up options for comma separators
lang <- getOption("highcharter.lang")
lang$thousandsSep <- ","
options(highcharter.lang = lang)
# Data Setup
pw <- {
"password"
}
# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "f5la_v2",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
user = "postgres", password = pw)
#############################################
####### ESTABLISH HIGHCHART THEME ############
##############################################
thm <- hc_theme(
colors = c("#22BCB8", "#009CDB", "#332985","#EC098C", "#54B847"),
chart = list(
backgroundColor = ""
),
title = list(
style = list(
color = "#333333",
fontFamily = "Franklin Gothic Condensed"
)
),
subtitle = list(
style = list(
color = "#666666",
fontFamily = "Franklin Gothic"
)
),
legend = list(
itemStyle = list(
fontFamily = "Franklin Gothic",
color = "black"
),
itemHoverStyle = list(
color = "gray"
)
)
)
#####Grab data from postgres
rcm_bsc<-dbGetQuery(con, "SELECT * FROM real_cost_measure_bsc_new")%>%
select(1,5,7)%>%
filter(grepl('Central Long Beach|Wilmington', best_start))
#####Create stacked bar graph for rcm by bsc
#Convert data wide to long for variables of interest
rcm_long<-rcm_bsc%>%
gather( hh_type, number, num_hh_below_rcm:num_hh_above_rcm, factor_key=TRUE)%>%
mutate(variable=ifelse(hh_type %in% 'num_hh_above_rcm', 'Number of Households Above the Real Cost Measure', 'Number of Households Below the Real Cost Measure'))%>%
mutate(num_label=as.character(formattable::comma(number, 0)))
#highcharts
hchart(rcm_long, "column",
hcaes(x=best_start, y=number, group=variable),
dataLabels = list(enabled = TRUE,
format = '{point.num_label:.1f}'),
tooltip = list(pointFormat = "{point.num_label:,.0f}"))%>%
hc_tooltip(crosshairs = TRUE)%>%
hc_xAxis(title = list(text = ""))%>%
hc_yAxis(title = list(text = ""))%>%
hc_title(
text = "Number of Households Earning less than the Real Cost Measure",
margin = 20,
align = "left",
style = list(useHTML = TRUE, fontWeight="bold")
)%>%
hc_caption(text="Source Struggling to Stay Afloat: The Real Cost Measure in California 2019. United Ways of California. http://www.unitedwaysca.org/realcost")%>%
hc_legend(enabled = TRUE, layout='vertical')%>%
hc_add_theme(thm)%>%
hc_exporting(
enabled = TRUE, sourceWidth=800, sourceHeight=500,
chartOptions=list(subtitle=NULL),
filename = "plot"
)
Code includes:
####User-Defined R Functions to create Visuals for the LAFed Project####
#Last Updated: 06/10/22
#Author: Maria Khan
# The following file aims to serve as a visuals template for the
# 2022 APCA project with the LA County Federation of Labor and
# Unemployed Workers United for the People's Project initiative.
# Overall Research Question: What do LA County/LA City residents need
# to feel economically safe and stable?
# Goal: Analysis to aid the advocacy efforts of LA Fed and UWU for greater
# protection of laborers and an increase in mutual aid from the supervisorial
# and city council legislators.
#### Set up workspace ####
library(extrafont)
library(tidyverse)
library(here)
library(dplyr)
library(data.table)
# library(tidycensus)
library(sf)
library(ggplot2)
library(RPostgreSQL)
library(formattable)
library(svglite)
library(stringr)
library(tidyr)
library(showtext)
library(scales)
library(kableExtra)
library(flextable)
library(highcharter)
####Load in data from the LAFed Survey and Prep ####
drv <- dbDriver("PostgreSQL")
con_lf <- dbConnect(drv, dbname = "lafed22",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
user = "postgres", password = "password")
db_lafed_survey <- dbGetQuery(con_lf, "SELECT * FROM lafed_survey_data")
db_age_rates <- dbGetQuery(con_lf, "SELECT * FROM age_rates")
db_gender_rates <- dbGetQuery(con_lf, "SELECT * FROM gender_rates")
db_income_rates <- dbGetQuery(con_lf, "SELECT * FROM income_rates")
db_race_rates <- dbGetQuery(con_lf, "SELECT * FROM race_rates")
db_tenure_rates <- dbGetQuery(con_lf, "SELECT * FROM tenure_rates")
# db_union_rates <- dbGetQuery(con_lf, "SELECT * FROM union_rates")
db_sd_rates <- as.data.frame(dbGetQuery(con_lf, "SELECT * FROM supervisor_rates"))
db_cd_rates <- as.data.frame(dbGetQuery(con_lf, "SELECT * FROM city_council_rates"))
#Cleaning up Race Rates to take away unknown because the sample size is small
db_race_rates <- db_race_rates[-c(9),]
#Re-code Categorical Values to Better Names
db_race_rates <- db_race_rates %>%
mutate(race = recode(group, race_latinx = "Latinx",
race_black = "Black",
race_asian = "Asian",
# race_nhpi = "Native Hawaiian/ \nPacific Islander",
# race_aian = "American Indian/ \nAlaskan Native",
# race_mena = "Middle Eastern/ \n North African ",
# updating to short names to avoid axis labeling issue
race_nhpi = "NHPI",
race_aian = "AIAN",
race_mena = "MENA",
race_white = "White",
race_other = "Other"))
####People's Project STYLE GUIDE####
## COLORS ##
pink <- "#FC8EB5"
lite_pink <- "#FCC7DA"
yellow <- "#FCEC59"
lite_yellow <- "#FDF3B5"
blue <- "#7ACBFB"
lite_blue <- "#BFE5FB"
green <- "#6AB268"
lite_green <- "#B5D8B5"
black <- "#020202"
white<-"#FFFFFF"
dark_grey <- "#2B2B2B"
mid_grey <- "#8C8C8C"
lite_grey <- "#BFBFBF"
## FONTS ##
## NOTE the Owners font was downloaded on Adobe and installed into Windows by right-clicking the font files and hitting "install"
showtext_auto()
loadfonts()
# define fonts in chart
font_title <- "Owners Text Black"
font_subtitle <- "Owners Text Medium"
font_caption <- "Owners Text Medium"
font_bar_label <- "Owners Text"
font_axis_label <- "Owners Text Medium"
font_table_text<-"Owners Text"
# a highchart theme
lafed_theme <- hc_theme(
colors = c("#FC8EB5", "#FCEC59", "#7ACBFB", "#6AB268", "#FCC7DA" # LA Fed colors
),
chart = list(
backgroundColor = white,
style = list(
fontFamily = font_subtitle,
color=white
)
),
plotOptions =
list(
line =
list(
marker =
list(
enabled = FALSE
)
)
),
title = list(
style = list(
color = black,
fontFamily = font_title,
textAlign="left",
fontsize='200px'
)
),
subtitle = list(
style = list(
color = black,
fontFamily = font_subtitle,
textAlign="left",
fontsize='100px'
)
),
caption = list(
style = list(
color = black,
fontFamily = font_caption,
textAlign = "left",
fontsize="10px"
),
useHTML = TRUE
),
axis = list(
style = list(
color = black,
fontFamily = font_axis_label,
fontSize='50px'
)
),
xAxis=list(
labels=list(
style=list(
color=black,
fontFamily=font_axis_label,
width=120, #argument to modify the width of the labels,
spacingLeft = "150px",
fontSize="12px"
))
),
yAxis=list(
labels=list(
style=list(
color=black,
fontFamily=font_axis_label,
fontSize="12px",
margin = 50
))
),
legend = list(
itemStyle = list(
fontFamily = font_axis_label,
color = black
),
itemHoverStyle = list(
fontFamily = font_table_text,
color = black
)
)
)
lafed_theme_blk <- hc_theme(
colors = c("#FC8EB5", "#FCEC59", "#7ACBFB", "#6AB268", "#FCC7DA" # LA Fed colors
),
chart = list(
backgroundColor = black,
style = list(
fontFamily = font_subtitle,
color=white
)
),
plotOptions =
list(
line =
list(
marker =
list(
enabled = FALSE
)
)
),
title = list(
style = list(
color = white,
fontFamily = font_title,
textAlign="left",
fontsize='200px'
)
),
subtitle = list(
style = list(
color = white,
fontFamily = font_subtitle,
textAlign="left",
fontsize='100px'
)
),
caption = list(
style = list(
color = white,
fontFamily = font_caption,
textAlign = "left",
fontsize="10px"
),
useHTML = TRUE
),
axis = list(
style = list(
color = white,
fontFamily = font_axis_label,
fontSize='50px',
textOutline= '0px'
)
),
xAxis=list(
labels=list(
style=list(
color=white,
fontFamily=font_axis_label,
width=120, #argument to modify the width of the labels
spacingLeft = "150px",
fontSize="12px",
textOutline = "0px"
))
),
yAxis=list(
labels=list(
style=list(
color=white,
fontFamily=font_axis_label,
fontSize="12px",
textOutline = "0px",
margin = 50
))
),
legend = list(
itemStyle = list(
fontFamily = font_axis_label,
color = white
),
itemHoverStyle = list(
fontFamily = font_table_text,
color = white
)
)
)
####INTERACTIVE Visual Functions####
#Function 1: Horizontal Bar Chart
fx_interactive_barchart <- function(db,
order_var,
x,
y,
chart_title="",
chart_subtitle= "",
chart_caption = "",
yaxis_label = "",
xaxis_label = "", #remember that a bar hchart flips the x and y axes (just for fun!)
theme) {
db <- db %>% arrange(desc(order_var))
hchart(db,
type = "bar",
hcaes(x = !!rlang::ensym(x), y =round(!!rlang::ensym(y)),2),
#hcaes(x = x, y = round(y, 2)),
color = pink) %>%
hc_tooltip(pointFormat = "{point.y:.1f}%") %>%
hc_tooltip(crosshairs = TRUE) %>%
hc_yAxis(title = list(text = paste0(yaxis_label))) %>%
hc_xAxis(title = list(text = paste0(xaxis_label))) %>%
# title elements
hc_title(
text = paste0(chart_title),
align = "left",
style = list(useHTML = TRUE, fontSize='22px')
) %>%
hc_subtitle(
text = paste0(chart_subtitle),
align = "left",
style=list(fontSize='15px')
) %>%
hc_caption(
text = paste0(chart_caption),
align = "left",
style=list(fontSize='12px')
) %>%
hc_add_theme(theme)%>%
hc_exporting(
enabled = TRUE, sourceWidth=900, sourceHeight=600,
chartOptions=list(plotOptions=list(series=list(dataLabels=list(enabled=TRUE, format='{y} %')))),
filename = "{chart_title} 2022. Advancement Project CA."
)
}
#Function 1 Example
example1_int <- fx_interactive_barchart(db = db_race_rates,
order_var = db_race_rates$pei_wages,
x = 'race',
y = 'pei_wages',
chart_title = "Experiencing Lower/Lost Wages during the<br> Covid-19 Pandemic by Race",
chart_subtitle = "",
chart_caption = "NHPI: Native Hawaiian/Pacific Islander <br>AIAN: American Indian/Alaskan Native <br>MENA: Middle Eastern/North African ",
yaxis_label = "Percentage of Respondents Experiencing Lower/Lost Wages",
xaxis_label = "",
theme = lafed_theme_blk
)
example1_int
Code includes:
#### Set up data and environment ####
library(RPostgreSQL)
library(here)
library(htmltools)
library(dplyr)
library(tidyverse)
library(highcharter)
library(sf)
# library(devtools)
library(broom)
library(kableExtra)
options(scipen=999)
options(highcharter.theme = hc_theme_smpl(tooltip = list(valueDecimals = 2)))
# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")
# create connection for rda database
con_pushla <- dbConnect(drv, dbname = "pushla",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
user = "postgres", password = "password")
# # just a note of reference to inserting text #https://monashbioinformaticsplatform.github.io/2017-11-16-open-science-training/topics/rmarkdown.html
# #https://rstudio.com/wp-content/uploads/2015/02/rmarkdown-cheatsheet.pdf
#
# load race disparity data
dat <- st_read(con_pushla, query = "select race, raceeth, type, interaction, count_3yravg as count, total_3yravg as total, name,
pop_est, rate_per1k, label_rate, disp_ratio, var from lapd_stops_arrests_race_disparity_la_city")
# load poverty disparity data
pov <- st_read(con_pushla, query = "select basiccar, bureau, raceeth, race, type, interaction,
count_3yravg as count, total_3yravg as total, est, tot, prc, prc_cv, rate_per1k, division, name, district, district_n
from lapd_stops_arrests_race_poverty_basiccar where type <>'all'")
## Add logos
#### Highchart theme ####
point_cols <- c( "#FE6A00", # red orange
"#3112B0", # royal blue
"#FFE000", # RC yellow
"#C3DE12", # lime green
"#8E8C8F", # medium grey
"#FFA000", # orange
"#070024", # RC dark blue
"#9CFF00",
"#D8D8D8", # light grey (labeled dark grey on rc style guide but is light)
"#F1F0F2" # v light grey
)
rc_theme <- hc_theme(
# race counts colors
colors = c("#FE6A00", # red orange
"#3112B0", # royal blue
"#FFE000", # RC yellow
"#CEF000", # lime green
"#8E8C8F", # medium grey
"#FFA000", # orange
"#070024", # RC dark blue
"#9CFF00",
"#D8D8D8", # dark grey (labeled so on rc style guide but is light)
"#F1F0F2" # shocking neon green
),
chart = list(
backgroundColor = "#F7F7F7",# rc white
style = list(
fontFamily = "Rubik"
)
),
plotOptions =
list(
line =
list(
marker =
list(
enabled = FALSE
)
)
),
title = list(
style = list(
color = "#070024",
fontFamily = "Rubik"
)
),
subtitle = list(
style = list(
color = "#8E8C8F", # medium grey
fontFamily = "Rubik"
)
),
caption = list(
style = list(
color = "#8E8C8F",
fontFamily = "Rubik"
)
),
axis = list(
style = list(
color = "#8E8C8F",
fontFamily = "Rubik"
)
),
legend = list(
itemStyle = list(
fontFamily = "Rubik",
color = "#070024"
),
itemHoverStyle = list(
fontFamily = "Rubik",
color = "#070024"
)
)
)
##### Stop Plot Data Prep #####
color_palette_stops <- c("#C3DE12", "#FE6A00", "#3112B0", "#A900E3", "#BFBFBF", "#FFE000" ) # total "#070024"
# prep data for plot
pov_stop_data <-
pov %>%
filter(type =="stop" & !raceeth %in% c("total", "nh_aian")) %>% # not enough data for nh aian
mutate(prc = round(prc*100, 1))
# split into list by race
pov_stop_data_list <- split(pov_stop_data, pov_stop_data$race, drop = FALSE)
# get regression line for each race group
lm.model_list = list()
for ( i in names(pov_stop_data_list)){
lm.model <- augment(lm(rate_per1k ~ prc, data = pov_stop_data_list[[i]]))
lm.model_list[[i]] <- lm.model
}
# append race group id to the lm list
listofnames <- names(lm.model_list)
lm.model_list <- mapply(cbind, lm.model_list, "race" = listofnames, SIMPLIFY = F)
# collapse list
lm.model_stop <- do.call("rbind", lm.model_list)
##### Stop Plot #####
# correlation line chart
hchart(lm.model_stop, "line",
hcaes(x = prc, y = .fitted, group = race),
color = color_palette_stops,
tooltip = list(headerFormat = "",
pointFormat = "{point.race} Trend Line: Stop Rate Correlated with Poverty Rate")) %>%
# labels
hc_xAxis(title = list(text = "Population Living Below Poverty Line"),
labels = list(format = "{value}%")) %>%
hc_yAxis(min = 0,
title = list(text = "Predicted Rate of Stops per 1,000 Residents")) %>%
# title elements
hc_title(
text = "Correlation of Poverty Rate and Traffic Stop Rate in the City of Los Angeles",
margin = 20, align = "center",
style = list(useHTML = TRUE, fontWeight = "bold")
) %>%
hc_caption(
text = "Data source: LAPD Vehicle Stop Data and Arrest Incident Data from the City of Los Angeles, 2018-2020; Population and Poverty estimates from the
American Community Survey 2015-2019 5-Year Estimates, Tables DP05 and S1701.<br>
Data notes: Traffic stop rates are based on a three-year average of all vehicle stops. Stop rates were excluded when the total population for a race or ethnic group was below 100 people in a Basic Car.",
align = "center"
) %>%
# display plot with theme
hc_add_theme(rc_theme)
Code includes:
#################################################
##PROJECT: Fresno Measure C/P Project with ECI###
#################################################
#set up work station
library(highcharter)
# library(devtools)
library(corrplot)
library(Hmisc)
library(here)
library(data.table)
library(dplyr)
library(RPostgreSQL)
# library(sf)
library(tidyr)
# library(nngeo)
library(readr)
# library(rpostgis)
# library(maptools)
# library(rmapshaper)
# library(sp)
# library(rgeos)
# library(janitor)
library(tidyverse)
# library(odbc)
# set global options to round decimals to two places for highcharts
options(highcharter.theme = hc_theme_smpl(tooltip = list(valueDecimals = 2)))
# load data
projects <- read_csv("W:/Project/ECI/Fresno Measure C&P/Data/Final Data/Measure C and P Projects with Districts and ZCTA Data.csv")
projects <- mutate(projects,
district_final = paste("District ", district_final))
#corr test
corr_table <-
projects%>%filter(measure=="Measure C") %>%
select(below_200_pct, unemp_rate, total_poc, total_5yr) %>%
filter(!is.na(below_200_pct)&!is.na(unemp_rate) & !is.na(total_poc) & !is.na(total_5yr))
# run correlations
corr_sig<-rcorr(as.matrix(corr_table), type="spearman")
flattenCorrMatrix <- function(cormat, pmat) {
ut <- upper.tri(cormat)
data.frame(
row = rownames(cormat)[row(cormat)[ut]],
column = rownames(cormat)[col(cormat)[ut]],
cor =(cormat)[ut],
p = pmat[ut]
)
}
# put into a table for easy review
corr_sig_table<-flattenCorrMatrix(corr_sig$r, round(corr_sig$P,3))
# no significant correlations
projects_c <- projects %>% filter(
total_5yr<15000000, measure=="Measure C", !district_final %in% c('District M','District M')
) # filter out project with high total and just select Measure C projects
hchart(projects_c,
type = "scatter",
hcaes(x = unemp_rate,
y = total_5yr, group=district_final),
tooltip = list(pointFormat = "<strong>{point.project_number}, {point.project_name}<br>Fund Types: <strong>{point.fund_types}</strong><br>5-Year Project Total: <strong>${point.total_5yr}<br>ZIP Code: <strong>{point.zipcode}</strong><br>Unemployment Rate: <strong>{point.unemp_rate}"))%>%
hc_yAxis(title = list(text = "5-Year Project Total")) %>%
hc_xAxis(title = list(text = "Unemployment Rate of Surrounding ZIP Code")) %>%
hc_tooltip(crosshairs = TRUE) %>%
hc_title(text="Measure C Project Totals Compared to Unemployment Rate by Council District") %>%
hc_caption(text = "Projects are matched to the ZIP Code they are located in and the corresponding unemployment rate for that ZIP Code. If a project crosses multiple ZIP Codes, it is shown multiple times--once for each ZIP Code where at least 15% of the project falls within. Projects are assigned to the Council District where the majority of the project is located. Excludes two projects above $15M -- PW00937 and PW00679",
align = "center",useHTML = TRUE
)
Code includes:
### Set up workspace ####
#install.packages("extrafont")
library(extrafont)
library(here)
library(dplyr)
library(data.table)
library(tidycensus)
library(sf)
library(RPostgreSQL)
library(rpostgis)
library(leaflet)
library(RColorBrewer)
library(tigris)
library(stringr)
library(tidyr)
library(showtext)
library(scales)
library(htmltools)
options(scipen=999)
library(tidyverse)
library(waffle)
library(magrittr)
library(plyr)
library(readxl)
# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "tnp",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
user = "postgres", password = "password")
####################
#### Style Guide ###
####################
## COLORS: There is only blue ##
black <- "#000000"
mainblue <- "#22BCB8"
textgrey <- "#919191"
linecolor<-"#FF0000"
##MAP COLORS:
low_blue <- '#90EBE9'
med_blue <- '#22BCB8' #same as mainblue
high_blue <- '#1A918E'
# define fonts in chart
font_title <- "Arial"
font_subtitle <- "Arial"
font_caption <- "Arial"
font_bar_label <- "Arial"
font_axis_label <- "Arial"
font_table_text<-"Arial"
################
###Map Prep#####
################
bsc<-pgGetGeom(con, "reg4_best_start_geographies")
#convert to sf
bsc<-st_as_sf(bsc)
####transform bsc shapefile for leaflet
bsc <- st_transform(bsc, crs = 4326)
####Add XY coordinates to bsc spatial df
## find centroid coordinates
bsc_cnt = st_centroid(bsc)
bsc_crd = data.frame(st_coordinates(bsc_cnt))
#add ID column to each df and spatial df for joining
bsc$ID <- seq.int(nrow(bsc))
bsc_crd$ID <- seq.int(nrow(bsc_crd))
##join spatial frame with data frame to get the XY columns into the spatial frame
bsc<-geo_join(bsc, bsc_crd, 'ID', 'ID',
how = "left")
#create label
bsc_label<-paste(bsc$best_start)
###########################
#####LAUSD Dist 7 Layer####
###########################
lausd<-pgGetGeom(con, "lausddist7_school_educ_indicators_nonrace", geom="school_geom_4326")
###################
####LBUSD Layer####
###################
lb<-pgGetGeom(con, "lbusd_school_educ_indicators_nonrace")
###############
####Pop-Ups####
###############
url=c("https://jzhang514.github.io/TNP/Additional-Resources")
lb_popup<-paste0((label = paste0("<span style='font-size: 15px; font-weight: bold'>", htmlEscape(lb$schoolname), "</span></br></br>",
"<span style='font-weight: bold;'>High School Graduation Rate:</span> ",ifelse(is.na(lb$prc_hs_grad), 'Data not Available', scales::percent(lb$prc_hs_grad/100,.1)),"</br>",
"<span style='font-weight: bold;'>UC/CSU Eligibility Rate:</span> ",ifelse(is.na(lb$prc_uccsu_elig), 'Data not Available', scales::percent(lb$prc_uccsu_elig/100,.1)),"</br>",
"<span style='font-weight: bold;'>Chronic Absenteeism Rate:</span> ",ifelse(is.na(lb$prc_chronicabsent), 'Data not Available', scales::percent(lb$prc_chronicabsent/100,.1)),"</br>",
"<span style='font-weight: bold;'>Suspension Rate:</span> ",ifelse(is.na(lb$prc_suspensions), 'Data not Available', scales::percent(lb$prc_suspensions/100,.1)),"</br>",
"<span style='font-weight: bold;'>Student Homelessness:</span> ",ifelse(is.na(lb$prc_studenthomeless), 'Data not Available', scales::percent(lb$prc_studenthomeless/100,.1)),"</br></br>",
'<a href="',
url,
'"target="_blank">Click here for data by race</a>.')))
lausd_popup<-paste0((label = paste0("<span style='font-size: 15px; font-weight: bold'>", htmlEscape(lausd$cde_school), "</span></br></br>",
"<span style='font-weight: bold;'>High School Graduation Rate:</span> ",ifelse(is.na(lausd$prc_hs_grad), 'Data not Available', scales::percent(subset(lausd,race=='Total')$prc_hs_grad/100,.1)),"</br>",
"<span style='font-weight: bold;'>UC/CSU Eligibility Rate:</span> ", ifelse(is.na(lausd$prc_uccsu_elig), 'Data not Available', scales::percent(subset(lausd, race =="Total")$prc_uccsu_elig/100,.1)),"</br>",
"<span style='font-weight: bold;'>Chronic Absenteeism Rate:</span> ",ifelse(is.na(lausd$prc_chronicabsent), 'Data not Available', scales::percent(lausd$prc_chronicabsent/100,.1)),"</br>",
"<span style='font-weight: bold;'>Suspension Rate:</span> ",ifelse(is.na(lausd$prc_suspensions), 'Data not Available', scales::percent(lausd$prc_suspensions/100,.1)),"</br>",
"<span style='font-weight: bold;'>Student Homelessness:</span> ",ifelse(is.na(lausd$prc_studenthomeless), 'Data not Available', scales::percent(lausd$prc_studenthomeless/100,.1)),"</br></br>",
'<a href="',
url,
'"target="_blank">Click here for data by race</a>.')))
######################
########Map###########
######################
map_schools<-leaflet(width = "100%", height = "495px")%>%
#LBUSD layer
addCircleMarkers(data = lb, lat = ~y_4326, lng = ~x_4326,
group="Long Beach Unified",
popup=~paste0(lb_popup),
label=~htmlEscape(schoolname),
weight = 2, fillOpacity = .8, radius=4,
color="white", fillColor= "#22BCB8", stroke = TRUE
)%>%
#LAUSD layer
addCircleMarkers(data = lausd, lat = ~y, lng = ~x,
group="LAUSD District 7",
popup=~paste0(lausd_popup),
label=~htmlEscape(cde_school),
weight = 2, fillOpacity = .8, radius=4,
color="white", fillColor= "#332985", stroke = TRUE
)%>%
#bsc layer
addPolygons(data=bsc, color="black", weight=1.5, opacity = 1, fill=F,
label=as.character(bsc$best_start)
) %>%
addLabelOnlyMarkers(data=bsc, ~X, ~Y, label = ~as.character(bsc_label),
labelOptions = labelOptions(noHide= T, direction = 'top', textOnly = T,
style=list("font-size"="12px", "color"="black",
"font-family"="Arial",
"font-weight" = "bold",
"text-shadow"="1px 1px white")))%>%
#layer control
addLayersControl(overlayGroups =
c("Long Beach Unified", "LAUSD District 7"),
options = layersControlOptions(collapsed = FALSE))%>%
#legend
addLegend("bottomleft", colors = c("#22BCB8", "#332985"),
labels = c( "Long Beach Unified", "LAUSD District 7"
))%>%
#base and view
addProviderTiles("CartoDB.Positron")%>%
setView( -118.263412, 33.815654 , zoom = 11)
map_schools
Data Source: California Department of Education (CDE) data from 2018-2019 or 2019-2020 School Year
Code includes:
# load packages
library(here)
library(data.table)
library(dplyr)
library(RPostgreSQL)
library(sf)
library(tidyr)
library(nngeo)
library(readr)
library(rpostgis)
library(maptools)
library(rmapshaper)
library(sp)
library(rgeos)
library(janitor)
library(tidyverse)
library(extrafont)
library(data.table)
library(tidycensus)
library(RPostgreSQL)
library(rpostgis)
library(leaflet)
library(RColorBrewer)
library(tigris)
library(stringr)
library(showtext)
library(scales)
library(htmltools)
options(scipen=999)
library(waffle)
library(magrittr)
library(plyr)
library(readxl)
library(highcharter)
# library(devtools)
library(corrplot)
library(Hmisc)
# load data
measure <- read_csv("W:/Project/ECI/Fresno Measure C&P/Data/Final Data/Fresno Measure C and P Project Spending and Geo Data Final TidyGeocoder.csv") %>% filter(`Project Number` != "PW00829")
# check for Project #
# insert column for measure type
measure$measure <- "Measure C"
measure$measure[measure$'Fund Type' %like% "^Measure P"]<-"Measure P"
names(measure) <- gsub(" ", "_", names(measure))
# rename id column
measure <- measure %>%
dplyr::rename(
id =...1
)
# remove duplicates and clean up names
names(measure) <- gsub(" ", "_", names(measure))
measure <- clean_names(measure)
## remove duplicates
measure <-measure %>%
group_by(project_number,x,y, x1,y1,x2,y2,measure) %>%
dplyr::summarise(fund_types=paste(fund_type, collapse=", "),
funds=paste(fund, collapse=", "),
project_name=min(project_name),
geography_type=min(geography_type),
links=min(link),
department=min(department),
district=min(district),
fy_2022=sum(capital_projects_fy22_estimate),
fy_2023=sum(x2023_capital_projects),
fy_2024=sum(x2024_capital_projects),
fy_2025=sum(x2025_capital_projects),
fy_2026=sum(x2026_capital_projects),
fy_2027=sum(x2027_capital_projects),
total_5yr=sum(x5_year_project_total),
project_count= n())
# add id field again
measure$id <- 1:nrow(measure)
# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "covid19_race_class",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
user = "postgres", password = "password")
### Load indicator data for poverty and unemployment ###
indicators <- st_read(con, query = "SELECT * FROM state_latest.index_2019_indicators")
indicators <- indicators %>% select(zipcode, below_200_pct, below_200_pct_cv, below_200_pct_moe, below_200_universe, unemp_rate, unemp_rate_moe, unemp_rate_cv, unemp_universe)
#1764 observations
### Load indicator data for people of Color
con2 <- dbConnect(drv, dbname = "rda_shared_data",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
user = "postgres", password = "password")
POC <- st_read(con2, query = "SELECT * FROM demographics.acs_5yr_dp05_multigeo_2020")
# But there are 1799 zip code
POC <- POC %>% filter(
geolevel == "zcta"
) # filter to only zip codes
### make name into 2 columns: name and zip-code
POC[c('name', 'zipcode')] <- str_split_fixed(POC$name, ' ', 2)
# select only POC groups we are interested in based on the metadata dictionary: W:\Data\Demographics\ACS\acs_2020_dp05_race_tract_california\ACSDP5Y2020.DP05_2022-04-11T200746
### POC <- POC %>% select(geoid, name, zipcode, geolevel, dp05_0071pe, dp05_0078pe, dp05_0080pe, dp05_0079pe, dp05_0081pe)
POC <- POC %>% select(geoid, name, zipcode, geolevel, dp05_0001e, dp05_0071e, dp05_0078e, dp05_0080e, dp05_0079e, dp05_0081e )
## data dictionary
# total
# DP05_0001e: estimate sex and age total population
#Latinx
# dp05_0071e = Estimate HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)
# non-hispanic black
#DP05_0078e = Estimate HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Black or African American alone
# nh asian
# DP05_0080e = Estimate HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Asian alone
# nh aian
# DP05_0079e = Estimate HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!American Indian and Alaska Native alone
# nh nhpi
# DP05_0081e = Estimate HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Native Hawaiian and Other Pacific Islander alone
# add total POC estimates
POC$total = (POC$dp05_0071e + POC$dp05_0078e + POC$dp05_0080e + POC$dp05_0079e + POC$dp05_0081e)
# divide by the total population for %
POC$total_poc = (POC$total/POC$dp05_0001e)*100
# select only POC, zipcode, geoid
POC = POC %>% select(geoid, zipcode, total_poc)
# round
POC$total_poc <- round(POC$total_poc, digits = 1)
# replace nan with NA
POC$total_poc[is.nan(POC$total_poc)] <- NA
# merge with indicators from state_latest.index_2019_indicators
final_indicators = left_join(indicators, POC, by = "zipcode")
#round poverty
final_indicators$below_200_pct <- round(final_indicators$below_200_pct, digits = 1)
# convert to df
#final_indicators_df <- as.data.frame(final_indicators)
# find Fresno City
## FRESNO zip code
fresno_city <- as.data.frame(fresno_city)
fresno_city <- fresno_city %>% select(ZIP5) # only select zip codes that we want while removing the geom. We are only using geom from the zip code indicators
fresno_indicators <- left_join(fresno_city, final_indicators, by = c("ZIP5" = "zipcode") )
# convert to sf
fresno_indicators <- st_as_sf(fresno_indicators)
# transform for leaflet projection
fresno_indicators<-st_transform(fresno_indicators, CRS("+proj=longlat +datum=WGS84 +no_defs"))
##### DROPS 93737 polygon which is empty polygon #########
fresno_indicators<- fresno_indicators %>% filter(!st_is_empty(.))
##### points to line function found in RPubs: https://rpubs.com/walkerke/points_to_line
points_to_line <- function(data, long, lat, id_field = NULL, sort_field = NULL) {
# Convert to SpatialPointsDataFrame
coordinates(data) <- c(long, lat)
# If there is a sort field...
if (!is.null(sort_field)) {
if (!is.null(id_field)) {
data <- data[order(data[[id_field]], data[[sort_field]]), ]
} else {
data <- data[order(data[[sort_field]]), ]
}
}
# If there is only one path...
if (is.null(id_field)) {
lines <- SpatialLines(list(Lines(list(Line(data)), "id")))
return(lines)
#if we have multiple lines...
} else if (!is.null(id_field)) {
# Split into a list by ID field
paths <- sp::split(data, data[[id_field]])
sp_lines <- SpatialLines(list(Lines(list(Line(paths[[1]])), "line1")))
# for loops
for (p in 2:length(paths)) {
id <- paste0("line", as.character(p))
l <- SpatialLines(list(Lines(list(Line(paths[[p]])), id)))
sp_lines <- spRbind(sp_lines, l)
}
return(sp_lines)
}
}
#####
## data prep
#convert to sf
fresno<-st_as_sf(fresno)
####transform bsc shapefile for leaflet
fresno <- st_transform(fresno, crs = 4326)
####Add XY coordinates to bsc spatial df
## find centroid coordinates
# fresno_cnt = st_point_on_surface(fresno)
# fresno_crd = data.frame(st_coordinates(fresno_cnt))
#add ID column to each df and spatial df for joining
#fresno$ID <- seq.int(nrow(fresno))
#fresno_crd$ID <- seq.int(nrow(fresno_crd))
##join spatial frame with data frame to get the XY columns into the spatial frame
#fresno<-geo_join(fresno, fresno_crd, 'ID', 'ID',
# how = "left")
#create label
#fresno_label<-paste(fresno$best_start)
###################
####Measure P Layer####
###################
# create df for measure p
measure_p <- measure %>% filter(measure == "Measure P")
########points
# filter out points
measure_p_point <- measure_p %>% filter(geography_type == "Point", x != "NA")
# create spatial point df: measure p
# make the SpatialPointsDataFrame object
p_spdf <- SpatialPointsDataFrame(coords = measure_p_point [ , c("x", "y")],
data = measure_p_point,
proj4string = CRS("+init=epsg:28992"))
########## lines
# filter out lines
measure_p_line <- measure_p %>% filter(geography_type == "Line", x1 != "NA", x2 != "NA")
# data manipulation
p_line <- gather(measure_p_line, measure, val, -id) %>% group_by(id) %>%
do(data.frame( lat=c(.[["val"]][.[["measure"]]=="y1"],
.[["val"]][.[["measure"]]=="y2"]),
long = c(.[["val"]][.[["measure"]]=="x1"],
.[["val"]][.[["measure"]]=="x2"])))
#convert to df
p_line <- as.data.frame(p_line)
#make numeric
p_line$lat <- as.numeric(p_line$lat)
p_line$long <- as.numeric(p_line$long)
p_line <- points_to_line(p_line, "long", "lat", "id")
# convert to spatial data frame
p_line_sf <- SpatialLinesDataFrame(sl = p_line, data = measure_p_line, match.ID = FALSE)
p_line_sf <- st_as_sf(p_line_sf)
#### Measure C Layer ######
measure_c <- measure %>% filter(measure == "Measure C")
# filter out points
measure_c_point <- measure_c %>% filter(geography_type == "Point", x != "NA")
# create spatial point df: measure c
# make the SpatialPointsDataFrame object
c_spdf <- SpatialPointsDataFrame(coords = measure_c_point [ , c("x", "y")],
data = measure_c_point,
proj4string = CRS("+init=epsg:28992"))
# filter out lines
measure_c_line <- measure_c %>% filter(geography_type == "Line", x1 != "NA", x2 != "NA")
# data manipulation
# c_line <- gather(measure_c_line, measure, val, -`Project Number`, -`Corridor Start`, -`Corridor End`) %>% group_by(`Project Number`, `Corridor Start`, `Corridor End`) %>%
# do(data.frame( lat=c(.[["val"]][.[["measure"]]=="Y1"],
# .[["val"]][.[["measure"]]=="Y2"]),
# long = c(.[["val"]][.[["measure"]]=="X1"],
# .[["val"]][.[["measure"]]=="X2"])))
c_line <- gather(measure_c_line, measure, val, -id) %>% group_by(id) %>%
do(data.frame( lat=c(.[["val"]][.[["measure"]]=="y1"],
.[["val"]][.[["measure"]]=="y2"]),
long = c(.[["val"]][.[["measure"]]=="x1"],
.[["val"]][.[["measure"]]=="x2"])))
#convert to df
c_line <- as.data.frame(c_line)
#make numeric
c_line$lat <- as.numeric(c_line$lat)
c_line$long <- as.numeric(c_line$long)
c_line <- points_to_line(c_line, "long", "lat", "id")
# convert to spatial data frame
c_line_sf <- SpatialLinesDataFrame(sl = c_line, data = measure_c_line, match.ID = FALSE)
c_line_sf <- st_as_sf(c_line_sf)
####################
#### Style Guide ###
####################
## COLORS: ##
black <- "#000000"
mainblue <- "#22BCB8"
textgrey <- "#919191"
linecolor<-"#FF0000"
green <- "#347F4D"
orange <- "FF8000"
# set up custom legend
###############
####Pop-Ups####
###############
p_popup<-paste0((label = paste0("<span style='font-size: 15px; font-weight: bold'>", htmlEscape(p_spdf$project_name), "</span></br>",
"<span style='font-weight: bold;'>Project Number:</span> ", p_spdf$project_number, "</br>",
"<span style='font-weight: bold;'>Fund Type:</span> ", p_spdf$fund_types, "</br></br>",
"<span style='font-weight: bold;'>FY 22 Estimate:</span> ", "$", ifelse(is.na(p_spdf$fy_2022), 'Data not Available', scales::comma(p_spdf$fy_2022)),"</br>",
"<span style='font-weight: bold;'>5 Year Project Total:</span> ", "$",
ifelse(is.na(p_spdf$total_5yr), 'Data not Available',
scales::comma(p_spdf$total_5yr)),"</br></br>",
### comment out this code: just let it be character
"<span style='font-weight: bold;'>Link:</span> ",
ifelse(is.na(p_spdf$links), 'Link not Available',
p_spdf$links)
## provide hyperlink later: '<a href=', p_spdf$Link, '>', p_spdf$Link, '</a>'
)
)
)
pline_popup<-paste0((label = paste0("<span style='font-size: 15px; font-weight: bold'>", htmlEscape(p_line_sf$project_name), "</span></br>",
"<span style='font-weight: bold;'>Project Number:</span> ", p_line_sf$project_number, "</br>",
"<span style='font-weight: bold;'>Fund Type:</span> ", p_line_sf$fund_types, "</br></br>",
"<span style='font-weight: bold;'>FY 22 Estimate:</span> ","$", ifelse(is.na(p_line_sf$fy_2022), 'Data not Available', scales::comma(p_line_sf$fy_2022)),"</br>",
"<span style='font-weight: bold;'>5 Year Project Total:</span> ","$", ifelse(is.na(p_line_sf$total_5yr), 'Data not Available',
scales::comma(p_line_sf$total_5yr)),"</br></br>",
"<span style='font-weight: bold;'>Link:</span> ",
ifelse(is.na(p_line_sf$links), 'Link not Available',
p_line_sf$links)
## provide hyperlink later: '<a href=', p_line_sf$Link, '>', p_line_sf$Link, '</a>'
)
)
)
##
c_popup<-paste0((label = paste0("<span style='font-size: 15px; font-weight: bold'>", htmlEscape(c_spdf$project_name), "</span></br>",
"<span style='font-weight: bold;'>Project Number:</span> ", c_spdf$project_number, "</br>",
"<span style='font-weight: bold;'>Fund Type:</span> ", c_spdf$fund_types, "</br></br>",
"<span style='font-weight: bold;'>FY 22 Estimate:</span> ","$", ifelse(is.na(c_spdf$fy_2022), 'Data not Available', scales::comma(c_spdf$fy_2022)),"</br>",
"<span style='font-weight: bold;'>5 Year Project Total:</span> ","$", ifelse(is.na(c_spdf$total_5yr), 'Data not Available',
scales::comma(c_spdf$total_5yr)),"</br></br>",
"<span style='font-weight: bold;'>Link:</span> ",
ifelse(is.na(c_spdf$links), 'Link not Available',
c_spdf$links)
## provide hyperlink later: '<a href=', c_spdf$Link, '>', c_spdf$Link, '</a>'
)
)
)
##
cline_popup<-paste0((label = paste0("<span style='font-size: 15px; font-weight: bold'>", htmlEscape(c_line_sf$project_name), "</span></br>",
"<span style='font-weight: bold;'>Project Number:</span> ", c_line_sf$project_number, "</br>",
"<span style='font-weight: bold;'>Fund Type:</span> ", c_line_sf$fund_types, "</br></br>",
"<span style='font-weight: bold;'>FY 22 Estimate:</span> ","$", ifelse(is.na(c_line_sf$fy_2022), 'Data not Available', scales::comma(c_line_sf$fy_2022)),"</br>",
"<span style='font-weight: bold;'>5 Year Project Total:</span> ","$", ifelse(is.na(c_line_sf$total_5yr), 'Data not Available',
scales::comma(c_line_sf$total_5yr)),"</br></br>",
"<span style='font-weight: bold;'>Link:</span> ",
ifelse(is.na(c_line_sf$links), 'Link not Available',
c_line_sf$links)
## provide hyperlink later: '<a href=', c_line_sf$Link, '>', c_line_sf$Link, '</a>'
)
)
)
###
#palette purples
pal_unemp <- colorBin(palette = c("#54278f","#605DA2", "#9391BF", "#cbc9e2","#f2f0f7" ), domain = fresno_indicators$unemp_rate, bins = 5, reverse = TRUE, pretty = FALSE)
unemployment_popup <- paste0((
label = paste0(
"<span style='font-weight: bold;'>Zipcode:</span> ", fresno_indicators$ZIP5, "</br>",
"<span style='font-weight: bold;'>Unemployment Rate:</span> ",ifelse(is.na(fresno_indicators$unemp_rate), 'Data not Available',fresno_indicators$unemp_rate), "%"
)
)
)
### map
leaflet(width = "100%", height = "495px")%>%
### add indicator: unemployment
addPolygons(data = fresno_indicators, color = "#444444", weight = 1, smoothFactor = 0.5, fillOpacity = .5, popup = ~paste0(unemployment_popup), fillColor = ~pal_unemp(unemp_rate), highlight = highlightOptions(color = "white", weight = 3, bringToFront = FALSE, sendToBack = TRUE)) %>%
#fresno district layer
addPolygons(data=fresno, color="black", weight=1.5, group = "Council Districts", opacity = 2, fill=F,
) %>%
#Measure P Point layer
# start line
addPolylines(data = p_line_sf, group = "Measure P", label=~htmlEscape(project_name), popup =~paste0(pline_popup), color = "orange", weight = 4, opacity = 2) %>%
addCircleMarkers(data = p_spdf, lat = ~y, lng = ~x,
group="Measure P",
popup=~paste0(p_popup),
label=~htmlEscape(project_name),
weight = 2, fillOpacity = 1, radius=3,
color="black", fillColor= "orange", stroke = TRUE
) %>%
# end p line
#Measure C Point layer
addPolylines(data = c_line_sf, group = "Measure C", label=~htmlEscape(project_name), popup =~paste0(cline_popup), color = "red", weight = 3, opacity = 2) %>%
addCircleMarkers(data = c_spdf, lat = ~y, lng = ~x,
group="Measure C",
popup=~paste0(c_popup),
label=~htmlEscape(project_name),
weight = 2, fillOpacity = 1, radius=3,
color="black", fillColor= "red", stroke = TRUE
) %>%
# end c line
#layer control
addLayersControl(overlayGroups =
c("Measure P", "Measure C", "Council Districts"),
options = layersControlOptions(collapsed = FALSE)) %>%
# add legend for unemployment
addLegend(position = "bottomleft", pal = pal_unemp, values = fresno_indicators$unemp_rate, opacity = .5, title = "Fresno Unemployment Rate (%)", na.label = "Unemployment data not available", labFormat = labelFormat(suffix = "%")
)%>%
addLegend(position = "bottomleft", title = "Fund Type", labels = c("Measure P", "Measure C"), colors = c("orange", "red")) %>%
#base and view
addProviderTiles("CartoDB.Positron")%>%
setView( -119.8026 , 36.75467 , zoom = 11)
Data Sources: * City Council Districts and Existing Fresno City Parks: City of Fresno GIS Download
Socioeconomic Indicators: American Community Survey 5 year estimates 2015-2019, 2016-2020.
Measure C & P Projects: Approximate Address Points for Measure P and C Projects compiled by Advancement Project California based on public documents from the City of Fresno.
Code includes:
# set up workspace
library(DT)
library(formattable)
library(knitr)
library(dplyr)
library(htmltools)
library(tidyr)
library(stringr)
library(stringr)
library(scales)
library(colorspace)
# # data setup
pw <- {"password"}
# load PostgreSQL driver
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv,
dbname = "prop47",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com",
port = 5432,
user = "postgres",
password = pw)
#### JENI Prep Steps ####
### Load index data ###
jeni<- st_read(con, query = "SELECT * FROM jeni_jesi_2021.jeni")
# round index data
jeni <- mutate(jeni,
jeni_pctl = round(jeni_pctl,3)*100,
jeni_score = round(jeni_score,3),
system_pctl = round(system_pctl,3)*100,
drivers_pctl = round(drivers_pctl,3)*100,
risk_pctl = round(risk_pctl,3)*100,
population_count = round(population_count,0),
black_rate = round(black_rate,1),
latinx_rate = round(latinx_rate,1),
aian_rate = round(aian_rate,1),
nhpi_rate = round(nhpi_rate,1),
nohighschool_rate = round(nohighschool_rate,1),
below200fpl_rate = round(below200fpl_rate,1),
unemployed_rate = round(unemployed_rate,1),
mentalhlth_hosp_rate = round(mentalhlth_hosp_rate,1),
alcdrug_hosp_rate = round(alcdrug_hosp_rate,1),
vc_rate = round(vc_rate,1),
homeless_rate = round(homeless_rate,1)
)
# add sd and spa names
jeni<-jeni%>%mutate(sd_name=recode(sd_number,
"1"="Solis",
"2"="Mitchell",
"3"="Kuehl",
"4"="Hahn",
"5"="Barger"))%>%
mutate(spa_name=recode(spa_number,
"1"="Antelope Valley",
"2"="San Fernando Valley",
"3"="San Gabriel Valley",
"4"="Metro",
"5"="West",
"6"="South",
"7"="East",
"8"="South Bay"
))
#remove geoms and reorganize the data for the data table download
jeni_data <- jeni[c(1:8,10:18, 23:44)]%>% st_drop_geometry()
jeni_data<-jeni_data[, c(1,6,7,8,5,2,3,4,9,10,12,13,15,16,18,20,22,24,26,28,30,32,34,36,38)]
jeni_data <- arrange(jeni_data, -desc(jeni_rank))
## table ##
datatable(jeni_data,
class = 'cell-border stripe',
colnames=c(
'JENI Rank'='jeni_rank',
'JENI Percentile'='jeni_pctl',
'JENI Category'='jeni_category',
'ZIP Code'='zipcode',
'Population'='population_count',
'Supervisorial District'='sd_number',
'Service Planning Area'='spa_number',
'Neighborhood'='lat_neighborhood',
'System Involvement Percentile' = 'system_pctl',
'System Involvement Category' = 'system_category',
'Inequity Drivers Percentile' = 'drivers_pctl',
'Inequity Drivers Category' = 'drivers_category',
'Criminalization Risk Percentile' = 'risk_pctl',
'Criminalization Risk Category' = 'risk_category',
'% Black Population' = 'black_rate',
'% Latinx Population' = 'latinx_rate',
'% American Indian / Alaskan Native Population' = 'aian_rate',
'% Native Hawaiian / Pacific Islander Population' = 'nhpi_rate',
'% of Population without a High School Diploma' = 'nohighschool_rate',
'Below 200% Poverty Level Rate' = 'below200fpl_rate',
'Unemployment Rate' = 'unemployed_rate',
'Mental Health Hospitalizations (per 1K)' = 'mentalhlth_hosp_rate',
'Alcohol and Drug-Related Hospitalizations (per 1K)' = 'alcdrug_hosp_rate',
'Violent Crime Rate (per 1K)' = 'vc_rate',
'Homelessness Rate (per 1K)' = 'homeless_rate'
),
style ='default',
rownames = FALSE,
filter ='top',
extensions = c(
# 'Scroller',
# 'FixedHeader',
'Buttons',
'KeyTable', 'FixedColumns'),
options = list(
dom = 'Bfrtip',
buttons = c('csv','excel'),
pageLenth =15,
autoWidth = TRUE,
scrollX = T,
scroller = TRUE
# columnDefs = list(list(width = '200px', targets = c(1,23))),
#, searchHighlight = TRUE
)
,
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: left;',
htmltools::strong('Percentiles range from 0-100. A higher percentile means a higher level of need.'))
)
Code includes:
library(RPostgreSQL)
library(highcharter)
library(dplyr)
library(tidyr)
# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")
# create connection for rda database
con_tnp <- dbConnect(drv, dbname = "tnp",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
user = "postgres", password = "password")
# pull view in for visualizing
uninsured<-dbGetQuery(con_tnp, "SELECT * FROM puma_uninsured_2019")
# convert view to long format and filter for values of interest
uninsured_clb<-uninsured%>%
gather(measure, value, tot_uninsured:pct_uninsured)%>%
filter(grepl("pct_uninsured", measure))%>%
filter(!grepl('Total', race))%>%
filter(grepl("Central Long Beach", best_start))%>%
mutate(variable=ifelse(measure %in% 'tot_hh', 'Total Number of Households',
ifelse(measure %in% 'tot_uninsured', 'Total Number of Uninsured Households', 'Blank')))
uninsured_clb %>%
hchart(
"pie", hcaes(x = race, y = value),
name = "Uninsured Population in Central Long Beach",
tooltip = list(pointFormat = "Rate: {point.value:.2f}%")
)%>%
#hc_colorAxis(minColor="#FFD432", maxColor = "#110066")%>% #note I manually specified my colors
hc_legend(enabled = FALSE)%>%
hc_title(
text = "Relative to other groups, Latinx households are most likely to be uninsured",
margin = 20,
align = "left",
style = list(useHTML = TRUE, fontWeight="bold")
)%>%
hc_subtitle(
text = '<a href="https://data.census.gov/cedsci/">ACS 2018 5-Year Estimates</a>',
style = list( fontWeight = "bold"),
align = "left"
)
Data Source: American Community Survey, 2015-2019 5-Year
Estimate.
Data Note: Race groups are Latinx-exclusive, except AIAN
and NHPI, which include all people who identify as AIAN or NHPI
including in combination with other races and ethnicities.
library(RPostgreSQL)
library(highcharter)
library(dplyr)
library(tidyr)
# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")
# create connection for rda database
con_tnp <- dbConnect(drv, dbname = "tnp",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
user = "postgres", password = "password")
#grab view for visualizing
race<-dbGetQuery(con_tnp, "SELECT * FROM ct_raceethnicity_2019")
# convert view to long and recode race for labeling
race_clb <- gather(race, measure, value, num_white:pct_twoormore, factor_key=TRUE)%>%
filter(grepl("pct", measure))%>%
filter(grepl("Central Long Beach", best_start))%>%
mutate(variable=ifelse(measure %in% 'pct_white', 'White',
ifelse(measure %in% 'pct_black_afam', "Black",
ifelse(measure %in% 'pct_aian', "AIAN",
ifelse(measure %in% 'pct_asian', "Asian",
ifelse(measure %in% 'pct_nativepi', "Native Pacific Islander",
ifelse(measure %in% 'pct_hispaniclatino', "Hispanic/Latinx",
ifelse(measure %in% 'pct_other', "Other Race",
ifelse(measure %in% 'pct_twoormore', "Two or more Races", "Blank")))))))))
#tree map
race_clb%>%
hchart(type = "treemap", hcaes(x = variable, value = value, color=value),
tooltip = list(pointFormat = "{point.variable} Percentage: {point.value:,.1f}%"))%>%
hc_tooltip(crosshairs = TRUE)%>%
hc_colorAxis(stops = color_stops(colors = c("#F6B3B9", "#FCDCC5", "#B0D9CD","#63AAB5")))%>% #note I added colors manually
hc_title(
text = "60.9% of families in Central Long Beach are Hispanic/Latinx. The second largest group of families are Asian at 13.5%",
margin = 20,
align = "left",
style = list(useHTML = TRUE, fontWeight="bold")
)%>%
hc_subtitle(
text = '<a href="https://data.census.gov/cedsci/">ACS 2019 5-Year Estimates</a>',
style = list( fontWeight = "bold"),
align = "left"
)%>%
hc_legend(enabled = FALSE)
Code includes:
###################################################
###PROJECT: ACLU (Criminal Justice Dept + RDA)#####
###################################################
##############################
####set up libraries styling###
##############################
library(here)
# install.packages("waffle", repos = "https://cinc.rud.is")
library(waffle)
library(dplyr)
library(data.table)
library(tidycensus)
library(sf)
library(ggplot2)
library(RPostgreSQL)
library(tidyr)
library(writexl)
library(classInt)
library(RColorBrewer)
library(stringr)
library(ggrepel)
library(scales)
library(extrafont)
library(kableExtra)
library(showtext)
library(forcats)
options(scipen=999)
setwd("W:/Project/OSI/ACLU CA")
## Set up Driver
drv <- dbDriver("PostgreSQL")
# create connection for acluca database
con <- dbConnect(drv, dbname = "acluca",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
user = "postgres", password = "password")
#### Define Styling ####
#load fonts
windowsFonts("Oswald SB" = windowsFont("Oswald SemiBold"))
windowsFonts("Oswald Light" = windowsFont("Oswald Light"))
windowsFonts("Atkinson Reg" = windowsFont("Atkinson Hyperlegible Regular"))
### Colors/Font Size ####
#naming convention for different styles: graphtype+style
#mainfont = the more bold font (for titles, subtitles, whatever we want more bold)
#secondfont = the less bold font (captions, axis labels, whatever should not be as bold)
## Grouped Bar Chart (3 bars) ##
groupbar3color<-c("#CD3247", "#F7D56A", "#357082") ## substitute with Ron's deeper red color: #FF6B6B
groupbar3bartextsize<-2.2
## Single Bar Chart ###
bar1color<-"#CD3247"
bar1bartextsize<-3.5
## Colors that have to be manually specified -mainly when a "total" bar is a different color from other bars of same graph
manual1color<-"#F7D56A"
manual2color<-"#357082"
## Two category pie chart ##
pie2color<-c("#CD3247", "#F7D56A")
## Boxplot ##
boxplot1color<-"#CD3247"
## Waffle Chart ##
waffle4color<-c("#CD3247", "#F7D56A", "#357082", "#908780")
## General for all graphs ###
# font
mainfont<-"Oswald SB" ##mainfont is for bolded text (i.e. Title, Subtitle, anything we want more bold)
secondfont<-"Oswald Light" ##secondfont is for lighter text (i.e. Caption, Axis labels, anything we want lighter)
#color
black <- "#000000"
###################
####graph code#####
###################
df<-dbGetQuery(con, "SELECT * FROM report_agency_timespent_percent_stopreason")
df<-df%>%
mutate(reason_label=ifelse(reason_label=="Outstanding arrest warrant/wanted person","Outstanding arrest warrant",
ifelse(reason_label=="Combined Traffic violation, Consensual encounter and search, and Reasonable suspicion","Traffic, Consensual encounter/search, and Reasonable suspicion",
ifelse(reason_label=="On parole/probation/PRCS/mandatory supervision","Parole/Probation",
ifelse(reason_label=="Investigation to determine if person is truant", "Truancy investigation", reason_label)))))%>%
mutate(total_time_hrs=total_time/60)
# list of values to loop over
agency = unique(df$agencyname)
# set up graph information
indicatorname<-"Percent of Total Stop Time by Stop Reason"
indicatorsource <- "\nData Source: AB 953 data requested by ACLU of Southern California (2019)."
datanote<-"\nData Note: Analysis by stop incident. Reasons for stop that make up less than one percent of total stop time are aggregated into 'Other Reason'. Stop incidents that include two or more reasons for the stop are aggregated into the 'Two or More Reasons' category. Excludes request for service calls. Stops with stop times that represented outlier values in the data are capped to an upper threshold by stop reason and race."
# bar plot instead
for (i in agency) {
temp_plot =print(ggplot(data=subset(df, agencyname==i),
aes(x= reorder(reason_label, percent_time), y= percent_time)) +
# bar style
geom_bar( stat = "identity", width=0.6, position = position_dodge(width=1),
fill = bar1color) +
expand_limits(y = max(df$percent_time) + .25) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1),limits=c(0,1))+
scale_x_discrete(labels=function(x) str_wrap(x, width=35))+
# bar labels
geom_text(
data=subset(df, agencyname==i),
aes(label = paste0(scales::percent(percent_time, .1)," ", "(",scales::comma(total_time_hrs , big.mark = ",")," hours)")), size = bar1bartextsize,
stat="identity", colour =black, family = mainfont,
position = position_dodge(width = 1), vjust = 0.2 , hjust= -0.03) +
labs(title = str_wrap(paste0(indicatorname),60),
subtitle=paste0(i),
caption = paste0(indicatorsource,"\n",str_wrap(datanote,90))) +
xlab("") +
ylab("Percent of Total Time Spent on Stops") +
coord_flip() +
theme_minimal()+
theme(legend.title = element_blank(), # no legend--modify if necessary
# define style for axis text
axis.text.y = element_text(size = 10, margin = margin(0, -10, 0, 0),
family= secondfont, face="bold"),
axis.text.x = element_text(size = 10,
family= secondfont),
axis.title.x = element_text(size = 10, margin = margin(10, 0, 0, 0),
family = secondfont, face="bold"),
axis.title.y = element_text(size = 9, margin = margin(10, 0, 0, 0),
family = secondfont),
# define style for title and caption
plot.caption =
element_text(hjust = 0.0, size = 9, family = secondfont),
plot.title =
element_text(hjust = 0.0, size = 13, family = mainfont),
plot.subtitle =
element_text(hjust = 0.0, size = 9, family = mainfont),
axis.ticks = element_blank(),
# grid line style
panel.grid.minor = element_blank(),
panel.grid.major = element_line(size = 0.25),
panel.grid.major.y = element_blank()))
# ggsave(temp_plot, file=paste0(indicatorname,"_", i,".png"), path="W:/Project/OSI/ACLU CA/R/Images/Time Spent", units = c("in"), width = 8, height = 5.5) ########COMMENTING OUT EXPORT CODE for RDA Code Repository so that files are't being overwritten
}
Code includes:
###Grab census median household income data for la county
library(tidycensus)
library(scales)
census_api_key("9c04405586a3811f542be8917ce8da4d75c9a8bf", overwrite=TRUE)
median_hh_income<-get_acs(geography="county",
state="CA", county="037", variables = c("B19013_001"),
year=2019,
survey="acs5")
#####Manually create data frame for RCM budget for LA County family of 4 (two adults, one preschooler, one school child)
rcm_la <-data.frame("measure"=c("Real Cost Measure \nAnnual Household Budget", "Real Cost Measure \nAnnual Household Budget", "Real Cost Measure \nAnnual Household Budget", "Real Cost Measure \nAnnual Household Budget", "Real Cost Measure \nAnnual Household Budget", "Real Cost Measure \nAnnual Household Budget", "Real Cost Measure \nAnnual Household Budget", "Los Angeles County \nMedian Household Income"), expense=c("Housing", "Child Care", "Food", "Health Care", "Transportation", "Miscellaneous", "Taxes", "Median Household Income"), "amount"=c(18540, 16848, 12468, 9828, 11496, 6912, 15469, 68044))
#customize the order for the stacked bar graph
rcm_la$expense <- factor(rcm_la$expense,levels = c("Child Care", "Food", "Health Care", "Housing", "Transportation", "Taxes", "Miscellaneous", "Median Household Income"))
#####Graph
library(ggplot2)
library(downloadthis)
plot<-ggplot(rcm_la, aes(fill=expense, y=amount, x=measure, label=amount))+
geom_bar(position="stack", stat="identity")+
geom_text(aes(label=paste0("$", format(amount, big.mark=","))), size = 3, position = position_stack(vjust = 0.5))+
scale_y_continuous(name="", labels=dollar_format(), breaks=seq(0,95000,10000))+
scale_x_discrete(name="")+
labs(fill="")+
scale_fill_brewer(breaks=c("Child Care", "Food", "Health Care", "Housing","Transportation", "Taxes","Miscellaneous"), type="qual", palette="Spectral")+
ggtitle("Real Cost Budget for Family in Los Angeles County")+
theme(panel.background = element_blank())
plot
download_this(plot) #this adds an export option for static graphs
Source: Annual budget looks like for a family of two adults, one
preschooler, and one school-age child living in Los Angeles County from
Struggling to Stay Afloat: The Real Cost Measure in California 2019.
United Ways of California. http://www.unitedwaysca.org/realcost
Code includes:
library(dplyr)
library(data.table)
library(tidycensus)
library(sf)
library(RPostgreSQL)
library(stringr)
library(tidyr)
library(ggplot2)
# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")
# create connection for rda database
con_tnp <- dbConnect(drv, dbname = "tnp",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
user = "postgres", password = "password")
# pull view for visualizing
fam<-dbGetQuery(con_tnp, "SELECT * FROM ct_familycomposition_2019")
# manipulate view from wide to long format for graphing
fam_long<-fam%>%
select(1,5,9,13)%>%
gather(measure,value, pct_married:pct_femalehh, factor_key = TRUE)%>%
arrange(-value) %>%
mutate(Per_cumsum=cumsum(value))
#graph
fam_long%>%
ggplot(aes(x="", y=value, fill=measure)) +
geom_bar(stat="identity", width=1, color="white",
position = position_stack(vjust = .5)) +
labs(title="Family Composition in the State, County and Region 4\n"
)+
geom_text(aes(label = value), position = position_stack(vjust = 0.5),
color = "white", size=3)+
theme_void()+
coord_polar(theta = "y") +
scale_fill_manual(name = "",
values=c("#D8A0D9", "#63268C", "#127CA6"),
labels = c("Percent Married", "Percent Single\nMale Head of Household", "Percent Single\nFemale Head of Household"))+
facet_wrap(~best_start)
Code includes:
library(kableExtra)
#connect to postgres
con_embracela<- dbConnect(drv, dbname = "embrace_la",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
user = "postgres", password = "password")
# pull view for visualizint
barriers_to_equity<-dbGetQuery(con_embracela, "SELECT response, freq, percent FROM commsurvey_barriers_to_equity WHERE percent > 26 OR percent < 6")
#table
kable(barriers_to_equity, digits=0, col.names=c("","Count","Percent"))%>%
kable_styling()%>%
row_spec(1:3, bold = T, color ="white", background="#62bce5", extra_css = "border-bottom: 1px solid")%>%
row_spec(4:8, bold=T, color= "white", background="#6c90ca", extra_css = "border-bottom: 1px solid")%>%
add_header_above(c("What are the top 3 barriers to creating a more equitable and inclusive city? (Most and least common barriers)"=1," "=2), align="l")
Count | Percent | |
---|---|---|
Housing affordability | 310 | 57 |
Gentrification and displacement | 201 | 37 |
Racism and discrimination | 184 | 34 |
Public participation | 29 | 5 |
Environmental justice | 29 | 5 |
Public safety | 28 | 5 |
Food security | 22 | 4 |
City services | 14 | 3 |
Code includes: * Creating factors to re-order the data * Dumbbell charts created through ggplot * See the barbell code template here: “W:\Project\RDA Team\Covid statewide\R\Covid_Statewide_Report_Reopenings.Rmd”
library(htmltools)
library(RPostgreSQL)
library(dplyr)
library(tidyverse)
library(highcharter)
options(highcharter.theme = hc_theme_smpl(tooltip = list(valueDecimals = 2)))
library(tidyr)
library(ggplot2)
library(ggalt)
library(cowplot)
library(lubridate)
setwd("W:/Project/RDA Team/COVID Statewide")
# Data Setup
pw <- {
"password"
}
# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "covid19_race_class",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
user = "postgres", password = pw)
# just a note of reference to inserting text #https://monashbioinformaticsplatform.github.io/2017-11-16-open-science-training/topics/rmarkdown.html
#https://rstudio.com/wp-content/uploads/2015/02/rmarkdown-cheatsheet.pdf
#Step 1 load data in wide and long format
reopenings_long<- dbGetQuery (con, "select * from state_latest.state_reopenings_analysis_long")
reopenings_wide<- dbGetQuery (con, "select * from state_latest.state_reopenings_analysis_wide")
#Step 2 Run some additional calcs to explore potential graphs and trends
reopenings_wide %>%
mutate(diff_1week = avg_cases_rate_1weekafter - avg_cases_rate_start) ->reopenings_wide
reopenings_wide %>%
mutate(perdiff_1week = (avg_cases_rate_1weekafter - avg_cases_rate_start)/avg_cases_rate_start,
perdiff_2week= (avg_cases_rate_2weekafter - avg_cases_rate_start)/avg_cases_rate_start,
perdiff_3week= (avg_cases_rate_3weekafter - avg_cases_rate_start)/avg_cases_rate_start) ->reopenings_wide
#Step 3 add in factors so that graphs are ordered by these fields versus the default in R
factor_levels<-c("3 weeks before", "2 weeks before", "1 week before", "Start", "1 week after", "2 weeks after", "3 weeks after")
factor_levels_2<-c("Baseline Period - May 8, 2020", "Reopening Period - May 25, 2020", "Reopening Period - June 12, 2020", "Reopening Period - June 21, 2020", "Closure Period - July 13, 2020",
"Closure Period - August 17, 2020", "Closure Period - September 7, 2020")
reopenings_wide %>%
mutate(
label_full = recode(label_,
"Baseline Period - May 8, 2020" = 'May 8, 2020, BASELINE - Limited reopening of low-risk businesses and retail (pickup only)',
"Reopening Period - May 25, 2020" = 'May 25, 2020, REOPENING - In-person retail shopping, places of worship, hair salons, LA County dining reopen',
"Reopening Period - June 12, 2020" = 'June 12, 2020, REOPENING - Restaurants, wineries, bars, cardrooms, gyms, personal care services reopen',
"Reopening Period - June 21, 2020" = 'June 21, 2020, REOPENING - LA County reopens bar, wineries, cardrooms, personal care services',
"Closure Period - July 13, 2020" = 'July 13, 2020, CLOSURE - Bars, restaurants, wineries, cardrooms, gyms, places of worship, hair salons, personal care services close indoor operations',
"Closure Period - August 17, 2020" = 'August 17, 2020, CLOSURE - Closures remain and no major sectors reopen',
"Closure Period - September 7, 2020" = 'September 7, 2020, CLOSURE - Labor day occurs + most counties must maintain closures')) %>%
mutate(
race_label=recode(race_label_short,
"latinx"="Latinx",
"nh_black"="Black",
"nh_asian"="Asian",
"nh_nhpi"="NHPI",
"nh_aian"="AIAN",
"nh_white"="White",
"total"="Total"))->reopenings_wide
factor_levels_3<-c("May 8, 2020, BASELINE - Limited reopening of low-risk businesses and retail (pickup only)",
"May 25, 2020, REOPENING - In-person retail shopping, places of worship, hair salons, LA County dining reopen",
"June 12, 2020, REOPENING - Restaurants, wineries, bars, cardrooms, gyms, personal care services reopen",
"June 21, 2020, REOPENING - LA County reopens bar, wineries, cardrooms, personal care services",
"July 13, 2020, CLOSURE - Bars, restaurants, wineries, cardrooms, gyms, places of worship, hair salons, personal care services close indoor operations",
"August 17, 2020, CLOSURE - Closures remain and no major sectors reopen",
"September 7, 2020, CLOSURE - Labor day occurs + most counties must maintain closures")
factor_race<-c("White", "Asian", "NHPI", "Latinx", "Black",
"AIAN", "Total")
#Add factors to dataset
reopenings_wide$label_2 <- factor(reopenings_wide$label_, levels = factor_levels_2)
reopenings_wide$label_full <- factor(reopenings_wide$label_full, levels = factor_levels_3)
reopenings_wide$race_label<-factor(reopenings_wide$race_label, levels = rev(factor_race))
#Create selection just for rates by race/ethnicity
reopenings_wide_nototal<-reopenings_wide[!reopenings_wide$race_label_short %in% c("total"), ]
reopenings_wide_reopenings_only<-reopenings_wide_nototal[!reopenings_wide_nototal$label_ %in% c("Closure Period - July 13, 2020","Closure Period - August 17, 2020","Closure Period - September 7, 2020"), ]
ggplot(reopenings_wide_nototal, aes(x=avg_cases_rate_start, xend=avg_cases_rate_1weekafter, y=race_label)) +
geom_dumbbell(size=1.5, color="gray",size_x=3, size_xend=3,colour_x="slategray2", colour_xend="darkgoldenrod1", dot_guide=FALSE)+
facet_wrap(~label_full, ncol=1)+
geom_text(data=filter(reopenings_wide_nototal,label_2=="Baseline Period - May 8, 2020" & race_label=="White"),
aes(x=.9, y=race_label, label="Beginning Rate"),
color="slategray2", size=2, vjust=-1,
fontface="bold") +
geom_text(data=filter(reopenings_wide_nototal,label_2=="Baseline Period - May 8, 2020" & race_label=="White"),
aes(x=2.5, y=race_label, label="1 Week After"),
color="darkgoldenrod1", size=2, vjust=-1,
fontface="bold") +
geom_rect(data=reopenings_wide_nototal, aes(xmin=20, xmax=25, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=reopenings_wide_nototal, aes(label=round(diff_1week, 1), y=race_label, x=22.5), fontface="bold", size=3) +
geom_text(data=filter(reopenings_wide_nototal,label_2=="Baseline Period - May 8, 2020" & race_label=="White"),
aes(x=22.5, y=race_label, label="Difference in 7-day rate"),
color="black", size=3, vjust=-.5, fontface="bold") +
scale_x_continuous(expand=c(0,0), limits=c(0, 25)) +
scale_y_discrete(expand=c(.2,0))+
theme_bw()+
labs(x="7-day average case rate", y="",title="Case Rates 1 Week After Reopenings and Closures")