The purpose of this code repository is to create a one-stop-shop for code for most common static and interactive data visualizations used for projects with examples from previous RDA projects.

Handy Reference Links:

Functions and Automation

Race Counts Indicator prep: Weighted Average and RC Functions

Code includes:

  • Applying Weighted Average functions and Race Counts functions to Proximity to Hazards indicator.

  • Weighted Average functions calculate the percentage of each group’s target geo (county/state) population within each sub geo. The function works from census tract to County or State aggregation. See script: “W:\RDA Team\R\Functions\Cnty_St_Wt_Avg_Functions.R”

  • Race Counts Functions calculate the number of _rate values, index of disparity, ranks, population or sample variance, county and state Z-scores, quadrant colors. See script: “W:\Project\RACE COUNTS\2022_v4\RaceCounts\RC_Functions.R”

# Proximity to Hazards Weighted Avg for RC 2022
library(dplyr)
library(data.table)
library(tidycensus)
library(sf)
library(RPostgreSQL)
library(stringr)
library(tidyr)
options(scipen=999)

###### SET UP WORKSPACE #######

# load census api key
#census_api_key("25fb5e48345b42318ae435e4dcd28ad3f196f2c4", install = TRUE)
#readRenviron("~/.Renviron")

# load the PostgreSQL driver
drv <- dbDriver("PostgreSQL")

# create connection for rda database
con <- dbConnect(drv, dbname = "rda_shared_data",
                 host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
                 user = "postgres", password = "password")


#set source for RC Functions script
source("W:/RDA Team/R/Functions/Cnty_St_Wt_Avg_Functions.R")


##### GET INDICATOR DATA ######
# load indicator data
ind_df <- st_read(con, query = "select ct_geoid AS geoid, cleanup from built_environment.oehha_ces4_tract_2021")
ind_df <- dplyr::rename(ind_df, sub_id = geoid, indicator = cleanup)       # rename columns for functions


############# COUNTY ##################

###### DEFINE VALUES FOR FUNCTIONS ######

# set values for weighted average functions - You may need to update these
year <- c(2020)                   # define your data vintage
subgeo <- c('tract')              # define your sub geolevel: tract (unless the WA functions are adapted for a different subgeo)
targetgeolevel <- c('county')     # define your target geolevel: county (state is handled separately)
survey <- "acs5"                  # define which Census survey you want
pop_threshold = 250               # define population threshold for screening

##### CREATE COUNTY GEOID & NAMES TABLE ######  You will NOT need this chunk if your indicator data table has target geolevel names already
targetgeo_names <- county_names(vars = vars_list, yr = year, srvy = survey)
targetgeo_names <- select(as.data.frame(targetgeo_names), target_id = GEOID, target_name = NAME) %>%   # get targetgeolevel names
  mutate(target_name = sub(" County, California", "", target_name))           # rename columns
targetgeo_names <- distinct(targetgeo_names, .keep_all = FALSE)                                        # keep only unique rows, 1 per target geo
#####


##### GET SUB GEOLEVEL POP DATA ######
pop <- update_detailed_table(vars = vars_list, yr = year, srvy = survey)  # subgeolevel pop

# transform pop data to wide format 
pop_wide <- lapply(pop, to_wide)
#### add target_id field, you may need to update this bit depending on the sub and target_id's in the data you're using
pop_wide <- as.data.frame(pop_wide) %>% mutate(target_id = substr(GEOID, 1, 5))  # use left 5 characters as target_id
pop_wide <- dplyr::rename(pop_wide, sub_id = GEOID)                              # rename to generic column name for WA functions

# calc target geolevel pop and number of sub geolevels per target geolevel
pop_df <- targetgeo_pop(pop_wide)


##### COUNTY WEIGHTED AVG CALCS ######
pct_df <- pop_pct(pop_df)   # calc pct of target geolevel pop in each sub geolevel
wa <- wt_avg(pct_df)        # calc weighted average and apply reliability screens
wa <- wa %>% left_join(targetgeo_names, by = "target_id")     # add in target geolevel names


############# STATE CALCS ##################
# get and prep state pop
ca_pop_wide <- state_pop(vars = vars_list, yr = year, srvy = survey)

# calc state wa
ca_pct_df <- ca_pop_pct(ca_pop_wide)
ca_wa <- ca_wt_avg(ca_pct_df)


############ JOIN COUNTY & STATE WA TABLES  ##################
wa <- union(wa, ca_wa) 
wa <- rename(wa, geoid = target_id, geoname = target_name)   # rename columns for RC functions
wa <- wa %>% dplyr::relocate(geoname, .after = geoid)        # move geoname column

d <- wa
#View(d)

############## CALC RACE COUNTS STATS ##############
############ To use the following RC Functions, 'd' will need the following columns at minimum: 
############ county_id and total and raced _rate (following RC naming conventions) columns. If you use a rate calc function, you will need _pop and _raw columns as well.

#set source for RC Functions script
source("W:/Project/RACE COUNTS/2022_v4/RaceCounts/RC_Functions.R")

d$asbest = 'min'    #YOU MUST UPDATE THIS FIELD AS NECESSARY: assign 'min' or 'max'

d <- count_values(d) #calculate number of "_rate" values
d <- calc_best(d) #calculate best rates -- be sure to define 'asbest' accordingly before running this function.
d <- calc_diff(d) #calculate difference from best
d <- calc_avg_diff(d) #calculate (row wise) mean difference from best
d <- calc_s_var(d) #calculate (row wise) population or sample variance. be sure to use calc_s_var for sample data or calc_p_var for population data.
d <- calc_id(d) #calculate index of disparity


#split STATE into separate table and format id, name columns
state_table <- d[d$geoname == 'California', ]

#calculate STATE z-scores
state_table <- calc_state_z(state_table)
state_table <- rename(state_table, state_id = geoid, state_name = geoname)
#View(state_table)

#remove state from county table
county_table <- d[d$geoname != 'California', ]

#calculate COUNTY z-scores
county_table <- calc_z(county_table)
county_table <- calc_ranks(county_table)
county_table <- rename(county_table, county_id = geoid, county_name = geoname)
#View(county_table)

###update info for postgres tables###
county_table_name <- "arei_hben_haz_weighted_avg_county_2022"
state_table_name <- "arei_hben_haz_weighted_avg_state_2022"
indicator <- "Proximity to Hazards Score (sum of weighted EnviroStor cleanup sites within buffered distances to populated blocks of census tracts)"
source <- "CalEnviroScreen 4.0 https://oehha.ca.gov/calenviroscreen/report/calenviroscreen-40"


#send tables to postgres
#to_postgres()

One Pager Automation

Code includes:

  • Code for prepping data and then running it in a loop
  • Applies loop to a separate RMD template that exports into one-pager word documents by City Council District and Supervisorial District
  • See the RMD one-pager template here: “W:\Project\RDA Team\LAFed\R\LAFed\One_Pager_Template.Rmd”
# Run automated reports

#### Set up workspace ####
#library(here)
#library(sf)
#library(RPostgreSQL)
#library(dplyr)

# loads the PostgreSQL driver
#drv <- dbDriver("PostgreSQL")

# create connection for rda database
#drv <- dbDriver("PostgreSQL")
#con <- dbConnect(drv, dbname = "lafed22",
#                    host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
#                   user = "postgres", password = "password")

# source functions

#source("W:/Project/RDA Team/LAFed/R/LAFed/lafed_data_functions.R")

#### Prep Data: SD ####

# prep data of statistics by CD/SD

# read SD in data from postgres

#sd<-dbGetQuery(con, "SELECT * FROM supervisor_rates_long")

# clean up and prep sd table

#sd <- sd %>%
 # mutate(group = recode(group,
 #                       sup_dist_1 = "1",
 #                       sup_dist_2 = "2",
 #                       sup_dist_3 = "3",
 #                       sup_dist_4 = "4",
 #                       sup_dist_5 = "5"))%>%
 # filter(!grepl('total', group))%>%
 # mutate_if(is.numeric, round, 1)%>%
 # rename("district"="group")

#sd <- cv_pop_prep(sd)
#sd <- add_labels(sd)
#sd <- remove_unknown_demo(sd)
#sd <- clean_surveyresponse(sd)

#sd<-sd%>%
#  mutate(geo="Supervisorial District")
#
#make survey responses lower case

#sd$survey_response_label<-tolower(sd$survey_response_label)

# revise order of columns
#sd<-sd[,c(1,11,2,3,4,5,6,7,8,9,10)]


# manipulate data to get top 4 responses/values to each question

#sd<-sd%>%
#  group_by(district, geo, survey_question)%>%
#  arrange(-response_rate, .by_group = TRUE)%>%
#  slice_head(n=4)%>% ##choose threshold for top "n" responses to question
#  mutate(rank = row_number()) #add rank # for subsetting in template

# split sd table into list
#sd_list <- split(sd, sd$district, drop=FALSE)

#### Prep Data: CD ####

# read in CD data:

#cd <- dbGetQuery(con, "SELECT * FROM city_council_rates_long") 

# clean up and prep cd table

#cd <- cd %>%
#  mutate(group = recode(group,
#                        council_dist_1 = "1",
#                        council_dist_2 = "2",
#                        council_dist_3 = "3",
#                        council_dist_4 = "4",
#                        council_dist_5 = "5",
#                        council_dist_6 = "6",
#                        council_dist_7 = "7",
#                        council_dist_8 = "8",
#                        council_dist_9 = "9",
#                        council_dist_10 = "10",
#                        council_dist_11 = "11",
#                        council_dist_12 = "12",
#                        council_dist_13 = "13",
#                        council_dist_14 = "14",
#                        council_dist_15 = "15"))%>%
#  filter(!grepl('total', group))%>%
#  rename(district=group)%>%
#  mutate_if(is.numeric, round, 1)
 


#cd <- cv_pop_prep(cd)
#cd <- add_labels(cd)
#cd <- clean_surveyresponse(cd)

#cd<-cd%>%
#  mutate(geo="City Council District")

#make survey responses lower case

#cd$survey_response_label<-tolower(cd$survey_response_label)

# revise order of columns
#cd<-cd[,c(1,11,2,3,4,5,6,7,8,9,10)]

# manipulate data to get top 4 responses/values to each question

#cd<-cd%>%
#  group_by(district, geo, survey_question)%>%
#  arrange(-response_rate, .by_group = TRUE)%>%
#  slice_head(n=4)%>% ##choose threshold for top "n" responses to question
#  mutate(rank = row_number()) #add rank # for subsetting in template

# custom order factor levels to be numerical order

#cd$district <- factor(cd$district ,levels = c("1","2","3","4","5","6","7","8","9","10","11","12",
#                                              "13","14","15"))

# split cd table into list
#cd_list <- split(cd, cd$district, drop=FALSE)


#### Loop through CD / SD data lists #### 

# loop through for CD one pagers:

# for(i in seq_along(cd_list)) {

#  dat <- cd_list[[i]]

#  rmarkdown::render(input = "W:/Project/RDA Team/LAFed/R/LAFed/One_Pager_Template.Rmd",
#                    output_format = "word_document",
#                   output_file = paste0("CD_",i, ".doc"),
#                    output_dir = here("W:/Project/RDA Team/LAFed/Deliverables/One Pagers"))
# }


# loop through for SD one pagers:

# for(i in seq_along(sd_list)) {

 # dat <- sd_list[[i]]

#  rmarkdown::render(input = "W:/Project/RDA Team/LAFed/R/LAFed/One_Pager_Template.Rmd",
#                    output_format = "word_document",
#                    output_file = paste0("SD_", i, ".doc"),
 #                   output_dir = here("W:/Project/RDA Team/LAFed/Deliverables/One Pagers"))
#}

#dbDisconnect(con)

ACS UPDATE

Code includes:

  • Automated downloads of ACS data from the Census API and uploading to our PgAdmin database in ‘wide’ format.
  • See the ACS Update template here: “W:\RDA Team\R\ACS Updates\Economic\Update Detailed Tables_B19301.R”
# load update functions
## Be sure to check the ZCTA data year matches the data year you are calling here ##
source("W:/RDA Team/R/ACS Updates/ACS Update Functions - template.R")


# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")

# create connection for rda database
con <- dbConnect(drv, dbname = "rda_shared_data",
                 host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
                 user = "postgres", password = "password")

# load ACS vars. Manually define year and ACS data set, 5-year, 1-year etc.
acs19 <-  load_variables(2019, "acs5", cache = TRUE)


##### Update Table #####
## UPDATE POSTGRES SCHEMA, TABLE NAME, AND TABLE COMMENT ##

# define table destination schema and final table name here, to be used in column comments in the rest of the script.
# define the table source, to be used in the table comments.
table_schema <- "economic."
table_name <- "acs_5yr_b19301_multigeo_2019"
table_comment_source <- "ACS 2015-2019 5-Year Estimate Table B19301."


# MANUAL UPDATE REQUIRED--QUERY YOUR VARIABLES FROM THE ACS Variable List (B01001A-I or B01001 are the examples here)

# define variables - Select either raced tables or Total table option from below.
    # For all raced tables use the following:
    #vars_list <- subset(acs19, name %like% "B19301" & !(name %like% "B19301_"))
    # this query example excludes the Total detailed table (e.e B01001) because the column format is different (more fields, more age groups). It should be handled separately. 
  
    # For total table use the following:
    vars_list <- subset(acs19, name %like% "B19301_")


# Character vector of variable names
vars_list <- vars_list$name


# get data for all geographies with ACS Update functions
df <- update_detailed_table(vars = vars_list, yr = 2019, srvy = "acs5")

# get metadata
df_metadata <- update_detailed_table_metadata(vars = vars_list, yr = 2019, srvy = "acs5")


# transform to wide data format 
df_wide <- 
  lapply(df, function(x) x %>% 
           
           # rename estimate and moe to be better for wide format
           rename("e" = estimate,
                  "m" = moe) %>% 
           
           # make wide
           pivot_wider(id_cols = c(GEOID, NAME, geolevel),
                       names_from = variable, 
                       values_from = c(e, m),
                       names_glue = "{variable}{.value}") # specify the order of col names
  )

# collapse list to make a multigeo table
df_wide_multigeo <- do.call("rbind", df_wide)

# make colnames lower case
colnames(df_wide_multigeo) <- tolower(colnames(df_wide_multigeo))


# make character vector for field types in postgresql db
charvect = rep('numeric', dim(df_wide_multigeo)[2])

# change data type for first three columns
charvect[1:3] <- "varchar" # first three are characters for the geoid and names

# add names to the character vector
names(charvect) <- colnames(df_wide_multigeo)


##### Export Table to postgres db #####
# write schema and table name here
#dbWriteTable(con, c("economic", "acs_5yr_b19301_multigeo_2019"), df_wide_multigeo, 
 #            overwrite = TRUE, row.names = FALSE,
 #            field.types = charvect)


# TABLE COMMENTS

# write comment to table, and the first three fields that won't change.
#table_comment_df_wide <- paste0("COMMENT ON TABLE ", table_schema, table_name,  " IS '", table_comment_source, 
#" Wide data format, multigeo table with state, county, place, PUMA, ZCTA (CA) and #tract.';

#COMMENT ON COLUMN ", table_schema, table_name, ".geoid IS 'Census geographic ID';
#COMMENT ON COLUMN ", table_schema, table_name, ".name IS 'Census name of #geography.';
#COMMENT ON COLUMN ", table_schema, table_name, ".geolevel IS 'Geographic level #(state, county, place, PUMA, ZCTA (CA) or tract). Downloaded using template scripts here: W > RDA Team > R > ACS Updates > Economic > Update Detailed Tables_B19301.R';")

# send table comment + first three (static) col comments to database
#dbSendQuery(conn = con, table_comment_df_wide)


# COLUMN COMMENTS

# format metadata
#colcomments <-
  
  # change the metadata name depending on the table -- should improve this to not be hard coded
#  df_metadata %>% 
#  mutate(variable = tolower(variable),
#         label = gsub("'", "", label),
#         label = paste0(gsub("!!", " ", label), ": ", concept)) # remove apostrophes from the metadata

# make character vectors for column names and metadata. 
#colcomments_charvar <- colcomments$label
#colname_charvar <- colcomments$variable


# loop through the columns that will change depending on the table. This loop writes a comment for the estimate and MOE for a particular column, then sends to the db.
#for (i in seq_along(colname_charvar)){
  
 # sqlcolcomment <-  
    
    # change the table name here depending -- could improve this to not be hardcoded.
 #   paste0("COMMENT ON COLUMN ", table_schema, table_name, ".", 
   #        colname_charvar[[i]], "e IS '", colcomments_charvar[[i]], "'; COMMENT ON #COLUMN ", table_schema, table_name, ".",
     #      colname_charvar[[i]], "m IS 'MOE for ", colcomments_charvar[[i]], "';" )
  
  # send sql comment to database
 # dbSendQuery(conn = con, sqlcolcomment)
#}

# index the table
#dbSendQuery(conn = con, 
 #           paste0("create index ", table_name, "_geoid on ", table_schema, #table_name, " (geo

Interactive Visualizations

Bar Graphs

Single Bar Graph with Total Line

This single bar graph shows the annual percentage of of mothers who did not receive prenatal care until after their first trimester in pregnancy by supervisorial district, along with the Los Angeles County percentage for comparison. The graph clearly shows that Districts 1, 2 and 4 both exceed the county percentage for women who receive late prenatal care. This graph was used for the First 5 LA project.

Code includes:

  • Horizontal y intercept line with a label
  • Pop ups with estimates and percentages
library(RPostgreSQL)
library(highcharter)
library(dplyr)
library(tidyr)


# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")

# create connection for rda database
con_f5la <- dbConnect(drv, dbname = "f5la_v2",
                 host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
                 user = "postgres", password = "password")

####### ESTABLISH HIGHCHART THEME ############

  thm <- hc_theme(
  colors = c("#033E8C", "#EC098D", "#04B2D9","#26BFB0", "#64BF4B"),
  chart = list(
    backgroundColor = ""
  ),
  title = list(
    style = list(
      color = "#333333",
      fontFamily = "Franklin Gothic Condensed"
    )
  ),
  subtitle = list(
    style = list(
      color = "#666666",
      fontFamily = "Franklin Gothic"
    )
  ),
  legend = list(
    itemStyle = list(
      fontFamily = "Franklin Gothic",
      color = "black"
    ),
    itemHoverStyle = list(
      color = "gray"
    )
  )
)



#grab data for visualizing

prenatal<-dbGetQuery(con_f5la, "SELECT * FROM lamb_prenatal_care_supdist_2016")

#remove the 'total' row because we will plot it as a y intercept line to match other graphs

prenatal<-prenatal%>%
  filter(!grepl('Los Angeles County', bos_name))%>%
   mutate(num_total = as.character(formattable::comma(num_total, 0)))

#graph

hchart(prenatal, "column",
         hcaes(x=bos_name, y=pct_total),
          tooltip = list(pointFormat = "Percent: {point.pct_total:.1f}%<br>
                         Number: {point.num_total}"))%>%
      

      hc_tooltip(crosshairs = TRUE)%>%
     
     hc_xAxis(title = list(text = "Supervisorial Districts"))%>%
    hc_yAxis(title = list(text = ""), 
              labels = list(
      formatter = JS("function () {
                      return Math.abs(this.value) + '%';
                                    }")), min=0,max=15,
       plotLines = list(
      list(
        label = list(text = "LA County = 8.2%", align = "left", y=-10, x=600),
        color = "#FF0000",
        width = 2,
        value = 8.2,
        zIndex=5
      )
    ))%>%
    hc_title(
    text = "Annual Percentage of Mothers Who Entered Prenatal Care After 1st Trimester of Pregnancy",
    margin = 20,
    align = "left",
    style = list(useHTML = TRUE, fontWeight="bold")
    )%>%
      hc_subtitle(
    text =  '<a href="http://publichealth.lacounty.gov/mch/lamb/LAMB.html">Los Angeles County Department of Public Health Los Angeles Mommy and Baby Project</a>',
     style = list( fontWeight = "bold"),
    align = "left"
  )%>%
     hc_legend(enabled = TRUE)%>%
hc_add_theme(thm)

Single Horizontal Bar Graph with Export Option

This horizontal single bar graph shows the race and ethnicity percentages by PUMA in the Wilmington Best Start Geography. The hover pop up shows both the percentage and estimate for each racial group. There is also an export option on the graph that allows the graph to be downloaded as an image, with data labels appearing on the static download image.

Code includes:

  • Setting up a standard highchart theme and implementing it to graph
  • Setting up customized fonts and captions
  • Creating highchart export option that includes data labels
  • Data caption with a hyperlink
### Set up workspace ####
#install.packages("extrafont")
library(extrafont)
library(here)
library(dplyr)
library(RPostgreSQL)
library(formattable)
library(svglite)
library(stringr)
library(tidyr)
library(showtext)
library(scales)
library(highcharter)


options(scipen=999)

# set global options to ensure that comma separator is a comma for highchart graphs

lang <- getOption("highcharter.lang")
lang$thousandsSep <- ","
options(highcharter.lang = lang)

# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")

con <- dbConnect(drv, dbname = "tnp",
                 host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
                 user = "postgres", password = "password")

####################
#### Style Guide ###
####################

## COLORS: There is only blue ##
black <- "#000000"
mainblue <- "#22BCB8"
textgrey <- "#919191"
linecolor<-"#FF0000"

# define fonts in chart
font_title <- "Arial"
font_subtitle <- "Arial"
font_caption <- "Arial"
font_bar_label <- "Arial"
font_axis_label <- "Arial"
font_table_text<-"Arial"

####################################
########Highchart theme#############
####################################

#### Define Highcharts theme #### 
tnp_theme <- hc_theme(
  colors = c("#22BCB8"  # main blue
            
  ),
  chart = list(
    backgroundColor = "white",
    style = list(
      fontFamily = font_subtitle
    ) 
  ),
  plotOptions = 
    list(
      line = 
        list(
          marker = 
            list(
              enabled = FALSE
            )
        )
    ),
  title = list(
    style = list(
      color = black,
      fontFamily = font_title,
      textAlign="left",
      fontsize='200px'
    )
  ),
  subtitle = list(
    style = list(
      color = black, # medium grey
      fontFamily = font_subtitle,
      textAlign="left",
      fontsize='100px'
    )
  ),
  caption = list(
    style = list(
      color = black,
      fontFamily = font_caption,
      textAlign = "left",
      fontsize="12px"
    ),
    useHTML = TRUE
  ),
  axis = list(
    style = list(
      color = black,
      fontFamily = font_axis_label,
      fontSize='50px'
    )
  ),
  legend = list(
    itemStyle = list(
      fontFamily = font_axis_label,
      color = black
    ),
    itemHoverStyle = list(
      fontFamily = font_table_text,
      color = black
    )
  )
)

###################################
###Set Up Data Captions and Info###
###################################

### Info: Race 
lang <- getOption("highcharter.lang")
lang$thousandsSep <- ","
options(highcharter.lang = lang)
# name your issue area:

# name your issue area:
issue <-  "Demographics"

# write the name of your indicator and geography (CLB, WIL, or Reg4) here: [indicator name_geography]
var <- "Race_WIL"

# Title of chart here:
title_text <- "Population by Race and Ethnicity"

#Subtitle text if necessary:
subtitle_text<-""

#Geography level
geolevel<-"Geography: Wilmington"

# Write your data source here:
datasource <- "Data source: American Community Survey Public Use Microdata, 2015-2019 5-Year Estimate."

# write a brief version of your methodology here if necessary, which INCLUDES language about race coding:
methodbrief<- "Data Note: Race groups are Latinx-exclusive, except AIAN and NHPI, which include all people who identify as AIAN or NHPI\nincluding in combination with other races and ethnicities."

# grab view or table from postgres that will be visualized
# see indicator matrix for list of views and tables for each indicator: 
# https://advancementproject.sharepoint.com/:x:/r/sites/Portal/_layouts/15/Doc.aspx?sourcedoc=%7Be5a1f3e6-15ca-480e-b33e-07bbfd55ae26%7D&action=editnew&cid=d736e070-c6f6-4063-8a1d-efbf3b48beb0

##########################
###########Graph##########
##########################

df<-dbGetQuery(con, "SELECT * FROM puma_race_2019")

#### Graph: Race ####


#Data frame prep: convert data frame to long if necessary ; Recode 'Race Values' as necessary for nicer race labeling on the graph
df<-df%>%
  mutate(race_label=ifelse(race %in% 'AIAN Alone or in Combination', 'American Indian/Alaska Native', 
                           ifelse(race %in% 'Asian NL', 'Asian',
                                  ifelse(race %in% "Two or More NL", 'Two or More',
                                         ifelse(race %in% 'Latinx', 'Latinx',
                                                ifelse(race %in% 'NHPI Alone or in Combination', 'Native Hawaiian/Pacific Islander',
                                                       ifelse(race %in% 'Black NL', 'Black',
                                                              ifelse(race %in% 'White NL', 'White',
                                                                     ifelse(race %in% 'Other NL', 'Other',
                                                                            ifelse(race %in% 'Total', 'Total', 'Blank'))))))))))%>%
  mutate(race_pct=race_pct*100)
  
#Note: We DO NOT need to divide percents by 100 for highcharts. If percents in df are in decimal form, then MULTIPLY by 100 

plot_pums_race<-df %>% 
  filter(!is.na(race_pct) & best_start =='Wilmington' & race_label !='Blank') %>% 
  arrange(-race_pct) %>% 
  hchart(
    type = "bar", 
    hcaes(y = round(race_pct, 2), x = race_label), 
    color = mainblue,
    tooltip = list(
      pointFormat = "Total: {point.race_tot:,.0f}<br>
      Rate: {point.race_pct:.1f}%"),
  ) %>% 
  hc_tooltip(crosshairs = TRUE) %>% 
  
  hc_xAxis( title = list(text = ""),
            labels=list(style=list(fontSize='15px'))) %>% 
  
  hc_yAxis( title = list(text = ""),
            labels = list(format = "{value}%")) %>%  ##this is for rate indicators only adding % signs 
  
  # title elements
  hc_title(
    text = paste0(title_text),
    #margin = 20,
    align = "left", 
    style = list(useHTML = TRUE, fontWeight = "bold", fontSize='30px')
  ) %>% 
  
  hc_subtitle(
    text = paste0(subtitle_text)
    ) %>% 
  hc_caption(
    text = paste0("<br>",geolevel,"<br>",
                  '<a href="https://jzhang514.github.io/TNP/Additional-Resources">Data Source: American Community Survey, 2015-2019 5-Year Estimate.</a>',
                  "<br>",methodbrief),
    style=list(fontSize='15px')
    #align = "left"
  ) %>% 
  
  hc_add_theme(tnp_theme)%>%
  hc_exporting(
    enabled = TRUE, sourceWidth=900, sourceHeight=600, 
    chartOptions=list(plotOptions=list(series=list(dataLabels=list(enabled=TRUE, format='{y} %')))),
    filename = "plot"
  )


# review the plot

plot_pums_race

Grouped Bar Graph with Data Labels

This grouped graph shows households that earn above and below the Real Cost Measure in the Region 4 Best Start Geographies (Central Long Beach and Wilmington). Data is from the 2019 United Way Real Cost Measure report and was used for the First 5 LA project.

Code includes:

  • Creating a highchart with static data labels
  • Creating highchart export option that includes data labels
  • Data caption with a hyperlink
##################
######Set up######
#################

library(ggplot2)
library(dplyr)
library(stringr)
library(RPostgreSQL)
library(tidyr)
library(scales)
library(knitr)
library(data.table)
library(sp)
library(sf)
library(rgdal)
library(rpostgis)
library(highcharter)
library(kableExtra)

## set up options for comma separators

lang <- getOption("highcharter.lang")
lang$thousandsSep <- ","
options(highcharter.lang = lang)

# Data Setup
pw <- {
  "password"
}
# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")

con <- dbConnect(drv, dbname = "f5la_v2",
                 host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
                 user = "postgres", password = pw)


#############################################
####### ESTABLISH HIGHCHART THEME ############
##############################################

thm <- hc_theme(
  colors = c("#22BCB8", "#009CDB", "#332985","#EC098C", "#54B847"),
  chart = list(
    backgroundColor = ""
  ),
  title = list(
    style = list(
      color = "#333333",
      fontFamily = "Franklin Gothic Condensed"
    )
  ),
  subtitle = list(
    style = list(
      color = "#666666",
      fontFamily = "Franklin Gothic"
    )
  ),
  legend = list(
    itemStyle = list(
      fontFamily = "Franklin Gothic",
      color = "black"
    ),
    itemHoverStyle = list(
      color = "gray"
    )
  )
)


#####Grab data from postgres
rcm_bsc<-dbGetQuery(con, "SELECT * FROM real_cost_measure_bsc_new")%>%
  select(1,5,7)%>%
  filter(grepl('Central Long Beach|Wilmington', best_start))



#####Create stacked bar graph for rcm by bsc

#Convert data wide to long for variables of interest
rcm_long<-rcm_bsc%>%
  gather( hh_type, number, num_hh_below_rcm:num_hh_above_rcm, factor_key=TRUE)%>%
  mutate(variable=ifelse(hh_type %in% 'num_hh_above_rcm', 'Number of Households Above the Real Cost Measure', 'Number of Households Below the Real Cost Measure'))%>%
  mutate(num_label=as.character(formattable::comma(number, 0)))



#highcharts 


hchart(rcm_long, "column",
       hcaes(x=best_start, y=number, group=variable),
       dataLabels = list(enabled = TRUE,
                         format = '{point.num_label:.1f}'),
       tooltip = list(pointFormat = "{point.num_label:,.0f}"))%>%
  hc_tooltip(crosshairs = TRUE)%>%
  hc_xAxis(title = list(text = ""))%>%
  hc_yAxis(title = list(text = ""))%>%
  hc_title(
    text = "Number of Households Earning less than the Real Cost Measure",
    margin = 20,
    align = "left",
    style = list(useHTML = TRUE, fontWeight="bold")
  )%>%
  hc_caption(text="Source Struggling to Stay Afloat: The Real Cost Measure in California 2019. United Ways of California. http://www.unitedwaysca.org/realcost")%>%
  hc_legend(enabled = TRUE, layout='vertical')%>%
  hc_add_theme(thm)%>% 
  hc_exporting(
    enabled = TRUE, sourceWidth=800, sourceHeight=500, 
    
    chartOptions=list(subtitle=NULL),
    filename = "plot"
  )

Single Bar Graph made with Function and Custom Theme

Code includes:

  • Creating a highchart graph using custom-built function
  • Creating a highchart with a custom theme that includes a black background
####User-Defined R Functions to create Visuals for the LAFed Project####
#Last Updated: 06/10/22
#Author: Maria Khan

# The following file aims to serve as a visuals template for the 
# 2022 APCA project with the LA County Federation of Labor and 
# Unemployed Workers United for the People's Project initiative.

# Overall Research Question: What do LA County/LA City residents need 
# to feel economically safe and stable?

# Goal: Analysis to aid the advocacy efforts of LA Fed and UWU for greater 
# protection of laborers and an increase in mutual aid from the supervisorial 
# and city council legislators.

#### Set up workspace ####
library(extrafont)
library(tidyverse)
library(here)
library(dplyr)
library(data.table)
# library(tidycensus)
library(sf)
library(ggplot2)
library(RPostgreSQL)
library(formattable)
library(svglite)
library(stringr)
library(tidyr)
library(showtext)
library(scales)
library(kableExtra)
library(flextable)
library(highcharter)

####Load in data from the LAFed Survey and Prep ####
drv <- dbDriver("PostgreSQL")
con_lf <- dbConnect(drv, dbname = "lafed22",
                    host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
                    user = "postgres", password = "password")

db_lafed_survey <- dbGetQuery(con_lf, "SELECT * FROM lafed_survey_data")
db_age_rates <- dbGetQuery(con_lf, "SELECT * FROM age_rates")
db_gender_rates <- dbGetQuery(con_lf, "SELECT * FROM gender_rates")
db_income_rates <- dbGetQuery(con_lf, "SELECT * FROM income_rates")
db_race_rates <- dbGetQuery(con_lf, "SELECT * FROM race_rates")
db_tenure_rates <- dbGetQuery(con_lf, "SELECT * FROM tenure_rates")
# db_union_rates <- dbGetQuery(con_lf, "SELECT * FROM union_rates")
db_sd_rates <- as.data.frame(dbGetQuery(con_lf, "SELECT * FROM supervisor_rates"))
db_cd_rates <- as.data.frame(dbGetQuery(con_lf, "SELECT * FROM city_council_rates"))

#Cleaning up Race Rates to take away unknown because the sample size is small
db_race_rates <- db_race_rates[-c(9),]

#Re-code Categorical Values to Better Names 
db_race_rates <- db_race_rates %>%
  mutate(race = recode(group, race_latinx = "Latinx", 
                       race_black = "Black",
                       race_asian = "Asian", 
                       # race_nhpi = "Native Hawaiian/ \nPacific Islander",
                       # race_aian = "American Indian/ \nAlaskan Native",
                       # race_mena = "Middle Eastern/ \n North African   ",
                       # updating to short names to avoid axis labeling issue
                       race_nhpi = "NHPI",
                       race_aian = "AIAN",
                       race_mena = "MENA",
                       race_white = "White", 
                       race_other = "Other"))


####People's Project STYLE GUIDE####

## COLORS ##
pink <- "#FC8EB5"
lite_pink <- "#FCC7DA"
yellow <- "#FCEC59"
lite_yellow <- "#FDF3B5"
blue <- "#7ACBFB"
lite_blue <- "#BFE5FB"
green <- "#6AB268"
lite_green <- "#B5D8B5"

black <- "#020202"
white<-"#FFFFFF"
dark_grey <- "#2B2B2B"
mid_grey <- "#8C8C8C"
lite_grey <- "#BFBFBF"

## FONTS ## 

## NOTE the Owners font was downloaded on Adobe and installed into Windows by right-clicking the font  files and hitting "install"


showtext_auto()
loadfonts()


# define fonts in chart
font_title <- "Owners Text Black"
font_subtitle <- "Owners Text Medium"
font_caption <- "Owners Text Medium"
font_bar_label <- "Owners Text"
font_axis_label <- "Owners Text Medium"
font_table_text<-"Owners Text"

# a highchart theme 
lafed_theme <- hc_theme(
  colors = c("#FC8EB5", "#FCEC59", "#7ACBFB", "#6AB268", "#FCC7DA"  # LA Fed colors
  ),
  chart = list(
    backgroundColor = white,
    style = list(
      fontFamily = font_subtitle,
      color=white
    ) 
  ),
  plotOptions = 
    list(
      line = 
        list(
          marker = 
            list(
              enabled = FALSE
            )
        )
    ),
  title = list(
    style = list(
      color = black,
      fontFamily = font_title,
      textAlign="left",
      fontsize='200px'
    )
  ),
  subtitle = list(
    style = list(
      color = black, 
      fontFamily = font_subtitle,
      textAlign="left",
      fontsize='100px'
    )
  ),
  caption = list(
    style = list(
      color = black,
      fontFamily = font_caption,
      textAlign = "left",
      fontsize="10px"
    ),
    useHTML = TRUE
  ),
  axis = list(
    style = list(
      color = black,
      fontFamily = font_axis_label,
      fontSize='50px'
    )
  ),
  
  xAxis=list(
    labels=list(
      style=list(
        color=black,
        fontFamily=font_axis_label,
        width=120,  #argument to modify the width of the labels,
        spacingLeft = "150px",
        fontSize="12px"
      ))
  ),
  
  yAxis=list(
    labels=list(
      style=list(
        color=black,
        fontFamily=font_axis_label,
        fontSize="12px",
        margin = 50
      ))
  ),
  
  
  legend = list(
    itemStyle = list(
      fontFamily = font_axis_label,
      color = black
    ),
    itemHoverStyle = list(
      fontFamily = font_table_text,
      color = black
    )
  )
)


lafed_theme_blk <- hc_theme(
  colors = c("#FC8EB5", "#FCEC59", "#7ACBFB", "#6AB268", "#FCC7DA"  # LA Fed colors
  ),
  chart = list(
    backgroundColor = black,
    style = list(
      fontFamily = font_subtitle,
      color=white
    ) 
  ),
  plotOptions = 
    list(
      line = 
        list(
          marker = 
            list(
              enabled = FALSE
            )
        )
    ),
  title = list(
    style = list(
      color = white,
      fontFamily = font_title,
      textAlign="left",
      fontsize='200px'
    )
  ),
  subtitle = list(
    style = list(
      color = white, 
      fontFamily = font_subtitle,
      textAlign="left",
      fontsize='100px'
    )
  ),
  caption = list(
    style = list(
      color = white,
      fontFamily = font_caption,
      textAlign = "left",
      fontsize="10px"
    ),
    useHTML = TRUE
  ),
  axis = list(
    style = list(
      color = white,
      fontFamily = font_axis_label,
      fontSize='50px',
      textOutline= '0px'
    )
  ),
  
  xAxis=list(
    labels=list(
      style=list(
        color=white,
        fontFamily=font_axis_label,
        width=120,  #argument to modify the width of the labels
        spacingLeft = "150px",
        fontSize="12px",
        textOutline = "0px"
      ))
  ),
  
  yAxis=list(
    labels=list(
      style=list(
        color=white,
        fontFamily=font_axis_label,
        fontSize="12px",
        textOutline = "0px",
        margin = 50
      ))
  ),
  
  
  legend = list(
    itemStyle = list(
      fontFamily = font_axis_label,
      color = white
    ),
    itemHoverStyle = list(
      fontFamily = font_table_text,
      color = white
    )
  )
)




####INTERACTIVE Visual Functions####
#Function 1: Horizontal Bar Chart
fx_interactive_barchart <- function(db,
                                  order_var,
                                  x,
                                  y,
                                  chart_title="",
                                  chart_subtitle= "",
                                  chart_caption = "",
                                  yaxis_label = "",
                                  xaxis_label = "", #remember that a bar hchart flips the x and y axes (just for fun!)
                                  theme) {
  db <- db %>% arrange(desc(order_var))
  hchart(db,
    type = "bar",
    hcaes(x = !!rlang::ensym(x), y =round(!!rlang::ensym(y)),2),
    #hcaes(x = x, y = round(y, 2)), 
    color = pink) %>%
    hc_tooltip(pointFormat = "{point.y:.1f}%") %>%
      hc_tooltip(crosshairs = TRUE) %>% 
      hc_yAxis(title = list(text = paste0(yaxis_label))) %>%
      hc_xAxis(title = list(text = paste0(xaxis_label))) %>%
    # title elements
    hc_title(
      text = paste0(chart_title),
      align = "left", 
      style = list(useHTML = TRUE, fontSize='22px')
    ) %>% 
    hc_subtitle(
      text = paste0(chart_subtitle),
      align = "left",
      style=list(fontSize='15px')
    ) %>% 
    hc_caption(
      text = paste0(chart_caption),
      align = "left",
      style=list(fontSize='12px')
    ) %>% 
    hc_add_theme(theme)%>%
    hc_exporting(
      enabled = TRUE, sourceWidth=900, sourceHeight=600, 
      chartOptions=list(plotOptions=list(series=list(dataLabels=list(enabled=TRUE, format='{y} %')))),
      filename = "{chart_title} 2022. Advancement Project CA."
    )
}

#Function 1 Example
example1_int <- fx_interactive_barchart(db = db_race_rates,
                                        order_var = db_race_rates$pei_wages,
                                        x = 'race',
                                        y = 'pei_wages',
                                        chart_title = "Experiencing Lower/Lost Wages during the<br> Covid-19 Pandemic by Race",
                                        chart_subtitle = "",
                                        chart_caption = "NHPI: Native Hawaiian/Pacific Islander <br>AIAN: American Indian/Alaskan Native <br>MENA: Middle Eastern/North African ",
                                        yaxis_label = "Percentage of Respondents Experiencing Lower/Lost Wages",
                                        xaxis_label = "",
                                        theme = lafed_theme_blk
) 

example1_int

Bargraph with selection menu

Code includes:

  • Creating a plotly bar-graph using cross-talk
  • JS function to automatically select the first county
  • NOTE this graph code also produces a title for the yaxis and hovers for all the bars. These features are not being displayed correctly on the rda repository but you can see how it would work in the original script: “W:\Project\RACE COUNTS\2022_v4\Visualizations\RC_2022_Economic.Rmd”
### install version 1.1.1 of Crosstalk -- LATER VERSIONS OF CROSSTALK WILL NOT WORK ####
#remotes::install_version("crosstalk", version = "1.1.1", repos = "http://cran.us.r-project.org") 
library(crosstalk)
#Set up workspace
library(DT)
library(RPostgreSQL)
library(formattable)
library(knitr)
library(dplyr)
library(sf)
library(leaflet)
library(htmltools)
library(tidyr)
library(stringr)
library(rgdal)
library(rpostgis)
library(RColorBrewer)
library(ggplot2)
library(stringr)
library(scales)
library(colorspace)
library(highcharter)
library(here)
library(htmltools)
# library(devtools)
library(broom)
library(kableExtra)
library(reshape2)
library(plotly)
options(scipen=999)
options(highcharter.theme = hc_theme_smpl(tooltip = list(valueDecimals = 2)))

# data setup
pw <- {"password"}

# load PostgreSQL driver
drv <- dbDriver("PostgreSQL")

con <- dbConnect(drv,
dbname = "racecounts",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", 
port = 5432,
user = "postgres",
password = pw)


c_1 <- st_read(con, query = "SELECT * FROM v4.arei_econ_living_wage_county_2022")  

c_1_long <- c_1 %>%
   select(c(2, ends_with('_rate')))

c_1_long <- melt(c_1_long, id.vars=c("county_name"))
# Be sure to order bars in ascending or descending depending on whether MIN or MAX is best rate. Add a "-" in front of c_1_long$value when MAX is best.
c_1_long <- c_1_long[order(-c_1_long$value),]
# Round 'value' field to 1 decimal
c_1_long <- c_1_long %>% mutate(value = round(value, 1))


# drop down selection menu for counties: check out this helpful resource https://rstudio.github.io/crosstalk/using.html

# create a shared data object from the df and assign a key for the unique observation. County is fine since each race/group has one unique county. from now on, we will be using the shareddata object and not the df_long
sd1 <- SharedData$new(c_1_long, key = ~ county_name)
title = 'People Ages 18-64 Earning a living Wage (%)'

#
bscols(
  widths = 7, 
  
   filter_select("county_name", ## this is the name of column we want to select. 
                "County:",  # this is what we want to see in the selection menu
                sd1,
                ~ county_name, multiple = FALSE),
   plot_ly(sd1) %>%
      add_trace(x = ~ variable, y = ~ value, type = "bar", color = I("yellow"), 
                marker = list(line = list(color = "black", width = 1)) ## add order t
                ) %>%
     config(displayModeBar = FALSE)%>%
     layout(barmode = "stack",
             xaxis = list(title = 'Race/Ethnicity',
                          tickangle=-45,
                          categoryorder = "total descending"),
            # yaxis = list(title = title),
             width = 600,
            paper_bgcolor = "#FFFFFF",
            plot_bgcolor = "#FFFFFF",
            
            font = list(
              family = "Rubik",
              size = 12,
              color = '#8E8C8F'
             ), 
            hoverlabel = list(bgcolor = "FFFFFF")
   ) # end layout
) # end bscols

function filter_default() {
    document.getElementsByClassName("selectized") 
[0].selectize.setValue("Alameda", false);

 }
window.onload = filter_default;

Correlation Line Chart

Code includes:

  • Creating a correlation line chart in highcharts
  • Implementing a “Race Counts” highchart theme to chart
  • Calculating regression lines for groups in data
#### Set up data and environment ####
library(RPostgreSQL)
library(here)
library(htmltools)
library(dplyr)
library(tidyverse)
library(highcharter) 
library(sf)
# library(devtools)
library(broom)
library(kableExtra)
options(scipen=999)

options(highcharter.theme = hc_theme_smpl(tooltip = list(valueDecimals = 2)))



# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")

# create connection for rda database
con_pushla <- dbConnect(drv, dbname = "pushla",
                 host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
                 user = "postgres", password = "password")
 
# # just a note of reference to inserting text #https://monashbioinformaticsplatform.github.io/2017-11-16-open-science-training/topics/rmarkdown.html
# #https://rstudio.com/wp-content/uploads/2015/02/rmarkdown-cheatsheet.pdf
# 


# load race disparity data
dat <- st_read(con_pushla, query = "select race, raceeth, type, interaction, count_3yravg as count, total_3yravg as total, name,
               pop_est, rate_per1k, label_rate, disp_ratio, var from lapd_stops_arrests_race_disparity_la_city")

# load poverty disparity data
pov <- st_read(con_pushla, query = "select basiccar, bureau, raceeth, race, type, interaction,
                count_3yravg as count, total_3yravg as total, est, tot, prc, prc_cv, rate_per1k, division, name, district, district_n
               from lapd_stops_arrests_race_poverty_basiccar where type <>'all'")

## Add logos

#### Highchart theme ####
point_cols <-  c( "#FE6A00",  # red orange
                  "#3112B0", # royal blue
             "#FFE000",  # RC yellow
             "#C3DE12",  # lime green
             "#8E8C8F",  # medium grey
             "#FFA000",  # orange
              "#070024",  # RC dark blue
             "#9CFF00",
             "#D8D8D8",  # light grey (labeled dark grey on rc style guide but is light)
             "#F1F0F2"  # v light grey
             )

rc_theme <- hc_theme(
    # race counts colors
  colors = c("#FE6A00",  # red orange
            "#3112B0", # royal blue
             "#FFE000",  # RC yellow
             "#CEF000",  # lime green
             "#8E8C8F",  # medium grey
             "#FFA000",  # orange
              "#070024",  # RC dark blue
             "#9CFF00",
             "#D8D8D8",  # dark grey (labeled so on rc style guide but is light)
             "#F1F0F2" # shocking neon green
  ),
  chart = list(
    backgroundColor = "#F7F7F7",# rc white
    style = list(
          fontFamily = "Rubik"
        ) 
  ),
   plotOptions = 
    list(
      line = 
        list(
          marker = 
            list(
              enabled = FALSE
              )
          )
      ),
  title = list(
    style = list(
      color = "#070024",
      fontFamily = "Rubik"
    )
  ),
  subtitle = list(
    style = list(
      color = "#8E8C8F", # medium grey
      fontFamily = "Rubik"
    )
  ),
  caption = list(
    style = list(
      color = "#8E8C8F",
      fontFamily = "Rubik"
    )
  ),
    axis = list(
    style = list(
      color = "#8E8C8F",
      fontFamily = "Rubik"
    )
  ),
  legend = list(
    itemStyle = list(
      fontFamily = "Rubik",
      color = "#070024"
    ),
    itemHoverStyle = list(
       fontFamily = "Rubik",
      color = "#070024"
    )
  )
)

##### Stop Plot Data Prep #####

color_palette_stops <- c("#C3DE12", "#FE6A00", "#3112B0", "#A900E3",    "#BFBFBF", "#FFE000" ) # total  "#070024"


# prep data for plot
pov_stop_data <- 
pov %>% 
  filter(type =="stop" & !raceeth %in% c("total", "nh_aian")) %>% # not enough data for nh aian
  mutate(prc = round(prc*100, 1))

# split into list by race
pov_stop_data_list <- split(pov_stop_data, pov_stop_data$race, drop = FALSE)

# get regression line for each race group
lm.model_list = list()

for ( i in names(pov_stop_data_list)){

    lm.model <- augment(lm(rate_per1k ~ prc, data = pov_stop_data_list[[i]]))

    lm.model_list[[i]] <- lm.model
}

# append race group id to the lm list
listofnames <- names(lm.model_list)
lm.model_list <- mapply(cbind, lm.model_list, "race" = listofnames, SIMPLIFY = F)

# collapse list
lm.model_stop <- do.call("rbind", lm.model_list)

##### Stop Plot #####

  # correlation line chart
  hchart(lm.model_stop, "line",
       hcaes(x = prc, y = .fitted, group = race),
       color = color_palette_stops, 
        tooltip = list(headerFormat = "", 
                                pointFormat = "{point.race} Trend Line: Stop Rate Correlated with Poverty Rate")) %>% 
  
  # labels
  hc_xAxis(title = list(text = "Population Living Below Poverty Line"), 
           labels = list(format = "{value}%")) %>% 
  hc_yAxis(min = 0,
           title = list(text = "Predicted Rate of Stops per 1,000 Residents")) %>% 
  
  # title elements
  hc_title(
    text = "Correlation of Poverty Rate and Traffic Stop Rate in the City of Los Angeles",
    margin = 20, align = "center", 
    style = list(useHTML = TRUE, fontWeight = "bold")
  ) %>% 
  
  hc_caption(
    text = "Data source: LAPD Vehicle Stop Data and Arrest Incident Data from the City of Los Angeles, 2018-2020; Population and Poverty estimates from the
American Community Survey 2015-2019 5-Year Estimates, Tables DP05 and S1701.<br>
Data notes: Traffic stop rates are based on a three-year average of all vehicle stops. Stop rates were excluded when the total population for a race or ethnic group was below 100 people in a Basic Car.",
    align = "center"
  ) %>% 

# display plot with theme 
hc_add_theme(rc_theme)

Scatterplot

Code includes:

  • Running a correlation test
  • Creating a scatterplot in highcharts
  • Setting global options in highcharts to set rounding of values to two decimal places
#################################################
##PROJECT: Fresno Measure C/P Project with ECI###
#################################################

#set up work station

library(highcharter) 
# library(devtools)
library(corrplot)
library(Hmisc)
library(here)
library(data.table)
library(dplyr)
library(RPostgreSQL)
# library(sf)
library(tidyr)
# library(nngeo)
library(readr)
# library(rpostgis)
# library(maptools)
# library(rmapshaper)
# library(sp)
# library(rgeos)
# library(janitor)
library(tidyverse)
# library(odbc)

# set global options to round decimals to two places for highcharts

options(highcharter.theme = hc_theme_smpl(tooltip = list(valueDecimals = 2)))
 
# load data
projects <- read_csv("W:/Project/ECI/Fresno Measure C&P/Data/Final Data/Measure C and P Projects with Districts and ZCTA Data.csv")

projects <- mutate(projects,
   district_final = paste("District ", district_final))

#corr test
corr_table <-
  projects%>%filter(measure=="Measure C") %>%
   select(below_200_pct, unemp_rate, total_poc, total_5yr) %>%
  filter(!is.na(below_200_pct)&!is.na(unemp_rate) & !is.na(total_poc) & !is.na(total_5yr))

# run correlations 
corr_sig<-rcorr(as.matrix(corr_table), type="spearman")
flattenCorrMatrix <- function(cormat, pmat) {
  ut <- upper.tri(cormat)
  data.frame(
    row = rownames(cormat)[row(cormat)[ut]],
    column = rownames(cormat)[col(cormat)[ut]],
    cor  =(cormat)[ut],
    p = pmat[ut]
    )
}

# put into a table for easy review
corr_sig_table<-flattenCorrMatrix(corr_sig$r, round(corr_sig$P,3))
# no significant correlations

projects_c <- projects %>% filter(
  total_5yr<15000000, measure=="Measure C", !district_final %in% c('District  M','District M')
)  # filter out project with high total and just select Measure C projects

hchart(projects_c, 
         type = "scatter", 
         hcaes(x = unemp_rate, 
               y = total_5yr, group=district_final), 
         tooltip = list(pointFormat = "<strong>{point.project_number}, {point.project_name}<br>Fund Types: <strong>{point.fund_types}</strong><br>5-Year Project Total: <strong>${point.total_5yr}<br>ZIP Code: <strong>{point.zipcode}</strong><br>Unemployment Rate: <strong>{point.unemp_rate}"))%>%
    hc_yAxis(title = list(text = "5-Year Project Total")) %>%
    hc_xAxis(title = list(text = "Unemployment Rate of Surrounding ZIP Code")) %>%
    hc_tooltip(crosshairs = TRUE) %>%
    hc_title(text="Measure C Project Totals Compared to Unemployment Rate by Council District") %>%
    hc_caption(text = "Projects are matched to the ZIP Code they are located in and the corresponding unemployment rate for that ZIP Code. If a project crosses multiple ZIP Codes, it is shown multiple times--once for each ZIP Code where at least 15% of the project falls within. Projects are assigned to the Council District where the majority of the project is located. Excludes two projects above $15M -- PW00937 and PW00679",
      align = "center",useHTML = TRUE
    )

Map

Chloropleth Map with Sidebar


Code includes:

  • Interactive leaflet chloropleth map with multiple layers that toggle
  • Extra customization on leaflet pop ups
  • Customized legend and leaflet layer control boxes

Related links:

###################################
#####Project: JENI 20201 update####
###################################

# set up workspace

library(DT)
library(RPostgreSQL)
library(formattable)
library(knitr)
library(dplyr)
library(sf)
library(leaflet)
# devtools::install_github('bhaskarvk/leaflet.extras')
library("leaflet.extras")
library(htmltools)
library(tidyr)
library(stringr)
library(rgdal)
library(rpostgis)
library(stringr)
library(scales)
library(colorspace)
# install.packages("geojsonsf")
library(geojsonsf)
# install.packages("rmapshaper")
library(rmapshaper)

# # data setup
pw <- {"password"}

# load PostgreSQL driver
drv <- dbDriver("PostgreSQL")

con <- dbConnect(drv,
dbname = "prop47",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com",
port = 5432,
user = "postgres",
password = pw)


#### JENI Prep Steps ####

### Load index data ###
    jeni<- st_read(con, query = "SELECT * FROM jeni_jesi_2021.jeni") 

# round index data
    jeni <- mutate(jeni,
   jeni_pctl = round(jeni_pctl,3)*100,
   jeni_score = round(jeni_score,3),
   system_pctl = round(system_pctl,3)*100,
   drivers_pctl = round(drivers_pctl,3)*100,
   risk_pctl = round(risk_pctl,3)*100,
   population_count = round(population_count,0),
   black_rate = round(black_rate,1),
   latinx_rate = round(latinx_rate,1),
   aian_rate = round(aian_rate,1),
   nhpi_rate = round(nhpi_rate,1),
   nohighschool_rate = round(nohighschool_rate,1),
   below200fpl_rate = round(below200fpl_rate,1),
   unemployed_rate = round(unemployed_rate,1),
   mentalhlth_hosp_rate = round(mentalhlth_hosp_rate,1),
   alcdrug_hosp_rate = round(alcdrug_hosp_rate,1),
   vc_rate = round(vc_rate,1),
   homeless_rate = round(homeless_rate,1)   
    )
    
# add sd and spa names
jeni<-jeni%>%mutate(sd_name=recode(sd_number,
                                                               "1"="Solis",
                                                               "2"="Mitchell",
                                                               "3"="Kuehl",
                                                               "4"="Hahn",
                                                               "5"="Barger"))%>%
  mutate(spa_name=recode(spa_number,
                        "1"="Antelope Valley",
                        "2"="San Fernando Valley",
                        "3"="San Gabriel Valley",
                        "4"="Metro",
                        "5"="West",
                        "6"="South",
                        "7"="East",
                        "8"="South Bay"
                        ))

# transform for leaflet projection
jeni<-st_transform(jeni, CRS("+proj=longlat +datum=WGS84 +no_defs"))


## Create dataset for zips with no index score--no or low population###
# polygons
zips <- st_read(con, query = "SELECT * from jeni_jesi_2021.jeni_jesi_zip_polys")

# transform for leaflet projection
zips<-st_transform(zips, CRS("+proj=longlat +datum=WGS84 +no_defs"))

# bring in population for pop-up
pop <- st_read(con, query = "SELECT * from jeni_jesi_2021.jeni_jesi_zip_population")

# Add population for missing JENI scores to popup
pop <-rename(pop, zipcode = lac_zip)

# join geographies to population and clean up
nojeni <- left_join(zips,pop,by="zipcode")
nojeni <- filter(nojeni, population_count<500 | is.na(population_count))
nojeni$population_count <- prettyNum(round(nojeni$population_count,0), big.mark = ",", preserve.width = "none")

  
### Overlay Prep ###
# load spas and sds
sds <- st_read(con, query = "SELECT * from jeni_jesi_2021.lacounty_supdis_2021")
spas <- st_read(con, query = "SELECT * FROM jeni_jesi_2021.la_county_service_planning_areas_2012")

# re-project spas and sds to 4326 for leaflet
sds<-st_transform(sds, CRS("+proj=longlat +datum=WGS84 +no_defs"))
spas<-st_transform(spas, CRS("+proj=longlat +datum=WGS84 +no_defs"))


## Load jesi 2021 ##
jesi_2021 <- st_read(con, query = "SELECT * from jeni_jesi_2021.jesi")
jesi_2021 <-st_transform(jesi_2021, CRS("+proj=longlat +datum=WGS84 +no_defs"))
# filter for low and lowest only to include
jesi_2021_lowest <- filter(jesi_2021, jesi_category=="Lowest" | jesi_category=="Low")


### Simplify Shapes ###
jeni <- ms_simplify(jeni)
nojeni <-ms_simplify(nojeni)
sds <- ms_simplify(sds)
spas <- ms_simplify(spas)
jesi_2021_lowest<-ms_simplify(jesi_2021_lowest)


### map ###

#orange for JENI 2021
pal_impact <- colorBin(palette = c("#ffff00","#f2bf00", "#e68000", "#d94000","#cc0000"), domain = jeni$jeni_pctl, bins = 5, reverse = FALSE, pretty = FALSE)

#create popup
popup <- paste("<div class='leaflet-popup-scrolled' style='max-width:800px;max-height:200px'> ZIP Code: <span style='font-size: 16px; font-weight: bold;'>", jeni$zipcode, "</span></br>Neighborhood: ", jeni$lat_neighborhood, 
 "</br> JENI Percentile*: <span style='font-size: 15px; font-weight: bold;'>", jeni$jeni_pctl," | </span><span style='font-size: 14px; font-weight: bold;'>",
 jeni$jeni_category, " Need Level</span></br>JENI Rank: ", "<b>", jeni$jeni_rank, "</b> out of 277 ZIP Codes","</br>",
"Population: ", prettyNum(jeni$population_count, big.mark = ",", preserve.width = "none"), "</br>",
"Supervisorial District: ", jeni$sd_number, " | ", jeni$sd_name, "</br>",
"Service Planning Area: ", jeni$spa_number, " | ",jeni$spa_name, "</br>",
"</br><span style='font-size: 14px; font-weight: bold;'>",jeni$zipcode, "</span> has a <span style='font-size: 14px; font-weight: bold;'>", jeni$jeni_category,"</span> overall need level. It also scores <span style='font-size: 14px; font-weight: bold;'>",jeni$system_category,"</span> on System Involvement, <span style='font-size: 14px; font-weight: bold;'>",jeni$drivers_category,"</span> on Inequity Drivers, and <span style='font-size: 14px; font-weight: bold;'>",jeni$risk_category,"</span> on Criminalization Risk need.", "</br>",
"</br>",
"<span style='font-size: 14px; font-weight: bold;'>Component Percentiles*</span>","</br>",
"System Involvement: ", "<b>", jeni$system_pctl,"</b></br>",
"Inequity Drivers: ", "<b>", jeni$drivers_pctl,"</b></br>",
"Criminalization Risk: ", "<b>", jeni$risk_pctl,"</b></br>",
"<br>",
"<span style='font-size: 14px; font-weight: bold;'>Indicators</span>","</br>",
"% Black Population: <b>",jeni$black_rate,"</b></br>",
"% Latinx Population: <b>",jeni$latinx_rate,"</b></br>",
"% American Indian / Alaskan Native Population: <b>",jeni$aian_rate,"</b></br>",
"% Native Hawaiian / Pacific Islander Population: <b>",jeni$nhpi_rate,"</b></br>",
"Unemployment Rate: <b>",jeni$unemployed_rate,"</b></br>",
"% of Population without High School Diploma: <b>",jeni$nohighschool_rate,"</b></br>",
"Below 200% Poverty Level Rate: <b>",jeni$below200fpl_rate,"</b></br>",
"Violent Crime Rate (per 1K): <b>",jeni$vc_rate,"</b></br>",
"Homelessness Rate (per 1K): <b>",jeni$homeless_rate,"</b></br>",
"Alcohol and Drug-Related Hospitalizations (per 1K): <b>",jeni$alcdrug_hosp_rate,"</b></br>",
"Mental Health Hospitalizations (per 1K): <b>",jeni$mentalhlth_hosp_rate,"</b></br>",
"</br>",
"<span style='font-size: 10px; font-style: italic;'>*Percentiles range from 0-100. A higher percentile means a higher level of need in that area.</span>","</br>","</div>")

# create custom legend labels
labels <- c(
"Lowest (0-19th Percentile)",
"Low (20-39th Percentile)",
"Moderate (40-59th Percentile)",
"High (60-79th Percentile)",
"Highest (80-100th Percentile)")

# build map
leaflet(jeni, width = "100%", height = "600px") %>%

# add base map
addProviderTiles("CartoDB.PositronNoLabels")%>%
addProviderTiles("CartoDB.PositronOnlyLabels", options = providerTileOptions(pane = "markerPane")) %>%

# add map panes
addMapPane("index_pane", zIndex = 400) %>%
addMapPane("missing_pane", zIndex = 410) %>%
addMapPane("jesi_2021", zIndex = 415) %>%
addMapPane("sds_pane", zIndex = 420) %>%
addMapPane("spas_pane", zIndex = 420) %>%


# set view and layer control
setView(-118.6368454, 34.2324274, zoom = 8.5) %>%
addLayersControl(overlayGroups = c("Supervisorial Districts","Service Planning Areas", 
                                   "JESI - Low/Lowest Service ZIP Codes"), options = layersControlOptions(collapsed = FALSE, autoZIndex = TRUE)) %>%

# add polygons for JENI ZIP Codes
addPolygons(color = "#444444", weight = 1, smoothFactor = 0.5, fillOpacity = .8, fillColor = ~pal_impact(jeni_pctl), highlight = highlightOptions(color = "white", weight = 3, bringToFront = TRUE, sendToBack = TRUE),
popup = ~popup,
group = "JENI", options = pathOptions(pane = "index_pane"))%>%

# add polygons for ZIP Codes with no JENI
  addPolygons(data=nojeni,color="#444444", weight = 1, smoothFactor = 0.5, fillOpacity = .8, fillColor ="#808080", highlight = highlightOptions(color = "white", weight = 3, bringToFront = TRUE, sendToBack = TRUE),
popup = ~paste0(
"ZIP Code:<b> ", nojeni$zipcode, "</b></br>",
"No JENI Available</br>Population:<b> ", nojeni$population_count, "</b></br>"),
options = pathOptions(pane = "missing_pane")) %>%

# add boundaries for lowest service JESI 2021 Zips
addPolygons(data = jesi_2021_lowest, fillOpacity=0, color = "#54278f", weight = 2, label=~zipcode, group = "JESI - Low/Lowest Service ZIP Codes", options = pathOptions(pane = "jesi_2021", interactive = FALSE), highlight = highlightOptions(color = "white", weight = 3, bringToFront = TRUE)) %>%

# add boundaries for SPAs and SDs
addPolygons(data = sds, fillOpacity=0, color = "black", weight = 2.2, label=~distname, group = "Supervisorial Districts", options = pathOptions(pane = "sds_pane", interactive = FALSE), highlight = highlightOptions(color = "white", weight = 3, bringToFront = TRUE)) %>%
  addPolygons(data = spas, fillOpacity=0, color = "black", weight = 2.2, label=~spa_2012, group = "Service Planning Areas", options = pathOptions(pane = "spas_pane", interactive = FALSE), highlight = highlightOptions(color = "white", weight = 3, bringToFront = TRUE))%>%

    #add combined legend
   addLegend(position = "bottomleft",
            title="JENI Need Level",
            labels = c("Lowest (0-19th Percentile)", "Low (20-39th Percentile)",
                       "Moderate (40-59th Percentile)", "High (60-79th Percentile)","Highest (80-100th Percentile)", "JENI Not Available"),
            colors= c( "#ffff00","#f2bf00", "#e68000", "#d94000","#cc0000", "#808080"), opacity=.9)%>%

  # layers control
  addLayersControl(
    overlayGroups =  c(
      # "JENI 2019 - High/Highest Need ZIP Codes",
                       "JESI - Low/Lowest Service ZIP Codes",
      "Supervisorial Districts",
      "Service Planning Areas"),
    options = layersControlOptions(collapsed = FALSE),position = "bottomleft")  %>%
 hideGroup("Supervisorial Districts") %>%
  hideGroup("Service Planning Areas")%>%

    htmlwidgets::onRender("function(el, x) {
        L.control.zoom({ position: 'topright' }).addTo(this)
    }")

Map with Point Data


Code includes:

  • Added customization to pop up
  • Hyperlink added to pop up
  • Hover over point data for labels
### Set up workspace ####
#install.packages("extrafont")
library(extrafont)
library(here)
library(dplyr)
library(data.table)
library(tidycensus)
library(sf)
library(RPostgreSQL)
library(rpostgis)
library(leaflet)
library(RColorBrewer)
library(tigris)
library(stringr)
library(tidyr)
library(showtext)
library(scales)
library(htmltools)
options(scipen=999)

library(tidyverse)
library(waffle)
library(magrittr)
library(plyr)
library(readxl)
# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")

con <- dbConnect(drv, dbname = "tnp",
                 host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
                 user = "postgres", password = "password")
####################
#### Style Guide ###
####################

## COLORS: There is only blue ##
black <- "#000000"
mainblue <- "#22BCB8"
textgrey <- "#919191"
linecolor<-"#FF0000"

##MAP COLORS: 
low_blue <- '#90EBE9'
med_blue <- '#22BCB8' #same as mainblue 
high_blue <- '#1A918E'

# define fonts in chart
font_title <- "Arial"
font_subtitle <- "Arial"
font_caption <- "Arial"
font_bar_label <- "Arial"
font_axis_label <- "Arial"
font_table_text<-"Arial"

################
###Map Prep#####
################

bsc<-pgGetGeom(con, "reg4_best_start_geographies")

#convert to sf 
bsc<-st_as_sf(bsc)

####transform bsc shapefile for leaflet

bsc <- st_transform(bsc, crs = 4326)


####Add XY coordinates to bsc spatial df

## find centroid coordinates
bsc_cnt = st_centroid(bsc)
bsc_crd = data.frame(st_coordinates(bsc_cnt))

#add ID column to each df and spatial df for joining
bsc$ID <- seq.int(nrow(bsc))
bsc_crd$ID <- seq.int(nrow(bsc_crd))

##join spatial frame with data frame to get the XY columns into the spatial frame

bsc<-geo_join(bsc, bsc_crd, 'ID', 'ID',
              how = "left")

#create label 
bsc_label<-paste(bsc$best_start)

###########################
#####LAUSD Dist 7 Layer####
###########################

lausd<-pgGetGeom(con, "lausddist7_school_educ_indicators_nonrace", geom="school_geom_4326")


###################
####LBUSD Layer####
###################

lb<-pgGetGeom(con, "lbusd_school_educ_indicators_nonrace")


###############
####Pop-Ups####
###############

url=c("https://jzhang514.github.io/TNP/Additional-Resources")

lb_popup<-paste0((label = paste0("<span style='font-size: 15px; font-weight: bold'>", htmlEscape(lb$schoolname), "</span></br></br>",
                                 "<span style='font-weight: bold;'>High School Graduation Rate:</span> ",ifelse(is.na(lb$prc_hs_grad), 'Data not Available', scales::percent(lb$prc_hs_grad/100,.1)),"</br>",
                                 "<span style='font-weight: bold;'>UC/CSU Eligibility Rate:</span> ",ifelse(is.na(lb$prc_uccsu_elig), 'Data not Available', scales::percent(lb$prc_uccsu_elig/100,.1)),"</br>",
                                 "<span style='font-weight: bold;'>Chronic Absenteeism Rate:</span> ",ifelse(is.na(lb$prc_chronicabsent), 'Data not Available', scales::percent(lb$prc_chronicabsent/100,.1)),"</br>",
                                 "<span style='font-weight: bold;'>Suspension Rate:</span> ",ifelse(is.na(lb$prc_suspensions), 'Data not Available', scales::percent(lb$prc_suspensions/100,.1)),"</br>",
                                 "<span style='font-weight: bold;'>Student Homelessness:</span> ",ifelse(is.na(lb$prc_studenthomeless), 'Data not Available', scales::percent(lb$prc_studenthomeless/100,.1)),"</br></br>",
                                 
                                 '<a href="', 
                                 url,
                                 '"target="_blank">Click here for data by race</a>.')))


lausd_popup<-paste0((label = paste0("<span style='font-size: 15px; font-weight: bold'>", htmlEscape(lausd$cde_school), "</span></br></br>",
                                    "<span style='font-weight: bold;'>High School Graduation Rate:</span> ",ifelse(is.na(lausd$prc_hs_grad), 'Data not Available', scales::percent(subset(lausd,race=='Total')$prc_hs_grad/100,.1)),"</br>",
                                    "<span style='font-weight: bold;'>UC/CSU Eligibility Rate:</span> ", ifelse(is.na(lausd$prc_uccsu_elig), 'Data not Available', scales::percent(subset(lausd, race =="Total")$prc_uccsu_elig/100,.1)),"</br>",
                                    "<span style='font-weight: bold;'>Chronic Absenteeism Rate:</span> ",ifelse(is.na(lausd$prc_chronicabsent), 'Data not Available', scales::percent(lausd$prc_chronicabsent/100,.1)),"</br>",
                                    "<span style='font-weight: bold;'>Suspension Rate:</span> ",ifelse(is.na(lausd$prc_suspensions), 'Data not Available', scales::percent(lausd$prc_suspensions/100,.1)),"</br>",
                                    "<span style='font-weight: bold;'>Student Homelessness:</span> ",ifelse(is.na(lausd$prc_studenthomeless), 'Data not Available', scales::percent(lausd$prc_studenthomeless/100,.1)),"</br></br>",
                                    
                                    '<a href="', 
                                    url,
                                    '"target="_blank">Click here for data by race</a>.')))


######################
########Map###########
######################

  map_schools<-leaflet(width = "100%", height = "495px")%>%

  #LBUSD layer

  addCircleMarkers(data = lb, lat = ~y_4326, lng = ~x_4326,
                   group="Long Beach Unified",
                   popup=~paste0(lb_popup),
                  label=~htmlEscape(schoolname),
                   weight = 2, fillOpacity = .8, radius=4,
                   color="white",  fillColor= "#22BCB8", stroke = TRUE
                   )%>%

  #LAUSD layer
  
  addCircleMarkers(data = lausd, lat = ~y, lng = ~x,
                   group="LAUSD District 7",
                   popup=~paste0(lausd_popup),
                   label=~htmlEscape(cde_school),
                   weight = 2, fillOpacity = .8, radius=4,
                   color="white",  fillColor= "#332985", stroke = TRUE
  )%>%
  
  #bsc layer
   addPolygons(data=bsc, color="black", weight=1.5, opacity = 1, fill=F,
 
               label=as.character(bsc$best_start)
  ) %>%
   addLabelOnlyMarkers(data=bsc,  ~X, ~Y, label =  ~as.character(bsc_label),
                       labelOptions = labelOptions(noHide= T, direction = 'top', textOnly = T,
                                                  style=list("font-size"="12px", "color"="black",
                                                             "font-family"="Arial",
                                                            "font-weight" = "bold",
                                                            "text-shadow"="1px 1px white")))%>%

  #layer control
  
  addLayersControl(overlayGroups =
                     c("Long Beach Unified", "LAUSD District 7"), 
                   options = layersControlOptions(collapsed = FALSE))%>%  
    
    #legend
    addLegend("bottomleft", colors = c("#22BCB8", "#332985"), 
              labels = c( "Long Beach Unified", "LAUSD District 7"
              ))%>%
    

  #base and view
    addProviderTiles("CartoDB.Positron")%>%
    setView(    -118.263412,    33.815654   , zoom = 11)

map_schools

Data Source: California Department of Education (CDE) data from 2018-2019 or 2019-2020 School Year

Map with Point/Line Data


Code includes:

  • Added customization to pop up
  • Hyperlink added to pop up
  • Hover over point data for labels
# load packages
library(here)
library(data.table)
library(dplyr)
library(RPostgreSQL)
library(sf)
library(tidyr)
library(nngeo)
library(readr)
library(rpostgis)
library(maptools)
library(rmapshaper)
library(sp)
library(rgeos)
library(janitor)
library(tidyverse)

library(extrafont)
library(data.table)
library(tidycensus)
library(RPostgreSQL)
library(rpostgis)
library(leaflet)
library(RColorBrewer)
library(tigris)
library(stringr)
library(showtext)
library(scales)
library(htmltools)
options(scipen=999)
library(waffle)
library(magrittr)
library(plyr)
library(readxl)


library(highcharter) 
# library(devtools)
library(corrplot)
library(Hmisc)

# load data

measure <- read_csv("W:/Project/ECI/Fresno Measure C&P/Data/Final Data/Fresno Measure C and P Project Spending and Geo Data Final TidyGeocoder.csv") %>% filter(`Project Number` != "PW00829")
# check for Project #

# insert column for measure type
measure$measure <- "Measure C"
measure$measure[measure$'Fund Type' %like% "^Measure P"]<-"Measure P"

names(measure) <- gsub(" ", "_", names(measure))

# rename id column
measure <- measure %>% 
  dplyr::rename(
   id =...1
  )

# remove duplicates and clean up names
 
names(measure) <- gsub(" ", "_", names(measure))

measure <- clean_names(measure)


## remove duplicates

measure <-measure %>% 
  group_by(project_number,x,y, x1,y1,x2,y2,measure) %>% 
   dplyr::summarise(fund_types=paste(fund_type, collapse=", "),
            funds=paste(fund, collapse=", "),
            project_name=min(project_name),
            geography_type=min(geography_type),
            links=min(link),
            department=min(department),
            district=min(district),
            fy_2022=sum(capital_projects_fy22_estimate),
            fy_2023=sum(x2023_capital_projects),
            fy_2024=sum(x2024_capital_projects),
                        fy_2025=sum(x2025_capital_projects),
                                    fy_2026=sum(x2026_capital_projects),
                                                fy_2027=sum(x2027_capital_projects),
                                                total_5yr=sum(x5_year_project_total),
         project_count= n())


# add id field again                                              
measure$id <- 1:nrow(measure)


# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")

con <- dbConnect(drv, dbname = "covid19_race_class",
                 host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
                 user = "postgres", password = "password")




### Load indicator data for poverty and unemployment ###

indicators <- st_read(con, query = "SELECT * FROM state_latest.index_2019_indicators") 


indicators <- indicators %>% select(zipcode, below_200_pct, below_200_pct_cv, below_200_pct_moe, below_200_universe, unemp_rate, unemp_rate_moe, unemp_rate_cv, unemp_universe)
#1764 observations

### Load indicator data for people of Color
con2 <- dbConnect(drv, dbname = "rda_shared_data",
                 host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
                 user = "postgres", password = "password")

POC <- st_read(con2, query = "SELECT * FROM demographics.acs_5yr_dp05_multigeo_2020") 
# But there are 1799 zip code


POC <- POC %>% filter(
 geolevel  == "zcta"
)  # filter to only zip codes

### make name into 2 columns: name and zip-code

POC[c('name', 'zipcode')] <- str_split_fixed(POC$name, ' ', 2)


# select only POC groups we are interested in based on the metadata dictionary: W:\Data\Demographics\ACS\acs_2020_dp05_race_tract_california\ACSDP5Y2020.DP05_2022-04-11T200746 
### POC <- POC %>% select(geoid, name, zipcode, geolevel, dp05_0071pe, dp05_0078pe, dp05_0080pe, dp05_0079pe, dp05_0081pe)

POC <- POC %>% select(geoid, name, zipcode, geolevel, dp05_0001e, dp05_0071e, dp05_0078e, dp05_0080e, dp05_0079e, dp05_0081e )




## data dictionary


# total
# DP05_0001e: estimate sex and age total population


  #Latinx
   # dp05_0071e =  Estimate HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)

  # non-hispanic black
 #DP05_0078e = Estimate HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Black or African American alone


  
  # nh asian
  # DP05_0080e = Estimate HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Asian alone

  
  # nh aian
# DP05_0079e = Estimate HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!American Indian and Alaska Native alone


  # nh nhpi
 # DP05_0081e = Estimate HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!Native Hawaiian and Other Pacific Islander alone



# add total POC estimates
 POC$total = (POC$dp05_0071e  + POC$dp05_0078e + POC$dp05_0080e + POC$dp05_0079e + POC$dp05_0081e)
 # divide by the total population for %
 POC$total_poc = (POC$total/POC$dp05_0001e)*100
 
 
# select only POC, zipcode, geoid

POC = POC %>% select(geoid, zipcode, total_poc)

# round
POC$total_poc <- round(POC$total_poc, digits = 1)

# replace nan with NA
POC$total_poc[is.nan(POC$total_poc)] <- NA


# merge with indicators from state_latest.index_2019_indicators
final_indicators = left_join(indicators, POC, by = "zipcode")

#round poverty
final_indicators$below_200_pct <- round(final_indicators$below_200_pct, digits = 1)



# convert to df
#final_indicators_df <- as.data.frame(final_indicators)


# find Fresno City
## FRESNO zip code


fresno_city  <- as.data.frame(fresno_city)

fresno_city <- fresno_city %>% select(ZIP5) # only select zip codes that we want while removing the geom. We are only using geom from the zip code indicators


fresno_indicators <- left_join(fresno_city, final_indicators, by = c("ZIP5" = "zipcode") )

# convert to sf
fresno_indicators <- st_as_sf(fresno_indicators)

# transform for leaflet projection


fresno_indicators<-st_transform(fresno_indicators, CRS("+proj=longlat +datum=WGS84 +no_defs"))


##### DROPS 93737 polygon which is empty polygon #########

fresno_indicators<- fresno_indicators %>% filter(!st_is_empty(.))

##### points to line function found in RPubs: https://rpubs.com/walkerke/points_to_line

points_to_line <- function(data, long, lat, id_field = NULL, sort_field = NULL) {

  # Convert to SpatialPointsDataFrame
  coordinates(data) <- c(long, lat)

  # If there is a sort field...
  if (!is.null(sort_field)) {
    if (!is.null(id_field)) {
      data <- data[order(data[[id_field]], data[[sort_field]]), ]
    } else {
      data <- data[order(data[[sort_field]]), ]
    }
  }

  # If there is only one path...
  if (is.null(id_field)) {

    lines <- SpatialLines(list(Lines(list(Line(data)), "id")))

    return(lines)

    #if we have multiple lines...
  } else if (!is.null(id_field)) {  

    # Split into a list by ID field
    paths <- sp::split(data, data[[id_field]])

    sp_lines <- SpatialLines(list(Lines(list(Line(paths[[1]])), "line1")))

    # for loops
    for (p in 2:length(paths)) {
      id <- paste0("line", as.character(p))
      l <- SpatialLines(list(Lines(list(Line(paths[[p]])), id)))
      sp_lines <- spRbind(sp_lines, l)
    }

    return(sp_lines)
  }
}
#####

## data prep
#convert to sf 
fresno<-st_as_sf(fresno)

####transform bsc shapefile for leaflet
fresno <- st_transform(fresno, crs = 4326)


####Add XY coordinates to bsc spatial df

## find centroid coordinates
# fresno_cnt = st_point_on_surface(fresno)

# fresno_crd = data.frame(st_coordinates(fresno_cnt))

#add ID column to each df and spatial df for joining
#fresno$ID <- seq.int(nrow(fresno))
#fresno_crd$ID <- seq.int(nrow(fresno_crd))

##join spatial frame with data frame to get the XY columns into the spatial frame

#fresno<-geo_join(fresno, fresno_crd, 'ID', 'ID',
   #           how = "left")

#create label 
#fresno_label<-paste(fresno$best_start)



###################
####Measure P Layer####
###################

# create df for measure p 
measure_p <- measure %>% filter(measure == "Measure P")

########points
 
# filter out points
measure_p_point <- measure_p %>% filter(geography_type == "Point", x != "NA")

# create spatial point df: measure p

# make the SpatialPointsDataFrame object
p_spdf <- SpatialPointsDataFrame(coords      = measure_p_point [ , c("x", "y")],
                               data        = measure_p_point, 
                               proj4string = CRS("+init=epsg:28992"))

########## lines
# filter out lines
measure_p_line <- measure_p %>% filter(geography_type  == "Line", x1 != "NA", x2 != "NA")

 # data manipulation
p_line <- gather(measure_p_line, measure, val, -id) %>% group_by(id) %>%
            do(data.frame(   lat=c(.[["val"]][.[["measure"]]=="y1"],
                                   .[["val"]][.[["measure"]]=="y2"]),
                          long = c(.[["val"]][.[["measure"]]=="x1"],
                                   .[["val"]][.[["measure"]]=="x2"])))

#convert to df
p_line <- as.data.frame(p_line)

#make numeric
p_line$lat <- as.numeric(p_line$lat)
p_line$long <- as.numeric(p_line$long)

p_line <- points_to_line(p_line, "long", "lat", "id")

# convert to spatial data frame

p_line_sf <- SpatialLinesDataFrame(sl = p_line, data = measure_p_line, match.ID = FALSE)


p_line_sf <- st_as_sf(p_line_sf)

#### Measure C Layer ######
measure_c <- measure %>% filter(measure == "Measure C")

# filter out points
measure_c_point <- measure_c %>% filter(geography_type == "Point", x != "NA")

# create spatial point df: measure c

# make the SpatialPointsDataFrame object
c_spdf <- SpatialPointsDataFrame(coords      = measure_c_point [ , c("x", "y")],
                               data        = measure_c_point, 
                               proj4string = CRS("+init=epsg:28992"))



# filter out lines
measure_c_line <- measure_c %>% filter(geography_type == "Line", x1 != "NA", x2 != "NA")

 # data manipulation
# c_line <- gather(measure_c_line, measure, val, -`Project Number`, -`Corridor Start`, -`Corridor End`) %>% group_by(`Project Number`, `Corridor Start`, `Corridor End`) %>%
      #     do(data.frame(   lat=c(.[["val"]][.[["measure"]]=="Y1"],
        #                          .[["val"]][.[["measure"]]=="Y2"]),
        #                 long = c(.[["val"]][.[["measure"]]=="X1"],
        #                          .[["val"]][.[["measure"]]=="X2"])))



  c_line <- gather(measure_c_line, measure, val, -id) %>% group_by(id) %>%
          do(data.frame(   lat=c(.[["val"]][.[["measure"]]=="y1"],
                                   .[["val"]][.[["measure"]]=="y2"]),
                          long = c(.[["val"]][.[["measure"]]=="x1"],
                                   .[["val"]][.[["measure"]]=="x2"])))

#convert to df
c_line <- as.data.frame(c_line)

#make numeric
c_line$lat <- as.numeric(c_line$lat)
c_line$long <- as.numeric(c_line$long)


c_line <- points_to_line(c_line, "long", "lat", "id")

# convert to spatial data frame

c_line_sf <- SpatialLinesDataFrame(sl = c_line, data = measure_c_line, match.ID = FALSE)


c_line_sf <- st_as_sf(c_line_sf)

####################
#### Style Guide ###
####################

## COLORS:  ##
black <- "#000000"
mainblue <- "#22BCB8"
textgrey <- "#919191"
linecolor<-"#FF0000"
green <- "#347F4D"
orange <- "FF8000"


# set up custom legend


###############
####Pop-Ups####
###############


p_popup<-paste0((label = paste0("<span style='font-size: 15px; font-weight: bold'>", htmlEscape(p_spdf$project_name), "</span></br>",
                                
                                "<span style='font-weight: bold;'>Project Number:</span> ", p_spdf$project_number, "</br>",
                                
                                "<span style='font-weight: bold;'>Fund Type:</span> ", p_spdf$fund_types, "</br></br>",
                                
                                  
                                 "<span style='font-weight: bold;'>FY 22 Estimate:</span> ", "$", ifelse(is.na(p_spdf$fy_2022), 'Data not Available', scales::comma(p_spdf$fy_2022)),"</br>",
                                
                                
"<span style='font-weight: bold;'>5 Year Project Total:</span> ", "$",

ifelse(is.na(p_spdf$total_5yr), 'Data not Available', 
scales::comma(p_spdf$total_5yr)),"</br></br>",
                                 

### comment out this code: just let it be character

                            "<span style='font-weight: bold;'>Link:</span> ",

ifelse(is.na(p_spdf$links), 'Link not Available', 
p_spdf$links) 


  ## provide hyperlink later: '<a href=',  p_spdf$Link, '>', p_spdf$Link, '</a>'
 
                         
)
)
)


pline_popup<-paste0((label = paste0("<span style='font-size: 15px; font-weight: bold'>", htmlEscape(p_line_sf$project_name), "</span></br>",
                                
                                "<span style='font-weight: bold;'>Project Number:</span> ", p_line_sf$project_number, "</br>",
                                
                                "<span style='font-weight: bold;'>Fund Type:</span> ", p_line_sf$fund_types, "</br></br>",
                                               
                                    
                                                                       
                                
                                 "<span style='font-weight: bold;'>FY 22 Estimate:</span> ","$", ifelse(is.na(p_line_sf$fy_2022), 'Data not Available', scales::comma(p_line_sf$fy_2022)),"</br>",
                                 
"<span style='font-weight: bold;'>5 Year Project Total:</span> ","$", ifelse(is.na(p_line_sf$total_5yr), 'Data not Available', 
scales::comma(p_line_sf$total_5yr)),"</br></br>",
                                 
                            "<span style='font-weight: bold;'>Link:</span> ",


ifelse(is.na(p_line_sf$links), 'Link not Available', 
p_line_sf$links) 



  ## provide hyperlink later:  '<a href=',  p_line_sf$Link, '>',  p_line_sf$Link, '</a>'
                                 
                              
                         
)
)
)



##


c_popup<-paste0((label = paste0("<span style='font-size: 15px; font-weight: bold'>", htmlEscape(c_spdf$project_name), "</span></br>",
                                
                                "<span style='font-weight: bold;'>Project Number:</span> ", c_spdf$project_number, "</br>",
                                
                                "<span style='font-weight: bold;'>Fund Type:</span> ", c_spdf$fund_types, "</br></br>",           
                                
                 
                                 "<span style='font-weight: bold;'>FY 22 Estimate:</span> ","$", ifelse(is.na(c_spdf$fy_2022), 'Data not Available', scales::comma(c_spdf$fy_2022)),"</br>",
                                 
            
"<span style='font-weight: bold;'>5 Year Project Total:</span> ","$", ifelse(is.na(c_spdf$total_5yr), 'Data not Available', 
scales::comma(c_spdf$total_5yr)),"</br></br>",
                                 

                            "<span style='font-weight: bold;'>Link:</span> ",
ifelse(is.na(c_spdf$links), 'Link not Available', 
c_spdf$links)




   ## provide hyperlink later: '<a href=',  c_spdf$Link, '>', c_spdf$Link, '</a>'
                                 
                              
                         
)
)
)

## 



cline_popup<-paste0((label = paste0("<span style='font-size: 15px; font-weight: bold'>", htmlEscape(c_line_sf$project_name), "</span></br>",
                                
                                "<span style='font-weight: bold;'>Project Number:</span> ", c_line_sf$project_number, "</br>",
                                
                                "<span style='font-weight: bold;'>Fund Type:</span> ", c_line_sf$fund_types, "</br></br>",
                                
                                 "<span style='font-weight: bold;'>FY 22 Estimate:</span> ","$", ifelse(is.na(c_line_sf$fy_2022), 'Data not Available', scales::comma(c_line_sf$fy_2022)),"</br>",
                                 
             

"<span style='font-weight: bold;'>5 Year Project Total:</span> ","$", ifelse(is.na(c_line_sf$total_5yr), 'Data not Available', 
scales::comma(c_line_sf$total_5yr)),"</br></br>",
                                 

                            "<span style='font-weight: bold;'>Link:</span> ",
ifelse(is.na(c_line_sf$links), 'Link not Available', 
c_line_sf$links) 



  ## provide hyperlink later: '<a href=',  c_line_sf$Link, '>',  c_line_sf$Link, '</a>'
                                 
                    
)
)
)
###

#palette purples
pal_unemp <- colorBin(palette = c("#54278f","#605DA2", "#9391BF", "#cbc9e2","#f2f0f7" ), domain = fresno_indicators$unemp_rate, bins = 5, reverse = TRUE, pretty = FALSE)


unemployment_popup <- paste0((
  label = paste0(
    "<span style='font-weight: bold;'>Zipcode:</span> ", fresno_indicators$ZIP5, "</br>",
     "<span style='font-weight: bold;'>Unemployment Rate:</span> ",ifelse(is.na(fresno_indicators$unemp_rate), 'Data not Available',fresno_indicators$unemp_rate), "%"
    
    
    
  )
  )
  )

### map 

leaflet(width = "100%", height = "495px")%>%
  
   ### add indicator: unemployment

addPolygons(data = fresno_indicators, color = "#444444", weight = 1, smoothFactor = 0.5, fillOpacity = .5, popup = ~paste0(unemployment_popup), fillColor = ~pal_unemp(unemp_rate), highlight = highlightOptions(color = "white", weight = 3, bringToFront = FALSE, sendToBack = TRUE)) %>% 

      
  
#fresno district layer
   addPolygons(data=fresno, color="black", weight=1.5, group = "Council Districts", opacity = 2, fill=F,
 
               
   ) %>%
  
  #Measure P Point layer
  
  
  # start line
         addPolylines(data = p_line_sf, group = "Measure P",  label=~htmlEscape(project_name), popup =~paste0(pline_popup), color = "orange", weight = 4, opacity = 2) %>%



  addCircleMarkers(data = p_spdf, lat = ~y, lng = ~x,
                   group="Measure P",
                  popup=~paste0(p_popup),
                  label=~htmlEscape(project_name),
                   weight = 2, fillOpacity = 1, radius=3,
                   color="black",  fillColor= "orange", stroke = TRUE
                   ) %>%

  # end p line
  
   #Measure C Point layer
  
     addPolylines(data = c_line_sf, group = "Measure C", label=~htmlEscape(project_name), popup =~paste0(cline_popup), color = "red", weight = 3, opacity = 2) %>%

  
   addCircleMarkers(data = c_spdf, lat = ~y, lng = ~x,
                   group="Measure C",
                  popup=~paste0(c_popup),
                  label=~htmlEscape(project_name),
                   weight = 2, fillOpacity = 1, radius=3,
                   color="black",  fillColor= "red", stroke = TRUE
                   ) %>% 
 
  # end c line
  

  #layer control
  
  addLayersControl(overlayGroups =
                     c("Measure P", "Measure C", "Council Districts"), 
                   options = layersControlOptions(collapsed = FALSE)) %>%  
    
   
    # add legend for unemployment
addLegend(position = "bottomleft", pal = pal_unemp, values = fresno_indicators$unemp_rate, opacity = .5, title = "Fresno Unemployment Rate (%)", na.label = "Unemployment data not available", labFormat = labelFormat(suffix = "%")
          )%>%
 
  addLegend(position = "bottomleft",   title = "Fund Type", labels = c("Measure P", "Measure C"), colors = c("orange", "red")) %>%
  
  #base and view
  
    addProviderTiles("CartoDB.Positron")%>%
    setView(    -119.8026   ,   36.75467    , zoom = 11)

Data Sources: * City Council Districts and Existing Fresno City Parks: City of Fresno GIS Download

  • Socioeconomic Indicators: American Community Survey 5 year estimates 2015-2019, 2016-2020.

  • Measure C & P Projects: Approximate Address Points for Measure P and C Projects compiled by Advancement Project California based on public documents from the City of Fresno.

Interactive Tables

Justice Equity Need Index (JENI) Data Table

Code includes:

  • Interactive data table built with the DT package
  • Excel and CSV download options
  • Table search function
  • Columns that can be sorted or filtered
# set up workspace

library(DT)
library(formattable)
library(knitr)
library(dplyr)
library(htmltools)
library(tidyr)
library(stringr)
library(stringr)
library(scales)
library(colorspace)


# # data setup
pw <- {"password"}

# load PostgreSQL driver
drv <- dbDriver("PostgreSQL")

con <- dbConnect(drv,
dbname = "prop47",
host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com",
port = 5432,
user = "postgres",
password = pw)


#### JENI Prep Steps ####

### Load index data ###
jeni<- st_read(con, query = "SELECT * FROM jeni_jesi_2021.jeni") 

# round index data
    jeni <- mutate(jeni,
   jeni_pctl = round(jeni_pctl,3)*100,
   jeni_score = round(jeni_score,3),
   system_pctl = round(system_pctl,3)*100,
   drivers_pctl = round(drivers_pctl,3)*100,
   risk_pctl = round(risk_pctl,3)*100,
   population_count = round(population_count,0),
   black_rate = round(black_rate,1),
   latinx_rate = round(latinx_rate,1),
   aian_rate = round(aian_rate,1),
   nhpi_rate = round(nhpi_rate,1),
   nohighschool_rate = round(nohighschool_rate,1),
   below200fpl_rate = round(below200fpl_rate,1),
   unemployed_rate = round(unemployed_rate,1),
   mentalhlth_hosp_rate = round(mentalhlth_hosp_rate,1),
   alcdrug_hosp_rate = round(alcdrug_hosp_rate,1),
   vc_rate = round(vc_rate,1),
   homeless_rate = round(homeless_rate,1)   
    )
    
# add sd and spa names
jeni<-jeni%>%mutate(sd_name=recode(sd_number,
                                                               "1"="Solis",
                                                               "2"="Mitchell",
                                                               "3"="Kuehl",
                                                               "4"="Hahn",
                                                               "5"="Barger"))%>%
  mutate(spa_name=recode(spa_number,
                        "1"="Antelope Valley",
                        "2"="San Fernando Valley",
                        "3"="San Gabriel Valley",
                        "4"="Metro",
                        "5"="West",
                        "6"="South",
                        "7"="East",
                        "8"="South Bay"
                        ))


#remove geoms and reorganize the data for the data table download
jeni_data <- jeni[c(1:8,10:18, 23:44)]%>% st_drop_geometry()
jeni_data<-jeni_data[, c(1,6,7,8,5,2,3,4,9,10,12,13,15,16,18,20,22,24,26,28,30,32,34,36,38)]
jeni_data <- arrange(jeni_data, -desc(jeni_rank))

## table ##

datatable(jeni_data,
class = 'cell-border stripe',
colnames=c(
'JENI Rank'='jeni_rank',
'JENI Percentile'='jeni_pctl',
'JENI Category'='jeni_category',
'ZIP Code'='zipcode',
'Population'='population_count',
'Supervisorial District'='sd_number',
'Service Planning Area'='spa_number',
'Neighborhood'='lat_neighborhood',
'System Involvement Percentile' = 'system_pctl',
'System Involvement Category' = 'system_category',
'Inequity Drivers Percentile' = 'drivers_pctl',
'Inequity Drivers Category' = 'drivers_category',
'Criminalization Risk Percentile' = 'risk_pctl',
'Criminalization Risk Category' = 'risk_category',
'% Black Population' = 'black_rate',
'% Latinx Population' = 'latinx_rate',
'% American Indian / Alaskan Native Population' = 'aian_rate',
'% Native Hawaiian / Pacific Islander Population' = 'nhpi_rate',
'% of Population without a High School Diploma' = 'nohighschool_rate',
'Below 200% Poverty Level Rate' = 'below200fpl_rate',
'Unemployment Rate' = 'unemployed_rate',
'Mental Health Hospitalizations (per 1K)' = 'mentalhlth_hosp_rate',
'Alcohol and Drug-Related Hospitalizations (per 1K)' = 'alcdrug_hosp_rate',
'Violent Crime Rate (per 1K)' = 'vc_rate',
'Homelessness Rate (per 1K)' = 'homeless_rate'
),
style ='default',
rownames = FALSE,
filter ='top',
extensions = c(
  # 'Scroller',
  # 'FixedHeader',
               'Buttons',
               'KeyTable', 'FixedColumns'),

options = list(
  dom = 'Bfrtip',
  buttons = c('csv','excel'),
pageLenth =15,
autoWidth = TRUE,
scrollX = T,
scroller = TRUE
  # columnDefs = list(list(width = '200px', targets = c(1,23))),


#, searchHighlight = TRUE
)
,
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: left;',
htmltools::strong('Percentiles range from 0-100. A higher percentile means a higher level of need.'))
)

Pie Chart


Code includes:

  • Tooltip formatted with percent sign and rounded to two decimal places
  • Subtitle as a hyperlink
library(RPostgreSQL)
library(highcharter)
library(dplyr)
library(tidyr)


# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")

# create connection for rda database
con_tnp <- dbConnect(drv, dbname = "tnp",
                 host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
                 user = "postgres", password = "password")

# pull view in for visualizing

uninsured<-dbGetQuery(con_tnp, "SELECT * FROM puma_uninsured_2019")

# convert view to long format and filter for values of interest

uninsured_clb<-uninsured%>%
  gather(measure, value, tot_uninsured:pct_uninsured)%>%
 filter(grepl("pct_uninsured", measure))%>%
  filter(!grepl('Total', race))%>%
  filter(grepl("Central Long Beach", best_start))%>%
  mutate(variable=ifelse(measure %in% 'tot_hh', 'Total Number of Households',
                         ifelse(measure %in% 'tot_uninsured', 'Total Number of Uninsured Households', 'Blank')))


uninsured_clb %>%
  hchart(
    "pie", hcaes(x = race, y = value),
    name = "Uninsured Population in Central Long Beach",
    tooltip = list(pointFormat = "Rate: {point.value:.2f}%")
    )%>%
   #hc_colorAxis(minColor="#FFD432", maxColor = "#110066")%>%  #note I manually specified my colors
      hc_legend(enabled = FALSE)%>%
  hc_title(
    text = "Relative to other groups, Latinx households are most likely to be uninsured",
    margin = 20,
    align = "left",
    style = list(useHTML = TRUE, fontWeight="bold")
    )%>%
   hc_subtitle(
    text =  '<a href="https://data.census.gov/cedsci/">ACS 2018 5-Year Estimates</a>',
     style = list( fontWeight = "bold"),
    align = "left"
  )


Data Source: American Community Survey, 2015-2019 5-Year Estimate.
Data Note: Race groups are Latinx-exclusive, except AIAN and NHPI, which include all people who identify as AIAN or NHPI including in combination with other races and ethnicities.

Tree Graph

library(RPostgreSQL)
library(highcharter)
library(dplyr)
library(tidyr)


# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")

# create connection for rda database
con_tnp <- dbConnect(drv, dbname = "tnp",
                 host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
                 user = "postgres", password = "password")


#grab view for visualizing

race<-dbGetQuery(con_tnp, "SELECT * FROM ct_raceethnicity_2019")

# convert view to long and recode race for labeling 

race_clb <- gather(race, measure, value, num_white:pct_twoormore, factor_key=TRUE)%>%
  filter(grepl("pct", measure))%>%
  filter(grepl("Central Long Beach", best_start))%>%
  mutate(variable=ifelse(measure %in% 'pct_white', 'White',
                         ifelse(measure %in% 'pct_black_afam', "Black",
                                ifelse(measure %in% 'pct_aian', "AIAN",
                                       ifelse(measure %in% 'pct_asian', "Asian",
                                              ifelse(measure %in% 'pct_nativepi', "Native Pacific Islander",
                                                     ifelse(measure %in% 'pct_hispaniclatino', "Hispanic/Latinx",
                                                            ifelse(measure %in% 'pct_other', "Other Race",
                                                                   ifelse(measure %in% 'pct_twoormore', "Two or more Races", "Blank")))))))))


#tree map

race_clb%>%
  hchart(type = "treemap", hcaes(x = variable, value = value, color=value),
   tooltip = list(pointFormat = "{point.variable} Percentage: {point.value:,.1f}%"))%>%
      hc_tooltip(crosshairs = TRUE)%>%
   hc_colorAxis(stops = color_stops(colors = c("#F6B3B9", "#FCDCC5", "#B0D9CD","#63AAB5")))%>%  #note I added colors manually
     hc_title(
    text = "60.9% of families in Central Long Beach are Hispanic/Latinx. The second largest group of families are Asian at 13.5%",
    margin = 20,
    align = "left",
    style = list(useHTML = TRUE, fontWeight="bold")
    )%>%
   hc_subtitle(
    text =  '<a href="https://data.census.gov/cedsci/">ACS 2019 5-Year Estimates</a>',
     style = list( fontWeight = "bold"),
    align = "left"
  )%>%
  hc_legend(enabled = FALSE)

Static Visualizations

Bar Graph

Horizontal Single Bar Graph with Styling and Loop and Export Code

Code includes:

  • Four graphs showing same data for different geographies produced with one graph code chunk via loop
  • Export code to save and export graphs as png (this code is commented out to prevent overwriting of files)
  • Styling of graphs based off project styleguide, including custom fonts
###################################################
###PROJECT: ACLU (Criminal Justice Dept + RDA)#####
###################################################


##############################
####set up libraries styling###
##############################

library(here)
# install.packages("waffle", repos = "https://cinc.rud.is")
library(waffle)
library(dplyr)
library(data.table)
library(tidycensus)
library(sf)
library(ggplot2)
library(RPostgreSQL)
library(tidyr)
library(writexl)
library(classInt)
library(RColorBrewer)
library(stringr)
library(ggrepel)
library(scales)
library(extrafont)
library(kableExtra)
library(showtext)
library(forcats)
options(scipen=999)

setwd("W:/Project/OSI/ACLU CA")

## Set up Driver
drv <- dbDriver("PostgreSQL")

# create connection for acluca database
con <- dbConnect(drv, dbname = "acluca",
                 host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
                 user = "postgres", password = "password")
    
#### Define Styling ####

#load fonts 

windowsFonts("Oswald SB" = windowsFont("Oswald SemiBold"))
windowsFonts("Oswald Light" = windowsFont("Oswald Light"))
windowsFonts("Atkinson Reg" = windowsFont("Atkinson Hyperlegible Regular"))

### Colors/Font Size ####

#naming convention for different styles: graphtype+style 
#mainfont = the more bold font (for titles, subtitles, whatever we want more bold)
#secondfont = the less bold font (captions, axis labels, whatever should not be as bold)

## Grouped Bar Chart (3 bars) ##

groupbar3color<-c("#CD3247", "#F7D56A", "#357082") ## substitute with Ron's deeper red color: #FF6B6B 
groupbar3bartextsize<-2.2

## Single Bar Chart ###

bar1color<-"#CD3247"
bar1bartextsize<-3.5

## Colors that have to be manually specified -mainly when a "total" bar is a different color from other bars of same graph

 manual1color<-"#F7D56A"
 manual2color<-"#357082"

## Two category pie chart ## 

pie2color<-c("#CD3247", "#F7D56A")

## Boxplot ##

boxplot1color<-"#CD3247"

## Waffle Chart ##

 waffle4color<-c("#CD3247", "#F7D56A", "#357082", "#908780")

## General for all graphs ###

# font

mainfont<-"Oswald SB" ##mainfont is for bolded text (i.e. Title, Subtitle, anything we want more bold)

secondfont<-"Oswald Light" ##secondfont is for lighter text (i.e. Caption, Axis labels, anything we want lighter)

#color

black <- "#000000"


###################
####graph code#####
###################

df<-dbGetQuery(con, "SELECT * FROM report_agency_timespent_percent_stopreason")

df<-df%>%
mutate(reason_label=ifelse(reason_label=="Outstanding arrest warrant/wanted person","Outstanding arrest warrant", 
       ifelse(reason_label=="Combined Traffic violation, Consensual encounter and search, and Reasonable suspicion","Traffic, Consensual encounter/search, and Reasonable suspicion",
ifelse(reason_label=="On parole/probation/PRCS/mandatory supervision","Parole/Probation",
ifelse(reason_label=="Investigation to determine if person is truant", "Truancy investigation", reason_label)))))%>%
  mutate(total_time_hrs=total_time/60)


# list of values to loop over
agency = unique(df$agencyname)

# set up graph information

indicatorname<-"Percent of Total Stop Time by Stop Reason"
indicatorsource <- "\nData Source: AB 953 data requested by ACLU of Southern California (2019)."
datanote<-"\nData Note: Analysis by stop incident. Reasons for stop that make up less than one percent of total stop time are aggregated into 'Other Reason'. Stop incidents that include two or more reasons for the stop are aggregated into the 'Two or More Reasons' category. Excludes request for service calls. Stops with stop times that represented outlier values in the data are capped to an upper threshold by stop reason and race."
 
# bar plot instead

for (i in agency) {

 temp_plot =print(ggplot(data=subset(df, agencyname==i),
         aes(x= reorder(reason_label, percent_time), y= percent_time)) +

  # bar style
  geom_bar( stat = "identity", width=0.6, position = position_dodge(width=1),
 fill = bar1color) +
    
    
  expand_limits(y = max(df$percent_time) + .25) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1),limits=c(0,1))+
scale_x_discrete(labels=function(x) str_wrap(x, width=35))+
 
   # bar labels
  geom_text(
            data=subset(df, agencyname==i),
            aes(label = paste0(scales::percent(percent_time, .1)," ", "(",scales::comma(total_time_hrs , big.mark = ",")," hours)")), size = bar1bartextsize,
    stat="identity", colour =black, family = mainfont,
     position = position_dodge(width = 1), vjust = 0.2 , hjust= -0.03) +


  labs(title = str_wrap(paste0(indicatorname),60),
       subtitle=paste0(i),
       caption = paste0(indicatorsource,"\n",str_wrap(datanote,90)))  +
  xlab("") +
  ylab("Percent of Total Time Spent on Stops") +
  coord_flip() +
  theme_minimal()+
  theme(legend.title = element_blank(), # no legend--modify if necessary

        # define style for axis text
        axis.text.y = element_text(size = 10, margin = margin(0, -10, 0, 0), 
                                   
                                   family= secondfont, face="bold"),
        axis.text.x = element_text(size = 10,
                                 family= secondfont),
        axis.title.x = element_text(size = 10, margin = margin(10, 0, 0, 0),
                                 family = secondfont, face="bold"),
        axis.title.y = element_text(size = 9, margin = margin(10, 0, 0, 0),
                                    family = secondfont),

        # define style for title and caption
        plot.caption =
          element_text(hjust = 0.0, size = 9, family = secondfont),

        plot.title =
          element_text(hjust = 0.0, size = 13, family = mainfont),

      plot.subtitle =
          element_text(hjust = 0.0, size = 9, family = mainfont),

        axis.ticks = element_blank(),

        # grid line style
        panel.grid.minor = element_blank(),
        panel.grid.major = element_line(size = 0.25),
        panel.grid.major.y = element_blank()))

  # ggsave(temp_plot, file=paste0(indicatorname,"_", i,".png"), path="W:/Project/OSI/ACLU CA/R/Images/Time Spent", units = c("in"),  width = 8, height = 5.5)  ########COMMENTING OUT EXPORT CODE for RDA Code Repository so that files are't being overwritten 
}

Stacked Bar Graph


Code includes:

  • Shows how to manually create a df
  • Includes code to export a static graph
###Grab census median household income data for la county
library(tidycensus)
library(scales)

census_api_key("9c04405586a3811f542be8917ce8da4d75c9a8bf", overwrite=TRUE)

median_hh_income<-get_acs(geography="county",
        state="CA", county="037", variables = c("B19013_001"),
        year=2019,
        survey="acs5")

#####Manually create data frame for RCM budget for LA County family of 4 (two adults, one preschooler, one school child)

rcm_la <-data.frame("measure"=c("Real Cost Measure \nAnnual Household Budget", "Real Cost Measure \nAnnual Household Budget", "Real Cost Measure \nAnnual Household Budget", "Real Cost Measure \nAnnual Household Budget", "Real Cost Measure \nAnnual Household Budget", "Real Cost Measure \nAnnual Household Budget", "Real Cost Measure \nAnnual Household Budget", "Los Angeles County \nMedian Household Income"), expense=c("Housing", "Child Care", "Food", "Health Care", "Transportation", "Miscellaneous", "Taxes", "Median Household Income"), "amount"=c(18540, 16848, 12468, 9828, 11496, 6912, 15469, 68044))

#customize the order for the stacked bar graph

rcm_la$expense <- factor(rcm_la$expense,levels = c("Child Care", "Food", "Health Care", "Housing", "Transportation", "Taxes", "Miscellaneous", "Median Household Income"))

#####Graph

library(ggplot2)
library(downloadthis)


plot<-ggplot(rcm_la, aes(fill=expense, y=amount, x=measure, label=amount))+
  geom_bar(position="stack", stat="identity")+
  geom_text(aes(label=paste0("$", format(amount, big.mark=","))), size = 3, position = position_stack(vjust = 0.5))+
   scale_y_continuous(name="", labels=dollar_format(), breaks=seq(0,95000,10000))+
  scale_x_discrete(name="")+
  labs(fill="")+
  scale_fill_brewer(breaks=c("Child Care", "Food", "Health Care", "Housing","Transportation", "Taxes","Miscellaneous"), type="qual", palette="Spectral")+
  ggtitle("Real Cost Budget for Family in Los Angeles County")+
 theme(panel.background = element_blank())

plot

download_this(plot) #this adds an export option for static graphs


Source: Annual budget looks like for a family of two adults, one preschooler, and one school-age child living in Los Angeles County from Struggling to Stay Afloat: The Real Cost Measure in California 2019. United Ways of California. http://www.unitedwaysca.org/realcost

Map

Pie Chart

Family Composition in Region 4 Best Start Geographies


Code includes:

  • Multiple pie charts in a facet-wrap
  • Percentage labels positioned on the pie charts
  • Custom colors on the pie charts
library(dplyr)
library(data.table)
library(tidycensus)
library(sf)
library(RPostgreSQL)
library(stringr)
library(tidyr)
library(ggplot2)


# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")

# create connection for rda database
con_tnp <- dbConnect(drv, dbname = "tnp",
                 host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
                 user = "postgres", password = "password")

# pull view for visualizing

fam<-dbGetQuery(con_tnp, "SELECT * FROM ct_familycomposition_2019")

# manipulate view from wide to long format for graphing

fam_long<-fam%>%
  select(1,5,9,13)%>%
  gather(measure,value, pct_married:pct_femalehh, factor_key = TRUE)%>%
  arrange(-value) %>% 
  mutate(Per_cumsum=cumsum(value))

#graph

fam_long%>%
  ggplot(aes(x="", y=value, fill=measure)) +
  geom_bar(stat="identity", width=1, color="white",
           position = position_stack(vjust = .5)) +
  
  labs(title="Family Composition in the State, County and Region 4\n"
  )+
  geom_text(aes(label = value), position = position_stack(vjust = 0.5),
            color = "white", size=3)+
    theme_void()+
  coord_polar(theta = "y") + 
  scale_fill_manual(name = "", 
                    values=c("#D8A0D9", "#63268C", "#127CA6"),
                    labels = c("Percent Married", "Percent Single\nMale Head of Household", "Percent Single\nFemale Head of Household"))+
  facet_wrap(~best_start)

Tables

EmbRACELA Table


Code includes:

  • Custom row colors
  • Custom column names
  • Custom table title
library(kableExtra)

#connect to postgres

con_embracela<- dbConnect(drv, dbname = "embrace_la",
                 host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
                 user = "postgres", password = "password")

# pull view for visualizint

barriers_to_equity<-dbGetQuery(con_embracela, "SELECT response, freq, percent FROM commsurvey_barriers_to_equity WHERE percent > 26 OR percent < 6") 

#table

 kable(barriers_to_equity, digits=0, col.names=c("","Count","Percent"))%>%
  kable_styling()%>%
    row_spec(1:3, bold = T, color ="white", background="#62bce5", extra_css = "border-bottom: 1px solid")%>%
    row_spec(4:8, bold=T, color= "white", background="#6c90ca", extra_css = "border-bottom: 1px solid")%>%
    add_header_above(c("What are the top 3 barriers to creating a more equitable and inclusive city? (Most and least common barriers)"=1," "=2), align="l")
What are the top 3 barriers to creating a more equitable and inclusive city? (Most and least common barriers)
Count Percent
Housing affordability 310 57
Gentrification and displacement 201 37
Racism and discrimination 184 34
Public participation 29 5
Environmental justice 29 5
Public safety 28 5
Food security 22 4
City services 14 3

Barbell Chart

Code includes: * Creating factors to re-order the data * Dumbbell charts created through ggplot * See the barbell code template here: “W:\Project\RDA Team\Covid statewide\R\Covid_Statewide_Report_Reopenings.Rmd”

library(htmltools)
library(RPostgreSQL)
library(dplyr)
library(tidyverse)
library(highcharter) 
options(highcharter.theme = hc_theme_smpl(tooltip = list(valueDecimals = 2)))
library(tidyr)
library(ggplot2)
library(ggalt)
library(cowplot)
library(lubridate)

setwd("W:/Project/RDA Team/COVID Statewide")

# Data Setup
pw <- {
  "password"
}
# loads the PostgreSQL driver
drv <- dbDriver("PostgreSQL")

con <- dbConnect(drv, dbname = "covid19_race_class",
                 host = "aws-postgres-db.c5udgz7ro8hq.us-west-2.rds.amazonaws.com", port = 5432,
                 user = "postgres", password = pw)

# just a note of reference to inserting text #https://monashbioinformaticsplatform.github.io/2017-11-16-open-science-training/topics/rmarkdown.html
#https://rstudio.com/wp-content/uploads/2015/02/rmarkdown-cheatsheet.pdf


#Step 1 load data in wide and long format
reopenings_long<- dbGetQuery (con, "select * from state_latest.state_reopenings_analysis_long")


reopenings_wide<- dbGetQuery (con, "select * from state_latest.state_reopenings_analysis_wide")


#Step 2 Run some additional calcs to explore potential graphs and trends

reopenings_wide %>%
  mutate(diff_1week = avg_cases_rate_1weekafter - avg_cases_rate_start) ->reopenings_wide

reopenings_wide %>%
  mutate(perdiff_1week = (avg_cases_rate_1weekafter - avg_cases_rate_start)/avg_cases_rate_start,
         perdiff_2week= (avg_cases_rate_2weekafter - avg_cases_rate_start)/avg_cases_rate_start,
         perdiff_3week= (avg_cases_rate_3weekafter - avg_cases_rate_start)/avg_cases_rate_start) ->reopenings_wide


#Step 3 add in factors so that graphs are ordered by these fields versus the default in R

factor_levels<-c("3 weeks before", "2 weeks before", "1 week before", "Start", "1 week after", "2 weeks after", "3 weeks after")

factor_levels_2<-c("Baseline Period - May 8, 2020", "Reopening Period - May 25, 2020", "Reopening Period - June 12, 2020", "Reopening Period - June 21, 2020", "Closure Period - July 13, 2020", 
                   "Closure Period - August 17, 2020", "Closure Period - September 7, 2020")
reopenings_wide %>% 
  mutate(
    label_full = recode(label_,
      "Baseline Period - May 8, 2020" = 'May 8, 2020, BASELINE - Limited reopening of low-risk businesses and retail (pickup only)',
      "Reopening Period - May 25, 2020" = 'May 25, 2020, REOPENING - In-person retail shopping, places of worship, hair salons, LA County dining reopen',
      "Reopening Period - June 12, 2020" = 'June 12, 2020, REOPENING - Restaurants, wineries, bars, cardrooms, gyms, personal care services reopen',
      "Reopening Period - June 21, 2020" = 'June 21, 2020, REOPENING - LA County reopens bar, wineries, cardrooms, personal care services',
      "Closure Period - July 13, 2020" = 'July 13, 2020, CLOSURE - Bars, restaurants, wineries, cardrooms, gyms, places of worship, hair salons, personal care services close indoor operations',
      "Closure Period - August 17, 2020" = 'August 17, 2020, CLOSURE - Closures remain and no major sectors reopen',
      "Closure Period - September 7, 2020" = 'September 7, 2020, CLOSURE - Labor day occurs + most counties must maintain closures')) %>% 
  mutate(
    race_label=recode(race_label_short,
                      "latinx"="Latinx",
                      "nh_black"="Black",
                      "nh_asian"="Asian",
                      "nh_nhpi"="NHPI",
                      "nh_aian"="AIAN",
                      "nh_white"="White",
                      "total"="Total"))->reopenings_wide

factor_levels_3<-c("May 8, 2020, BASELINE - Limited reopening of low-risk businesses and retail (pickup only)", 
                   "May 25, 2020, REOPENING - In-person retail shopping, places of worship, hair salons, LA County dining reopen", 
                   "June 12, 2020, REOPENING - Restaurants, wineries, bars, cardrooms, gyms, personal care services reopen", 
                   "June 21, 2020, REOPENING - LA County reopens bar, wineries, cardrooms, personal care services", 
                   "July 13, 2020, CLOSURE - Bars, restaurants, wineries, cardrooms, gyms, places of worship, hair salons, personal care services close indoor operations", 
                   "August 17, 2020, CLOSURE - Closures remain and no major sectors reopen", 
                   "September 7, 2020, CLOSURE - Labor day occurs + most counties must maintain closures")

factor_race<-c("White", "Asian", "NHPI", "Latinx", "Black", 
                   "AIAN", "Total")

#Add factors to dataset 
reopenings_wide$label_2 <- factor(reopenings_wide$label_, levels = factor_levels_2)
reopenings_wide$label_full <- factor(reopenings_wide$label_full, levels = factor_levels_3)
reopenings_wide$race_label<-factor(reopenings_wide$race_label, levels = rev(factor_race))

#Create selection just for rates by race/ethnicity
reopenings_wide_nototal<-reopenings_wide[!reopenings_wide$race_label_short %in% c("total"), ]
reopenings_wide_reopenings_only<-reopenings_wide_nototal[!reopenings_wide_nototal$label_ %in% c("Closure Period - July 13, 2020","Closure Period - August 17, 2020","Closure Period - September 7, 2020"), ]

ggplot(reopenings_wide_nototal, aes(x=avg_cases_rate_start, xend=avg_cases_rate_1weekafter, y=race_label)) + 
        geom_dumbbell(size=1.5, color="gray",size_x=3, size_xend=3,colour_x="slategray2", colour_xend="darkgoldenrod1", dot_guide=FALSE)+
    facet_wrap(~label_full, ncol=1)+
  geom_text(data=filter(reopenings_wide_nototal,label_2=="Baseline Period - May 8, 2020" & race_label=="White"),
            aes(x=.9, y=race_label, label="Beginning Rate"),
            color="slategray2", size=2, vjust=-1,
            fontface="bold") +
    geom_text(data=filter(reopenings_wide_nototal,label_2=="Baseline Period - May 8, 2020" & race_label=="White"),
            aes(x=2.5, y=race_label, label="1 Week After"),
            color="darkgoldenrod1", size=2, vjust=-1,
            fontface="bold") +
  geom_rect(data=reopenings_wide_nototal, aes(xmin=20, xmax=25, ymin=-Inf, ymax=Inf), fill="grey") +
geom_text(data=reopenings_wide_nototal, aes(label=round(diff_1week, 1), y=race_label, x=22.5), fontface="bold", size=3) +
geom_text(data=filter(reopenings_wide_nototal,label_2=="Baseline Period - May 8, 2020" & race_label=="White"),
            aes(x=22.5, y=race_label, label="Difference in 7-day rate"),
            color="black", size=3, vjust=-.5, fontface="bold") +
scale_x_continuous(expand=c(0,0), limits=c(0, 25)) +
scale_y_discrete(expand=c(.2,0))+
  theme_bw()+
  labs(x="7-day average case rate", y="",title="Case Rates 1 Week After Reopenings and Closures")