Coding workshop: Week 6

new cleaning functions, skimr, whales
tidyverse
lterdatasampler
skimr
plotly
magick
NatParksPalettes
naniar
Author
Affiliation
Published

May 10, 2023

Set up

Loading packages:

Code
library(tidyverse) # general usage
library(here) # organization
library(naniar) # missing data visualization

library(skimr) # quick glimpse at data
library(plotly) # interactive plots
library(magick) # insert images into plots
library(NatParksPalettes) # one example of color palette package
library(wesanderson) # another example of color palette package
library(patchwork) # put plots together

load in the data

Code
# create a new object called whales
# read in the whales data
whales <- read_csv(
  here("workshop", "data", "Graywhale_watching_2007_2019.csv")
)

look at missing data:

Code
gg_miss_var(whales)

use skimr package to quickly glimpse the data:

Code
skim(whales)
Data summary
Name whales
Number of rows 5005
Number of columns 7
_______________________
Column type frequency:
character 3
Date 1
difftime 1
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
condition 75 0.99 4 13 0 37 0
direction 86 0.98 3 14 0 2589 0
species 0 1.00 21 21 0 1 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
date 0 1 2007-02-01 2019-05-19 2014-04-07 1021

Variable type: difftime

skim_variable n_missing complete_rate min max median n_unique
local_time 284 0.94 2520 secs 63420 secs 12:26:00 547

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
total 0 1.00 2.25 2.46 0 1 2 2 65 ▇▁▁▁▁
calves 141 0.97 0.33 0.66 0 0 0 1 6 ▇▁▁▁▁

wrangling

Code
whales_clean <- whales %>% 
  # creating new columns for years and months
  mutate(year_new = lubridate::year(date)) %>% 
  mutate(month_new = lubridate::month(date)) %>% 
  # create a new column for month names
  mutate(month_name = case_when(
    month_new == 2 ~ "February",
    month_new == 3 ~ "March",
    month_new == 4 ~ "April",
    month_new == 5 ~ "May"
  ),
  month_name = forcats::as_factor(month_name),
  month_name = forcats::fct_relevel(month_name, "February", "March", "April", "May")
  ) %>% 
  mutate(condition_new = case_when(
    stringr::str_detect(condition, "Excellent") ~ "excellent",
    stringr::str_detect(condition, "Good") ~ "good",
    stringr::str_detect(condition, "Fair") ~ "fair",
    stringr::str_detect(condition, "Poor") ~ "poor"
  ))

create a plot and insert an image

Code
# create a new data frame to filter 2019 observations
whales_2019 <- whales_clean %>% 
  filter(year_new == 2019)

# read in the whale png as a raster
whale_image <- magick::image_read(
  here("workshop", "images", "noaa-gray-whale.png")
) %>% 
  as.raster()

# make a boxplot
whales_2019_boxplot <- ggplot(data = whales_2019, aes(x = month_name, y = total)) +
  geom_boxplot(aes(fill = month_name)) +
  geom_point(position = position_jitter(width = 0.3, height = 0, seed = 1000)) +
  scale_fill_manual(values = natparks.pals("RockyMtn", 4)) +
  annotation_raster(whale_image, xmin = 2.5, xmax = 4.5, ymin = 30, ymax = 60)

whales_2019_boxplot

Code
ggplotly(whales_2019_boxplot)

Citation

BibTeX citation:
@online{bui2023,
  author = {Bui, An},
  title = {Coding Workshop: {Week} 6},
  date = {2023-05-10},
  url = {https://an-bui.github.io/ES-193DS-W23/workshop/workshop-06_2023-05-10.html},
  langid = {en}
}
For attribution, please cite this work as:
Bui, An. 2023. “Coding Workshop: Week 6.” May 10, 2023. https://an-bui.github.io/ES-193DS-W23/workshop/workshop-06_2023-05-10.html.