# libraries
if (!require(librarian)){
install.packages("librarian")
library(librarian)
}
shelf(
# time-series
caTools, tools, dygraphs, xts, leaflet,
# tidyverse
fs, glue, lubridate, tidyverse)
# paths & variables
user <- Sys.info()[["user"]]
# set dir_data as filepath for robomussels data
dir_data <- case_when(
#user == "bbest" ~ "/Users/bbest/Downloads/robomusseldata20201030",
user == "bbest" ~ "/Volumes/GoogleDrive/My Drive/projects/mbon-p2p/data/rocky/MARINe/robomusseldata20201030",
user == "cdobbelaere" ~ "/Users/cdobbelaere/Documents/robomussels/robomusseldata20201030")
dir_avg <- file.path(dirname(dir_data), "robomusseldata20201030_avg")
stopifnot(any(dir.exists(c(dir_data, dir_avg))))
# TODO: iterate over files in dir_data
# path individual tab-seperated value (*.tsv) file
tsv <- file.path(dir_data, "BMRMUSCABD3_2012.txt")
csv_hourlymean <- glue("{dir_avg}/{basename(path_ext_remove(tsv))}_hourlymean.csv")
csv_movingwindow <- glue("{dir_avg}/{basename(path_ext_remove(tsv))}_movingwindow.csv")
csv_dailyavg <- glue("{dir_avg}/{basename(path_ext_remove(tsv))}_dailyavg.csv")
csv_dailyq <- glue("{dir_avg}/{basename(path_ext_remove(tsv))}_dailyq.csv")
stopifnot(file.exists(tsv))
# read data from individual tsv files
d <- read_tsv(tsv) %>%
mutate(
time = parse_date_time(Time_GMT, "m/d/y H:M")) %>% # parse through datetimes, store as time col.
select(-Time_GMT) %>% # get rid of original time column
arrange(time) # d # sort by ascending time
# convert to eXtensible Time Series for dygraph
x <- d
x <- xts(select(x, -time), order.by=x$time) # select only Temp_C and order by time, store as xts object
# output dygraph interactive plot
dygraph(x, main="Temp_C") %>%
dyOptions(
colors = "red",
fillGraph = TRUE, fillAlpha = 0.4) %>%
dyRangeSelector()
# create df containing hourly mean temp data
d_hourlymean <- d %>%
mutate(time = floor_date(time, unit = "hour")) %>% # round each time down to the nearest hourly boundary
# (could alternatively round up with ceiling_date() or round to nearest values with round_date())
group_by(time) %>% # group by hour
summarize(Temp_C_hourly_mean = mean(Temp_C)) # calculate mean for each hour
# show file size difference from original
write_csv(d_hourlymean, csv_hourlymean)
file_size(c(tsv, csv_hourlymean))
## 559K 153K
# convert to xts for dygraph
x_hourlymean <- d_hourlymean
x_hourlymean <- xts(select(x_hourlymean, -time), order.by=x_hourlymean$time)
# output dygraph interactive plot
dygraph(x_hourlymean, main="Hourly_Mean_Temp_C") %>%
dyOptions(
colors = "red",
fillGraph = TRUE, fillAlpha = 0.4) %>%
dyRangeSelector()
(note: can do rolling averages directly in dygraphs too with dygraph() %>% dyRoller(rollPeriod = 6)
# 6 hour moving average, using hourly averages from earlier
d_movingwindow <- d_hourlymean %>%
mutate(
# min for 6 hr moving window, based on hourly mean
Temp_C_min_06_hours = runmin(Temp_C_hourly_mean, k=6, alg="C", endrule="constant", align="center"),
# mean for 6 hr moving window, based on hourly mean
Temp_C_mean_06_hours = runmean(Temp_C_hourly_mean, k=6, alg="C", endrule="constant", align="center"),
# max for 6 hr moving window, based on hourly mean
Temp_C_max_06_hours = runmax(Temp_C_hourly_mean, k=6, alg="C", endrule="constant", align="center")
) %>%
select(-Temp_C_hourly_mean)
# show file size difference from original
write_csv(d_movingwindow, csv_movingwindow)
file_size(c(tsv, csv_movingwindow))
## 559K 294K
# convert to xts
x_movingwindow <- xts(select(d_movingwindow, -time), order.by=d_movingwindow$time)
# output dygraph plot
dygraph(x_movingwindow, main="Temp_C") %>%
dySeries(
c("Temp_C_min_06_hours",
"Temp_C_mean_06_hours",
"Temp_C_max_06_hours"),
label = "Temp (ºC) over 6 hour moving window",
color = "orangered") %>%
dyOptions(
#colors = c("orange", "orangered", "red"),
fillGraph = FALSE, fillAlpha = 0.4) %>%
dyRangeSelector()
# is there a way to display min/max data labels when using min/max as upper/lower bars?
# alternative: moving min and max using original
# but this doesn't reduce amount of data so not ideal
d_movingwindow_orig <- d %>%
mutate(time = floor_date(time, unit = "hour")) %>%
mutate(
Temp_C_min_06_hours = runmin(Temp_C, k=6, alg="C", endrule="constant", align="center"),
Temp_C_max_06_hours = runmax(Temp_C, k=6, alg="C", endrule="constant", align="center"),
Temp_C_mean_06_hours = runmean(Temp_C, k=6, alg="C", endrule="constant", align="center")
) %>%
select(-Temp_C)
#group_by(time) %>%
#mutate(Temp_C_hourly_mean = mean(Temp_C)) %>%
#ungroup()
x_movingwindow_orig <- xts(select(d_movingwindow_orig, -time), order.by=d_movingwindow_orig$time) # View(x_movingwindow_orig)
# output dygraph interactive plot
# ideally would like to plot the means with fill but plot the mins and maxes just with line
dygraph(x_movingwindow_orig, main="Temp_C") %>%
dySeries(
c("Temp_C_min_06_hours",
"Temp_C_mean_06_hours",
"Temp_C_max_06_hours"),
label = "Temp (ºC) over 6 hour moving window",
color = "orangered") %>%
dyOptions(
#colors = c("orange", "orangered", "red"),
fillGraph = TRUE, fillAlpha = 0.4) %>%
dyRangeSelector()