#| label: setup-threshold-sensitivity
#| include: false
pacman::p_load(
brms, dplyr, here, future, purrr, furrr, cmdstanr, tibble, stringr
)Sensitivity analyses: Bloom onset thresholds
Robustness of bloom phenology inference across threshold definitions
This section evaluates whether inference about bloom phenology is robust to the chlorophyll-a threshold used to define bloom onset. We compare posterior summaries from Gaussian location–scale models fit to bloom onset defined at 5%, 15%, and 25% above the seasonal median chlorophyll-a concentration, and provide the code used to fit these models.
These models test whether inference about temporal trends in both the mean timing and variability of bloom onset remains consistent across alternative bloom-threshold definitions.
Threshold summaries
The tabs below report posterior summaries for the location–scale models fit to bloom onset defined using three alternative chlorophyll-a thresholds.
Family: gaussian
Links: mu = identity; sigma = log
Formula: delta5 ~ 1 + TIME
sigma ~ 1 + TIME
Data: data (Number of observations: 21)
Draws: 4 chains, each with iter = 6000; warmup = 2000; thin = 1;
total post-warmup draws = 16000
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 6.90 3.27 0.98 13.77 1.00 6118 7692
sigma_Intercept 2.60 0.17 2.28 2.96 1.00 7678 8641
TIME 0.53 0.47 -0.38 1.47 1.00 5927 8014
sigma_TIME 0.11 0.03 0.05 0.18 1.00 7058 7653
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
Family: gaussian
Links: mu = identity; sigma = log
Formula: delta15 ~ 1 + TIME
sigma ~ 1 + TIME
Data: data (Number of observations: 21)
Draws: 4 chains, each with iter = 6000; warmup = 2000; thin = 1;
total post-warmup draws = 16000
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 18.28 5.06 8.27 28.43 1.00 6610 7594
sigma_Intercept 2.96 0.17 2.66 3.31 1.00 7771 7899
TIME 1.59 0.68 0.24 2.95 1.00 6450 6955
sigma_TIME 0.13 0.03 0.07 0.19 1.00 7458 7346
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
Family: gaussian
Links: mu = identity; sigma = log
Formula: delta25 ~ 1 + TIME
sigma ~ 1 + TIME
Data: data (Number of observations: 21)
Draws: 4 chains, each with iter = 6000; warmup = 2000; thin = 1;
total post-warmup draws = 16000
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 35.80 8.44 19.16 52.74 1.00 6016 7391
sigma_Intercept 3.46 0.17 3.16 3.82 1.00 7522 8074
TIME 3.07 1.22 0.62 5.41 1.00 5663 7263
sigma_TIME 0.10 0.04 0.02 0.17 1.00 6597 7284
Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
Model fitting
The following code documents the workflow used to transform bloom onset dates and fit the threshold-specific Gaussian location–scale models.
Show computational setup
# --- Parallel Processing Setup ---
options(mc.cores = parallel::detectCores(), brms.backend = "cmdstanr")
plan(multisession, workers=4) Data transformation
Bloom onset dates were converted to numeric offsets, measured as days since the previous winter solstice (21 December), matching the parameterization used in the main bloom phenology analyses.
#| label: threshold-data-transformation
#| eval: false
#| code-fold: true
#| code-summary: "Show data transformation code"
dat <- read.csv(here("..","data","bloom_dates.csv"))
dat <- dat %>%
mutate(
# --- Date parsing for detected bloom onsets ---
bloom_start_thr5_as_date = as.Date(bloom_start_thr5, format = "%d/%m/%Y"),
bloom_start_thr15_as_date = as.Date(bloom_start_thr15, format = "%d/%m/%Y"),
bloom_start_thr25_as_date = as.Date(bloom_start_thr25, format = "%d/%m/%Y"),
# --- Reference date: previous winter solstice (Dec 21) ---
winter_start_date = as.Date(paste0(season_year - 1, "-12-21")),
# --- Bloom timing response variables (days since winter solstice) ---
delta5 = as.numeric(difftime(bloom_start_thr5_as_date, winter_start_date, units = "days")),
delta15 = as.numeric(difftime(bloom_start_thr15_as_date, winter_start_date, units = "days")),
delta25 = as.numeric(difftime(bloom_start_thr25_as_date, winter_start_date, units = "days")),
# --- Year variables with distinct roles ---
# For summaries / tables / bookkeeping only (not used as the model time trend)
season_year_f = as.factor(season_year),
# For location–scale models (mean and sigma trends): centered numeric year
TIME = as.numeric(scale(season_year, center = TRUE, scale = FALSE)),
# Optional: chlorophyll values at detected bloom onset (kept as numeric)
value5 = as.numeric(value_at_bloom_thr5),
value15 = as.numeric(value_at_bloom_thr15),
value25 = as.numeric(value_at_bloom_thr25)
) %>%
# Drop original date strings and helper date column after transformation
select(-starts_with("bloom_start_thr"), -winter_start_date)Fit Gaussian location–scale models
The same Gaussian location–scale structure was fit to each bloom onset metric (delta5, delta15, delta25), with TIME included in both the location (mean) and scale (residual SD) components.
Show location–scale model function
run_location_scale_models <- function(response_vars, data) {
if (!dir.exists(here("Rdata"))) {
dir.create(here("Rdata"))
}
# Use purrr::walk to iterate through each response variable and fit a model.
purrr::walk(response_vars, function(var) {
# Define the model formula. We model both the mean (mu) and the standard
# deviation (sigma) as a function of TIME.
model_formula <- bf(
as.formula(paste(var, "~ 1 + TIME")),
sigma ~ 1 + TIME
)
# Define the output file path for the saved model object.
file_path <- here("Rdata", paste0("model_", var, ".rds"))
# Define the model family.
model_family <- gaussian()
# Get the default priors for the specified model formula and data.
model_priors <- get_prior(model_formula, data = data, family = model_family)
# Use tryCatch to gracefully handle any potential errors during model fitting.
fit <- tryCatch({
brm(
formula = model_formula,
data = data,
prior = model_priors,
family = model_family,
iter = 6000,
warmup = 2000,
chains = 4,
seed = 123,
init = 0,
control = list(adapt_delta = 0.99, max_treedepth = 15),
file = file_path,
file_refit = "on_change"
)
}, error = function(e) {
# If an error occurs, print a message and return NULL.
cat("Error fitting model for:", var, "\n")
cat("Error message:", e$message, "\n")
return(NULL)
})
# Only print the success message if the model fitting was successful.
if (!is.null(fit)) {
cat("Saved model for:", var, "to", file_path, "\n")
}
})
}Model execution
# --- Run Analysis for Delta Variables ---
# Define the list of delta response variables.
delta_vars <- c("delta5", "delta15", "delta25")
# Run the models for the delta variables.
run_location_scale_models(response_vars = delta_vars, data = dat)