# ---- FINAL WORKFLOW
library(dplyr)
library(ggplot2)
library(readxl)
library(forcats)

# 1. Load Packages ----
required_packages <- c(
  "metafor",
  "dplyr",
  "ggplot2",
  "tidyr",
  "readxl",
  "gridExtra",
  "forcats",
  "metaviz",
  "meta",
  "esc",
  "stringr",
  "countrycode",
  "patchwork",
  "reshape2",
  "viridis",
  "performance",
  "lme4",
  "grid",
  "gtable"
)

# Install missing packages
new_packages <- required_packages[!(required_packages %in% installed.packages()[, "Package"])]
if (length(new_packages))
  install.packages(new_packages)

# Load all packages
invisible(lapply(required_packages, library, character.only = TRUE))

# 2. Load data ----
data <- readxl::read_xlsx("data_metanalysis.xlsx")

# 3. Estimate missing standard deviations, and effect sizes -------------------

#' Cite these functions: Shinichi Nakagawa, Daniel W. A. Noble, Malgorzata Lagisz,
#' Rebecca Spake, Wolfgang Viechtbauer and Alistair M. Senior. 2022.
#' A robust and readily implementable method for the meta-analysis of response
#' ratios with and without missing standard deviations. Ecology Letters,
#' DOI: 10.1111/ele.14144
#'
#' @title cv_avg
#' @description Calculates the weighted average CV^2 within a study and the weighted average CV^2 across a study
#' @param x Mean of an experimental group
#' @param sd Standard deviation of an experimental group
#' @param n The sample size of an experimental group
#' @param group Study, grouping or cluster variable one wishes to calculate the within and between weighted CV^2. In meta-analysis this will most likely be 'study'.
#' @param data The dataframe containing the mean, sd, n and grouping variables
#' @param label A character string specifying the label one wishes to attach to columns to identify the treatment. Otherwise, if not specified it will default to the variable name for x
#' @param sub_b A logical indicating whether the between study CV^2 (b_CV2) should be appended to the data only ('TRUE') or whether both within study CV^2 (w_CV2), mean sample size (n_mean) and between study CV^2 (b_CV2) should all be appended to the data only ('FALSE')
#' @param cv2 A logical indicating whether one should take the weighted average of CV2 or the weighted average of CV followed by squaring this average. Default to FALSE.

cv_avg <- function(x,
                   sd,
                   n,
                   group,
                   data,
                   label = NULL,
                   sub_b = TRUE,
                   cv2 = FALSE) {
  # Check if the name is specified or not. If not, then assign it the name of the mean, x, variable input in the function. https://stackoverflow.com/questions/60644445/converting-tidyeval-arguments-to-string
  if (is.null(label)) {
    label <- purrr::map_chr(enquos(x), rlang::as_label)
  }
  
  # Calculate between study CV. Take weighted mean CV within study, and then take a weighted mean across studies of the within study CV. Weighted based on sample size and pooled sample size.
  b_grp_cv_data <- data %>%
    dplyr::group_by({{group}}) %>%
    dplyr::mutate(w_CV2 = weighted_CV({{sd}}, {{x}}, {{n}}, cv2 = cv2),
                  n_mean = mean({{n}}, na.rm = TRUE)) %>%
    dplyr::ungroup(.) %>%
    dplyr::mutate(b_CV2 = weighted.mean(w_CV2, n_mean, na.rm = TRUE), .keep = "used")
  
  # Make sure that label of the calculated columns is distinct from any other columns
  names(b_grp_cv_data) <- paste0(names(b_grp_cv_data), "_", label)
  
  # Append these calculated columns back to the original data and return the full dataset.
  if (sub_b) {
    b_grp_cv_data <- b_grp_cv_data %>% dplyr::select(grep("b_", names(b_grp_cv_data)))
    dat_new <- cbind(data, b_grp_cv_data)
  } else {
    dat_new <- cbind(data, b_grp_cv_data)
  }
  
  return(data.frame(dat_new))
}

#' @title weighted_CV
#' @description Calculates the weighted average CV^2 or CV followed by squaring within a study and the weighted averages CV^2 across a studies
#' @param sd Standard deviation of an experimental group
#' @param x Mean of an experimental group
#' @param n The sample size of an experimental group
#' @param cv2 Logical indicating whether the weighted average of CV^2 or CV should be taken (followed by squaring weighted average CV). Defaults to weighted average of CV.

weighted_CV <- function(sd, x, n, cv2 = FALSE) {
  if (cv2) {
    weighted.mean(na_if((sd / x) ^ 2, Inf), n, na.rm = TRUE)
  } else{
    weighted.mean(na_if((sd / x), Inf), n, na.rm = TRUE) ^ 2
  }
}


#' @title lnrr_laj
#' @description Calculates log response ratio based on Taylor expansion from Jajeunesse 2011
#' @param m1 Mean of treatment group 1
#' @param m2 Mean of treatment group 2
#' @param cv1_2 Coefficient of variation squared (CV^2) for treatment group 1
#' @param cv2_2 Coefficient of variation squared (CV^2) for treatment group 2
#' @param n1 Sample size for treatment group 1
#' @param n2 Sample size for treatment group 2
#' @param taylor A logical indicating whether to calculate point estimate with Taylor expansion.
#'
lnrr_laj <- function(m1, m2, cv1_2, cv2_2, n1, n2, taylor = TRUE) {
  if (taylor) {
    log(m1 / m2) + 0.5 * ((cv1_2 / n1) - (cv2_2 / n2))
  } else {
    log(m1 / m2)
  }
}

#' @title v_lnrr_laj
#' @description Calculates the sampling variance for log response ratio based on second order Taylor expansion proposed by Lajeunesse 2011
#' @param cv1_2 Coefficient of variation squared (CV^2) for treatment group 1
#' @param cv2_2 Coefficient of variation squared (CV^2) for treatment group 2
#' @param n1 Sample size for treatment group 1
#' @param n2 Sample size for treatment group 2
#' @param taylor A logical indicating whether to calculate point estimate with Taylor expansion.
v_lnrr_laj <- function(cv1_2, cv2_2, n1, n2, taylor = TRUE) {
  if (taylor) {
    ((cv1_2) / n1) + ((cv2_2) / n2) +
      ((cv1_2) ^ 2 / (2 * n1 ^ 2)) + ((cv2_2) ^ 2 / (2 * n2 ^ 2))
  } else {
    ((cv1_2) / n1) + ((cv2_2) / n2)
  }
}

# Replace any 0 SD with NA
data <- data |>
  mutate(sd_control   = na_if(sd_control, 0),
         sd_treatment = na_if(sd_treatment, 0))

# Give each row a unique id called "ES_ID"
data$ES_ID <- as.factor(seq_len(nrow(data)))

# Give each unique group of studies a unique id called "study_id"
data$study_id <- as.factor(as.numeric(as.factor(data$author_year)))

# Calculate logRR and associated sampling variances differently depending if observation has missing SD or not.
data$cv_control <- ifelse(
  is.infinite(data$sd_control / data$mean_control_adj),
  NA,
  data$sd_control / data$mean_control_adj
)

data$cv_treatment <- ifelse(
  is.infinite(data$sd_treatment / data$mean_treatment_adj),
  NA,
  data$sd_treatment / data$mean_treatment_adj
)

# Calculate the average between study CV, which will replace missing values.
data <- cv_avg(
  x = mean_control_adj,
  sd = sd_control,
  n = n_control,
  group = study_id,
  label = "1",
  data = data
)

data <- cv_avg(
  x = mean_treatment_adj,
  sd = sd_treatment,
  n = n_treatment,
  group = study_id,
  label = "2",
  data = data
)

# Use weighted mean CV in replacement for where CV's are missing.
# Otherwise, calculate CV^2 of data that is known.
# Replace NA values in cv_control and cv_treatment with b_CV2_1 and b_CV2_2, respectively
data$cv2_cont_new <- ifelse(is.na(data$cv_control), data$b_CV2_1, data$cv_control ^
                              2)
data$cv2_treatment_new <- ifelse(is.na(data$cv_treatment), data$b_CV2_2, data$cv_treatment ^
                                   2)

# Following Table 1 in Nakagawa et al. (2023), we can use equation 4 or 6 for the
# point estimate calculation, and equation 5 for the sampling variance when we
# have a known SD and equation 7 when we have an unknown SD.

# Now calculate new yi and vi, called lnrr_laj & v_lnrr_laj, respectively.
# This uses either the between individual CV^2 when missing or normal CV^2 when not missing.

data <- data %>%
  mutate(
    lnrr_laj = lnrr_laj(
      m1 = mean_treatment_adj,
      m2 = mean_control_adj,
      cv1_2 = cv2_treatment_new,
      cv2_2 = cv2_cont_new,
      n1 = n_treatment,
      n2 = n_control
    ),
    v_lnrr_1A = v_lnrr_laj(
      cv1_2 = cv2_treatment_new,
      n1 = n_treatment,
      cv2_2 = cv2_cont_new,
      n2 = n_control
    )
  )

data <- data |>
  mutate(
    study_id = as.factor(study_id),
    ES_ID = as.factor(ES_ID),
    author_year = as.factor(author_year)
  ) |>
  droplevels()

# 4. Multi-level meta-analysis model ------------------------------------------

# Multilevel random effects model with effect sizes nested within studies
run_multilevel_meta <- function(data) {
  # Convert author_year to a factor to represent study ID
  data$study_id <- as.factor(data$author_year)
  
  # Free memory before fitting model
  gc_often()
  
  # Model without moderators - base model
  cat("\nFitting base multilevel model. This may take some time...\n")
  
  # Model without moderators - base model
  model_base <- try(metafor::rma.mv(
    yi = lnrr_laj,
    # Log response ratio effect size
    V = v_lnrr_1A,
    # Variance of effect size
    random = ~ 1 |
      study_id / ES_ID,
    # Random effects: effect sizes nested within studies
    data = data,
    method = "REML"         # Restricted Maximum Likelihood estimation
  ))
  
  if (inherits(model_base, "try-error")) {
    cat("Error in fitting base model. Check the data and model specification.\n")
    return(NULL)
  }
  
  cat("\nBase Model Results:\n")
  print(summary(model_base))
  
  # Calculate I² statistics for heterogeneity
  W <- diag(1 / data$v_lnrr_1A)
  X <- model.matrix(model_base)
  P <- W - W %*% X %*% solve(t(X) %*% W %*% X) %*% t(X) %*% W
  I2_total <- 100 * sum(model_base$sigma2) / (sum(model_base$sigma2) + (model_base$k - model_base$p) / sum(diag(P)))
  
  cat("\nTotal heterogeneity (I²):", I2_total, "%\n")
  
  return(model_base)
}

# Run the base multilevel model
base_model <- run_multilevel_meta(data)
# Extract base model AIC for later use
base_aic_reml <- as.numeric(base_model$fit.stats["AIC", "REML"])

# 5. Test for heterogeneity ----

# Distribution of variance across levels
i2 <- dmetar::var.comp(base_model)
summary(i2)
plot(i2)

# Comparing full 3-level model and 2-level model
# Check if nesting individual effect sizes in studies has improved the model
# Set a model to where level 3 is set to zero
# This is equal to fitting a simple random-effects model in which all effect sizes are assumed independent (which we know they are not).
l3.removed <- rma.mv(
  yi = lnrr_laj,
  V = v_lnrr_1A,
  slab = paste("Study", data$author_year),
  random = ~ 1 |
    study_id / ES_ID,
  data = data,
  method = "REML",
  sigma2 =  c(0, NA)
)

# check if this result is better than the full model
model_comparison_result <- anova(base_model, l3.removed)

# 6. Publication bias analysis --------------------------------------------

# The analyses include:
# 1. Funnel plot visualization - symmetrical funnel suggests no bias
# 2. Egger's regression test - tests if small studies show different effects than large ones
# 3. Rank correlation test - another test for small-study effects
# 4. Trim-and-fill analysis - estimates how many studies might be missing and adjusts the effect
# 5. Fail-safe N - how many "null" studies would be needed to nullify the result

# Make the funnel plot of effect size vs precision i.e., inverse of SE)
# metafor::funnel(base_model, yaxis = "seinv",
#                 main = "Funnel Plot (Precision)",
#                 xlab = "Log Response Ratio",
#                 ylab = "Inverse Standard Error (1/SE)")

publication_bias_analysis <- function(data, base_model) {

  # Note: Many publication bias methods are not directly available for multilevel models (rma.mv objects)
  # Implement alternatives and adaptations where possible

  # 1. Funnel plot - works with most model types
  png(
    "funnel_plot.png",
    width = 2000,
    height = 1800,
    res = 300
  )
  metafor::funnel(base_model, main = "Funnel Plot", xlab = "Log Response Ratio")
  dev.off()

  # For multilevel models, we need to adapt the approach for testing publication bias

  # 2. Custom implementation of Egger's test for multilevel models
  # Adding precision (1/sqrt(v_lnrr_1A)) as a predictor in the model
  cat("2. Modified Egger's test for multilevel models:\n")

  # Create precision variable (1/SE)
  data$precision <- 1 / sqrt(data$v_lnrr_1A)

  # Fit model with precision as predictor
  egger_model <- try(rma.mv(
    lnrr_laj ~ precision,
    v_lnrr_1A,
    random = ~ 1 | study_id / ES_ID,
    data = data,
    method = "REML"
  ))

  if (!inherits(egger_model, "try-error")) {
    cat("\nModified Egger's test for multilevel models:\n")
    cat("Testing if small studies show different effects than larger studies\n")
    print(summary(egger_model))

    # Interpret the results
    egger_p <- coef(summary(egger_model))[2, "pval"]
    if (egger_p < 0.05) {
      cat(
        "\nThe modified Egger's test is significant (p =",
        round(egger_p, 4),
        "), suggesting possible small-study effects or publication bias.\n"
      )
    } else {
      cat(
        "\nThe modified Egger's test is not significant (p =",
        round(egger_p, 4),
        "), suggesting no clear evidence of small-study effects or publication bias.\n"
      )
    }
  } else {
    cat("\nError fitting modified Egger's test model.\n")
  }

  # 3. Alternative approach: Aggregated analysis for publication bias tests
  # For many publication bias tests, we can first aggregate effect sizes at the study level
  # and then apply standard tests to the aggregated data
  cat("Aggregating effect sizes at the study level and then applying standard tests.\n\n")

  # Aggregate effect sizes by study
  agg_data <- try({
    # Calculate study-level effects using fixed effects within studies
    study_effects <- data.frame()
    for (study in unique(data$study_id)) {
      study_data <- data[data$study_id == study, ]
      if (nrow(study_data) > 0) {
        # Simple precision-weighted mean for this study
        yi <- sum(study_data$lnrr_laj / study_data$v_lnrr_1A) / sum(1 / study_data$v_lnrr_1A)
        vi <- 1 / sum(1 / study_data$v_lnrr_1A)  # Variance of the weighted mean

        study_effects <- rbind(study_effects,
                               data.frame(
                                 study_id = study,
                                 yi = yi,
                                 vi = vi
                               ))
      }
    }
    study_effects
  }, silent = TRUE)

  if (!inherits(agg_data, "try-error") && nrow(agg_data) > 1) {
    # Run conventional random-effects meta-analysis on aggregated data
    agg_model <- try(metafor::rma(
      yi = yi,
      vi = vi,
      data = agg_data,
      method = "REML"
    ),
    silent = TRUE)

    if (!inherits(agg_model, "try-error")) {
      cat("Aggregated random-effects model results:\n")
      print(summary(agg_model))

      # Now we can apply standard publication bias tests to the aggregated data

      # Egger's regression test
      egger_agg <- try(metafor::regtest(agg_model), silent = TRUE)
      if (!inherits(egger_agg, "try-error")) {
        cat("\nEgger's regression test on aggregated data:\n")
        print(egger_agg)
      } else {
        cat("\nCould not perform Egger's test on aggregated data.\n")
      }

      # Rank correlation test
      rank_agg <- try(metafor::ranktest(agg_model), silent = TRUE)
      if (!inherits(rank_agg, "try-error")) {
        cat("\nBegg & Mazumdar's rank correlation test on aggregated data:\n")
        print(rank_agg)
      } else {
        cat("\nCould not perform rank correlation test on aggregated data.\n")
      }

      # Trim-and-fill analysis on aggregated data
      tf_agg <- try(metafor::trimfill(agg_model), silent = TRUE)
      if (!inherits(tf_agg, "try-error")) {
        cat("\nTrim-and-fill analysis on aggregated data:\n")
        cat("Estimating number of missing studies and adjusted effect size.\n\n")
        print(summary(tf_agg))

        # Save funnel plot with filled studies
        png(
          "funnel_plot_trimfill.png",
          width = 2000,
          height = 1800,
          res = 300
        )
        metafor::funnel(tf_agg, main = "Trim-and-Fill Funnel Plot", xlab = "Log Response Ratio")
        dev.off()

        # Interpret results
        if (tf_agg$k0 > 0) {
          cat("\nThe trim-and-fill analysis estimated",
              tf_agg$k0,
              "missing studies.\n")
          cat(
            "Original effect size:",
            round(agg_model$b, 4),
            "(",
            round(agg_model$ci.lb, 4),
            "to",
            round(agg_model$ci.ub, 4),
            ")\n"
          )
          cat(
            "Adjusted effect size:",
            round(tf_agg$b, 4),
            "(",
            round(tf_agg$ci.lb, 4),
            "to",
            round(tf_agg$ci.ub, 4),
            ")\n"
          )
        } else {
          cat("\nThe trim-and-fill analysis did not identify any missing studies.\n")
        }
      } else {
        cat("\nCould not perform trim-and-fill analysis on aggregated data.\n")
      }
    } else {
      cat("\nError fitting random-effects model to aggregated data.\n")
    }
  } else {
    cat(
      "\nCould not aggregate data at the study level or insufficient studies after aggregation.\n"
    )
  }

  # 4. Sensitivity analysis for multilevel model
  # Cannot directly use trimfill() with rma.mv objects
  cat("\n4. Sensitivity analysis for publication bias in multilevel model:\n")
  cat("Examining effect sizes by precision to assess potential bias patterns.\n\n")

  # Create a plot of effect sizes by precision
  png(
    "precision_plot.png",
    width = 2000,
    height = 1800,
    res = 300
  )
  plot(
    data$precision,
    data$lnrr_laj,
    xlab = "Precision (1/SE)",
    ylab = "Log Response Ratio",
    main = "Effect Sizes by Precision"
  )
  abline(h = coef(base_model), lty = 2)  # Add line for overall effect

  # Add smoothed line to show trend
  if (requireNamespace("mgcv", quietly = TRUE)) {
    try({
      smooth <- mgcv::gam(lnrr_laj ~ s(precision), data = data)
      ord <- order(data$precision)
      lines(data$precision[ord],
            predict(smooth)[ord],
            col = "red",
            lwd = 2)
    }, silent = TRUE)
  }
  dev.off()

  # 5. Return summarized results
  pub_bias_summary <- list(
    egger_model = if (exists("egger_model") &&
                      !inherits(egger_model, "try-error"))
      egger_model
    else
      NULL,
    agg_model = if (exists("agg_model") &&
                    !inherits(agg_model, "try-error"))
      agg_model
    else
      NULL,
    egger_agg = if (exists("egger_agg") &&
                    !inherits(egger_agg, "try-error"))
      egger_agg
    else
      NULL,
    rank_agg = if (exists("rank_agg") &&
                   !inherits(rank_agg, "try-error"))
      rank_agg
    else
      NULL,
    tf_agg = if (exists("tf_agg") &&
                 !inherits(tf_agg, "try-error"))
      tf_agg
    else
      NULL
  )


  # 4. Rosenberg's Fail-safe N
  cat("\n4. Rosenberg's Fail-safe N calculation:\n")
  cat(
    "Estimating how many unpublished null studies would be needed to nullify a significant result.\n\n"
  )

  # Rosenberg's fail-safe N calculation for multilevel models
  # First, we need to work with the aggregated data since the standard fsn methods
  # don't directly work with multilevel models
  calc_rosenberg_fsn <- function(agg_data) {
    # Only calculate if the overall effect is significant
    agg_model <- try(metafor::rma(
      yi = yi,
      vi = vi,
      data = agg_data,
      method = "REML"
    ),
    silent = TRUE)

    if (!inherits(agg_model, "try-error")) {
      # Check if overall effect is significant
      if (agg_model$pval < 0.05) {
        # Calculate Rosenberg's fail-safe N using metafor's fsn function
        fsn_result <- try(metafor::fsn(
          yi = yi,
          vi = vi,
          data = agg_data,
          type = "Rosenberg"
        ),
        silent = TRUE)

        if (!inherits(fsn_result, "try-error")) {
          cat("Rosenberg's fail-safe N calculation result:\n")
          print(fsn_result)

          # Interpret the results
          cat(
            "\nInterpretation: ",
            fsn_result$fsnum,
            " studies with null results would be needed to nullify the significant effect.\n"
          )

          # Apply Rosenthal's criterion: 5k + 10 (where k is number of studies)
          k <- nrow(agg_data)
          criterion <- 5 * k + 10

          if (fsn_result$fsnum > criterion) {
            cat(
              "The fail-safe N exceeds Rosenthal's criterion of 5k + 10 = ",
              criterion,
              ", suggesting the result is robust against publication bias.\n"
            )
          } else {
            cat(
              "The fail-safe N does not exceed Rosenthal's criterion of 5k + 10 = ",
              criterion,
              ", suggesting potential vulnerability to publication bias.\n"
            )
          }

          return(fsn_result)
        } else {
          cat("Could not calculate Rosenberg's fail-safe N.\n")
          return(NULL)
        }
      } else {
        cat(
          "The overall effect is not significant (p =",
          round(agg_model$pval, 4),
          "), so fail-safe N is not applicable.\n"
        )
        return(NULL)
      }
    } else {
      cat("Could not fit aggregated model for fail-safe N calculation.\n")
      return(NULL)
    }
  }

  # Calculate fail-safe N if we have aggregated data
  fsn_result <- NULL
  if (exists("agg_data") &&
      !inherits(agg_data, "try-error") && nrow(agg_data) > 1) {
    fsn_result <- calc_rosenberg_fsn(agg_data)
  } else {
    cat("Aggregated data not available for fail-safe N calculation.\n")
  }

  # Store in the publication bias summary list
  pub_bias_summary$fsn <- fsn_result

  # Overall conclusion

  has_bias_evidence <- FALSE

  if (exists("egger_p") && egger_p < 0.05) {
    has_bias_evidence <- TRUE
  }

  if (exists("egger_agg") &&
      !inherits(egger_agg, "try-error") && egger_agg$pval < 0.05) {
    has_bias_evidence <- TRUE
  }

  if (exists("rank_agg") &&
      !inherits(rank_agg, "try-error") && rank_agg$pval < 0.05) {
    has_bias_evidence <- TRUE
  }

  if (exists("tf_agg") &&
      !inherits(tf_agg, "try-error") && tf_agg$k0 > 0) {
    has_bias_evidence <- TRUE
  }

  # Add check for fail-safe N
  fsn_robust <- FALSE
  if (exists("fsn_result") && !is.null(fsn_result)) {
    # Check if fail-safe N exceeds Rosenthal's criterion
    k <- length(unique(data$study_id))
    criterion <- 5 * k + 10
    fsn_robust <- fsn_result$fsnum > criterion
  }

  if (has_bias_evidence) {
    cat(
      "There is some evidence of publication bias or small-study effects in this meta-analysis.\n"
    )

    if (fsn_robust) {
      cat(
        "However, Rosenberg's fail-safe N analysis indicates that a large number of unpublished null studies\n"
      )
      cat(
        "would be needed to nullify our significant result, suggesting some robustness to publication bias.\n"
      )
    }

    cat("Caution is warranted when interpreting the overall effects.\n")
    cat("Consider the adjusted effect sizes from the trim-and-fill analysis if available.\n")
  } else {
    cat(
      "There is limited evidence of publication bias in this meta-analysis based on the tests performed.\n"
    )

    if (fsn_robust) {
      cat(
        "This is further supported by Rosenberg's fail-safe N analysis, which indicates that a large number\n"
      )
      cat("of unpublished null studies would be needed to nullify our significant results.\n")
    }

    cat("However, these tests have limited power, especially with heterogeneous data,\n")
    cat("so publication bias cannot be completely ruled out.\n")
  }

  return(pub_bias_summary)
}

# Run publication bias analyses
publication_bias <- publication_bias_analysis(data, base_model)

# Create publication bias results table for supplementary material
create_pub_bias_table <- function(publication_bias) {
  # Extract actual values from the publication_bias object

  # Modified Egger's test results
  egger_coef <- ifelse(!is.null(publication_bias$egger_model),
                       round(coef(
                         summary(publication_bias$egger_model)
                       )[2, "estimate"], 4),
                       NA)
  egger_p <- ifelse(!is.null(publication_bias$egger_model),
                    round(coef(
                      summary(publication_bias$egger_model)
                    )[2, "pval"], 4),
                    NA)

  # Egger's test on aggregated data
  egger_agg_z <- ifelse(
    !is.null(publication_bias$egger_agg),
    round(publication_bias$egger_agg$zval, 4),
    NA
  )
  egger_agg_p <- ifelse(
    !is.null(publication_bias$egger_agg),
    round(publication_bias$egger_agg$pval, 4),
    NA
  )

  # Rank correlation test
  rank_tau <- ifelse(
    !is.null(publication_bias$rank_agg),
    round(publication_bias$rank_agg$tau, 4),
    NA
  )
  rank_p <- ifelse(
    !is.null(publication_bias$rank_agg),
    round(publication_bias$rank_agg$pval, 4),
    NA
  )

  # Trim-and-fill results
  missing_studies <- ifelse(!is.null(publication_bias$tf_agg),
                            publication_bias$tf_agg$k0,
                            NA)
  missing_se <- ifelse(
    !is.null(publication_bias$tf_agg) &&
      !is.null(publication_bias$tf_agg$se.k0),
    round(publication_bias$tf_agg$se.k0, 4),
    NA
  )

  # Original effect from aggregated model
  orig_effect <- ifelse(
    !is.null(publication_bias$agg_model),
    round(publication_bias$agg_model$b, 4),
    NA
  )
  orig_ci_lb <- ifelse(
    !is.null(publication_bias$agg_model),
    round(publication_bias$agg_model$ci.lb, 4),
    NA
  )
  orig_ci_ub <- ifelse(
    !is.null(publication_bias$agg_model),
    round(publication_bias$agg_model$ci.ub, 4),
    NA
  )
  orig_p <- ifelse(
    !is.null(publication_bias$agg_model),
    round(publication_bias$agg_model$pval, 4),
    NA
  )

  # Adjusted effect from trim-and-fill
  adj_effect <- ifelse(!is.null(publication_bias$tf_agg),
                       round(publication_bias$tf_agg$b, 4),
                       NA)
  adj_ci_lb <- ifelse(!is.null(publication_bias$tf_agg),
                      round(publication_bias$tf_agg$ci.lb, 4),
                      NA)
  adj_ci_ub <- ifelse(!is.null(publication_bias$tf_agg),
                      round(publication_bias$tf_agg$ci.ub, 4),
                      NA)
  adj_p <- ifelse(!is.null(publication_bias$tf_agg),
                  round(publication_bias$tf_agg$pval, 4),
                  NA)

  # Fail-safe N results
  fsn_value <- ifelse(!is.null(publication_bias$fsn),
                      publication_bias$fsn$fsnum,
                      NA)
  k <- length(unique(data$study_id))  # Number of studies
  rosenthal_criterion <- 5 * k + 10

  # Format results for table
  egger_result <- paste0("coefficient = ", egger_coef, ", p = ", egger_p)
  egger_agg_result <- paste0("z = ", egger_agg_z, ", p = ", egger_agg_p)
  rank_result <- paste0("Kendall's tau = ", rank_tau, ", p = ", rank_p)
  trim_fill_result <- paste0(missing_studies,
                             " missing studies estimated",
                             ifelse(!is.na(missing_se), paste0(" (SE = ", missing_se, ")"), ""))

  fsn_result <- ifelse(
    !is.na(fsn_value),
    paste0(
      fsn_value,
      " studies needed to nullify the effect (criterion = ",
      rosenthal_criterion,
      ")"
    ),
    "Not calculated or not applicable"
  )

  orig_effect_result <- paste0(
    orig_effect,
    " (95% CI: ",
    orig_ci_lb,
    " to ",
    orig_ci_ub,
    ", p = ",
    orig_p,
    ")",
    ifelse(orig_p < 0.05, "*", "")
  )

  adj_effect_result <- paste0(
    adj_effect,
    " (95% CI: ",
    adj_ci_lb,
    " to ",
    adj_ci_ub,
    ", p = ",
    adj_p,
    ")",
    ifelse(adj_p < 0.05, "*", "")
  )

  # Create data frame with results
  pub_bias_table <- data.frame(
    Method = c(
      "Modified Egger's test (multilevel model)",
      "Egger's regression test (aggregated data)",
      "Begg & Mazumdar's rank correlation test",
      "Trim-and-fill analysis",
      "Rosenberg's fail-safe N",
      "Original effect size (aggregated data)",
      "Adjusted effect size (after trim-and-fill)"
    ),
    Result = c(
      egger_result,
      egger_agg_result,
      rank_result,
      trim_fill_result,
      fsn_result,
      orig_effect_result,
      adj_effect_result
    ),
    Interpretation = c(
      ifelse(
        is.na(egger_p) || egger_p >= 0.05,
        "No significant evidence of small-study effects in multilevel model",
        "Significant evidence of small-study effects in multilevel model"
      ),

      ifelse(
        is.na(egger_agg_p) || egger_agg_p >= 0.05,
        "No significant asymmetry in study-level effects",
        "Significant asymmetry in study-level effects"
      ),

      ifelse(
        is.na(rank_p) || rank_p >= 0.05,
        "No significant correlation between effect size and precision",
        "Significant correlation between effect size and precision"
      ),

      ifelse(
        is.na(missing_studies) || missing_studies == 0,
        "No evidence of missing studies",
        ifelse(
          missing_studies > 0,
          "Evidence of potential missing studies with positive effects",
          "Evidence of potential missing studies with negative effects"
        )
      ),

      ifelse(
        is.na(fsn_value),
        "Not calculated or not applicable",
        ifelse(
          fsn_value > rosenthal_criterion,
          "Result is robust against publication bias (exceeds Rosenthal's criterion)",
          "Result may be vulnerable to publication bias (below Rosenthal's criterion)"
        )
      ),

      ifelse(
        is.na(orig_p),
        "Not available",
        ifelse(
          orig_p < 0.05,
          "Significant overall effect in original analysis",
          "Non-significant overall effect in original analysis"
        )
      ),

      ifelse(
        is.na(adj_p),
        "Not available",
        ifelse(
          adj_p < 0.05,
          "Significant effect after adjustment for potential bias",
          "Non-significant effect after adjustment for potential bias"
        )
      )
    )
  )

  # Handle NAs in Result column
  pub_bias_table$Result <- ifelse(
    grepl("NA", pub_bias_table$Result),
    "Not available or could not be calculated",
    pub_bias_table$Result
  )

  # Add a note about the pattern observed in funnel plots
  funnel_pattern <- "Visual inspection of funnel plots revealed a rectangular pattern at the top with most points outside the expected funnel shape, suggesting high heterogeneity in the dataset rather than conventional publication bias."

  trim_fill_note <- ifelse(
    !is.na(missing_studies) && missing_studies > 0,
    paste0(
      "The trim-and-fill analysis suggested ",
      missing_studies,
      " potentially missing studies",
      ifelse(
        !is.na(adj_p) && !is.na(orig_p) &&
          (adj_p < 0.05) != (orig_p < 0.05),
        paste0(
          ", which, if included, would change the overall effect from ",
          ifelse(orig_p < 0.05, "significant", "non-significant"),
          " (",
          orig_effect,
          ") to ",
          ifelse(adj_p < 0.05, "significantly ", "non-significantly "),
          adj_effect,
          ". "
        ),
        "."
      )
    ),
    ""
  )

  fsn_note <- ifelse(
    !is.na(fsn_value),
    paste0(
      "Rosenberg's fail-safe N analysis indicates that ",
      fsn_value,
      " studies with null results would be needed to nullify the significant effect. "
    ),
    ""
  )

  note <- paste0(
    "Note: ",
    funnel_pattern,
    " ",
    trim_fill_note,
    " ",
    fsn_note,
    "* indicates statistical significance at p < 0.05."
  )

  # Write table to CSV file
  # write.csv(pub_bias_table,
  #           "publication_bias_table_S1.csv",
  #           row.names = FALSE)

  # Also create a nicely formatted text version for quick reference
  cat(
    "Table S1. Summary of publication bias analyses for meta-analysis of fire effects on species abundance.\n\n"
  )

  # Print the table in a formatted way
  for (i in 1:nrow(pub_bias_table)) {
    cat("METHOD:", pub_bias_table$Method[i], "\n")
    cat("RESULT:", pub_bias_table$Result[i], "\n")
    cat("INTERPRETATION:",
        pub_bias_table$Interpretation[i],
        "\n\n")
  }

  cat(note)

  # Return the table invisibly
  invisible(pub_bias_table)
}

# Run the function to create the table
pub <- create_pub_bias_table(publication_bias)

# 7. Sensitivity analyses -------------------------------------------------

# Sensitivity analyses test how robust the results are to influential studies or methodological choices.

sensitivity_analysis <- function(data, base_model) {

  # 1. Influence diagnostics
  # Use influence.rma.mv which is designed for multilevel models
  infl <- try(metafor::influence.rma.mv(base_model))

  # Plot influence diagnostics if successful
  if (!inherits(infl, "try-error") && !is.null(infl)) {
    try({
      png(
        "influence_plot.png",
        width = 2000,
        height = 1800,
        res = 300
      )
      plot(infl, layout = c(8, 1))
      dev.off()

    }, silent = TRUE)
  } else {
    cat("\nInfluence diagnostics could not be computed for this model.\n")
    cat("This is common with complex multilevel models or large datasets.\n")
    cat("Continuing with other sensitivity analyses...\n")
  }

  # 2. Leave-one-out analysis: Removes one study at a time to see if any single study drives the results

  # Get unique study IDs
  study_ids <- unique(data$study_id)
  n_studies <- length(study_ids)

  # Initialize vectors to store results
  loo_estimates <- numeric(n_studies)
  loo_ci_lb <- numeric(n_studies)
  loo_ci_ub <- numeric(n_studies)
  p_values <- numeric(n_studies)
  study_labels <- character(n_studies)

  cat("\nRunning leave-one-out sensitivity analysis...\n")

  # Loop through each study
  for (i in 1:n_studies) {
    # Remove one study
    temp_data <- data[data$study_id != study_ids[i], ]
    study_labels[i] <- as.character(unique(data$author_year[data$study_id == study_ids[i]])[1])

    # Refit the model
    temp_model <- try(metafor::rma.mv(
      yi = lnrr_laj,
      V = v_lnrr_1A,
      random = ~ 1 | study_id / ES_ID,
      data = temp_data,
      method = "REML"
    ))

    # Store results if successful
    if (!inherits(temp_model, "try-error")) {
      loo_estimates[i] <- temp_model$b
      loo_ci_lb[i] <- temp_model$ci.lb
      loo_ci_ub[i] <- temp_model$ci.ub
      p_values[i] <- temp_model$pval
      cat("Completed leave-one-out for study",
          i,
          "of",
          n_studies,
          "\r")
    } else {
      cat("Error in leave-one-out for study", i, "\n")
      loo_estimates[i] <- NA
      loo_ci_lb[i] <- NA
      loo_ci_ub[i] <- NA
      p_values[i] <- NA
    }
  }

  # Plot leave-one-out results
  loo_data <- data.frame(
    study = study_labels,
    estimate = loo_estimates,
    ci_lb = loo_ci_lb,
    ci_ub = loo_ci_ub,
    p_value = p_values
  )

  # Remove NAs
  loo_data <- loo_data[complete.cases(loo_data), ]

  # Create leave-one-out plot
  png(
    "leave_one_out_plot.png",
    width = 2400,
    height = 1800,
    res = 300
  )
  par(mar = c(4, 10, 3, 2))

  # Sort by effect size
  loo_data <- loo_data[order(loo_data$estimate), ]

  # Plot
  metafor::forest(
    x = loo_data$estimate,
    ci.lb = loo_data$ci_lb,
    ci.ub = loo_data$ci_ub,
    slab = loo_data$study,
    refline = base_model$b,
    xlab = "Log Response Ratio",
    main = "Leave-One-Out Sensitivity Analysis"
  )

  # Add a zero reference line
  abline(v = 0, lty = 3)

  # Add significance indicators
  sig_symbols <- ifelse(loo_data$p_value < 0.001,
                        "***",
                        ifelse(
                          loo_data$p_value < 0.01,
                          "**",
                          ifelse(loo_data$p_value < 0.05, "*", "")
                        ))

  text(
    x = loo_data$ci.ub + 0.1,
    y = length(loo_data$study):1,
    labels = sig_symbols
  )

  dev.off()

  # 3. Original vs. trimmed effect size comparison
  cat("\nComparing original vs. bias-corrected effect size:\n")
  cat(
    "Original effect size: ",
    round(base_model$beta[1], 3),
    " (95% CI: ",
    round(base_model$ci.lb, 3),
    " to ",
    round(base_model$ci.ub, 3),
    "), p = ",
    format.pval(base_model$pval, digits = 3),
    "\n",
    sep = ""
  )

  if (!is.null(publication_bias$trim_fill)) {
    cat(
      "Trim-and-fill effect: ",
      round(publication_bias$trim_fill$b, 3),
      " (95% CI: ",
      round(publication_bias$trim_fill$ci.lb, 3),
      " to ",
      round(publication_bias$trim_fill$ci.ub, 3),
      "), p = ",
      format.pval(publication_bias$trim_fill$pval, digits = 3),
      "\n",
      sep = ""
    )
  }

  return(list(
    influence = if (!inherits(infl, "try-error") &&
                    !is.null(infl))
      infl
    else
      NULL,
    leave_one_out = loo_data
  ))
}

# Run sensitivity analyses
sensitivity <- sensitivity_analysis(data, base_model)

# 8. Systematic Moderator Analysis --------------------------------------------------------

# This section implements a step-wise approach to moderator analysis:
# 1. First, test each moderator individually to identify significant predictors
# 2. Only after identifying significant main effects, consider interactions
# 3. Rank moderators by their importance to guide further analysis

# Moderator analysis keeps ALL data in a single model but adds predictor
# variables to test if they explain variation in effect sizes.

# Before running any moderator analyses, convert all categorical variables to non-ordered factors
# This prevents ordered factors from creating polynomial contrasts (L, Q, C) in the output

categorical_vars <- c(
  "time_since_fire",
  "fire_type",
  "fire_severity",
  "final_group",
  "biome_wwf",
  "ecoregion_wwf"
)

for (var in categorical_vars) {
  if (var %in% names(data)) {
    data[[var]] <- factor(data[[var]], ordered = FALSE)
    cat("  Converted",
        var,
        "to non-ordered factor with",
        length(levels(data[[var]])),
        "levels\n")
  }
}

run_main_effects_analysis_simplified <- function(data, base_model) {
  moderators <- c(
    "final_group", "time_since_fire", "fire_type", "biome_wwf", "fire_severity", "ecoregion_wwf"
  )
  pretty_names <- c(
    "Taxonomic Group", "Time Since Fire", "Fire Type", "Biome", "Fire Severity", "Ecoregion"
  )
  names(pretty_names) <- moderators
  
  moderator_summary <- data.frame(
    Moderator = character(),
    Levels = numeric(),
    QM = numeric(),
    QMdf = numeric(),
    `p.value` = numeric(),
    `Pseudo.R2` = numeric(),
    AIC_REML = numeric(),
    delta_AIC_vs_base = numeric(),
    stringsAsFactors = FALSE
  )
  base_aic_reml <- as.numeric(base_model$fit.stats["AIC", "REML"])
  models_list <- list()
  all_param_est <- data.frame()
  
  # Fit all moderator models and extract parameter estimates
  for (mod in moderators) {
    if (mod %in% names(data) && length(unique(na.omit(data[[mod]]))) > 1) {
      cat("TESTING MODERATOR (REML):", mod, "\n")
      formula_int <- as.formula(paste("lnrr_laj ~", mod))
      model_int <- try(metafor::rma.mv(
        formula_int, v_lnrr_1A, random = ~ 1 | study_id / ES_ID, data = data, method = "REML"
      ))
      formula_no_int <- as.formula(paste("lnrr_laj ~ -1 +", mod))
      model_no_int <- try(metafor::rma.mv(
        formula_no_int, v_lnrr_1A, random = ~ 1 | study_id / ES_ID, data = data, method = "REML"
      ))
      if (!inherits(model_int, "try-error") && !inherits(model_no_int, "try-error")) {
        levels_n <- length(unique(na.omit(data[[mod]])))
        pseudo_r2_raw <- 100 * (sum(base_model$sigma2) - sum(model_int$sigma2)) / sum(base_model$sigma2)
        pseudo_r2 <- round(max(pseudo_r2_raw, 0), 2)
        aic_reml <- as.numeric(model_int$fit.stats["AIC", "REML"])
        delta_aic_base <- round(aic_reml - base_aic_reml, 2)
        models_list[[paste0(mod, "_intercept")]] <- model_int
        models_list[[paste0(mod, "_no_int")]] <- model_no_int
        moderator_summary <- rbind(
          moderator_summary,
          data.frame(
            Moderator = pretty_names[mod],
            Levels = levels_n,
            QM = round(model_int$QM, 2),
            QMdf = if (!is.null(model_int$k.reg) && length(model_int$k.reg) > 0) model_int$k.reg - 1 else NA,
            `p.value` = model_int$QMp,
            `Pseudo.R2` = pseudo_r2,
            AIC_REML = aic_reml,
            delta_AIC_vs_base = delta_aic_base,
            stringsAsFactors = FALSE
          )
        )
        # Extract parameter estimates for both models
        coefs_int <- coef(summary(model_int))
        for (i in 1:nrow(coefs_int)) {
          all_param_est <- rbind(
            all_param_est,
            data.frame(
              Moderator = pretty_names[mod],
              ModelType = "intercept",
              Level = rownames(coefs_int)[i],
              Estimate = coefs_int[i, "estimate"],
              SE = coefs_int[i, "se"],
              CI_lb = coefs_int[i, "ci.lb"],
              CI_ub = coefs_int[i, "ci.ub"],
              p_value = coefs_int[i, "pval"],
              PercentChange = (exp(coefs_int[i, "estimate"]) - 1) * 100,
              stringsAsFactors = FALSE
            )
          )
        }
        coefs_no_int <- coef(summary(model_no_int))
        for (i in 1:nrow(coefs_no_int)) {
          all_param_est <- rbind(
            all_param_est,
            data.frame(
              Moderator = pretty_names[mod],
              ModelType = "no_intercept",
              Level = rownames(coefs_no_int)[i],
              Estimate = coefs_no_int[i, "estimate"],
              SE = coefs_no_int[i, "se"],
              CI_lb = coefs_no_int[i, "ci.lb"],
              CI_ub = coefs_no_int[i, "ci.ub"],
              p_value = coefs_no_int[i, "pval"],
              PercentChange = (exp(coefs_no_int[i, "estimate"]) - 1) * 100,
              stringsAsFactors = FALSE
            )
          )
        }
      } else {
        cat("WARNING: One or both models for", mod, "failed to converge or returned an error. Skipping.\n")
      }
    }
  }
  
  # Save all parameter estimates to CSV
  if (nrow(all_param_est) > 0) {
    # write.csv(all_param_est, "main_moderator_level_parameters.csv", row.names = FALSE)
  }
  
  # Build ranking table and calculate delta AIC
  base_row_summary <- data.frame(
    Moderator = "Base Model (No Moderator)",
    Levels = 1,
    QM = NA,
    QMdf = NA,
    `p.value` = base_model$pval,
    `Pseudo.R2` = 0,
    AIC_REML = base_aic_reml,
    delta_AIC_vs_base = 0,
    stringsAsFactors = FALSE
  )
  final_ranking_table <- rbind(base_row_summary, moderator_summary)
  min_aic_reml <- min(final_ranking_table$AIC_REML, na.rm = TRUE)
  final_ranking_table$`Delta.AIC.vs.Min` <- round(final_ranking_table$AIC_REML - min_aic_reml, 2)
  final_ranking_table <- final_ranking_table[order(final_ranking_table$`Delta.AIC.vs.Min`), ]
  final_output_table <- final_ranking_table %>%
    dplyr::select(
      Moderator,
      `k` = Levels,
      `QM`,
      `QMdf`,
      `p.val` = `p.value`,
      `Pseudo.R2` = `Pseudo.R2`,
      AIC = AIC_REML,
      `dAIC.vs.Base` = delta_AIC_vs_base,
      `dAIC.vs.Min` = `Delta.AIC.vs.Min`
    )
  cat("FINAL MODERATOR RANKING TABLE (REML Only)\n")
  print(final_output_table, row.names = FALSE)
  return(list(summary = final_output_table, models = models_list))
}
# --- Run the analysis with the simplified function ---
main_effects_results <- run_main_effects_analysis_simplified(data, base_model)

# Save the main effects results
# save(main_effects_results, file = "main_moderator_model_results.RData")

# Save the final table
# write.csv(
#   main_effects_results$summary,
#   "main_effects_summary_final_ranking.csv",
#   row.names = FALSE
# )

# --- Step 2: Calculate Delta AIC (vs Min) for Ranking ---

base_row_summary <- data.frame(
  Moderator = "Base Model (No Moderator)",
  Levels = 1,
  QM = NA,
  QMdf = NA,
  `p.value` = base_model$pval,
  `Pseudo.R2` = 0,
  AIC_REML = base_aic_reml,
  delta_AIC_vs_base = 0,
  stringsAsFactors = FALSE
)

final_ranking_table <- rbind(base_row_summary, moderator_summary)

min_aic_reml <- min(final_ranking_table$AIC_REML, na.rm = TRUE)
final_ranking_table$`Delta.AIC.vs.Min` <- round(final_ranking_table$AIC_REML - min_aic_reml, 2)

# Sort and format the final table
final_ranking_table <- final_ranking_table[order(final_ranking_table$`Delta.AIC.vs.Min`), ]

# Select and rename columns for clean output
final_output_table <- final_ranking_table %>%
  select(
    Moderator,
    `k` = Levels,
    `QM`,
    `QMdf`,
    `p.val` = `p.value`,
    `Pseudo.R2` = `Pseudo.R2`,
    AIC = AIC_REML,
    `dAIC.vs.Base` = delta_AIC_vs_base,
    `dAIC.vs.Min` = `Delta.AIC.vs.Min`
  )

cat("FINAL MODERATOR RANKING TABLE (REML Only)\n")
print(final_output_table, row.names = FALSE)

### PErcent change in abundance
# For the base model
base_percent_change <- (exp(base_model$beta[1]) - 1) * 100

# For each moderator model
percent_change_table <- data.frame(
  Moderator = character(),
  Level = character(),
  lnRR = numeric(),
  PercentChange = numeric(),
  stringsAsFactors = FALSE
)

for (mod in names(main_effects_results$models)) {
  model <- main_effects_results$models[[mod]]
  if (!inherits(model, "try-error")) {
    coefs <- coef(model)
    for (level in names(coefs)) {
      lnrr <- coefs[level]
      pct <- (exp(lnrr) - 1) * 100
      percent_change_table <- rbind(
        percent_change_table,
        data.frame(
          Moderator = mod,
          Level = level,
          lnRR = lnrr,
          PercentChange = pct,
          stringsAsFactors = FALSE
        )
      )
    }
  }
}

# Add base model
percent_change_table <- rbind(
  percent_change_table,
  data.frame(
    Moderator = "Base Model",
    Level = "Overall",
    lnRR = base_model$beta[1],
    PercentChange = base_percent_change,
    stringsAsFactors = FALSE
  )
)

print(percent_change_table)

# write.csv(percent_change_table, "main_moderator_percent_change_table.csv", row.names = FALSE)

# Step 2: Run interaction analysis (regardless of main effects significance) ----

data <- read.csv("data_meta_analysis_clean.csv")

# Define comprehensive batches of interactions ----
interaction_batches <- list(
  # Batch 1: Taxonomic group (final_group) interactions
  batch1 = list(
    c("final_group", "fire_type"),
    c("final_group", "time_since_fire"),
    c("final_group", "fire_severity"),
    c("final_group", "biome_wwf"),
    c("final_group", "ecoregion_wwf")
  ),
  
  # Batch 2: Fire type interactions
  batch2 = list(
    c("fire_type", "fire_severity"),
    c("fire_type", "time_since_fire"),
    c("fire_type", "biome_wwf"),
    c("fire_type", "ecoregion_wwf")
  ),
  
  # Batch 3: Time since fire interactions
  batch3 = list(
    c("time_since_fire", "fire_severity"),
    c("time_since_fire", "biome_wwf"),
    c("time_since_fire", "ecoregion_wwf")
  ),
  
  # Batch 4: Fire severity interactions
  batch4 = list(
    c("fire_severity", "biome_wwf"),
    c("fire_severity", "ecoregion_wwf")
  ),
  
  # Batch 5: Geographic interactions
  batch5 = list(c("biome_wwf", "ecoregion_wwf")),
  
  # Batch 6: Three-way interactions
  batch6 = list(
    c("final_group", "fire_type", "fire_severity"),
    c("final_group", "fire_type", "time_since_fire"),
    c("final_group", "fire_severity", "time_since_fire")
  )
  
)

# =============================================================================
# FUNCTION: ANALYZE VARIABLE COMBINATIONS AND IDENTIFY GROUPING NEEDS
# =============================================================================

check_variable_combinations <- function(data, variable_pairs, min_threshold = 3) {
  cat("ANALYZING VARIABLE COMBINATIONS\n")
  cat("Identifying combinations with insufficient observations (< ",
      min_threshold,
      ")\n")
  
  # Initialize storage structures for results
  results <- list()
  summary_table <- data.frame(
    interaction = character(),
    total_combinations = integer(),
    combinations_below_threshold = integer(),
    percent_below_threshold = numeric(),
    min_obs = integer(),
    max_obs = integer(),
    median_obs = numeric(),
    stringsAsFactors = FALSE
  )
  
  # Analyze each variable pair
  for (var_pair in variable_pairs) {
    if (all(var_pair %in% names(data))) {
      pair_name <- paste(var_pair, collapse = " × ")
      cat("ANALYZING:", pair_name, "\n")
      
      var1_values <- na.omit(unique(data[[var_pair[1]]]))
      var2_values <- na.omit(unique(data[[var_pair[2]]]))
      
      # Create filtered dataset (removing NA values)
      filtered_data <- data[!is.na(data[[var_pair[1]]]) &
                              !is.na(data[[var_pair[2]]]), ]
      
      # Count observations for each combination
      combination_table <- table(filtered_data[[var_pair[1]]], filtered_data[[var_pair[2]]])
      
      # Print the full contingency table
      cat("\nCounts of observations for each combination:\n")
      print(combination_table)
      
      # Convert to data frame for easier analysis
      combo_df <- as.data.frame(combination_table)
      names(combo_df) <- c(var_pair[1], var_pair[2], "count")
      
      # Identify combinations below threshold
      below_threshold <- combo_df[combo_df$count < min_threshold, ]
      
      # Calculate summary statistics
      total_combos <- nrow(combo_df)
      below_threshold_count <- nrow(below_threshold)
      percent_below <- round((below_threshold_count / total_combos) * 100, 1)
      
      # Sort combinations by count (ascending order)
      combo_df_sorted <- combo_df[order(combo_df$count), ]
      
      # Report summary counts
      cat("\nTotal possible combinations:", total_combos, "\n")
      cat(
        "Combinations with fewer than",
        min_threshold,
        "observations:",
        below_threshold_count,
        "(",
        percent_below,
        "%)\n"
      )
      
      # If there are combinations with insufficient observations, list them
      if (below_threshold_count > 0) {
        cat("\nCombinations below threshold:\n")
        print(below_threshold)
      }
      
      # Calculate descriptive statistics
      min_obs <- min(combo_df$count)
      max_obs <- max(combo_df$count)
      median_obs <- median(combo_df$count)
      
      # Report descriptive statistics
      cat("\nSummary statistics:\n")
      cat("  Minimum observations:", min_obs, "\n")
      cat("  Maximum observations:", max_obs, "\n")
      cat("  Median observations:", median_obs, "\n")
      
      # Store detailed results for this variable pair
      results[[pair_name]] <- list(
        combination_table = combination_table,
        combo_df = combo_df,
        below_threshold = below_threshold,
        stats = c(
          min = min_obs,
          max = max_obs,
          median = median_obs
        ),
        total_combos = total_combos,
        below_threshold_count = below_threshold_count,
        percent_below = percent_below
      )
      
      # Add results to summary table
      summary_table <- rbind(
        summary_table,
        data.frame(
          interaction = pair_name,
          total_combinations = total_combos,
          combinations_below_threshold = below_threshold_count,
          percent_below_threshold = percent_below,
          min_obs = min_obs,
          max_obs = max_obs,
          median_obs = median_obs,
          stringsAsFactors = FALSE
        )
      )
      
      # Provide recommendations for grouping if many combinations have insufficient data
      # Using 30% as a threshold for concern
      if (percent_below > 30) {
        cat("RECOMMENDATION FOR", pair_name, "\n")
        cat(
          "Consider grouping levels of",
          var_pair[1],
          "or",
          var_pair[2],
          "as",
          percent_below,
          "% of combinations have insufficient data.\n"
        )
        
        # Variable-specific recommendations
        # For taxonomic groups
        if (var_pair[1] == "group2_new" ||
            var_pair[2] == "group2_new") {
          cat(
            "- Consider using higher taxonomic groupings (e.g., class level instead of order level)\n"
          )
          cat("- Focus on well-represented taxonomic groups\n")
        }
        
        # For geographic variables
        if (any(c("biome_wwf", "ecoregion_wwf", "country") %in% var_pair)) {
          cat("- Consider using broader geographic categories\n")
          cat("- Focus on specific regions with more data\n")
          cat("- Group similar ecoregions together\n")
        }
        
        # For fire variables
        if (any(c("fire_type", "fire_severity", "time_since_fire") %in% var_pair)) {
          cat("- Consider simplifying fire categories\n")
          cat("- Example: Combine moderate and high severity classes\n")
          cat("- Example: Group time periods into broader categories\n")
        }
      }
    } else {
      cat(
        "\nWARNING: One or more variables in",
        paste(var_pair, collapse = " × "),
        "not found in the dataset.\n"
      )
      cat("Skipping this variable combination.\n")
    }
  }
  
  # Sort summary table by percent of combinations below threshold (descending)
  summary_table <- summary_table[order(-summary_table$percent_below_threshold), ]
  
  # Print overall summary of all variable combinations
  cat("SUMMARY OF VARIABLE COMBINATIONS ANALYSIS\n")
  
  print(summary_table)
  
  cat("\nNOTE: Interactions with higher percentages of combinations below threshold\n")
  cat("      require grouping of factor levels or alternative analytical strategies.\n")
  cat("      Focus on interactions with <30% insufficient combinations for most robust results.\n")
  
  # Create a detailed table for all combinations (for supplementary material)
  cat("CREATING DETAILED TABLE OF ALL COMBINATIONS FOR SUPPLEMENTARY MATERIAL\n")
  
  # Initialize data frame to hold all combination details
  all_combinations <- data.frame(
    interaction = character(),
    var1_name = character(),
    var1_level = character(),
    var2_name = character(),
    var2_level = character(),
    observations = numeric(),
    sufficient = logical(),
    stringsAsFactors = FALSE
  )
  
  # Compile data from all interactions into a single data frame
  for (int_name in names(results)) {
    # Extract variable names from interaction name
    var_names <- strsplit(int_name, " × ")[[1]]
    combo_df <- results[[int_name]]$combo_df
    names(combo_df) <- c("var1_level", "var2_level", "observations")
    
    # Create standardized data frame for this interaction
    
    this_combo <- data.frame(
      interaction = int_name,
      var1_name = var_names[1],
      var1_level = as.character(combo_df$var1_level),
      var2_name = var_names[2],
      var2_level = as.character(combo_df$var2_level),
      observations = combo_df$observations,
      sufficient = combo_df$observations >= min_threshold,
      stringsAsFactors = FALSE
    )
    
    # Add to the master data frame
    all_combinations <- rbind(all_combinations, this_combo)
  }
  
  # Sort by interaction name, then by observation count (descending)
  all_combinations <- all_combinations[order(all_combinations$interaction,
                                             -all_combinations$observations), ]
  
  # Export to CSV file for supplementary material
  # output_file <- "variable_combinations_table_S1.csv"
  # write.csv(all_combinations,
  #           file.path("rerun_22oct2025", output_file),
  #           row.names = FALSE)

  
  # Return comprehensive results
  return(
    list(
      detailed_results = results,
      # Complete analysis for each variable pair
      summary = summary_table,
      # Summary table for all variable pairs
      all_combinations = all_combinations  # Detailed table for supplementary material
    )
  )
}

# FUNCTION: INTERACTION ANALYSIS ----
run_interaction_analysis_improved <- function(data, base_model, priority_interactions = NULL) {
  min_obs_threshold <- 3
  if (is.null(priority_interactions) || length(priority_interactions) == 0) {
    cat("\nNo valid interactions to test. Returning empty results.\n")
    return(list())
  }
  interactions_to_test <- priority_interactions
  interaction_summary <- data.frame(
    interaction = character(),
    `k.ES` = numeric(),
    `k.Studies` = numeric(),
    QM = numeric(),
    QMdf = numeric(),
    QMp = numeric(),
    AIC_REML = numeric(),
    `Pseudo.R2..` = numeric(),
    `Delta.AIC.vs.FilteredBase` = numeric(),
    stringsAsFactors = FALSE
  )
  models_list <- list()
  for (int in interactions_to_test) {
    if (all(int %in% names(data))) {
      interaction_name <- paste(int, collapse = " x ")
      sufficient_rows <- rep(FALSE, nrow(data))
      if (length(int) == 2) {
        combo_table <- table(data[[int[1]]], data[[int[2]]])
        sufficient_combos <- which(combo_table >= min_obs_threshold, arr.ind = TRUE)
        for (i in 1:nrow(sufficient_combos)) {
          level1 <- rownames(combo_table)[sufficient_combos[i, 1]]
          level2 <- colnames(combo_table)[sufficient_combos[i, 2]]
          sufficient_rows <- sufficient_rows |
            (data[[int[1]]] == level1 & data[[int[2]]] == level2)
        }
      } else if (length(int) == 3) {
        combo_table <- table(data[[int[1]]], data[[int[2]]], data[[int[3]]])
        sufficient_combos <- which(combo_table >= min_obs_threshold, arr.ind = TRUE)
        for (i in 1:nrow(sufficient_combos)) {
          level1 <- dimnames(combo_table)[[1]][sufficient_combos[i, 1]]
          level2 <- dimnames(combo_table)[[2]][sufficient_combos[i, 2]]
          level3 <- dimnames(combo_table)[[3]][sufficient_combos[i, 3]]
          sufficient_rows <- sufficient_rows |
            (data[[int[1]]] == level1 &
               data[[int[2]]] == level2 & data[[int[3]]] == level3)
        }
      } else {
        next
      }
      filtered_data <- data[sufficient_rows, ]
      for (var in int) {
        if (var %in% names(filtered_data)) {
          filtered_data[[var]] <- droplevels(factor(filtered_data[[var]], ordered = FALSE))
        }
      }
      N_filtered <- nrow(filtered_data)
      k_studies_filtered <- length(unique(filtered_data$study_id))
      if (N_filtered < 2) {
        cat("Skipping", interaction_name, ": Insufficient total observations after filtering (N =", N_filtered, ").\n")
        next
      }
      cat("\nTESTING:", interaction_name, "(N =", N_filtered, ", k =", k_studies_filtered, ")\n")
      # Intercept model (for omnibus QM)
      formula_base_int <- as.formula(paste("lnrr_laj ~", paste(int, collapse = " + ")))
      base_model_filtered_int <- try(rma.mv(
        formula_base_int,
        v_lnrr_1A,
        random = ~ 1 |
          study_id / ES_ID,
        data = filtered_data,
        method = "REML",
        control = list(stepup = FALSE)
      ))
      if (length(int) == 2) {
        formula_int <- as.formula(paste("lnrr_laj ~", paste(int[1], "*", int[2], sep = " ")))
      } else {
        formula_int <- as.formula(paste("lnrr_laj ~", paste(int[1], "*", int[2], "*", int[3], sep = " ")))
      }
      model_int <- try(rma.mv(
        formula_int,
        v_lnrr_1A,
        random = ~ 1 |
          study_id / ES_ID,
        data = filtered_data,
        method = "REML",
        control = list(stepup = FALSE)
      ))
      # No-intercept model (for level-specific estimates)
      if (length(int) == 2) {
        formula_no_int <- as.formula(paste("lnrr_laj ~ 0 +", paste(int[1], ":", int[2], sep = "")))
      } else {
        formula_no_int <- as.formula(paste("lnrr_laj ~ 0 +", paste(int[1], ":", int[2], ":", int[3], sep = "")))
      }
      model_no_int <- try(rma.mv(
        formula_no_int,
        v_lnrr_1A,
        random = ~ 1 |
          study_id / ES_ID,
        data = filtered_data,
        method = "REML",
        control = list(stepup = FALSE)
      ))
      if (!inherits(model_int, "try-error") && !inherits(base_model_filtered_int, "try-error") && !inherits(model_no_int, "try-error")) {
        models_list[[paste0(interaction_name, "_intercept")]] <- model_int
        models_list[[paste0(interaction_name, "_no_int")]] <- model_no_int
        sigma2_base_sum <- sum(base_model_filtered_int$sigma2, na.rm = TRUE)
        sigma2_int_sum <- sum(model_int$sigma2, na.rm = TRUE)
        pseudo_r2 <- ifelse(sigma2_base_sum > 0,
                            (sigma2_base_sum - sigma2_int_sum) / sigma2_base_sum,
                            NA)
        pseudo_r2_final <- round(ifelse(is.na(pseudo_r2) || pseudo_r2 < 0, 0, pseudo_r2 * 100), 2)
        base_aic <- base_model_filtered_int$fit.stats["AIC", "REML"]
        int_aic <- model_int$fit.stats["AIC", "REML"]
        delta_aic <- round(int_aic - base_aic, 2)
        QMdf = if (!is.null(model_int$k.reg) && length(model_int$k.reg) > 0) model_int$k.reg else NA
        interaction_summary <- rbind(
          interaction_summary,
          data.frame(
            interaction = interaction_name,
            `k.ES` = N_filtered,
            `k.Studies` = k_studies_filtered,
            QM = round(model_int$QM, 2),
            QMdf = QMdf,
            QMp = model_int$QMp,
            AIC_REML = round(int_aic, 2),
            `Pseudo.R2..` = pseudo_r2_final,
            `Delta.AIC.vs.FilteredBase` = delta_aic,
            stringsAsFactors = FALSE
          )
        )
        cat("Summary metrics calculated successfully.\n")
      } else {
        cat("ERROR: Failed to fit one or more interaction models.\n")
      }
    }
  }
  
  # --- 5. Final Ranking and Formatting ---
  
  if (nrow(interaction_summary) > 0) {
    # Sort by p-value
    combined_summary_by_p <- interaction_summary[order(interaction_summary$QMp), ]
    
    # Sort by Delta AIC (Best fit)
    combined_summary_by_aic <- interaction_summary[order(interaction_summary$`Delta.AIC.vs.FilteredBase`), ]
    
    cat("INTERACTION MODEL SUMMARY (Sorted by Delta AIC vs Filtered Base)\n")
    
    print(combined_summary_by_aic, row.names = FALSE)
    
    cat(
      "\nNote: Negative Delta AIC values indicate a better fit than the Base Model on the same filtered data.\n"
    )
    cat(
      "Pseudo-R² is based on the reduction in the SUM of all variance components (Level 2 + Level 3).\n"
    )
    
  } else {
    cat("\nNo interaction models were successfully fitted to produce a summary.\n")
  }
  
  # Return both summary and fitted models
  return(list(
    summary = if (exists("combined_summary_by_aic")) combined_summary_by_aic else interaction_summary,
    models = models_list
  ))
  
}

# Collect all interaction pairs from all batches defined earlier (using the same structure)
all_interaction_pairs <- unlist(interaction_batches, recursive = FALSE)

# # Run the final interaction analysis
# interaction_analysis_results <- run_interaction_analysis_improved(
#   data = data,
#   base_model = base_model,
#   priority_interactions = all_interaction_pairs
# )

# # Save the results
# if (!is.null(interaction_analysis_results) &&
#     !is.null(interaction_analysis_results$summary)) {
#   write.csv(
#     interaction_analysis_results$summary,
#     "interaction_models_final_ranking.csv",
#     row.names = FALSE
#   )
#   cat(
#     "\nFinal interaction model summary saved to 'interaction_models_final_ranking.csv'\n"
#   )
# }


# Analyze variable combinations before running interaction analysis ----
# Collect all interaction pairs from all batches
all_interaction_pairs <- unique(do.call(c, interaction_batches))

# Set minimum observation threshold
min_obs_threshold <- 3

# Run the combination analysis with the threshold
combo_analysis <- check_variable_combinations(data, all_interaction_pairs, min_threshold = min_obs_threshold)

# Save the analysis results for reference
# save(combo_analysis, file = "variable_combinations_analysis_lessagrregated.RData")

# Display detailed counts of observations per combination type
# Function to pretty print a count matrix and return a tidy version
pretty_print_counts <- function(combination_name, count_matrix) {

  # Print the raw count matrix
  print(count_matrix)
  
  # Convert to a tidy data frame for easier visualization
  tidy_counts <- as.data.frame(count_matrix)
  names(tidy_counts) <- c("Var1", "Var2", "Count")
  
  # Add the combination name
  tidy_counts$Combination <- combination_name
  
  # Add sufficiency indicator
  tidy_counts$Sufficient <- ifelse(tidy_counts$Count >= min_obs_threshold,
                                   "Sufficient",
                                   "Insufficient")
  
  # Print summary of sufficient vs insufficient
  cat("\nSummary for", combination_name, ":\n")
  sufficiency_summary <- table(tidy_counts$Sufficient)
  cat(
    "- Combinations with sufficient data (≥",
    min_obs_threshold,
    "observations):",
    sufficiency_summary["Sufficient"],
    "\n"
  )
  cat(
    "- Combinations with insufficient data (<",
    min_obs_threshold,
    "observations):",
    sufficiency_summary["Insufficient"],
    "\n"
  )
  
  return(tidy_counts)
}

# Process and display all combinations
all_tidy_counts <- data.frame()

for (combo_name in names(combo_analysis$detailed_results)) {
  # Get the count matrix
  count_matrix <- combo_analysis$detailed_results[[combo_name]]$combination_table
  
  # Pretty print and get tidy version
  tidy_counts <- pretty_print_counts(combo_name, count_matrix)
  
  # Add to combined data frame
  all_tidy_counts <- rbind(all_tidy_counts, tidy_counts)
}

# Save the tidy counts for future reference
# save(all_tidy_counts, file = "variable_combination_counts_tidy.RData")

# Summarize the combinations by percent below threshold ----

# Create a formatted version of the summary table
summary_formatted <- combo_analysis$summary %>%
  arrange(desc(percent_below_threshold)) %>%
  mutate(
    # Format percentages with % symbol
    percent_below = paste0(round(percent_below_threshold, 1), "%"),
    # Create a coverage ratio column (combinations with sufficient data)
    coverage_ratio = paste0(
      total_combinations - combinations_below_threshold,
      "/",
      total_combinations
    ),
    # Calculate percentage of combinations with sufficient data
    percent_covered = paste0(round(100 - percent_below_threshold, 1), "%")
  ) %>%
  select(
    Interaction = interaction,
    `Min Obs` = min_obs,
    `Max Obs` = max_obs,
    `Median Obs` = median_obs,
    `Coverage` = coverage_ratio,
    `% Below Threshold` = percent_below,
    `% With Sufficient Data` = percent_covered
  )

# Display the formatted table
print(summary_formatted, row.names = FALSE)

# Create a more detailed table showing the interactions that need attention ----
# Filter for interactions where more than 30% of combinations are below threshold
problem_interactions <- summary_formatted %>%
  filter(as.numeric(gsub("%", "", `% Below Threshold`)) > 30) %>%
  arrange(desc(as.numeric(gsub(
    "%", "", `% Below Threshold`
  ))))

if (nrow(problem_interactions) > 0) {
  cat(
    "\nThe following interactions have high percentages of combinations below threshold:\n"
  )
  print(problem_interactions, row.names = FALSE)
  cat(
    "\nConsider grouping levels for these interactions or focusing analysis on combinations with sufficient data.\n"
  )
} else {
  cat("All interactions have sufficient data for analysis.\n")
}


# # Process interactions in batches INTERACTION ANALYSIS ----
# Debug: Print interaction batches to verify they're defined
for (batch_name in names(interaction_batches)) {
  cat(
    "Batch:",
    batch_name,
    "- Number of interactions:",
    length(interaction_batches[[batch_name]]),
    "\n"
  )
  if (length(interaction_batches[[batch_name]]) > 0) {
    cat("  First interaction:",
        paste(interaction_batches[[batch_name]][[1]], collapse = " × "),
        "\n")
  }
}

# RUN INTERACTION ANALYSIS: Process each batch separately ----
all_results <- list()
for (batch_name in names(interaction_batches)) {
  cat("\n\nProcessing batch:", batch_name, "\n")
  
  # Verify interactions in this batch are valid
  valid_batch_interactions <- list()
  for (interaction in interaction_batches[[batch_name]]) {
    if (all(interaction %in% names(data))) {
      valid_batch_interactions <- c(valid_batch_interactions, list(interaction))
    } else {
      cat(
        "WARNING: Skipping interaction with invalid variable(s):",
        paste(interaction, collapse = " × "),
        "\n"
      )
    }
  }
  
  if (length(valid_batch_interactions) == 0) {
    cat("No valid interactions in batch",
        batch_name,
        ". Skipping this batch.\n")
    next
  }
  
  cat("Processing",
      length(valid_batch_interactions),
      "interactions in this batch\n")
  
  # Run the batch with improved error handling
  batch_results <- run_interaction_analysis_improved(
    data = data,
    base_model = base_model,
    priority_interactions = valid_batch_interactions
  )
  
  # Debug: Check what batch_results contains
  cat("\nDEBUG: batch_results content for", batch_name, ":\n")
  cat("Is NULL?", is.null(batch_results), "\n")
  cat("Is empty list?", length(batch_results) == 0, "\n")
  if (!is.null(batch_results) && length(batch_results) > 0) {
    cat("Contains keys:", paste(names(batch_results), collapse = ", "), "\n")
    if ("summary" %in% names(batch_results)) {
      cat("Summary rows:", nrow(batch_results$summary), "\n")
    }
    if ("models" %in% names(batch_results)) {
      cat("Number of models:", length(batch_results$models), "\n")
    }
  }
  
  # Store results
  all_results[[batch_name]] <- list(
    summary = batch_results$summary,
    models = batch_results$models
  )
}

# Combine results from all batches ----
combined_models <- list()
combined_summary <- data.frame()

cat("\n\nCombining results from all batches:\n")
for (batch_name in names(all_results)) {
  if (!is.null(all_results[[batch_name]]$models)) {
    batch_model_count <- length(all_results[[batch_name]]$models)
    combined_models <- c(combined_models, all_results[[batch_name]]$models)
    cat("Added",
        batch_model_count,
        "models from batch",
        batch_name,
        "\n")
  }
  if (!is.null(all_results[[batch_name]]$summary) &&
      nrow(all_results[[batch_name]]$summary) > 0) {
    batch_summary_rows <- nrow(all_results[[batch_name]]$summary)
    combined_summary <- rbind(combined_summary, all_results[[batch_name]]$summary)
    cat("Added",
        batch_summary_rows,
        "summary rows from batch",
        batch_name,
        "\n")
  }
}

# Save the combined results to CSV files
if (nrow(combined_summary) > 0) {
  # Sort by p-value (significance)
  combined_summary_by_p <- combined_summary[order(combined_summary$QMp), ]
  
  # Sort by AIC (model fit - lower is better)
  combined_summary_by_aic <- combined_summary[order(combined_summary$AIC), ]
  
  # Save the sorted results, now including Pseudo-R²
  # write.csv(
  #   combined_summary_by_p,
  #   "interaction_models_by_significance.csv",
  #   row.names = FALSE
  # )
  # write.csv(
  #   combined_summary_by_aic,
  #   "interaction_models_by_AIC.csv",
  #   row.names = FALSE
  # )
  
  # Create a detailed results table with model parameters
  detailed_results <- data.frame()
  percent_change_table <- data.frame()
  # Extract parameter estimates from each model
  for (model_name in names(combined_models)) {
    model <- combined_models[[model_name]]
    if (!inherits(model, "try-error") && !is.null(model)) {
      coefs <- coef(summary(model))
      for (i in 1:nrow(coefs)) {
        detailed_results <- rbind(
          detailed_results,
          data.frame(
            interaction = model_name,
            parameter = rownames(coefs)[i],
            estimate = coefs[i, "estimate"],
            se = coefs[i, "se"],
            ci_lower = coefs[i, "ci.lb"],
            ci_upper = coefs[i, "ci.ub"],
            p_value = coefs[i, "pval"],
            stringsAsFactors = FALSE
          )
        )
        # Only calculate percent change for no-intercept models
        if (grepl("_no_int$", model_name)) {
          percent_change_table <- rbind(
            percent_change_table,
            data.frame(
              interaction = model_name,
              parameter = rownames(coefs)[i],
              estimate = coefs[i, "estimate"],
              PercentChange = (exp(coefs[i, "estimate"]) - 1) * 100,
              se = coefs[i, "se"],
              ci_lower = coefs[i, "ci.lb"],
              ci_upper = coefs[i, "ci.ub"],
              p_value = coefs[i, "pval"],
              stringsAsFactors = FALSE
            )
          )
        }
      }
    }
  }
  # Save the detailed parameters table
  if (nrow(detailed_results) > 0) {
    # write.csv(
    #   detailed_results,
    #   "interaction_model_parameters.csv",
    #   row.names = FALSE
    # )
  }
  # Save the percent change table for interaction levels
  if (nrow(percent_change_table) > 0) {
    # write.csv(
    #   percent_change_table,
    #   "interaction_model_percent_change_table.csv",
    #   row.names = FALSE
    # )
  }
} else {
  cat("\nNo interaction models to save. Check that your models ran successfully.\n")
}

# Save the full results object for later use
# save(all_results, combined_models, combined_summary, file = "interaction_analysis_results.RData")
