amb26/estimate_kernels.R

## estimate_kernels.R
library(stringr)
library(dplyr)
library(HDInterval)
library(brms)


source("scripts/utils.R")

recordfiles <- c("A_formosa_detections_distances_to_first_records.csv",
             "C_attenuata_detections_distances_to_first_records.csv",
             "L_virginicum_detections_distances_to_first_records.csv",
             "P_montana_detections_distances_to_first_records.csv",
             "P_unalascensis_detections_distances_to_first_records.csv",
             "T_dichotomum_detections_distances_to_first_records.csv")

bin_by_distance <- function (records, bin_width) {
  # Bin the distance column
  breaks <- seq(0, 24, by = bin_width)  # Define bin edges
  bin_centers <- (head(breaks, -1) + tail(breaks, -1)) / 2

  # Bin the distances and summarize
  result <- records %>%
    mutate(
      bin = cut(distance, breaks = breaks, include.lowest = TRUE),
      distance = bin_centers[as.numeric(bin)]
    ) %>%
    group_by(bin, distance) %>%
    summarise(poccupancy = mean(occupancy == 1), .groups = "drop")

  result
}

# Define the formula
formula <- bf(poccupancy ~ q * exp(-decay * distance),
              q + decay ~ 1,
              nl = TRUE)

# Define priors for the parameters
priors <- c(prior(normal(0, 1), nlpar = "q", lb = 0),
            prior(normal(0, 1), nlpar = "decay", lb = 0))

fit <- brm(
  formula = formula,
  data = bins,
  prior = priors,
  iter = 2000,  # Number of iterations
  warmup = 1000,  # Warmup period for sampling
  chains = 4,  # Number of chains
  cores = 4,  # Number of cores for parallel computation
  seed = 123  # For reproducibility
)


estimate_kernel <- function (recordfile) {

  # Load observed data
  records <- read.csv(str_glue("scripts/distance_decay_hypothesis_tests/{recordfile}"))

  # Ensure data are formatted correctly
  records$distance <- as.numeric(records$distance)
  records$occupancy <- as.numeric(records$occupancy)

  # Remove non-detections that don't meet the search effort threshold
  records <- records %>%
    filter(occupancy == 1 | search_eff >= 1.2)

  # Define distance bins
  records$distance_bin <- cut(records$distance, breaks = seq(0, 24, by = 2), include.lowest = TRUE)

  binned <- records %>%
    group_by(distance_bin) %>%
    summarise(
      total = n(),
      occupancy_1_count = sum(occupancy == 1),
      proportion_occupancy_1 = occupancy_1_count / total
    )

  # Compute likelihood of detection or nondetection given a particular decay rate

  exp_like <- function (mu, decay_rate, data) {
    rates <- mu * exp(-decay_rate * data$distance)
    probs <- ifelse(data$occupancy == 1, rates, 1 - rates)
    prod(probs)
  }

  # Define decay rates including steeper values for simulation
  decay_rates <- seq(from = 0, to = 20, by = 0.01)

  mus <- seq(from = 0.2, to = 2, by = 0.2)

  results <- data.frame(matrix(ncol = length(decay_rates), nrow = length(mus)))
  colnames(results) <- decay_rates
  rownames(results) <- mus

  # Compute exp_like for each combination of mu and decay_rate
  for (i in seq_along(mus)) {
    for (j in seq_along(decay_rates)) {
      results[i, j] <- exp_like(mus[i], decay_rates[j], records)
    }
  }


  likes <- sapply(decay_rates, exp_like, data = records)
  likedensity <- data.frame(x = decay_rates, y = likes)
  # https://adv-r.hadley.nz/s3.html#s3-classes - tell R that this is a density
  class(likedensity) <- "density"

  likerange <- HDInterval::hdi(likedensity)
  maxlike = likedensity$x[which.max(likedensity$y)]
  wg("Estimating kernel density for {recordfile}")

  wg("Maximum likelihood decay rate is {maxlike}, 95% HPD interval between [{likerange[['lower']]}, {likerange[['upper']]}]")

  round_decay_rates <- c(0, 0.01, 0.05, 0.1, 0.5, 1, 1.5, 2, 3, 5, 10, 20)

  # Convert back to a dataframe so we can filter it
  class(likedensity) <- "data.frame"
  rounddensity <- likedensity %>% filter(x %in% round_decay_rates)
  maxroundlike = rounddensity$x[which.max(rounddensity$y)]

  wg("Maximum likelihood rounded decay rate: {maxroundlike}")

  m = strsplit(recordfile, '_')
  target <- paste(m[[1]][1], m[[1]][2], sep='_')

  plot(likedensity$x, likedensity$y, type="l", xlab = "Kernel value in km", ylab = "Kernel likelihood", main = str_glue("Posterior density for search effort distance kernel for {target}"))

  wg("")

}

for (recordfile in recordfiles) {
  estimate_kernel(recordfile)
}
	library(stringr)
	library(dplyr)
	library(HDInterval)
	library(brms)


	source("scripts/utils.R")

	recordfiles <- c("A_formosa_detections_distances_to_first_records.csv",
	"C_attenuata_detections_distances_to_first_records.csv",
	"L_virginicum_detections_distances_to_first_records.csv",
	"P_montana_detections_distances_to_first_records.csv",
	"P_unalascensis_detections_distances_to_first_records.csv",
	"T_dichotomum_detections_distances_to_first_records.csv")

	bin_by_distance <- function (records, bin_width) {
	# Bin the distance column
	breaks <- seq(0, 24, by = bin_width) # Define bin edges
	bin_centers <- (head(breaks, -1) + tail(breaks, -1)) / 2

	# Bin the distances and summarize
	result <- records %>%
	mutate(
	bin = cut(distance, breaks = breaks, include.lowest = TRUE),
	distance = bin_centers[as.numeric(bin)]
	) %>%
	group_by(bin, distance) %>%
	summarise(poccupancy = mean(occupancy == 1), .groups = "drop")

	result
	}

	# Define the formula
	formula <- bf(poccupancy ~ q * exp(-decay * distance),
	q + decay ~ 1,
	nl = TRUE)

	# Define priors for the parameters
	priors <- c(prior(normal(0, 1), nlpar = "q", lb = 0),
	prior(normal(0, 1), nlpar = "decay", lb = 0))

	fit <- brm(
	formula = formula,
	data = bins,
	prior = priors,
	iter = 2000, # Number of iterations
	warmup = 1000, # Warmup period for sampling
	chains = 4, # Number of chains
	cores = 4, # Number of cores for parallel computation
	seed = 123 # For reproducibility
	)


	estimate_kernel <- function (recordfile) {

	# Load observed data
	records <- read.csv(str_glue("scripts/distance_decay_hypothesis_tests/{recordfile}"))

	# Ensure data are formatted correctly
	records$distance <- as.numeric(records$distance)
	records$occupancy <- as.numeric(records$occupancy)

	# Remove non-detections that don't meet the search effort threshold
	records <- records %>%
	filter(occupancy == 1 \| search_eff >= 1.2)

	# Define distance bins
	records$distance_bin <- cut(records$distance, breaks = seq(0, 24, by = 2), include.lowest = TRUE)

	binned <- records %>%
	group_by(distance_bin) %>%
	summarise(
	total = n(),
	occupancy_1_count = sum(occupancy == 1),
	proportion_occupancy_1 = occupancy_1_count / total
	)

	# Compute likelihood of detection or nondetection given a particular decay rate

	exp_like <- function (mu, decay_rate, data) {
	rates <- mu * exp(-decay_rate * data$distance)
	probs <- ifelse(data$occupancy == 1, rates, 1 - rates)
	prod(probs)
	}

	# Define decay rates including steeper values for simulation
	decay_rates <- seq(from = 0, to = 20, by = 0.01)

	mus <- seq(from = 0.2, to = 2, by = 0.2)

	results <- data.frame(matrix(ncol = length(decay_rates), nrow = length(mus)))
	colnames(results) <- decay_rates
	rownames(results) <- mus

	# Compute exp_like for each combination of mu and decay_rate
	for (i in seq_along(mus)) {
	for (j in seq_along(decay_rates)) {
	results[i, j] <- exp_like(mus[i], decay_rates[j], records)
	}
	}


	likes <- sapply(decay_rates, exp_like, data = records)
	likedensity <- data.frame(x = decay_rates, y = likes)
	# https://adv-r.hadley.nz/s3.html#s3-classes - tell R that this is a density
	class(likedensity) <- "density"

	likerange <- HDInterval::hdi(likedensity)
	maxlike = likedensity$x[which.max(likedensity$y)]
	wg("Estimating kernel density for {recordfile}")

	wg("Maximum likelihood decay rate is {maxlike}, 95% HPD interval between [{likerange[['lower']]}, {likerange[['upper']]}]")

	round_decay_rates <- c(0, 0.01, 0.05, 0.1, 0.5, 1, 1.5, 2, 3, 5, 10, 20)

	# Convert back to a dataframe so we can filter it
	class(likedensity) <- "data.frame"
	rounddensity <- likedensity %>% filter(x %in% round_decay_rates)
	maxroundlike = rounddensity$x[which.max(rounddensity$y)]

	wg("Maximum likelihood rounded decay rate: {maxroundlike}")

	m = strsplit(recordfile, '_')
	target <- paste(m[[1]][1], m[[1]][2], sep='_')

	plot(likedensity$x, likedensity$y, type="l", xlab = "Kernel value in km", ylab = "Kernel likelihood", main = str_glue("Posterior density for search effort distance kernel for {target}"))

	wg("")

	}

	for (recordfile in recordfiles) {
	estimate_kernel(recordfile)
	}
No results found