DavisVaughan/filter-out.R

## filter-out.R
# Why filter_out?

library(dplyr)

patients <- tibble(
  name = c("Anne", "Mark", "Sarah", "Davis", "Max", "Derek", "Tina"),
  deceased = c(FALSE, TRUE, NA, TRUE, NA, FALSE, TRUE),
  date = c(2005, 2010, NA, 2020, 2010, NA, NA)
)

patients

# Filter out rows where the patient is deceased and the year was before 2012.

# Direct translation...
patients |>
  filter(!(deceased & date < 2012))

# ...doesn't work!
# We drop "too many rows"
anti_join(
  patients,
  patients |> filter(!(deceased & date < 2012)),
  join_by(name, deceased, date)
)

# You end up with this madness. This way lies pain.
patients |>
  filter(
    !((deceased & !is.na(deceased)) &
      (date < 2012 & !is.na(date)))
  )

# `filter_out()` lets you translate your intent directly to code
patients |>
  filter_out(deceased, date < 2012)

# Guidelines:
# If you want to "keep rows", you want `filter()`
# If you want to "drop rows", you want `filter_out()`
	# Why filter_out?

	library(dplyr)

	patients <- tibble(
	name = c("Anne", "Mark", "Sarah", "Davis", "Max", "Derek", "Tina"),
	deceased = c(FALSE, TRUE, NA, TRUE, NA, FALSE, TRUE),
	date = c(2005, 2010, NA, 2020, 2010, NA, NA)
	)

	patients

	# Filter out rows where the patient is deceased and the year was before 2012.

	# Direct translation...
	patients \|>
	filter(!(deceased & date < 2012))

	# ...doesn't work!
	# We drop "too many rows"
	anti_join(
	patients,
	patients \|> filter(!(deceased & date < 2012)),
	join_by(name, deceased, date)
	)

	# You end up with this madness. This way lies pain.
	patients \|>
	filter(
	!((deceased & !is.na(deceased)) &
	(date < 2012 & !is.na(date)))
	)

	# `filter_out()` lets you translate your intent directly to code
	patients \|>
	filter_out(deceased, date < 2012)

	# Guidelines:
	# If you want to "keep rows", you want `filter()`
	# If you want to "drop rows", you want `filter_out()`
No results found