Skip to content

Instantly share code, notes, and snippets.

@DavisVaughan
Created February 20, 2026 13:55
Show Gist options
  • Select an option

  • Save DavisVaughan/ec4583cff2e6fa0fdbe9ccdc0d1496b4 to your computer and use it in GitHub Desktop.

Select an option

Save DavisVaughan/ec4583cff2e6fa0fdbe9ccdc0d1496b4 to your computer and use it in GitHub Desktop.
filter-out
# Why filter_out?
library(dplyr)
patients <- tibble(
name = c("Anne", "Mark", "Sarah", "Davis", "Max", "Derek", "Tina"),
deceased = c(FALSE, TRUE, NA, TRUE, NA, FALSE, TRUE),
date = c(2005, 2010, NA, 2020, 2010, NA, NA)
)
patients
# Filter out rows where the patient is deceased and the year was before 2012.
# Direct translation...
patients |>
filter(!(deceased & date < 2012))
# ...doesn't work!
# We drop "too many rows"
anti_join(
patients,
patients |> filter(!(deceased & date < 2012)),
join_by(name, deceased, date)
)
# You end up with this madness. This way lies pain.
patients |>
filter(
!((deceased & !is.na(deceased)) &
(date < 2012 & !is.na(date)))
)
# `filter_out()` lets you translate your intent directly to code
patients |>
filter_out(deceased, date < 2012)
# Guidelines:
# If you want to "keep rows", you want `filter()`
# If you want to "drop rows", you want `filter_out()`
@DavisVaughan
Copy link
Author

# Why filter_out?

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

patients <- tibble(
  name = c("Anne", "Mark", "Sarah", "Davis", "Max", "Derek", "Tina"),
  deceased = c(FALSE, TRUE, NA, TRUE, NA, FALSE, TRUE),
  date = c(2005, 2010, NA, 2020, 2010, NA, NA)
)

patients
#> # A tibble: 7 × 3
#>   name  deceased  date
#>   <chr> <lgl>    <dbl>
#> 1 Anne  FALSE     2005
#> 2 Mark  TRUE      2010
#> 3 Sarah NA          NA
#> 4 Davis TRUE      2020
#> 5 Max   NA        2010
#> 6 Derek FALSE       NA
#> 7 Tina  TRUE        NA

# Filter out rows where the patient is deceased and the year was before 2012.

# Direct translation...
patients |>
  filter(!(deceased & date < 2012))
#> # A tibble: 3 × 3
#>   name  deceased  date
#>   <chr> <lgl>    <dbl>
#> 1 Anne  FALSE     2005
#> 2 Davis TRUE      2020
#> 3 Derek FALSE       NA

# ...doesn't work!
# We drop "too many rows"
anti_join(
  patients,
  patients |> filter(!(deceased & date < 2012)),
  join_by(name, deceased, date)
)
#> # A tibble: 4 × 3
#>   name  deceased  date
#>   <chr> <lgl>    <dbl>
#> 1 Mark  TRUE      2010
#> 2 Sarah NA          NA
#> 3 Max   NA        2010
#> 4 Tina  TRUE        NA

# You end up with this madness. This way lies pain.
patients |>
  filter(
    !((deceased & !is.na(deceased)) &
      (date < 2012 & !is.na(date)))
  )
#> # A tibble: 6 × 3
#>   name  deceased  date
#>   <chr> <lgl>    <dbl>
#> 1 Anne  FALSE     2005
#> 2 Sarah NA          NA
#> 3 Davis TRUE      2020
#> 4 Max   NA        2010
#> 5 Derek FALSE       NA
#> 6 Tina  TRUE        NA

# `filter_out()` lets you translate your intent directly to code
patients |>
  filter_out(deceased, date < 2012)
#> # A tibble: 6 × 3
#>   name  deceased  date
#>   <chr> <lgl>    <dbl>
#> 1 Anne  FALSE     2005
#> 2 Sarah NA          NA
#> 3 Davis TRUE      2020
#> 4 Max   NA        2010
#> 5 Derek FALSE       NA
#> 6 Tina  TRUE        NA

# Guidelines:
# If you want to "keep rows", you want `filter()`
# If you want to "drop rows", you want `filter_out()`

Created on 2026-02-20 with reprex v2.1.1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment