How to find rows with duplicate values in R

· 7 min read · Updated March 14, 2026 · beginner
r duplicates dplyr data.table data-frame

Finding duplicate rows is a common data cleaning task. This guide shows you multiple approaches using base R, dplyr, and data.table.

Finding Duplicates in a Vector

Base R: duplicated()

The duplicated() function returns a logical vector indicating which elements have been seen before:

x <- c(1, 2, 2, 3, 3, 3, 4, 5, 5)

duplicated(x)
# [1] FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE

To see which values are duplicated (not where they occur):

x <- c(1, 2, 2, 3, 3, 3, 4, 5, 5)

duplicated(x) | duplicated(x, fromLast = TRUE)
# [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE

Which elements are duplicates?

x <- c(1, 2, 2, 3, 3, 3, 4, 5, 5)

# Find positions of duplicated values
which(duplicated(x))
# [1] 3 4 5 7 9

# Get the actual duplicate values
unique(x[duplicated(x)])
# [1] 2 3 5

Finding Duplicate Rows in a Data Frame

Base R

df <- data.frame(
  id = c(1, 2, 2, 3, 4, 1),
  name = c("A", "B", "B", "C", "D", "A"),
  score = c(85, 90, 90, 78, 88, 85)
)

duplicated(df)
# [1] FALSE FALSE  TRUE FALSE FALSE  TRUE

Get the row numbers of duplicates:

which(duplicated(df))
# [1] 3 6

Extract the duplicate rows:

df[duplicated(df), ]
#   id name score
# 3  2    B     90
# 6  1    A     85

Find all duplicates including first occurrence

df[duplicated(df) | duplicated(df, fromLast = TRUE), ]
#   id name score
# 1  1    A     85
# 3  2    B     90
# 6  1    A     85

dplyr

Use duplicated() or the group_by() approach:

library(dplyr)

df <- data.frame(
  id = c(1, 2, 2, 3, 4, 1),
  name = c("A", "B", "B", "C", "D", "A"),
  score = c(85, 90, 90, 78, 88, 85)
)

# Find duplicate rows
df %>% 
  group_by_all() %>% 
  filter(n() > 1) %>%
  ungroup()
# # A tibble: 4 × 3
#      id name  score
#   <dbl> <chr> <dbl>
# 1     1 A        85
# 2     2 B        90
# 3     1 A        85
# 4     2 B        90

Find duplicates in specific columns only

library(dplyr)

df <- data.frame(
  id = c(1, 2, 2, 3, 4, 1),
  name = c("A", "B", "B", "C", "D", "A"),
  score = c(85, 90, 90, 78, 88, 85)
)

# Find duplicates based on id column only
df %>%
  group_by(id) %>%
  filter(n() > 1) %>%
  ungroup()
# # A tibble: 4 × 3
#      id name  score
#   <dbl> <chr> <dbl>
# 1     1 A        85
# 2     2 B        90
# 3     1 A        85
# 4     2 B        90

# Or using the id and name columns
df %>%
  group_by(id, name) %>%
  filter(n() > 1) %>%
  ungroup()

data.table

library(data.table)

dt <- data.table(
  id = c(1, 2, 2, 3, 4, 1),
  name = c("A", "B", "B", "C", "D", "A"),
  score = c(85, 90, 90, 78, 88, 85)
)

# Find duplicate rows
duplicated(dt)
# [1] FALSE FALSE  TRUE FALSE FALSE  TRUE

# Get duplicate rows
dt[duplicated(dt)]
#    id name score
# 1:  2    B     90
# 2:  1    A     85

# Using the anyDuplicated() function - returns index of first duplicate
anyDuplicated(dt)
# [1] 3

# Find duplicates by specific columns
dt[duplicated(dt, by = c("id", "name"))]
#    id name score
# 1:  2    B     90
# 2:  1    A     85

Counting Duplicates

Count total duplicate rows

df <- data.frame(
  id = c(1, 2, 2, 3, 4, 1),
  name = c("A", "B", "B", "C", "D", "A"),
  score = c(85, 90, 90, 78, 88, 85)
)

# Base R
sum(duplicated(df))
# [1] 2

# dplyr
library(dplyr)
df %>%
  summarise(n_dup = sum(duplicated(.)))
# # A tibble: 1 × 1
#   n_dup
#   <int>
# 1     2

Count duplicates per group

library(dplyr)

df <- data.frame(
  id = c(1, 2, 2, 3, 4, 1),
  name = c("A", "B", "B", "C", "D", "A"),
  score = c(85, 90, 90, 78, 88, 85)
)

# Count how many times each combination appears
df %>%
  group_by(id, name) %>%
  mutate(count = n()) %>%
  filter(count > 1)
# # A tibble: 4 × 4
#      id name  score count
#   <dbl> <chr> <dbl> <int>
# 1     1 A        85     2
# 2     2 B        90     2
# 3     1 A        85     2
# 4     2 B        90     2

Count unique vs total rows

df <- data.frame(
  id = c(1, 2, 2, 3, 4, 1),
  name = c("A", "B", "B", "C", "D", "A"),
  score = c(85, 90, 90, 78, 88, 85)
)

nrow(df)                           # Total rows: 6
# [1] 6

nrow(unique(df))                  # Unique rows: 4
# [1] 4

nrow(df) - nrow(unique(df))      # Duplicate rows: 2
# [1] 2

Removing Duplicate Rows

Base R

df <- data.frame(
  id = c(1, 2, 2, 3, 4, 1),
  name = c("A", "B", "B", "C", "D", "A"),
  score = c(85, 90, 90, 78, 88, 85)
)

# Remove all duplicates (keeps first occurrence)
unique(df)
#   id name score
# 1  1    A     85
# 2  2    B     90
# 4  3    C     78
# 5  4    D     88

# Or use duplicated()
df[!duplicated(df), ]
#   id name score
# 1  1    A     85
# 2  2    B     90
# 4  3    C     78
# 5  4    D     88

# Keep last occurrence instead
df[!duplicated(df, fromLast = TRUE), ]
#   id name score
# 3  2    B     90
# 4  3    C     78
# 5  4    D     88
# 6  1    A     85

dplyr

library(dplyr)

df <- data.frame(
  id = c(1, 2, 2, 3, 4, 1),
  name = c("A", "B", "B", "C", "D", "A"),
  score = c(85, 90, 90, 78, 88, 85)
)

# Remove duplicate rows
distinct(df)
# # A tibble: 4 × 3
#      id name  score
#   <dbl> <chr> <dbl>
# 1     1 A        85
# 2     2 B        90
# 3     3 C        78
# 4     4 D        88

# Remove duplicates based on specific columns
distinct(df, id, .keep_all = TRUE)
# # A tibble: 4 × 3
#      id name  score
#   <dbl> <chr> <dbl>
# 1     1 A        85
# 2     2 B        90
# 3     3 C        78
# 4     4 D        88

data.table

library(data.table)

dt <- data.table(
  id = c(1, 2, 2, 3, 4, 1),
  name = c("A", "B", "B", "C", "D", "A"),
  score = c(85, 90, 90, 78, 88, 85)
)

unique(dt)
#    id name score
# 1:  1    A     85
# 2:  2    B     90
# 3:  3    C     78
# 4:  4    D     88

# Remove duplicates by specific columns
unique(dt, by = c("id", "name"))
#    id name score
# 1:  1    A     85
# 2:  2    B     90
# 3:  3    C     78
# 4:  4    D     88

Practical Examples

Find customers with multiple orders

library(dplyr)

orders <- data.frame(
  customer_id = c(101, 101, 102, 103, 103, 103),
  order_id = c(1, 2, 3, 4, 5, 6),
  amount = c(50, 75, 100, 25, 30, 45)
)

# Find customers with more than one order
orders %>%
  group_by(customer_id) %>%
  filter(n() > 1) %>%
  ungroup()
# # A tibble: 5 × 3
#   customer_id order_id amount
#         <dbl>    <dbl>  <dbl>
# 1         101        1     50
# 2         101        2     75
# 3         103        4     25
# 4         103        5     30
# 5         103        6     45

Find records modified more than once

library(dplyr)

logs <- data.frame(
  record_id = c("A", "A", "B", "C", "C"),
  action = c("create", "update", "create", "create", "delete"),
  timestamp = c(1, 2, 3, 4, 5)
)

# Find records that were touched multiple times
logs %>%
  group_by(record_id) %>%
  filter(n() > 1) %>%
  ungroup()

Keep the row with maximum value per group

library(dplyr)

df <- data.frame(
  id = c(1, 1, 2, 2, 3, 3),
  time = c(10, 20, 15, 25, 30, 35),
  value = c(100, 150, 200, 180, 90, 120)
)

# Keep row with maximum value per id
df %>%
  group_by(id) %>%
  slice_max(value, n = 1) %>%
  ungroup()
# # A tibble: 3 × 3
#      id  time value
#   <dbl> <dbl> <dbl>
# 1     1    20   150
# 2     2    25   180
# 3     3    35   120

Performance Comparison

For large datasets, data.table is fastest:

library(data.table)

dt <- data.table(
  id = rep(1:1e6, each = 3),
  value = rnorm(3e6)
)

system.time(dt[!duplicated(dt)])
#    user  system elapsed 
#   0.089   0.010   0.099

Base R unique() is fastest for vectors. Use dplyr::distinct() for readability in pipelines, and data.table::unique() for large data.

See Also

  • duplicated() — Find duplicate elements in a vector or rows in a data frame
  • unique() — Extract unique elements or rows
  • table() — Count frequency of values