dplyr::*_join()

left_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...)

Returns: tibble · Updated March 13, 2026 · Tidyverse

dplyr join tidyverse data-wrangling

The dplyr join functions combine two data frames by matching rows based on one or more key columns. Each join type produces different results depending on which rows you want to keep from each table.

Syntax

left_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...)
right_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...)
inner_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...)
full_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...)

Parameters

Parameter	Type	Default	Description
`x`	tibble / data.frame	—	The primary table (left table for left_join)
`y`	tibble / data.frame	—	The table to join (right table for left_join)
`by`	character / NULL	NULL	Column name(s) to join on. Use a character vector for multiple keys, or `c("key1" = "key2")` for different column names
`copy`	logical	FALSE	If TRUE, copy y to the same source as x for database-like operations
`suffix`	character	c(“.x”, “.y”)	Suffix to add to overlapping column names
`...`	arguments	—	Additional arguments passed to underlying methods

Examples

Basic usage

library(dplyr)

# Create two sample data frames
employees <- data.frame(
  id = c(1, 2, 3, 4),
  name = c("Alice", "Bob", "Charlie", "Diana"),
  department_id = c(10, 20, 10, 30)
)

departments <- data.frame(
  department_id = c(10, 20, 30),
  department_name = c("Engineering", "Marketing", "Sales")
)

# Left join: keep all rows from employees
left_join(employees, departments, by = "department_id")
#   id    name department_id department_name
# 1  1    Alice             10     Engineering
# 2  2      Bob             20      Marketing
# 3  3  Charlie             10     Engineering
# 4  4    Diana             30          Sales

Right join

# Right join: keep all rows from departments
right_join(employees, departments, by = "department_id")
#   id    name department_id department_name
# 1  1    Alice             10     Engineering
# 2  3  Charlie             10     Engineering
# 3  2      Bob             20      Marketing
# 4  4    Diana             30          Sales

Inner join

# Inner join: keep only matching rows
inner_join(employees, departments, by = "department_id")
#   id    name department_id department_name
# 1  1    Alice             10     Engineering
# 2  3  Charlie             10     Engineering
# 3  2      Bob             20      Marketing
# 4  4    Diana             30          Sales

Full join

# Full join: keep all rows from both tables
full_join(employees, departments, by = "department_id")
#   id    name department_id department_name
# 1  1    Alice             10     Engineering
# 2  3  Charlie             10     Engineering
# 3  2      Bob             20      Marketing
# 4  4    Diana             30          Sales

Multiple key columns

# Join on multiple columns
df1 <- data.frame(
  year = c(2023, 2023, 2024),
  quarter = c(1, 2, 1),
  sales = c(100, 150, 200)
)

df2 <- data.frame(
  year = c(2023, 2023, 2024),
  quarter = c(1, 2, 1),
  profit = c(20, 35, 45)
)

inner_join(df1, df2, by = c("year", "quarter"))
#   year quarter sales profit
# 1 2023       1   100     20
# 2 2023       2   150     35
# 3 2024       1   200     45

Different column names

# Join when column names differ
products <- data.frame(
  product_id = c(101, 102, 103),
  price = c(19.99, 29.99, 39.99)
)

orders <- data.frame(
  id = c(1, 2, 3),
  product_id = c(101, 102, 104),
  quantity = c(2, 1, 3)
)

left_join(orders, products, by = c("product_id" = "product_id"))
#   id product_id quantity  price
# 1  1        101        2  19.99
# 2  2        102        1  29.99
# 3  3        104        3     NA

Common Patterns

Filter with joins: Use semi_join() to keep rows in x that have a match in y, or anti_join() to remove them.

# Find employees in the Engineering department
dept_eng <- departments %>% filter(department_name == "Engineering")
semi_join(employees, dept_eng, by = "department_id")

# Find employees NOT in Sales
dept_sales <- departments %>% filter(department_name == "Sales")
anti_join(employees, dept_sales, by = "department_id")

Multiple joins in sequence: Chain multiple joins for complex data transformations.

# Join multiple tables
result <- orders %>%
  left_join(customers, by = "customer_id") %>%
  left_join(products, by = "product_id") %>%
  left_join(shipping, by = "order_id")