factor()

factor(x = character(), levels, labels = levels, exclude = NA, ordered = FALSE, nmax = NA)
Returns: factor · Updated March 13, 2026 · Data Types
data-types categorical factors r

Factors are R’s primary data structure for representing categorical data. They store values as integers internally, with a mapping to human-readable level names, making them more memory-efficient than character vectors for repeated categories.

Syntax

factor(x = character(), levels, labels = levels, exclude = NA, ordered = FALSE, nmax = NA)

Parameters

ParameterTypeDefaultDescription
xcharacter/numericInput vector to convert to a factor
levelscharacterunique(x)Vector of unique values that will form the factor levels
labelscharacterlevelsLabels for the levels (must be same length as levels)
excludecharacterNAValues to exclude from being valid levels
orderedlogicalFALSEIf TRUE, creates an ordered factor for ordinal data
nmaxnumericNAUpper bound on number of levels

Examples

Creating a basic factor

# Create a factor from a character vector
colors <- c("red", "blue", "red", "green", "blue", "red")
color_factor <- factor(colors)

color_factor
# [1] red  blue red  green blue red
# Levels: blue green red

# Check the internal representation
as.integer(color_factor)
# [1] 3 1 3 2 1 3

Specifying levels explicitly

# Ensure all expected levels appear, even if not in data
survey <- c("Yes", "No", "Yes", "No")
response <- factor(survey, levels = c("Yes", "No", "Maybe"))

response
# [1] Yes  No  Yes  No 
# Levels: Yes No Maybe

# Note: "Maybe" appears in levels but not in data
levels(response)
# [1] "Yes"  "No"   "Maybe"

Ordered factors for ordinal data

# Create an ordered factor for rating scale
ratings <- c("Low", "High", "Medium", "Low", "High")
ordered_ratings <- factor(ratings, levels = c("Low", "Medium", "High"), ordered = TRUE)

ordered_ratings
# [1] Low    High   Medium Low    High  
# Levels: Low < Medium < High

# Ordered factors can be compared
ordered_ratings[1] < ordered_ratings[2]
# [1] TRUE

Using labels instead of levels

# Rename levels with labels parameter
gender <- c(1, 2, 1, 1, 2)
gender_factor <- factor(gender, levels = c(1, 2), labels = c("Male", "Female"))

gender_factor
# [1] Male   Female Male   Male   Female
# Levels: Male Female

Common Patterns

Releveling factors

# Change the reference level
education <- c("Bachelor", "Master", "Bachelor", "PhD", "Master")
edu_factor <- factor(education)

# By default, alphabetical order determines reference
relevel(edu_factor, ref = "Master")
# [1] Bachelor Master  Bachelor PhD    Master 
# Levels: Master Bachelor PhD

Counting levels

# Get number of levels
colors <- c("red", "blue", "red", "green", "blue", "red")
color_factor <- factor(colors)

nlevels(color_factor)
# [1] 3

levels(color_factor)
# [1] "blue"  "green" "red"

Dropping unused levels

# Subsetting may leave unused levels
colors <- c("red", "blue", "red", "green", "blue", "red")
color_factor <- factor(colors)

# Subset to only red values
red_only <- color_factor[color_factor == "red"]

# Unused levels still appear
levels(red_only)
# [1] "blue"  "green" "red"

# Drop unused levels
red_only <- droplevels(red_only)
levels(red_only)
# [1] "red"

Factors in data frames

# Creating a data frame with factors
df <- data.frame(
  name = c("Alice", "Bob", "Charlie"),
  department = factor(c("Sales", "Engineering", "Sales"))
)

# Check structure
str(df)
# 'data.frame':  3 obs. of  2 variables:
#  $ name      : chr  "Alice" "Bob" "Charlie"
#  $ department: Factor w/ 2 levels "Sales","Engineering": 1 2 1

See Also