Interaction Terms

Josh Allen

Department of Political Science at Georgia State University

3/2/23

Toy Data

  • How do we implement interaction terms in R?
# Lets load in the data via the VDEM package 
# and clean it a bit

vdem <- readRDS("data/vdem12.rds")

vdem <- vdem |>
filter(country_name == "United States of America") |>
rename(democracy = v2x_polyarchy,
       gdp_per_cap = e_gdppc,
       urbanization = e_miurbani,
       polarization = v2cacamps,
       polarization_ord = v2cacamps_ord) |>
       mutate(high_polarization = ifelse(polarization >= -1, 1, 0))

Estimating Interaction Terms in R

  • As with all things in R there are several ways to do this
    • : and * are the most popular
    • / is a nice trick to get the full marginal effect
    • I personally prefer * since it involves less typing
star_way <- lm(democracy ~ gdp_per_cap + urbanization * high_polarization, data = vdem) 


colon_way <- lm(democracy ~ gdp_per_cap + urbanization + high_polarization + urbanization:high_polarization,
     data = vdem)

slash_way <- lm(democracy ~ gdp_per_cap + urbanization/high_polarization, data = vdem) 

Output

star_way |>
tidy() |>
filter(str_detect(term, ":"))
# A tibble: 1 × 5
  term                           estimate std.error statistic p.value
  <chr>                             <dbl>     <dbl>     <dbl>   <dbl>
1 urbanization:high_polarization    0.377     0.460     0.819   0.415
colon_way |>
tidy() |>
filter(str_detect(term, ":"))
# A tibble: 1 × 5
  term                           estimate std.error statistic p.value
  <chr>                             <dbl>     <dbl>     <dbl>   <dbl>
1 urbanization:high_polarization    0.377     0.460     0.819   0.415

Make plots when you can

remove_stuff <- c("1", "_", ":")

plot_data <- star_way |>
tidy(conf.int = TRUE) |>
mutate(term = str_remove_all(term, paste(remove_stuff, collapse = "|", sep = " ")),
       term = case_when(term == "gdppercap" ~ "GDP Per Captia",
                        term == "urbanization" ~ "Urbanization",
                        term == "highpolarization" ~ "High Polarization",
                        term == "urbanizationhighpolarization" ~ "Urbanization x High Polarization")) |>
    filter(term != "(Intercept)")



ggplot(plot_data,
      aes(x = estimate,
      y = term,
     xmin = conf.low,
     xmax = conf.high)) +
geom_pointrange() +
geom_vline(xintercept = 0) +
labs(x = "coefficient",
    y = NULL,
    caption = "Bars Denote 95% confidence intervals") +
theme_allen_minimal()

Functionally What Does This Look like

library(modelsummary)

gof_names <- tribble(  ~raw, ~clean,  ~fmt,  ~omit,
                    "nobs", "Observations",  0,  FALSE,
                 "r.squared", "R<sup>2</sup>",  3,  FALSE, 
                "rmse", "RMSE", 3, FALSE)

non_interact <- lm(democracy ~ gdp_per_cap + urbanization + high_polarization, data = vdem)

tab_2 <- modelsummary(list("Non Interacted Model" = non_interact, "Interacted Model" = star_way),
             coef_map = c("gdp_per_cap" = "GDP Per Cap",
                            "urbanization" = "Urbanization",
                             "high_polarization" = "High Polarization",
                            "urbanization:high_polarization" = "Urbanization × High Polarization",
                            "(Intercept)" = "Intercept"),
            gof_map = gof_names,
            stars = c('⭐' = 0.05))

Output

 Non Interacted Model  Interacted Model
GDP Per Cap 0.012⭐ 0.012⭐
(0.000) (0.000)
Urbanization 1.284⭐ 1.189⭐
(0.178) (0.213)
High Polarization 0.012 −0.100
(0.009) (0.137)
Urbanization × High Polarization 0.377
(0.460)
Intercept 0.003 0.028
(0.051) (0.059)
Observations 101 101
R2 0.946 0.947
RMSE 0.037 0.037
⭐ p < 0.05

Assessing Interaction Terms

  • We need to check our LIE assumptions (Hainmueller, Mummolo, Xu 2019)
library(interflex)

interflex(estimator = "raw",
         Y = "democracy",
         X = "urbanization",
         D = "high_polarization",
         Ylabel = "Democracy",
         Xlabel = "Urbanization",
         Dlabel = "High Polarization",
        theme.bw = TRUE,
        data  = vdem,
        na.rm = TRUE, treat.type = "discrete")

Lets Pretend That Diagnostic Plot Is Not Bad

  • Interaction terms are tricky to interpret.
    • Use Marginal Effects plots to make it easier on yourself and your audience
interflex(estimator = "kernel",
          Y = "democracy",
          X = "urbanization",
          D = "high_polarization",
          Ylabel = "Democracy",
          Xlabel = "Urbanization",
          theme.bw = TRUE, data  = vdem,
          na.rm = TRUE,
          treat.type = "discrete")

Plotting the Predictive Values

library(marginaleffects)
library(MetBrewer)

plot_predictions(star_way,
condition = c("urbanization",
           "high_polarization")) +
theme_allen_bw() +
labs(color = "High Polarization",
     fill = "High Polarization",
     x = "Urbanization",
     y = "Democracy") +
guides(fill = guide_legend(reverse = TRUE),
       color = guide_legend(reverse = TRUE)) +
scale_color_met_d(name = "Demuth") +
scale_fill_met_d(name = "Demuth")