Supplement A: Exploratory Data Analysis

COMPILED

August 26, 2025

1 Introduction

Goal: look for general trends in the data.

Data: archaeological survey data collected by Don Schiffler in the Rio Puerco region just west of the Jemez Mountains in New Mexico.

Method: standard methods for analyzing archaeological survey data from the US Southwest, notably seriation.

Questions: here’s a list of questions we answer with this EDA.

2 R Preamble

library(broom)
library(ca)
library(colorspace)
library(ggeffects)
library(ggfx)
library(ggnewscale)
library(ggrepel)
library(ggspatial)
library(ggtext)
library(here)
library(kableExtra)
library(magick)
library(MASS)
library(patchwork)
library(performance)
library(sf)
library(slider)
library(terra)
library(tidyterra)
library(tidyverse)

Set seed

set.seed(1701) # USS Enterprise NCC-1701 🖖

Some ggplot defaults

Code
# region key
region_colors <- c(
  "es" = "#ED7D3A",
  "jch" = "#75485E",
  "mp" = "#9DBF9E",
  "rp" = "#006BA6"
)

region_labels <- c(
  "es" = "Elk Springs",
  "jch" = "Jones Canyon",
  "mp" = "Mesa Portales",
  "rp" = "Rio Puerco"
)

# phase key
phase_colors <- c(
  "Early PII" = "#E63946",
  "Late PII" = "#F9C846",
  "Early PIII" = "#157A6E",
  "Middle PIII" = "#C0FDFB",
  "Late PIII" = "#735CDD"
)

# a clunky function to assign colors to table cells
heat <- function(x, r = c(0, 1)) {
  colors <- sequential_hcl(256, "YlOrBr", rev = TRUE)
  i <- scales::rescale(c(x, r), c(1, 256)) |> round()
  i <- i[1:length(x)]
  colors[i]
}

# ggplot2 stuff
pretty_labels <- as_labeller(c(
  "ratio" = "Ratio of Decorated to Undecorated Ceramics",
  "relative_rate" = "Relative Rate of Decorated Ceramics"
))

squished_labels <- as_labeller(c(
  "ratio" = "Ratio of Decorated\nto Undecorated Ceramics",
  "relative_rate" = "Relative Rate of\nDecorated Ceramics"
))

theme_set(theme_bw(12))

theme_update(
  axis.text = element_text(size = rel(0.75), color = "gray50"),
  axis.ticks = element_line(linewidth = 0.2, color = "gray85"),
  axis.title = element_text(size = rel(1)),
  legend.text = element_text(size = rel(0.9)),
  legend.title = element_text(size = rel(0.9)),
  panel.grid = element_blank(),
  plot.title = element_text(size = rel(1), margin = margin(b = 2)),
  strip.background = element_blank(),
  strip.text = element_text(size = rel(1))
)

# trim all white space around image,
# but then add a few white pixels back (dx, dy)
# all done with the magic of {magick}
prepare_image <- function(x, dx = 2, dy = 30, color = "white") {
  img <- image_read(path = x)

  img <- image_trim(img)

  info <- image_info(img)

  new_width <- info[["width"]] + dx
  new_height <- info[["height"]] + dy

  img <- image_extent(
    img,
    geometry_area(new_width, new_height),
    color = color
  )

  image_write(img, path = x)
}

3 Data

gpkg <- here("data", "rio-puerco.gpkg")

survey_area <- read_sf(gpkg, "survey")

sites <- read_sf(
  gpkg,
  query = paste(
    "SELECT site_id, region, fnum, burn,",
    "marea AS mound_area,",
    "ifnull(karea, 0) AS kiva_area,",
    "ca AS orientation,",
    "geom AS geometry",
    "FROM sites"
  )
)

ceramics <- read_sf(
  gpkg,
  query = paste(
    "SELECT ceramics.*, sites.site_id, sites.region, 'ceramic-key'.*",
    "FROM ceramics",
    "LEFT JOIN sites",
    "ON ceramics.site_id = sites.site_id",
    "LEFT JOIN 'ceramic-key'",
    "ON ceramics.key = 'ceramic-key'.key",
    "WHERE ceramics.key NOT IN ('gpc', 'dg')"
  )
)

lithics <- read_sf(gpkg, "lithics")

rio_puerco <- read_sf(gpkg, "rio puerco")

# important places in the region (chaco, santa fe, albuquerque)
# useful for orientation in overview map
places <- read_sf(gpkg, "places")

elevation <- here("data", "elevation.tif") |> rast()

climate <- read_sf(gpkg, "climate")

Save attribute data without spatial locations.

sites |>
  st_drop_geometry() |>
  write_csv(here("data", "rp-sites.csv"))

ceramics |> write_csv(here("data", "rp-ceramics.csv"))

lithics |> write_csv(here("data", "rp-lithics.csv"))
Code
# labels -----------------------------------------------------------------------
survey_area <- survey_area |>
  mutate(name = "Rio Puerco\nSurvey Area") |>
  select(name)

lbls <- places |>
  bind_rows(survey_area) |>
  mutate(
    xmin = map_dbl(geom, \(x) {
      st_bbox(x)[["xmin"]]
    }),
    xmax = map_dbl(geom, \(x) {
      st_bbox(x)[["xmax"]]
    }),
    ymin = map_dbl(geom, \(x) {
      st_bbox(x)[["ymin"]]
    }),
    ymax = map_dbl(geom, \(x) {
      st_bbox(x)[["ymax"]]
    }),
    x = case_when(
      name == "Chaco Canyon" ~ xmin + 7500,
      name == "Rio Puerco\nSurvey Area" ~ xmax + 3500,
      name == "Santa Fe" ~ xmin + 6000,
      name == "Albuquerque" ~ xmin - 2500,
      TRUE ~ 0
    ),
    y = case_when(
      name == "Chaco Canyon" ~ ymax - 14500,
      name == "Rio Puerco\nSurvey Area" ~ ymax - 2000,
      name == "Santa Fe" ~ ymax - 5000,
      name == "Albuquerque" ~ ymax - 5000,
      TRUE ~ 0
    ),
    hjust = ifelse(name %in% c("Santa Fe", "Albuquerque"), 1, 0)
  ) |>
  st_drop_geometry() |>
  select(name, x, y, hjust)

# basemap for region -----------------------------------------------------------
get_basemap <- function(
  x,
  map = "physical",
  size = c(16000, 9000),
  dpi = 300,
  imageSR = 4326,
  path
) {
  x <- st_bbox(x)

  old_ratio <- (x[["xmax"]] - x[["xmin"]]) / (x[["ymax"]] - x[["ymin"]])
  new_ratio <- (size[[1]] / size[[2]])

  if (!all.equal(old_ratio, new_ratio)) {
    msg <- paste0(
      "Extent of image (size) differs from extent of x (bbox). ",
      "Map may be warped."
    )

    warning(msg, call. = FALSE)
  }

  req <- httr2::request("http://services.arcgisonline.com/arcgis/rest/services")

  req <- httr2::req_url_path_append(req, map, "MapServer", "export")

  req <- httr2::req_url_query(
    req,
    bbox = paste(x, collapse = ","),
    bboxSR = st_crs(x)$epsg,
    imageSR = imageSR,
    format = "png",
    dpi = dpi,
    size = paste(size, collapse = ","),
    pixelType = "U8",
    noDataInterpretation = "esriNoDataMatchAny",
    interpolation = "+RSP_BilinearInterpolation",
    f = "image"
  )

  resp <- httr2::req_perform(req, path = path)

  png::readPNG(path, native = TRUE)
}

bb8 <- places |>
  st_buffer(9000) |>
  st_bbox()

dy <- bb8[["ymax"]] - bb8[["ymin"]]
dx <- bb8[["xmax"]] - bb8[["xmin"]]

fn <- here("figures", "basemap.png")

if (file.exists(fn)) {
  basemap <- png::readPNG(fn)
} else {
  basemap <- get_basemap(
    bb8,
    map = "World_Imagery",
    size = c(6000, 6000 * (dy / dx)),
    dpi = 600,
    imageSR = 26913,
    path = fn
  )
}

# regional map -----------------------------------------------------------------
gg_region <- ggplot() +
  annotation_raster(
    basemap,
    bb8[["xmin"]],
    bb8[["xmax"]],
    bb8[["ymin"]],
    bb8[["ymax"]]
  ) +
  geom_sf(
    data = st_as_sfc(bb8),
    fill = "transparent",
    color = "black"
  ) +
  geom_sf(
    data = places,
    fill = alpha("white", 0.85),
    color = "gray30",
    linewidth = 0.3
  ) +
  with_outer_glow(
    geom_sf(
      data = survey_area,
      fill = alpha("#006BA6", 0.5),
      color = "#006BA6",
      linewidth = 0.3
    ),
    colour = "white",
    expand = 1,
    sigma = 2
  ) +
  with_outer_glow(
    geom_text(
      data = lbls,
      aes(x, y, label = name),
      size = 12 / .pt,
      color = "white",
      fontface = "bold",
      hjust = lbls[["hjust"]],
      vjust = 1,
      lineheight = 0.9
    ),
    colour = "black",
    expand = 2,
    sigma = 1
  ) +
  coord_sf(
    crs = 26913,
    datum = 26913,
    expand = FALSE,
    xlim = bb8[c("xmin", "xmax")],
    ylim = bb8[c("ymin", "ymax")]
  ) +
  theme_void() +
  theme(
    legend.position = "none",
    plot.margin = margin(l = 3)
  )

# local map --------------------------------------------------------------------
bb8 <- survey_area |>
  st_buffer(1000) |>
  st_bbox()

dx <- bb8[["xmax"]] - bb8[["xmin"]]
dy <- bb8[["ymax"]] - bb8[["ymin"]]

cover <- st_sym_difference(survey_area, st_as_sfc(bb8))

lbls <- tibble(
  x = st_bbox(survey_area)[["xmax"]] - 500,
  y = st_bbox(survey_area)[["ymin"]] + seq(2000, 5800, length = 4),
  region = rev(names(region_labels))
)

gg_local <- ggplot() +
  geom_spatraster_contour_filled(
    data = elevation,
    bins = 10,
    color = "gray30",
    linewidth = 0.1
  ) +
  scale_fill_cross_blended_d(
    palette = "arid",
    alpha = 0.7,
    guide = "none"
  ) +
  ggnewscale::new_scale_fill() +
  geom_sf(
    data = survey_area,
    fill = "transparent",
    color = "black"
  ) +
  geom_sf(
    data = cover,
    fill = alpha("white", 0.7),
    color = "transparent"
  ) +
  geom_sf(
    data = rio_puerco |> st_intersection(survey_area),
    color = "#7A96C2",
    linewidth = 0.5
  ) +
  with_outer_glow(
    geom_sf(
      data = sites,
      aes(fill = region),
      size = 2,
      shape = 21,
      color = "black",
      stroke = 0.2
    ),
    colour = "white",
    expand = 3.5,
    sigma = 2
  ) +
  scale_fill_manual(
    name = NULL,
    values = region_colors
  ) +
  geom_sf(
    data = st_as_sfc(bb8),
    fill = "transparent",
    color = "black"
  ) +
  geom_point(
    data = lbls,
    aes(x, y, fill = region),
    size = 2,
    shape = 21,
    color = "black",
    stroke = 0.2
  ) +
  geom_text(
    data = lbls,
    aes(x, y, label = region_labels[region], color = region),
    size = 9 / .pt,
    hjust = 1,
    vjust = 0.4,
    nudge_x = -500
  ) +
  scale_color_manual(
    name = NULL,
    values = darken(region_colors, 0.2)
  ) +
  annotate(
    "text",
    x = st_bbox(survey_area)[["xmin"]],
    y = st_bbox(survey_area)[["ymax"]],
    label = "Rio Puerco\nProject",
    size = 11 / .pt,
    hjust = 0,
    vjust = 1,
    fontface = "bold",
    lineheight = 0.85
  ) +
  annotation_scale(
    aes(location = "br", width_hint = 0.17),
    text_cex = 0.7,
    pad_x = unit(0.35, "cm"),
    pad_y = unit(0.37, "cm")
  ) +
  coord_sf(
    crs = 26913,
    datum = 26913,
    expand = FALSE,
    xlim = bb8[c("xmin", "xmax")],
    ylim = bb8[c("ymin", "ymax")]
  ) +
  theme_void() +
  theme(
    legend.position = "none",
    plot.margin = margin(r = 1)
  )

# combine and save -------------------------------------------------------------
fn <- here("figures", "overview-map.png")

ggsave(
  plot = gg_local + gg_region,
  filename = fn,
  width = 8,
  height = 5,
  dpi = 300,
  bg = "white"
)

prepare_image(fn)

remove(lbls, get_basemap, basemap, bb8, dx, dy, cover, gg_region, gg_local, fn)
Spatial distribution of sites.
Figure 1: Overview map showing location of sites and their spatial grouping (left) and location of survey area in the wider region (right).

4 Analysis

What does the combined ceramic assemblage look like in each region?

type_distributions <- ceramics |>
  group_by(region, ware, type) |>
  summarize(count = sum(count)) |>
  ungroup()
Code
ceramic_dates <- ceramics |>
  group_by(type) |>
  slice(1) |>
  mutate(range = paste(start, end, sep = "-")) |>
  select(type, range)

tbl <- type_distributions |>
  pivot_wider(
    names_from = region,
    values_from = count,
    values_fill = 0
  ) |>
  arrange(ware, type) |>
  ungroup()

tbl_region_ceramics <- tbl |>
  left_join(ceramic_dates, by = "type") |>
  mutate(
    type = str_remove(type, " Ware"),
    type = str_remove(type, " panel design")
  ) |>
  select(type, range, es, jch, mp, rp) |>
  kbl(
    col.names = c(" ", " ", unname(region_labels)),
    align = c("lrrrrr"),
    table.attr = "class='kibble'",
    escape = FALSE
  ) |>
  pack_rows(index = table(tbl[["ware"]])) |>
  column_spec(2:6, width = "12%")

remove(tbl)

tbl_region_ceramics |>
  kable_styling(bootstrap_options = c("hover", "condensed")) |>
  scroll_box(height = "450px")
Elk Springs Jones Canyon Mesa Portales Rio Puerco
Cibola White Ware
Cebolleta Black-on-white 950-1150 0 25 5 17
Chaco Black-on-white 1075-1150 0 0 8 28
Gallup Black-on-white 980-1150 0 17 12 95
Puerco Black-on-white 1000-1150 0 56 53 92
Red Mesa Black-on-white 875-1050 0 13 3 40
Reserve Black-on-white 1000-1200 2 90 406 27
Tularosa Black-on-white 1150-1300 0 84 0 0
Undiagnostic Cibola White 875-1200 0 102 57 40
Gallina Gray Ware
Gallina Utility 1050-1300 0 38 6 378
Gallina White Ware
Gallina Black-on-white 1050-1300 0 24 0 107
Jemez White Ware
Jemez Black-on-white 1300-1700 0 45 80 5
Vallecitos Black-on-white 1250-1400 0 53 35 4
Mogollon Brown Ware
Alma Plain 200-1250 0 27 390 12
Reserve Corrugated 1050-1300 0 35 268 29
Reserve Indented Corrugated 1050-1300 3 95 186 10
Reserve Indented Corrugated Smeared 1050-1300 0 53 57 4
Tularosa Fillet Rim 1050-1200 0 20 5 0
Northern Rio Grande Gray Ware
Smeared Indented Corrugated 1250-1450 0 1079 725 14
Tesuque Gray 1250-1450 28 42 9 56
Northern Rio Grande White Ware
Biscuit A 1350-1450 0 14 0 0
Kwahe'e Black-on-white 1050-1200 16 82 290 32
Santa Fe Black-on-white 1150-1350 276 469 372 32
Rio Abajo White Ware
Socorro Black-on-white 900-1350 0 139 84 11
Rio Puerco Gray Ware
Corrugated 1000-1350 20 363 620 168
Exuberant Indented Corrugated 950-1300 0 41 18 246
Indented Corrugated 950-1300 577 1688 2084 247
Plain Gray 700-1450 61 1918 2598 524
Rio Puerco White Ware
Casa Salazar Black-on-white 1150-1225 0 109 161 10
Loma Fria Black-on-white 1150-1280 0 425 427 7
Loma Fria Black-on-white, ext. paint 1225-1280 0 59 60 0
San Juan Gray Ware
Mummy Lake Gray 1050-1200 0 195 101 0
San Juan White Ware
McElmo Black-on-white 1075-1250 0 381 1067 54
Mesa Verde Black-on-white 1150-1280 0 601 481 9
Mesa Verde Black-on-white, ext. paint 1225-1280 0 83 71 0
Undiagnostic San Juan White 1075-1280 4 385 193 19
White Mountain Red Ware
North Plains Black-on-red 1050-1150 0 13 10 9
Puerco Black-on-red 1030-1150 0 17 56 21
Saint Johns Black-on-red 1150-1300 10 145 200 2
Saint Johns Polychrome 1150-1300 0 98 122 2
Wingate Black-on-red 1030-1175 2 121 132 50
Wingate Polychrome 1030-1175 0 56 26 4

How abundant are decorated ceramics relative to undecorated ceramics?

There are a number of ways to estimate abundance. Here we use the ratio \(\lambda_{i}\) of decorated to undecorated ceramics at site \(i\):

\[\lambda_{i} = \frac{d_{i}}{u_{i}}\]

where \(d_{i}\) and \(u_{i}\) are the number of decorated and undecorated ceramics at \(i\).

abundance <- ceramics |>
  select(site_id, region, use, count) |>
  group_by(site_id, use) |>
  summarize(
    count = sum(count),
    region = unique(region)
  ) |>
  pivot_wider(names_from = "use", values_from = "count") |>
  ungroup() |>
  mutate(ratio = decorated / utility) |>
  select(site_id, region, ratio)
Code
lbls <- tibble(
  y = 0.98,
  label = unname(region_labels),
  region = names(region_labels)
)

gg <- ggplot() +
  geom_histogram(
    data = abundance |> filter(ratio <= 2),
    aes(ratio, after_stat(ncount), fill = region, color = region),
    binwidth = 0.1,
    center = 0.05,
    linewidth = 0.2
  ) +
  geom_text(
    data = lbls,
    aes(x = 2, y = y, label = label),
    size = 12 / .pt,
    hjust = 1,
    vjust = 1,
    color = darken(region_colors, 0.2)
  ) +
  scale_color_manual(
    name = NULL,
    values = region_colors
  ) +
  scale_fill_manual(
    name = NULL,
    values = alpha(region_colors, 0.8)
  ) +
  scale_y_continuous(breaks = c(0, 0.5, 1)) +
  facet_wrap(
    vars(region),
    ncol = 2
  ) +
  labs(
    x = pretty_labels("ratio"),
    y = "Normalized Count of Sites"
  ) +
  theme(
    legend.position = "none",
    panel.grid.major = element_line(color = "grey92", linewidth = 0.4),
    strip.text = element_blank()
  )

fn <- here("figures", "abundance-of-decorated.png")

ggsave(
  plot = gg,
  filename = fn,
  width = 5.5,
  height = 5,
  dpi = 300
)

prepare_image(fn)

remove(lbls, gg, fn)
Figure shows abundance of decorated ceramics relative to utility types.
Figure 2: Figure shows abundance of decorated ceramics relative to utility types.

Is relative abundance a function of site size?

We estimate the relationship using a log-log linear model, with site size measured in units of area, specifically m\(^2\).

abundance <- abundance |>
  left_join(
    sites |> st_drop_geometry() |> select(site_id, mound_area, kiva_area),
    by = "site_id"
  ) |>
  mutate(site_area = mound_area + kiva_area) |>
  select(region, site_id, ratio, site_area)

abundance_model <- lm(
  log(ratio) ~ log(site_area),
  data = abundance
)
Code
tbl <- abundance_model |>
  tidy() |>
  rename_with(str_replace, pattern = "\\.", replacement = " ") |>
  rename_with(str_to_title) |>
  rename(
    "t Value" = Statistic,
    "p Value" = "P Value"
  ) |>
  mutate(across(2:5, \(x) {
    round(x, 4)
  }))

tbl_regression <- tbl |> kbl(table.attr = "class='kibble'")

remove(tbl)

tbl_regression |>
  kable_styling(
    bootstrap_options = "hover",
    full_width = FALSE,
    position = "left"
  )
Term Estimate Std Error t Value p Value
(Intercept) -2.0534 0.1875 -10.9527 0
log(site_area) 0.3370 0.0421 8.0085 0

Here is the shape of that dependence.

Code
site_area <- with(
  abundance,
  seq(min(site_area), max(site_area), length = 100)
)

dependence <- predict_response(
  abundance_model,
  terms = list("site_area" = site_area),
  back_transform = FALSE
)

gg <- ggplot() +
  geom_point(
    data = abundance,
    aes(x = log(site_area), y = log(ratio), color = region),
    size = 1.3,
    alpha = 0.9
  ) +
  scale_color_manual(
    name = NULL,
    values = region_colors,
    labels = paste(
      "<span style='color: ",
      darken(region_colors, 0.2),
      "'>",
      region_labels,
      "</span>"
    )
  ) +
  geom_ribbon(
    data = dependence,
    aes(x = log(x), ymin = conf.low, ymax = conf.high),
    color = "#fd4d00",
    fill = lighten("#fd4d00", 0.2),
    linewidth = 0.3,
    alpha = 0.5
  ) +
  geom_line(
    data = dependence,
    aes(x = log(x), y = predicted),
    color = "#fd4d00",
    linewidth = 1
  ) +
  labs(
    x = "Natural Log of Site Area [log sq.m]",
    y = "Natural Log of Ratio of\nDecorated to Undecorated Ceramics"
  ) +
  theme(
    legend.box.margin = margin(),
    # legend.justification = c(0, 1),
    legend.margin = margin(),
    legend.text = element_markdown(),
    panel.grid.major = element_line(color = "grey92", linewidth = 0.4)
  )

fn <- here("figures", "abundance-dependence.png")

ggsave(
  plot = gg,
  filename = fn,
  width = 5.25,
  height = 3.8,
  dpi = 300
)

prepare_image(fn)

remove(dependence, gg, fn)
Figure shows response of abundance of decorated ceramics to site size.
Figure 3: Figure shows response of abundance of decorated ceramics to site size. Note that the x and y axes both have log scales.

And here are some linear model diagnostics.

check_model(
  abundance_model,
  check = c("qq", "linearity", "homogeneity", "outliers"),
  detrend = FALSE
)

Looks like there’s some non-linearity and heteroskedasticity in this model. This is probably owing to the fact that we are modeling the log of a ratio and probably also the fact that there’s likely spatial and temporal structure in the data.

Does the mean of that relationship vary by region?

This is basically an ANOVA to test for significant differences in the mean of each group while controlling for mound area. The reference group in this model is Elk Springs.

regional_abundance_model <- lm(
  log(ratio) ~ log(site_area) + region,
  data = abundance
)
Code
regional_abundance_model |>
  tidy() |>
  mutate(
    term = case_when(
      term == "regionjch" ~ "region: Jones Canyon",
      term == "regionmp" ~ "region: Mesa Portales",
      term == "regionrp" ~ "region: Rio Puerco",
      TRUE ~ term
    )
  ) |>
  rename_with(str_replace, pattern = "\\.", replacement = " ") |>
  rename_with(str_to_title) |>
  rename(
    "t Value" = Statistic,
    "p Value" = "P Value"
  ) |>
  mutate(across(2:5, \(x) {
    round(x, 4)
  })) |>
  kbl(
    caption = "Coefficient Estimates",
    table.attr = "class='kibble'"
  ) |>
  kable_styling(
    bootstrap_options = "hover",
    full_width = FALSE,
    position = "left"
  )
Coefficient Estimates
Term Estimate Std Error t Value p Value
(Intercept) -2.1212 0.1981 -10.7064 0.0000
log(site_area) 0.3089 0.0416 7.4278 0.0000
region: Jones Canyon 0.2704 0.1024 2.6399 0.0087
region: Mesa Portales 0.2309 0.0971 2.3771 0.0181
region: Rio Puerco -0.1166 0.1266 -0.9208 0.3579

Here is the shape of that dependence.

Code
dependence <- predict_response(
  regional_abundance_model,
  terms = list(
    "site_area" = site_area,
    region = unique(abundance[["region"]])
  ),
  back_transform = FALSE
)

lbls <- tibble(
  x = min(log(dependence[["x"]])),
  y = log(max(abundance[["ratio"]])),
  label = unname(region_labels),
  group = names(region_labels)
)

gg <- ggplot(dependence, aes(log(x), color = group, fill = group)) +
  geom_point(
    data = abundance |> rename("group" = region),
    aes(x = log(site_area), y = log(ratio), color = group),
    size = 1.3,
    alpha = 0.7
  ) +
  geom_ribbon(
    aes(ymin = conf.low, ymax = conf.high),
    linewidth = 0.3,
    alpha = 0.5
  ) +
  geom_line(
    aes(y = predicted),
    linewidth = 1
  ) +
  geom_text(
    data = lbls,
    aes(x, y, label = label),
    size = 13 / .pt,
    hjust = 0,
    vjust = 1,
    color = darken(region_colors, 0.2)
  ) +
  scale_color_manual(name = NULL, values = region_colors) +
  scale_fill_manual(name = NULL, values = lighten(region_colors, 0.2)) +
  facet_wrap(
    vars(group),
    ncol = 2
  ) +
  labs(
    x = "Natural Log of Mound Area [log sq.m]",
    y = "Natural Log of Ratio of\nDecorated to Undecorated Ceramics"
  ) +
  theme(
    legend.position = "none",
    panel.grid.major = element_line(color = "grey92", linewidth = 0.3),
    strip.text = element_blank()
  )

fn <- here("figures", "regional-abundance-dependence.png")

ggsave(
  plot = gg,
  filename = fn,
  width = 5.5,
  height = 5,
  dpi = 300
)

prepare_image(fn)

remove(dependence, lbls, gg, fn)
Figure shows response of abundance of decorated ceramics to site size by region.
Figure 4: Figure shows response of abundance of decorated ceramics to site size by region. Note that the x and y axes both have log scales.

And the linear model diagnostics.

check_model(
  regional_abundance_model,
  check = c("qq", "linearity", "homogeneity", "outliers"),
  detrend = FALSE
)

Still quite a bit of non-linearity and heteroskedasticity, but you will notice that a lot of that has been tamed relative to the simpler model.

But is the added complexity in the model actually worth it?

complexity <- anova(
  abundance_model,
  regional_abundance_model
)
Code
complexity |>
  tidy() |>
  rename_with(str_replace, pattern = "\\.", replacement = " ") |>
  rename_with(str_to_title) |>
  rename(
    "F" = Statistic,
    "p Value" = "P Value"
  ) |>
  mutate(
    across(2:7, \(x) {
      round(x, 4)
    }),
    across(4:7, \(x) {
      ifelse(is.na(x), "--", x)
    })
  ) |>
  kbl(
    caption = "Analysis of Variance Table",
    align = c("lrrrrrr"),
    table.attr = "class='kibble'"
  ) |>
  kable_styling(
    bootstrap_options = "hover",
    full_width = FALSE,
    position = "left"
  )
Analysis of Variance Table
Term Df Residual Rss Df Sumsq F p Value
log(ratio) ~ log(site_area) 290 63.9329 -- -- -- --
log(ratio) ~ log(site_area) + region 287 59.8156 3 4.1173 6.585 3e-04

How do sites correspond in terms of their decorated ceramic assemblage?

This is estimated using correspondence analysis applied to the count matrix of decorated ceramic types (rows are sites, columns are ceramic types).

count_matrix <- ceramics |>
  filter(use == "decorated") |>
  select(site_id, key, count) |>
  pivot_wider(
    names_from = "key",
    values_from = "count",
    values_fill = 0
  )

correspondence <- ca(count_matrix[, -1])
Code
rowcoord <- correspondence[["rowcoord"]]
colcoord <- correspondence[["colcoord"]]

i <- nrow(rowcoord)
j <- ncol(rowcoord)
k <- nrow(colcoord)

sv <- correspondence[["sv"]]

svF <- matrix(
  rep(sv[1:j], i),
  nrow = i,
  ncol = j,
  byrow = TRUE
)

svG <- matrix(
  rep(sv[1:j], k),
  nrow = k,
  ncol = j,
  byrow = TRUE
)

rpc <- rowcoord * svF
cpc <- colcoord * svG

row_data <- tibble(
  site_id = count_matrix[, 1, drop = TRUE],
  x = rpc[, 1],
  y = rpc[, 2]
)

row_data <- row_data |>
  left_join(
    sites |> st_drop_geometry() |> select(site_id, region),
    by = "site_id"
  )

col_data <- tibble(
  x = cpc[, 1],
  y = cpc[, 2]
)

col_labels <- tibble(
  x = cpc[, 1],
  y = cpc[, 2],
  label = correspondence[["colnames"]]
) |>
  mutate(
    gx = ifelse(x <= median(x), -0.5, 0.5),
    gy = ifelse(y <= median(y), -0.5, 0.5)
  )

pct <- round(100 * (sv^2) / sum(sv^2), 1)[c(1, 2)]

remove(rowcoord, colcoord, i, j, k, sv, svF, svG, rpc, cpc)

gg <- ggplot() +
  geom_hline(
    yintercept = 0,
    color = "gray70",
    linewidth = 0.3
  ) +
  geom_vline(
    xintercept = 0,
    color = "gray70",
    linewidth = 0.3
  ) +
  geom_point(
    data = row_data,
    aes(x, y),
    shape = 21,
    color = "gray30",
    fill = alpha("gray70", 0.7),
    size = 2,
    stroke = 0.1
  ) +
  geom_label_repel(
    data = col_labels,
    aes(x, y, label = label),
    size = 10 / .pt,
    force = 5,
    nudge_x = col_labels[["gx"]],
    nudge_y = col_labels[["gy"]]
  ) +
  geom_point(
    data = col_data,
    aes(x, y),
    shape = 24,
    color = "gray30",
    fill = "#fd4d00",
    size = 2.5,
    stroke = 0.1
  ) +
  labs(
    x = paste0("Dimension 1 [", pct[1], "%]"),
    y = paste0("Dimension 2 [", pct[2], "%]")
  ) +
  theme_bw(12) +
  theme(
    legend.position = "none",
    panel.grid = element_blank()
  )

fn <- here("figures", "correspondence.png")

ggsave(
  plot = gg,
  filename = fn,
  width = 5,
  height = 4,
  dpi = 300
)

prepare_image(fn)

remove(col_data, col_labels, gg, fn)
Figure shows results of correspondence analysis.
Figure 5: Figure shows results of correspondence analysis for first two dimensions.

Here is a key to help interpret that figure.

Code
keys <- ceramics |>
  filter(use == "decorated") |>
  group_by(type) |>
  summarize(key = unique(key)) |>
  relocate(key)

keys <- bind_cols(
  keys |> slice(1:14),
  keys |> slice(15:28)
)

keys |>
  kbl(
    col.names = rep(" ", 4),
    table.attr = "class='kibble'",
  ) |>
  kable_styling(bootstrap_options = "condensed") |>
  scroll_box(height = "450px")
bisca Biscuit A pbr Puerco Black-on-red
csbw Casa Salazar Black-on-white pbw Puerco Black-on-white
cebbw Cebolleta Black-on-white rmbw Red Mesa Black-on-white
chbw Chaco Black-on-white rbw Reserve Black-on-white
gbw Gallina Black-on-white sjbo Saint Johns Black-on-red
gabw Gallup Black-on-white sjp Saint Johns Polychrome
jbw Jemez Black-on-white sfbw Santa Fe Black-on-white
kbw Kwahe'e Black-on-white sbw Socorro Black-on-white
lfbw Loma Fria Black-on-white tbw Tularosa Black-on-white
lfbwep Loma Fria Black-on-white, ext. paint ucww Undiagnostic Cibola White Ware
mcembw McElmo Black-on-white usjww Undiagnostic San Juan White Ware
mvbw Mesa Verde Black-on-white vbw Vallecitos Black-on-white
mvbwep Mesa Verde Black-on-white, ext. paint winbr Wingate Black-on-red
npbr North Plains Black-on-red winp Wingate Polychrome

What does the correspondence look like by region?

Code
gg_regions <- ggplot() +
  geom_hline(
    yintercept = 0,
    color = "gray70",
    linewidth = 0.3
  ) +
  geom_vline(
    xintercept = 0,
    color = "gray70",
    linewidth = 0.3
  ) +
  geom_point(
    data = row_data,
    aes(x, y, fill = region),
    shape = 21,
    color = "gray30",
    stroke = 0.1,
    size = 3
  ) +
  geom_point(
    data = tibble(
      x = 3.8,
      y = c(1.50, 1.25, 1.00, 0.75),
      region = c("es", "jch", "mp", "rp")
    ),
    aes(x, y, fill = region),
    shape = 21,
    color = "gray30",
    stroke = 0.1,
    size = 2.5
  ) +
  geom_text(
    data = tibble(
      x = 3.67,
      y = c(1.50, 1.25, 1.00, 0.75),
      region = c("es", "jch", "mp", "rp"),
      label = region_labels
    ),
    aes(x, y, label = label, color = region),
    size = 11 / .pt,
    hjust = 1,
    vjust = 0.4
  ) +
  scale_color_manual(
    name = NULL,
    values = darken(region_colors, 0.2)
  ) +
  scale_fill_manual(
    name = NULL,
    values = alpha(region_colors, 0.85)
  ) +
  labs(
    x = paste0("Dimension 1 [", pct[1], "%]"),
    y = paste0("Dimension 2 [", pct[2], "%]")
  ) +
  scale_x_continuous(limits = c(-0.8, 3.9), breaks = -0:3) +
  scale_y_continuous(breaks = -2:1) +
  theme_bw(12) +
  theme(
    legend.position = "none",
    panel.grid = element_blank()
  )

fn <- here("figures", "correspondence-regions.png")

ggsave(
  plot = gg_regions,
  filename = fn,
  width = 5,
  height = 4,
  dpi = 300
)

prepare_image(fn)

remove(row_data, fn)
Figure shows results of correspondence analysis color-coded by region.
Figure 6: Figure shows results of correspondence analysis color-coded by region.

What temporal groups emerge from the correspondence?

# calculate principal coordinates for rows (i.e., sites)
rowcoord <- correspondence[["rowcoord"]]

# singular values
sv <- correspondence[["sv"]]

# row principal coordinates
rpc <- rowcoord %*% diag(sv)

# how many groups?
k <- 5

# kmeans just on the first two dimensions
clusters <- kmeans(
  rpc[, 1:2],
  centers = k
)

# dimension 1 is basically time
phase_centers <- clusters[["centers"]][, 1, drop = TRUE]

names(phase_centers) <- 1:k

# oldest to the right, youngest to the left
phase_centers <- sort(phase_centers, decreasing = TRUE)

# the names of phase_centers are the integer IDs of the clusters
# now we set the values to the Pecos phase names
phase_centers[1:k] <- names(phase_colors)

# the cluster vector is the integer IDs of the cluster,
# of length equal to the number of rows (i.e., sites)
# by converting this to character, we can subset the phase_centers vector
# based on its names, the result being a vector of Pecos phase names
clusters <- as.character(clusters[["cluster"]])

phases <- tibble(
  site_id = count_matrix[, 1, drop = TRUE],
  x = rpc[, 1],
  y = rpc[, 2],
  phase = phase_centers[clusters]
)

remove(rowcoord, sv, rpc, k, clusters, phase_centers)
Code
ceramic_counts <- ceramics |>
  group_by(site_id) |>
  summarize(count = sum(count))

phases <- phases |> left_join(ceramic_counts, by = "site_id")

gg_groups <- ggplot() +
  geom_hline(
    yintercept = 0,
    color = "gray70",
    linewidth = 0.3
  ) +
  geom_vline(
    xintercept = 0,
    color = "gray70",
    linewidth = 0.3
  ) +
  geom_point(
    data = phases,
    aes(x, y, fill = phase, size = count),
    shape = 21,
    color = "gray30",
    stroke = 0.2
  ) +
  geom_point(
    data = tibble(
      x = 3.8,
      y = c(1.50, 1.25, 1.00, 0.75, 0.50),
      phase = names(phase_colors)
    ),
    aes(x, y, fill = phase),
    shape = 21,
    color = "gray30",
    stroke = 0.1,
    size = 2.5
  ) +
  geom_text(
    data = tibble(
      x = 3.67,
      y = c(1.50, 1.25, 1.00, 0.75, 0.50),
      phase = names(phase_colors)
    ),
    aes(x, y, label = phase, color = phase),
    size = 11 / .pt,
    hjust = 1,
    vjust = 0.4
  ) +
  scale_color_manual(
    name = NULL,
    values = darken(phase_colors, 0.2)
  ) +
  scale_fill_manual(
    name = NULL,
    values = alpha(phase_colors, 0.8)
  ) +
  annotate(
    "text",
    x = -0.8,
    y = -2.6,
    label = "*Point size represents relative ceramic count.",
    hjust = 0,
    vjust = 0,
    color = "gray35",
    size = 10 / .pt
  ) +
  labs(
    x = paste0("Dimension 1 [", pct[1], "%]"),
    y = paste0("Dimension 2 [", pct[2], "%]")
  ) +
  scale_x_continuous(limits = c(-0.8, 3.9), breaks = -0:3) +
  scale_y_continuous(breaks = -2:1) +
  theme_bw(12) +
  theme(
    legend.position = "none",
    panel.grid = element_blank()
  )

fn <- here("figures", "correspondence-groups.png")

ggsave(
  plot = gg_groups,
  filename = fn,
  width = 5,
  height = 4,
  dpi = 300
)

prepare_image(fn)

remove(ceramic_counts, pct)
Figure shows groups defined by kmeans applied to correspondence analysis.
Figure 7: Figure shows groups defined by kmeans applied to correspondence analysis.
Code
# combining the previous two figures
gg_regions <- gg_regions + theme(plot.margin = margin(r = 2))

gg_groups <- gg_groups +
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.title.y = element_blank(),
    plot.margin = margin(l = 2)
  )

fn <- here("figures", "correspondence-combined.png")

ggsave(
  plot = gg_regions + gg_groups + plot_layout(axis_titles = "collect"),
  filename = fn,
  width = 7.5,
  height = 4,
  dpi = 300
)

prepare_image(fn)

remove(gg_regions, gg_groups)

What does the combined ceramic assemblage look like in each time period?

type_distributions <- phases |>
  select(site_id, phase) |>
  left_join(ceramics, by = "site_id") |>
  group_by(phase, ware, type) |>
  summarize(count = sum(count)) |>
  ungroup()
Code
tbl <- type_distributions |>
  pivot_wider(
    names_from = phase,
    values_from = count,
    values_fill = 0
  ) |>
  arrange(ware, type) |>
  relocate(
    `Early PII`,
    `Late PII`,
    `Early PIII`,
    `Middle PIII`,
    `Late PIII`,
    .after = type
  ) |>
  left_join(ceramic_dates, by = "type") |>
  mutate(
    type = str_remove(type, " Ware"),
    type = str_remove(type, " panel design")
  )

col_names <- c(
  " ",
  " ",
  str_split_i(names(phase_colors), pattern = " ", i = 1)
)

tbl_period_ceramics <- tbl |>
  select(-ware) |>
  relocate(type, range) |>
  kbl(
    col.names = col_names,
    align = c("lrrrrrr"),
    table.attr = "class='kibble two-head'",
    escape = FALSE
  ) |>
  pack_rows(index = table(tbl[["ware"]])) |>
  column_spec(2, width = "12%") |>
  column_spec(3:7, width = "9.4%") |>
  add_header_above(
    header = c(" " = 2, "Pueblo II" = 2, "Pueblo III" = 3),
    align = "right",
    bold = TRUE,
    line_sep = 3
  )

remove(tbl, col_names)

tbl_period_ceramics |>
  kable_styling(bootstrap_options = c("hover", "condensed")) |>
  scroll_box(height = "450px")
Pueblo II
Pueblo III
Early Late Early Middle Late
Cibola White Ware
Cebolleta Black-on-white 950-1150 0 17 5 25 0
Chaco Black-on-white 1075-1150 27 9 0 0 0
Gallup Black-on-white 980-1150 66 47 3 8 0
Puerco Black-on-white 1000-1150 36 64 47 52 2
Red Mesa Black-on-white 875-1050 30 23 0 3 0
Reserve Black-on-white 1000-1200 0 6 419 84 16
Tularosa Black-on-white 1150-1300 0 0 0 64 20
Undiagnostic Cibola White 875-1200 14 30 58 92 5
Gallina Gray Ware
Gallina Utility 1050-1300 283 123 0 16 0
Gallina White Ware
Gallina Black-on-white 1050-1300 83 32 0 16 0
Jemez White Ware
Jemez Black-on-white 1300-1700 0 2 0 10 118
Vallecitos Black-on-white 1250-1400 0 0 7 37 48
Mogollon Brown Ware
Alma Plain 200-1250 0 6 375 40 8
Reserve Corrugated 1050-1300 0 0 292 25 15
Reserve Indented Corrugated 1050-1300 0 4 154 94 42
Reserve Indented Corrugated Smeared 1050-1300 0 3 25 29 57
Tularosa Fillet Rim 1050-1200 0 5 0 11 9
Northern Rio Grande Gray Ware
Smeared Indented Corrugated 1250-1450 0 7 9 409 1393
Tesuque Gray 1250-1450 0 62 3 11 59
Northern Rio Grande White Ware
Biscuit A 1350-1450 0 0 0 9 5
Kwahe'e Black-on-white 1050-1200 2 16 294 51 57
Santa Fe Black-on-white 1150-1350 0 25 53 276 795
Rio Abajo White Ware
Socorro Black-on-white 900-1350 0 0 100 109 25
Rio Puerco Gray Ware
Corrugated 1000-1350 90 67 566 252 196
Exuberant Indented Corrugated 950-1300 173 98 3 31 0
Indented Corrugated 950-1300 47 149 1502 1098 1800
Plain Gray 700-1450 167 213 2495 1138 1088
Rio Puerco White Ware
Casa Salazar Black-on-white 1150-1225 0 9 184 57 30
Loma Fria Black-on-white 1150-1280 0 10 0 132 717
Loma Fria Black-on-white, ext. paint 1225-1280 0 0 0 13 106
San Juan Gray Ware
Mummy Lake Gray 1050-1200 0 0 6 129 161
San Juan White Ware
McElmo Black-on-white 1075-1250 0 12 1017 272 201
Mesa Verde Black-on-white 1150-1280 0 3 2 258 828
Mesa Verde Black-on-white, ext. paint 1225-1280 0 0 0 12 142
Undiagnostic San Juan White 1075-1280 0 11 112 214 264
White Mountain Red Ware
North Plains Black-on-red 1050-1150 0 5 10 0 17
Puerco Black-on-red 1030-1150 18 11 57 8 0
Saint Johns Black-on-red 1150-1300 0 2 0 82 273
Saint Johns Polychrome 1150-1300 0 2 0 59 161
Wingate Black-on-red 1030-1175 18 45 82 109 51
Wingate Polychrome 1030-1175 2 2 13 37 32

What does that correspondence look like geographically?

site_phases <- sites |>
  left_join(phases, by = "site_id") |>
  select(site_id, region, fnum, phase) |>
  filter(!is.na(phase))
Code
bb8 <- survey_area |>
  st_buffer(1000) |>
  st_bbox()

dx <- bb8[["xmax"]] - bb8[["xmin"]]
dy <- bb8[["ymax"]] - bb8[["ymin"]]

cover <- st_sym_difference(survey_area, st_as_sfc(bb8))

lbls <- tibble(
  x = st_bbox(survey_area)[["xmax"]] - 500,
  y = st_bbox(survey_area)[["ymin"]] + seq(1950, 5950, length = 5),
  phase = rev(names(phase_colors))
)

gg <- ggplot() +
  geom_spatraster_contour_filled(
    data = elevation,
    bins = 10,
    color = "gray30",
    linewidth = 0.1
  ) +
  scale_fill_cross_blended_d(
    palette = "arid",
    alpha = 0.7,
    guide = "none"
  ) +
  ggnewscale::new_scale_fill() +
  geom_sf(
    data = survey_area,
    fill = "transparent",
    color = "black"
  ) +
  geom_sf(
    data = cover,
    fill = alpha("white", 0.7),
    color = "transparent"
  ) +
  geom_sf(
    data = rio_puerco |> st_intersection(survey_area),
    color = "#7A96C2",
    linewidth = 0.5
  ) +
  with_outer_glow(
    geom_sf(
      data = site_phases,
      aes(fill = fct_relevel(phase, "Early PII", "Middle PII", "Late PII")),
      size = 2.75,
      shape = 21,
      color = "black",
      stroke = 0.2
    ),
    colour = "white",
    expand = 3.5,
    sigma = 2
  ) +
  scale_fill_manual(
    name = NULL,
    values = alpha(phase_colors, 0.85)
  ) +
  geom_sf(
    data = st_as_sfc(bb8),
    fill = "transparent",
    color = "black"
  ) +
  geom_point(
    data = lbls,
    aes(x, y, fill = phase),
    size = 2.75,
    shape = 21,
    color = "black",
    stroke = 0.2
  ) +
  geom_text(
    data = lbls,
    aes(x, y, label = phase, color = phase),
    size = 12 / .pt,
    hjust = 1,
    vjust = 0.45,
    nudge_x = -500
  ) +
  scale_color_manual(
    name = NULL,
    values = darken(phase_colors, 0.2)
  ) +
  annotate(
    "text",
    x = st_bbox(survey_area)[["xmin"]],
    y = st_bbox(survey_area)[["ymax"]],
    label = "Rio Puerco Project",
    size = 14 / .pt,
    hjust = 0,
    vjust = 1,
    fontface = "bold"
  ) +
  annotation_scale(
    aes(location = "br"),
    text_cex = 0.9,
    pad_x = unit(0.7, "cm"),
    pad_y = unit(0.65, "cm")
  ) +
  coord_sf(
    crs = 26913,
    datum = 26913,
    expand = FALSE,
    xlim = bb8[c("xmin", "xmax")],
    ylim = bb8[c("ymin", "ymax")]
  ) +
  theme_void(12) +
  theme(legend.position = "none")

ggsave(
  plot = gg,
  filename = here("figures", "correspondence-map.png"),
  width = 5,
  height = 5 * (dy / dx),
  dpi = 300,
  bg = "white"
)

remove(bb8, dx, dy, cover, gg, lbls)
Figure shows geographic distribution of correspondence groups.
Figure 8: Figure shows geographic distribution of correspondence groups.

How do site characteristics vary by region and time period?

Here, we measure the following site characteristics for each time period and region:

  • Site count
  • Site area or size (mound_area + kiva_area), total and median
  • Fraction of burned sites
  • Site orientation
site_characteristics <- sites |>
  st_drop_geometry() |>
  mutate(
    area = mound_area + kiva_area,
    burn = ifelse(burn == "Yes", 1L, 0L)
  ) |>
  inner_join(phases[c("site_id", "phase")], by = "site_id") |>
  group_by(region, phase) |>
  summarize(
    count = n(),
    area_t = sum(area),
    area_m = median(area),
    burn = mean(burn),
    orientation = median(orientation)
  ) |>
  ungroup() |>
  complete(
    region,
    phase,
    fill = list(area_t = 0, area_m = 0, burn = 0, count = 0)
  )
Code
tbl <- site_characteristics |>
  mutate(
    across(count:orientation, \(x) {
      round(x, digits = 2)
    }),
    across(count:orientation, \(x) {
      ifelse(is.na(x), "--", format(x, nsmall = 2))
    }),
    count = gsub(".00", "", count),
    region = region_labels[region]
  ) |>
  pivot_longer(
    count:orientation,
    names_to = "characteristic",
    values_to = "value"
  ) |>
  group_by(characteristic, phase) |>
  summarize(
    region = paste0(region, collapse = "<br>"),
    value = paste0(value, collapse = "<br>")
  ) |>
  ungroup() |>
  pivot_wider(
    names_from = "phase",
    values_from = "value"
  ) |>
  relocate(
    `Early PII`,
    `Late PII`,
    `Early PIII`,
    `Middle PIII`,
    `Late PIII`,
    .after = region
  ) |>
  mutate(
    i = case_when(
      characteristic == "count" ~ 1,
      characteristic == "area_t" ~ 2,
      characteristic == "area_m" ~ 3,
      characteristic == "burn" ~ 4,
      characteristic == "orientation" ~ 5
    ),
    characteristic = case_when(
      characteristic == "count" ~ "<strong>A. N Sites</strong>",
      characteristic == "area_t" ~
        "<strong>B. Area</strong><br>[total m&#178;]",
      characteristic == "area_m" ~
        "<strong>C. Area</strong><br>[median m&#178;]",
      characteristic == "burn" ~
        "<strong>D. Burned</strong><br>[mean] (Yes, No)",
      characteristic == "orientation" ~
        "<strong>E. Orientation<br>of Roomblock</strong><br>[median degrees]"
    )
  ) |>
  arrange(i) |>
  select(-i)


tbl_site_characteristics <- tbl |>
  kbl(
    col.names = c(" ", " ", names(phase_colors)),
    align = c("llrrrrr"),
    table.attr = "class='kibble lines'",
    escape = FALSE
  )

remove(tbl)

tbl_site_characteristics |>
  kable_styling(bootstrap_options = c("hover", "condensed"))
Early PII Late PII Early PIII Middle PIII Late PIII
A. N Sites Elk Springs
Jones Canyon
Mesa Portales
Rio Puerco
0
0
1
10
0
2
1
8
0
4
94
7
3
44
10
0
23
40
44
1
B. Area
[total m²]
Elk Springs
Jones Canyon
Mesa Portales
Rio Puerco
0.00
0.00
14.00
794.46
0.00
284.96
143.04
600.09
0.00
448.06
7296.94
533.43
211.12
4575.03
669.78
0.00
1648.79
5531.77
5878.69
101.24
C. Area
[median m²]
Elk Springs
Jones Canyon
Mesa Portales
Rio Puerco
0.00
0.00
14.00
77.94
0.00
142.48
143.04
91.36
0.00
80.93
65.42
62.81
61.26
86.47
74.76
0.00
74.47
124.06
99.74
101.24
D. Burned
[mean] (Yes, No)
Elk Springs
Jones Canyon
Mesa Portales
Rio Puerco
0.00
0.00
0.00
0.50
0.00
0.00
1.00
0.25
0.00
0.00
0.93
0.86
0.00
0.07
0.20
0.00
0.00
0.00
0.00
0.00
E. Orientation
of Roomblock

[median degrees]
Elk Springs
Jones Canyon
Mesa Portales
Rio Puerco
--
--
90.00
180.50
--
92.50
180.00
99.00
--
93.00
175.50
176.00
180.00
95.00
92.00
--
185.00
95.50
180.00
101.00

How are sites oriented through time?

And now for some trigonometry…

Code
bb8 <- survey_area |>
  st_buffer(1000) |>
  st_bbox()

cover <- st_sym_difference(survey_area, st_as_sfc(bb8))

lbls <- tibble(
  x = st_bbox(survey_area)[["xmin"]],
  y = st_bbox(survey_area)[["ymax"]],
  phase = factor(names(phase_colors), levels = names(phase_colors))
)

r <- 1750

circles <- sites |>
  inner_join(phases[c("site_id", "phase")], by = "site_id") |>
  group_by(region, phase) |>
  summarize() |>
  ungroup() |>
  st_centroid() |>
  st_buffer(r) |>
  mutate(phase = factor(phase, levels = names(phase_colors)))

xy <- circles |>
  st_centroid() |>
  st_coordinates() |>
  as_tibble() |>
  rename_with(str_to_lower) |>
  mutate(
    region = circles[["region"]],
    phase = circles[["phase"]]
  )

xy <- site_characteristics |>
  select(region, phase, orientation) |>
  filter(!is.na(orientation)) |>
  left_join(xy, by = c("region", "phase")) |>
  mutate(
    orientation = ((-orientation + 90) * pi) / 180,
    xend = x + ((r - 200) * cos(orientation)),
    yend = y + ((r - 200) * sin(orientation)),
    phase = factor(phase, levels = names(phase_colors))
  )

gg <- ggplot() +
  geom_spatraster_contour_filled(
    data = elevation,
    bins = 10,
    color = "gray30",
    linewidth = 0.1
  ) +
  scale_fill_cross_blended_d(
    palette = "arid",
    alpha = 0.7,
    guide = "none"
  ) +
  ggnewscale::new_scale_fill() +
  geom_sf(
    data = survey_area,
    fill = "transparent",
    color = "black"
  ) +
  geom_sf(
    data = cover,
    fill = alpha("white", 0.7),
    color = "transparent"
  ) +
  geom_sf(
    data = rio_puerco |> st_intersection(survey_area),
    color = "#7A96C2",
    linewidth = 0.5
  ) +
  geom_sf(
    data = circles,
    color = "black",
    fill = alpha("white", 0.5),
    linewidth = 0.25
  ) +
  with_outer_glow(
    geom_segment(
      data = xy,
      aes(x, y, xend = xend, yend = yend, color = region),
      linewidth = 0.65,
      lineend = "round",
      arrow = arrow(angle = 35, type = "closed", length = unit(0.05, "inches"))
    ),
    colour = "black",
    expand = 1.5,
    sigma = 0.5
  ) +
  geom_sf(
    data = st_as_sfc(bb8),
    fill = "transparent",
    color = "black"
  ) +
  geom_text(
    data = lbls,
    aes(x, y, label = phase),
    color = "black",
    size = 11 / .pt,
    hjust = 0,
    vjust = 1
  ) +
  scale_color_manual(
    name = NULL,
    values = region_colors,
    labels = paste(
      "<span style='color: ",
      darken(region_colors, 0.2),
      "'>",
      region_labels,
      "</span>"
    )
  ) +
  facet_wrap(
    vars(phase),
    ncol = 3
  ) +
  coord_sf(
    crs = 26913,
    datum = 26913,
    expand = FALSE,
    xlim = bb8[c("xmin", "xmax")],
    ylim = bb8[c("ymin", "ymax")]
  ) +
  theme_void(12) +
  theme(
    legend.justification.inside = c("right", "bottom"),
    legend.position = "inside",
    legend.position.inside = c(0.91, 0.15),
    legend.text = element_markdown(),
    strip.text = element_blank()
  ) +
  guides(color = guide_legend(label.position = "left"))

fn <- here("figures", "orientation-map.png")

ggsave(
  plot = gg,
  filename = fn,
  width = 7,
  height = 6,
  dpi = 300,
  bg = "white"
)

prepare_image(fn)

remove(bb8, circles, xy, cover, gg, lbls, fn, r)
Figure shows median orientation of sites across regions and time periods.
Figure 9: Figure shows median orientation of sites across regions and time periods. The circles are positioned so that their centers roughly coincide with the centroid of site points for each region and time period.

What does population do over time?

We include climate estimates from SKOPE here.

Code
phase_key <- 1L:5L
names(phase_key) <- names(phase_colors)

start_key <- c(950, 1050, 1150, 1200, 1250)
names(start_key) <- names(phase_colors)

end_key <- c(1050, 1150, 1200, 1250, 1300)
names(end_key) <- names(phase_colors)

ts_population_regions <- sites |>
  st_drop_geometry() |>
  inner_join(phases[c("site_id", "phase")], by = "site_id") |>
  group_by(region, phase) |>
  summarize(
    population = sum(mound_area + kiva_area),
    count = n()
  ) |>
  ungroup() |>
  complete(
    region,
    phase,
    fill = list(population = 0, count = 0, p_pop = 0, p_cnt = 0)
  ) |>
  mutate(phase_id = phase_key[phase]) |>
  arrange(region, phase_id)

ts_population_project <- ts_population_regions |>
  group_by(phase) |>
  summarize(
    phase_id = unique(phase_id),
    region = "project",
    population = sum(population),
    count = sum(count)
  ) |>
  ungroup() |>
  mutate(
    start = start_key[phase],
    end = end_key[phase]
  ) |>
  pivot_longer(
    c(start, end),
    names_to = "z",
    values_to = "year"
  ) |>
  arrange(phase_id, year)

ts_population_regions <- ts_population_regions |>
  mutate(
    region = factor(region, levels = names(region_labels)),
    start = start_key[phase],
    end = end_key[phase]
  ) |>
  group_by(phase) |>
  arrange(region) |>
  mutate(
    population = cumsum(population),
    count = cumsum(count)
  ) |>
  ungroup() |>
  pivot_longer(
    c(population, count),
    names_to = "variable",
    values_to = "value"
  ) |>
  group_by(variable, phase) |>
  arrange(region) |>
  mutate(
    variable = factor(
      variable,
      levels = c("population", "count", "gdd", "ppt")
    ),
    ymin = lag(value, n = 1L, default = 0)
  ) |>
  ungroup()

ts_population_project <- ts_population_project |>
  pivot_longer(
    c(population, count),
    names_to = "variable",
    values_to = "value"
  ) |>
  mutate(
    variable = factor(variable, levels = c("population", "count", "gdd", "ppt"))
  )

ts_climate <- climate |>
  filter(year >= min(start_key), year <= max(end_key)) |>
  mutate(
    region = "project",
    gdd = (5 / 9) * gdd
  ) |>
  pivot_longer(
    c(gdd, ppt),
    names_to = "variable",
    values_to = "value"
  ) |>
  mutate(
    variable = factor(
      variable,
      levels = c("population", "count", "gdd", "ppt")
    ),
  )

brks <- unique(c(start_key, end_key))

gg <- ggplot() +
  geom_rect(
    data = ts_population_regions,
    aes(xmin = start, xmax = end, ymin = ymin, ymax = value, fill = region),
    color = "transparent"
  ) +
  geom_vline(
    xintercept = brks[2:5],
    color = "gray40",
    linewidth = 0.1
  ) +
  geom_line(
    data = ts_population_project,
    aes(year, value),
    linewidth = 0.6
  ) +
  geom_line(
    data = ts_climate,
    aes(year, value),
    linewidth = 0.4,
    alpha = 0.4
  ) +
  geom_line(
    data = ts_climate |>
      group_by(variable) |>
      arrange(year) |>
      mutate(value = slide_dbl(value, mean, .before = 12L, .after = 12L)) |>
      ungroup(),
    aes(year, value),
    linewidth = 0.6
  ) +
  facet_wrap(
    vars(variable),
    ncol = 1,
    scales = "free_y",
    labeller = as_labeller(c(
      "population" = "Site Area\n[sq.m]",
      "count" = "Site Count\n[N]",
      "gdd" = "Maize GDD\n[°C days]",
      "ppt" = "Summer\nPrecipitation\n[mm]"
    )),
    strip.position = "left"
  ) +
  scale_fill_manual(
    name = NULL,
    values = region_colors,
    labels = paste(
      "<span style='color: ",
      darken(region_colors, 0.2),
      "'>",
      region_labels,
      "</span>"
    )
  ) +
  scale_x_continuous(
    breaks = brks,
    expand = c(0.01, 0.01),
    sec.axis = sec_axis(
      transform = \(x) {
        x
      },
      breaks = brks[1:5] + diff(brks) / 2,
      labels = names(phase_colors)
    )
  ) +
  labs(
    x = "Year [CE]",
    y = NULL
  ) +
  theme(
    axis.ticks.x.top = element_blank(),
    legend.box.background = element_blank(),
    legend.justification.inside = c("left", "top"),
    legend.key.size = unit(0.5, "cm"),
    legend.position = "inside",
    legend.position.inside = c(0.002, 0.99),
    legend.text = element_markdown(),
    strip.placement = "outside",
    strip.text = element_text(vjust = 0)
  )

fn <- here("figures", "time-series.png")

ggsave(
  plot = gg,
  filename = fn,
  width = 8,
  height = 7
)

prepare_image(fn)

remove(
  ts_climate,
  ts_population_regions,
  ts_population_project,
  ts_population,
  time_series,
  gg,
  fn,
  alpha_values,
  line_values,
  start_key,
  end_key,
  phase_key,
  brks
)
Figure shows time series for climate and population.
Figure 10: Figure shows time series for climate variables (precipitation and maize GDD) and two proxies for population size (total combined site area and site count). Note that the height of the bars in the population panels represent the total estimate for the project area. The filled and stacked bars are the distributions across regions for each time period.

Does the relationship between abundance and site size vary through time?

abundance <- left_join(
  abundance,
  phases[c("site_id", "phase")],
  by = "site_id"
)

temporal_abundance_model <- lm(
  log(ratio) ~ log(site_area) + phase,
  data = abundance
)
Code
temporal_abundance_model |>
  tidy() |>
  mutate(
    term = str_replace(term, pattern = "phase", replacement = "phase: ")
  ) |>
  rename_with(str_replace, pattern = "\\.", replacement = " ") |>
  rename_with(str_to_title) |>
  rename(
    "t Value" = Statistic,
    "p Value" = "P Value"
  ) |>
  mutate(across(2:5, \(x) {
    round(x, 4)
  })) |>
  kbl(
    caption = "Coefficient Estimates",
    table.attr = "class='kibble'"
  ) |>
  kable_styling(
    bootstrap_options = "hover",
    full_width = FALSE,
    position = "left"
  )
Coefficient Estimates
Term Estimate Std Error t Value p Value
(Intercept) -2.0825 0.2055 -10.1345 0.0000
log(site_area) 0.2361 0.0396 5.9663 0.0000
phase: Early PIII 0.2342 0.1324 1.7693 0.0779
phase: Late PII 0.4284 0.1780 2.4064 0.0167
phase: Late PIII 0.7198 0.1341 5.3685 0.0000
phase: Middle PIII 0.5485 0.1376 3.9852 0.0001

We already have a sense of what the slope looks like, but how does the mean response vary over time given the average mound area?

Code
dependence <- predict_response(
  temporal_abundance_model,
  terms = list(
    "site_area" = mean(abundance[["site_area"]]),
    phase = names(phase_colors)
  ),
  back_transform = FALSE
)

lbls <- tibble(
  group = names(phase_colors),
  y = max(dependence[["conf.high"]]) + 0.1
)

gg <- ggplot(dependence, aes(group, color = group, fill = group)) +
  geom_errorbar(
    aes(ymin = conf.low, ymax = conf.high),
    linewidth = 0.8,
    width = 0.13
  ) +
  geom_point(
    aes(y = predicted),
    shape = 21,
    size = 3.5,
    stroke = 0.6
  ) +
  geom_text(
    data = lbls,
    aes(group, y, label = group, color = group),
    size = 13 / .pt,
    hjust = 0.5,
    vjust = 1
  ) +
  annotate(
    "text",
    x = 5.4,
    y = min(dependence[["conf.low"]]),
    label = paste0(
      "*Assuming average site size: log(",
      round(mean(abundance[["site_area"]]), 2),
      " sq.m)"
    ),
    hjust = 1,
    vjust = 0,
    color = "gray35",
    size = 10 / .pt
  ) +
  scale_color_manual(name = NULL, values = darken(phase_colors, 0.2)) +
  scale_fill_manual(name = NULL, values = lighten(phase_colors, 0.2)) +
  labs(
    x = NULL,
    y = "Natural Log of Ratio of\nDecorated to Undecorated Ceramics"
  ) +
  theme(
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    axis.title.x = element_blank(),
    legend.position = "none",
    panel.grid.major = element_line(color = "grey92", linewidth = 0.3)
  )

fn <- here("figures", "temporal-abundance-dependence.png")

ggsave(
  plot = gg,
  filename = fn,
  width = 6,
  height = 4.25,
  dpi = 300
)

prepare_image(fn)

remove(dependence, lbls, gg, fn)
Figure shows response of abundance of decorated ceramics to site size by time period.
Figure 11: Figure shows response of abundance of decorated ceramics to site size by time period. Note that the y axis has a log scale.

Here are the linear model diagnostics.

check_model(
  temporal_abundance_model,
  check = c("qq", "linearity", "homogeneity", "outliers"),
  detrend = FALSE
)

The non-linearity is almost completely gone in this model, but heteroskedasticity appears to have increased. Incorporating an intercept offset for region and time period might address this issue, but that’s asking a lot of a linear model applied to this small dataset.

How are burned sites distributed across the region?

There is an interesting pattern of burned sites that may have something to do with the pattern of settlement in the region.

Code
bb8 <- survey_area |>
  st_buffer(1000) |>
  st_bbox()

dx <- bb8[["xmax"]] - bb8[["xmin"]]
dy <- bb8[["ymax"]] - bb8[["ymin"]]

cover <- st_sym_difference(survey_area, st_as_sfc(bb8))

lbls <- tibble(
  x = st_bbox(survey_area)[["xmax"]] - 500,
  y = st_bbox(survey_area)[["ymin"]] + seq(2000, 5800, length = 4)[3:4],
  burn = c("Yes", "No"),
  label = c("Burned", "Not Burned")
)

gg <- ggplot() +
  geom_spatraster_contour_filled(
    data = elevation,
    bins = 10,
    color = "gray30",
    linewidth = 0.1
  ) +
  scale_fill_cross_blended_d(
    palette = "arid",
    alpha = 0.7,
    guide = "none"
  ) +
  ggnewscale::new_scale_fill() +
  geom_sf(
    data = survey_area,
    fill = "transparent",
    color = "black"
  ) +
  geom_sf(
    data = cover,
    fill = alpha("white", 0.7),
    color = "transparent"
  ) +
  geom_sf(
    data = rio_puerco |> st_intersection(survey_area),
    color = "#7A96C2",
    linewidth = 0.5
  ) +
  with_outer_glow(
    geom_sf(
      data = sites,
      aes(fill = burn),
      size = 2.75,
      shape = 21,
      color = "black",
      stroke = 0.2
    ),
    colour = "white",
    expand = 3.5,
    sigma = 2
  ) +
  scale_fill_manual(
    name = NULL,
    values = alpha(c("Yes" = "#fd4d00", "No" = "#523800"), 0.9)
  ) +
  geom_sf(
    data = st_as_sfc(bb8),
    fill = "transparent",
    color = "black"
  ) +
  geom_point(
    data = lbls,
    aes(x, y, fill = burn),
    size = 2.75,
    shape = 21,
    color = "black",
    stroke = 0.2
  ) +
  geom_text(
    data = lbls,
    aes(x, y, label = label, color = burn),
    size = 12 / .pt,
    hjust = 1,
    vjust = 0.4,
    nudge_x = -500
  ) +
  scale_color_manual(
    name = NULL,
    values = darken(c("Yes" = "#fd4d00", "No" = "#523800"), 0.2)
  ) +
  annotate(
    "text",
    x = st_bbox(survey_area)[["xmin"]],
    y = st_bbox(survey_area)[["ymax"]],
    label = "Rio Puerco Project",
    size = 14 / .pt,
    hjust = 0,
    vjust = 1,
    fontface = "bold",
    lineheight = 0.85
  ) +
  annotation_scale(
    aes(location = "br"),
    text_cex = 0.9,
    pad_x = unit(0.7, "cm"),
    pad_y = unit(0.65, "cm")
  ) +
  coord_sf(
    crs = 26913,
    datum = 26913,
    expand = FALSE,
    xlim = bb8[c("xmin", "xmax")],
    ylim = bb8[c("ymin", "ymax")]
  ) +
  theme_void() +
  theme(
    legend.position = "none",
    plot.margin = margin(r = 1)
  )

ggsave(
  plot = gg,
  filename = here("figures", "burned-sites-map.png"),
  width = 5,
  height = 5 * (dy / dx),
  dpi = 300,
  bg = "white"
)

remove(bb8, dx, dy, cover, gg, lbls)
Figure shows geographic distribution of sites with evidence of burning.
Figure 12: Figure shows geographic distribution of sites with evidence of burning.

How are lithics distributed across survey areas and time periods?

lithic_summary <- site_phases |>
  st_drop_geometry() |>
  left_join(lithics, by = "site_id") |>
  group_by(region, phase, type) |>
  summarize(count = sum(count)) |>
  ungroup() |>
  complete(region, phase, type, fill = list(count = 0))
Code
tbl <- lithic_summary |>
  mutate(
    phase_id = case_when(
      phase == "Early PII" ~ 1L,
      phase == "Late PII" ~ 2L,
      phase == "Early PIII" ~ 3L,
      phase == "Middle PIII" ~ 4L,
      TRUE ~ 5L
    ),
    region = region_labels[region]
  ) |>
  pivot_wider(
    names_from = region,
    values_from = count,
    values_fill = 0
  ) |>
  arrange(phase_id, type) |>
  select(-phase_id)

idx <- rep(10, 5)
names(idx) <- names(phase_colors)

tbl_lithic_summary <- tbl |>
  select(-phase) |>
  kbl(
    col.names = c("\u200B", unname(region_labels)),
    align = c("lrrrrr"),
    table.attr = "class='kibble'",
    escape = FALSE
  ) |>
  column_spec(2:5, width = "17%") |>
  pack_rows(index = idx)

remove(tbl)

tbl_lithic_summary |>
  kable_styling(bootstrap_options = c("hover", "condensed")) |>
  scroll_box(height = "450px")
Elk Springs Jones Canyon Mesa Portales Rio Puerco
Early PII
Chinle Chert 0 0 6 0
Dacite 0 0 0 0
Gray Chert 0 0 0 0
Obsidian 0 0 3 0
Petrified Wood 0 0 0 19
Red Chert 0 0 0 27
Red/White Chert 0 0 6 17
Rhyolite 0 0 0 0
White Chert 0 0 10 222
Yellow Chert 0 0 0 4
Late PII
Chinle Chert 0 0 4 7
Dacite 0 0 0 0
Gray Chert 0 8 2 31
Obsidian 0 5 0 44
Petrified Wood 0 3 0 14
Red Chert 0 9 3 39
Red/White Chert 0 0 4 27
Rhyolite 0 0 0 0
White Chert 0 22 4 92
Yellow Chert 0 3 4 11
Early PIII
Chinle Chert 0 0 14 3
Dacite 0 11 19 0
Gray Chert 0 30 488 32
Obsidian 0 9 63 7
Petrified Wood 0 3 514 35
Red Chert 0 15 546 20
Red/White Chert 0 18 341 26
Rhyolite 0 3 10 7
White Chert 0 36 670 41
Yellow Chert 0 0 179 11
Middle PIII
Chinle Chert 0 85 0 0
Dacite 0 58 7 0
Gray Chert 0 130 13 0
Obsidian 27 233 42 0
Petrified Wood 0 78 13 0
Red Chert 0 164 44 0
Red/White Chert 0 106 16 0
Rhyolite 0 30 0 0
White Chert 0 371 82 0
Yellow Chert 0 64 9 0
Late PIII
Chinle Chert 0 21 2 0
Dacite 36 49 46 0
Gray Chert 3 82 53 0
Obsidian 250 283 284 6
Petrified Wood 0 36 8 0
Red Chert 2 139 46 3
Red/White Chert 0 77 17 4
Rhyolite 0 4 0 0
White Chert 8 339 445 9
Yellow Chert 14 48 2 0

Additional figures requested by reviewers

Ware disributions across regions

Code
tmp_labels <- paste0(
  "<span style='color: ",
  darken(region_colors, 0.2),
  "'>",
  region_labels,
  "</span>"
)

names(tmp_labels) <- names(region_labels)

tmp_data <- ceramics |>
  group_by(region, ware) |>
  summarize(count = sum(count)) |>
  mutate(ware = sub(" Ware", "", ware)) |>
  filter(ware != "Rio Puerco Gray") |>
  ungroup() |>
  tidyr::complete(region, ware, fill = list(count = 0)) |>
  mutate(white = ifelse(count == 0, 1L, 2L))

p <- ggplot(tmp_data) +
  geom_col(
    aes(count, ware, fill = region),
    position = position_dodge2(0.9, reverse = TRUE)
  ) +
  geom_text(
    aes(count, ware, label = count, color = as.factor(white), group = region),
    position = position_dodge2(0.9, reverse = TRUE),
    hjust = -0.2,
    size = 7 / .pt
  ) +
  scale_color_manual(
    values = c("white", "gray65"),
    guide = "none"
  ) +
  scale_fill_manual(
    name = NULL,
    values = region_colors,
    labels = paste(
      "<span style='color: ",
      darken(region_colors, 0.2),
      "'>",
      region_labels,
      "</span>"
    )
  ) +
  scale_x_continuous(limits = c(0, 2050)) +
  geom_hline(
    yintercept = seq(1.5, 11.5, by = 1),
    color = "gray85",
    linewidth = 0.1
  ) +
  labs(x = NULL, y = NULL) +
  theme(
    legend.background = element_blank(),
    legend.justification.inside = c("right", "bottom"),
    legend.position = "inside",
    legend.position.inside = c(1, 0),
    legend.text = element_markdown(size = rel(0.8)),
    plot.margin = margin()
  )

tmp_data <- ceramics |>
  left_join(phases[c("site_id", "phase")], by = "site_id") |>
  group_by(phase, ware) |>
  summarize(count = sum(count)) |>
  mutate(ware = sub(" Ware", "", ware)) |>
  filter(ware != "Rio Puerco Gray") |>
  ungroup() |>
  tidyr::complete(phase, ware, fill = list(count = 0)) |>
  mutate(
    phase = as.factor(phase),
    phase = fct_relevel(
      phase,
      "Early PII",
      "Late PII",
      "Early PIII",
      "Middle PIII"
    ),
    white = ifelse(count == 0, 1L, 2L)
  )

p2 <- ggplot(tmp_data) +
  geom_col(
    aes(count, ware, fill = phase),
    position = position_dodge2(0.9, reverse = TRUE)
  ) +
  geom_text(
    aes(count, ware, label = count, color = as.factor(white), group = phase),
    position = position_dodge2(0.9, reverse = TRUE),
    hjust = -0.2,
    size = 7 / .pt
  ) +
  scale_color_manual(
    values = c("white", "gray65"),
    guide = "none"
  ) +
  scale_fill_manual(
    name = NULL,
    values = phase_colors,
    labels = paste(
      "<span style='color: ",
      darken(phase_colors, 0.2),
      "'>",
      names(phase_colors),
      "</span>"
    )
  ) +
  scale_x_continuous(limits = c(0, 1700)) +
  geom_hline(
    yintercept = seq(1.5, 11.5, by = 1),
    color = "gray85",
    linewidth = 0.1
  ) +
  labs(x = NULL, y = NULL) +
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    legend.background = element_blank(),
    legend.justification.inside = c("right", "bottom"),
    legend.position = "inside",
    legend.position.inside = c(1, 0),
    legend.text = element_markdown(size = rel(0.8)),
    plot.margin = margin(l = 4)
  )

ggsave(
  plot = p + p2,
  filename = here("figures", "wares-by-region-and-phase.png"),
  width = 6.5,
  height = 6,
  dpi = 300,
  bg = "white"
)
Figure shows spatial and temporal distribution of ceramic wares.
Figure 13: Figure shows spatial and temporal distribution of ceramic wares

5 Store Tables

Tables are still a mess in Quarto, so we store everything we need to recreate them in the manuscript. To be fair, that’s mostly because everyone still wants to support LaTeX, and LaTeX is terrible.

save(
  list = ls(pattern = "tbl_"),
  file = here("data", "tables.Rdata")
)

6 Session Info

Code
# save the session info as an object
pkg_sesh <- sessioninfo::session_info(pkgs = "attached")

# inject the quarto info
pkg_sesh$platform$quarto <- paste(
  quarto::quarto_version(),
  "@",
  quarto::quarto_path()
)

# print it out
pkg_sesh
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.5.0 (2025-04-11 ucrt)
 os       Windows 11 x64 (build 26100)
 system   x86_64, mingw32
 ui       RTerm
 language (EN)
 collate  English_United States.utf8
 ctype    English_United States.utf8
 tz       America/Denver
 date     2025-08-26
 pandoc   3.6.3 @ c:\\Program Files\\Positron\\resources\\app\\quarto\\bin\\tools/ (via rmarkdown)
 quarto   1.7.32 @ C:\\PROGRA~1\\Quarto\\bin\\quarto.exe

─ Packages ───────────────────────────────────────────────────────────────────
 package     * version    date (UTC) lib source
 broom       * 1.0.9      2025-07-28 [1] RSPM
 ca          * 0.71.1     2020-01-24 [1] RSPM
 colorspace  * 2.1-1      2024-07-26 [1] RSPM
 dplyr       * 1.1.4      2023-11-17 [1] RSPM
 forcats     * 1.0.0      2023-01-29 [1] RSPM
 ggeffects   * 2.3.1      2025-08-20 [1] RSPM
 ggfx        * 1.0.2      2025-07-24 [1] RSPM
 ggnewscale  * 0.5.2      2025-06-20 [1] RSPM
 ggplot2     * 3.5.2.9000 2025-05-01 [1] Github (tidyverse/ggplot2@295f5cb)
 ggrepel     * 0.9.6      2024-09-07 [1] RSPM
 ggspatial   * 1.1.10     2025-08-24 [1] RSPM
 ggtext      * 0.1.2      2022-09-16 [1] RSPM
 here        * 1.0.1      2020-12-13 [1] CRAN (R 4.5.0)
 kableExtra  * 1.4.0      2024-01-24 [1] RSPM
 lubridate   * 1.9.4      2024-12-08 [1] RSPM
 magick      * 2.8.7      2025-06-06 [1] RSPM
 MASS        * 7.3-65     2025-02-28 [2] CRAN (R 4.5.0)
 patchwork   * 1.3.2      2025-08-25 [1] CRAN (R 4.5.0)
 performance * 0.15.0     2025-07-10 [1] RSPM
 purrr       * 1.0.4      2025-02-05 [1] RSPM
 readr       * 2.1.5      2024-01-10 [1] RSPM
 sf          * 1.0-21     2025-05-15 [1] RSPM
 slider      * 0.3.2      2024-10-25 [1] RSPM
 stringr     * 1.5.1      2023-11-14 [1] RSPM
 terra       * 1.8-60     2025-07-21 [1] RSPM
 tibble      * 3.2.1      2023-03-20 [1] RSPM
 tidyr       * 1.3.1      2024-01-24 [1] RSPM
 tidyterra   * 0.7.2      2025-04-14 [1] RSPM
 tidyverse   * 2.0.0      2023-02-22 [1] RSPM

 [1] C:/Users/kenne/AppData/Local/R/win-library/4.5
 [2] C:/Program Files/R/R-4.5.0/library
 * ── Packages attached to the search path.

──────────────────────────────────────────────────────────────────────────────