Lab 04: Tesselations and Point-in-Polygon

Question 1

#Create conus counties and county centroid (1.1 and 1.2)

USAboundaries::us_counties() %>%
  filter(!state_name %in% c("Alaska", "Hawaii", "Puerto Rico")) %>%
  st_transform(5070) ->
  counties

st_centroid(counties) %>%
  st_union() ->
  cent_counties

#Tessellation and coverage (1.3 and 1.4)

#voronoi tessellation
st_voronoi(cent_counties) %>%
  st_cast() %>%
  st_as_sf() %>%
  mutate(id = 1:n()) ->
  v_grid

st_intersection(v_grid, st_union(counties)) ->
  v_grid

mapview::npts(v_grid)

## [1] 24858

rmapshaper::ms_simplify(v_grid, keep = .01) ->
  v_grid_simp

mapview::npts(v_grid_simp)

## [1] 21567

#triangulated tessellation
st_triangulate(cent_counties) %>%
  st_cast() %>%
  st_as_sf() %>%
  mutate(id = 1:n()) ->
  t_grid

st_intersection(t_grid, st_union(counties)) ->
  t_grid

mapview::npts(t_grid)

## [1] 28800

rmapshaper::ms_simplify(t_grid, keep = .01) ->
  t_grid_simp

mapview::npts(t_grid_simp)

## [1] 26011

#grid coverage
st_make_grid(counties, n = c(70, 50)) %>%
  st_as_sf() %>%
  mutate(id = 1:n()) ->
  sq_grid

#hex coverage
st_make_grid(counties, n = c(70, 50), square = FALSE) %>%
  st_as_sf() %>%
  mutate(id = 1:n()) ->
  hex_grid

#Create a function (1.6)

#ggplot function
plot_tess = function(data, title){
  ggplot() +
    geom_sf(data = data, fill = "white", col = "navy", size = .2) +
    theme_void() +
    labs(title = title, caption = paste("This tesselation has:", nrow(data), "tiles" ))
}

#Plot (1.7)

#original county data 
plot_tess(counties, "counties")

#grid coverage 
plot_tess(sq_grid, "square grid coverage")

#hex coverage
plot_tess(hex_grid, "hexagonal grid coverage")

#voronoi 
plot_tess(v_grid_simp, "voronoi tessellation")

#triangulated
plot_tess(t_grid_simp, "triangulated tessellation")

Question 2

#create a function to summarize tessellations

tess_features = function(data, title){
  df = data.frame(row.names = title,
                  mean_area = units::set_units((st_area(data)), "km^2") %>%
                    units::drop_units() %>%
                    head(n = 1),
                  total_area = units::set_units((sum(st_area(data))), "km^2") %>%
                    units::drop_units() %>%
                    head(n = 1),
                  n_features = count(data, "id") %>%
                    st_drop_geometry() %>%
                    select(n),
                  standard_dev = sd(st_area(data)))
  return(df)
}

#2.2
tess_features(counties, "original counties summary")

##                           mean_area total_area    n standard_dev
## original counties summary  1157.884    7837583 3108   3404325220

tess_features(sq_grid, "square grid summary")

##                     mean_area total_area    n standard_dev
## square grid summary  3819.376    8563041 2242  1.45713e-05

tess_features(hex_grid, "hexagonal grid summary")

##                        mean_area total_area    n standard_dev
## hexagonal grid summary  3763.052    8545891 2271 1.413761e-05

tess_features(v_grid_simp, "voronoi tessellation summary")

##                              mean_area total_area    n standard_dev
## voronoi tessellation summary  3946.585    7822549 3105   2891630506

tess_features(t_grid_simp, "triangulated tessellation summary")

##                                   mean_area total_area    n standard_dev
## triangulated tessellation summary  674.9631    7754447 6196   1578538715

#2.3 
tess_summary = bind_rows(
  tess_features(counties, "original counties summary"), 
  tess_features(sq_grid, "square grid summary"), 
  tess_features(hex_grid, "hexagonal grid summary"), 
  tess_features(v_grid_simp, "voronoi tessellation summary"), 
  tess_features(t_grid_simp, "triangulated tessellation summary"))

#table with the five summaries (2.4)
knitr::kable(tess_summary,
             caption = "Tessellation Summary",
             col.names = c("Mean Area in km^2", "Total Area in km^2", "Number of Features", "Standard Deviation")) %>%
  kable_styling(bootstrap_options = "striped", font_size = 15)

Tessellation Summary
	Mean Area in km^2	Total Area in km^2	Number of Features	Standard Deviation
original counties summary	1157.8837	7837583	3108	3.404325e+09
square grid summary	3819.3758	8563041	2242	1.460000e-05
hexagonal grid summary	3763.0520	8545891	2271	1.410000e-05
voronoi tessellation summary	3946.5852	7822549	3105	2.891631e+09
triangulated tessellation summary	674.9631	7754447	6196	1.578539e+09

Comment on the traits of each tessellation (2.5)

Square grid and hexagonal grid has the least amount of features since they are both pretty simple, easy to assemble and equally spread out. Hexagonal grid would be better for mapping large areas since it fits curve surface better.
The voronoi tessellation and triangulated tessellation above are both simplified. The voronoi tessellation is the most similar to the original counties coverage. Triangulated tessellation has the most amount of features but is more accurate.

##Question 3

#loading dam data (3.1)
read_excel("~/github/geog-176A-labs/data/NID2019_U.xlsx") %>%
  filter(!is.na(LATITUDE)) %>%
  st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>%
  st_transform(5070) %>%
  st_filter(counties) ->
  NID

#point-in-polygon function (3.2)
point_in_polygon = function(points, polygon, group){
  st_join(polygon, points) %>%
    st_drop_geometry() %>%
    count(get(group)) %>%
    setNames(c(group, "n")) %>%
    left_join(polygon, by = group) %>%
    st_as_sf()
}

#apply PIP to five tessellations (3.3)
point_in_polygon(NID, counties, "name")

point_in_polygon(NID, v_grid_simp, "id")

point_in_polygon(NID, t_grid_simp, "id")

point_in_polygon(NID, sq_grid, "id")

point_in_polygon(NID, hex_grid, "id")

#function for plotting PIP (3.4)
plot_pip = function(data, title){
  ggplot() +
    geom_sf(data = data, aes(fill = log(n)), col = NA) +
    scale_fill_viridis_b() +
    theme_void() +
    labs(title = title,
         caption = paste("Dams:", sum(data$n)))
}

#plot the five tessellations (3.5)
point_in_polygon(NID, counties, "name") %>%
  plot_pip(., "Dam Locations on Original Counties Coverage")

point_in_polygon(NID, v_grid_simp, "id") %>%
  plot_pip(., "Dam Locations on Voronoi Tessellation")

point_in_polygon(NID, t_grid_simp, "id") %>%
  plot_pip(., "Dam Locations on Triangulated Tessellation")

point_in_polygon(NID, sq_grid, "id") %>%
  plot_pip(., "Dam Locations on Square Grid")

point_in_polygon(NID, hex_grid, "id") %>%
  plot_pip(., "Dam Locations on Hexagonal Grid")

Comment on the influence of the tessellated surface in the visualization of point counts. How does this related to the MAUP problem. Moving forward you will only use one tessellation, which will you chose and why? (3.6)

Being able to visualize the point counts on different tessellated surfaces show how the object coverage can look significantly different based on how polygons are shown.
MAUP problem is of statistical bias, where data can be interpreted in different ways based on what spatial shape and layout is given.
I would use the hexagonal grid because it’s represented on equal area instead of relying on points for calculating the polygons. Unlike the square grid, it fits curved surfaces better and reduce edge effects.

Question 4

The four dam purposes I choose are recreation (R), navigation (N), debris control (D), and water supply(S). I am interested in analyzing dams with recreation as its main purpose because I was surprised by how it is the most common. I am also interested in analyzing dams with navigation, debris control, and water supply as its purpose because I thought they would be more common.

#filtering dam purposes (4.1)
NID %>%
 filter(grepl("S", PURPOSES)) ->
  water_supply

NID %>%
  filter(grepl("R", PURPOSES)) ->
  recreation

NID %>%
  filter(grepl("N", PURPOSES)) ->
  navigation

NID %>%
  filter(grepl("D", PURPOSES)) ->
  debris_control

#apply dam purposes through PIP. My elected tessellation is hexagonal grid. (4.2)
point_in_polygon(water_supply, hex_grid, "id") 

point_in_polygon(recreation, hex_grid, "id") 

point_in_polygon(navigation, hex_grid, "id") 

point_in_polygon(debris_control, hex_grid, "id")

#function for plotting (4.2)
plot_pip_2 = function(data, title){
  ggplot() +
    geom_sf(data = data, aes(fill = log(n)), col = NA) +
    scale_fill_viridis_b() +
    gghighlight::gghighlight(n > mean(n) + sd(n)) + 
    theme_void() +
    labs(title = title,
         caption = paste("Dams:", sum(data$n)))
}

#plotting dam purposes on hexagonal grid tessellation 
point_in_polygon(water_supply, hex_grid, "id") %>%
    plot_pip_2(., "Dam Locations with Water Supply Purpose")

point_in_polygon(recreation, hex_grid, "id") %>%
    plot_pip_2(., "Dam Locations with Recreation Purpose")

point_in_polygon(navigation, hex_grid, "id") %>%
    plot_pip_2(., "Dam Locations with Navigation Purpose")

point_in_polygon(debris_control, hex_grid, "id") %>%
    plot_pip_2(., "Dam Locations with Debris Control Purpose")

Comment of geographic distribution of dams you found. Does it make sense? How might the tessellation you chose impact your findings? How does the distribution of dams coiencide with other geogaphic factors such as river systems, climate, ect?

I chose the hexagonal grid as my tessellation to plot the dam locations. The dam locations make sense to me because I can see how geographical and climate features can affect these locations. For example, dams built with navigation purposes can be found mainly near the Mississippi River.

Lab 04: Tesselations and Point-in-Polygon

Kaitlyn Fu

Question 1

Question 2

Question 4