#Create conus counties and county centroid (1.1 and 1.2)
USAboundaries::us_counties() %>%
filter(!state_name %in% c("Alaska", "Hawaii", "Puerto Rico")) %>%
st_transform(5070) ->
counties
st_centroid(counties) %>%
st_union() ->
cent_counties
#Tessellation and coverage (1.3 and 1.4)
#voronoi tessellation
st_voronoi(cent_counties) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id = 1:n()) ->
v_grid
st_intersection(v_grid, st_union(counties)) ->
v_grid
mapview::npts(v_grid)
## [1] 24858
rmapshaper::ms_simplify(v_grid, keep = .01) ->
v_grid_simp
mapview::npts(v_grid_simp)
## [1] 21567
#triangulated tessellation
st_triangulate(cent_counties) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id = 1:n()) ->
t_grid
st_intersection(t_grid, st_union(counties)) ->
t_grid
mapview::npts(t_grid)
## [1] 28800
rmapshaper::ms_simplify(t_grid, keep = .01) ->
t_grid_simp
mapview::npts(t_grid_simp)
## [1] 26011
#grid coverage
st_make_grid(counties, n = c(70, 50)) %>%
st_as_sf() %>%
mutate(id = 1:n()) ->
sq_grid
#hex coverage
st_make_grid(counties, n = c(70, 50), square = FALSE) %>%
st_as_sf() %>%
mutate(id = 1:n()) ->
hex_grid
#Create a function (1.6)
#ggplot function
plot_tess = function(data, title){
ggplot() +
geom_sf(data = data, fill = "white", col = "navy", size = .2) +
theme_void() +
labs(title = title, caption = paste("This tesselation has:", nrow(data), "tiles" ))
}
#Plot (1.7)
#original county data
plot_tess(counties, "counties")
#grid coverage
plot_tess(sq_grid, "square grid coverage")
#hex coverage
plot_tess(hex_grid, "hexagonal grid coverage")
#voronoi
plot_tess(v_grid_simp, "voronoi tessellation")
#triangulated
plot_tess(t_grid_simp, "triangulated tessellation")
#create a function to summarize tessellations
tess_features = function(data, title){
df = data.frame(row.names = title,
mean_area = units::set_units((st_area(data)), "km^2") %>%
units::drop_units() %>%
head(n = 1),
total_area = units::set_units((sum(st_area(data))), "km^2") %>%
units::drop_units() %>%
head(n = 1),
n_features = count(data, "id") %>%
st_drop_geometry() %>%
select(n),
standard_dev = sd(st_area(data)))
return(df)
}
#2.2
tess_features(counties, "original counties summary")
## mean_area total_area n standard_dev
## original counties summary 1157.884 7837583 3108 3404325220
tess_features(sq_grid, "square grid summary")
## mean_area total_area n standard_dev
## square grid summary 3819.376 8563041 2242 1.45713e-05
tess_features(hex_grid, "hexagonal grid summary")
## mean_area total_area n standard_dev
## hexagonal grid summary 3763.052 8545891 2271 1.413761e-05
tess_features(v_grid_simp, "voronoi tessellation summary")
## mean_area total_area n standard_dev
## voronoi tessellation summary 3946.585 7822549 3105 2891630506
tess_features(t_grid_simp, "triangulated tessellation summary")
## mean_area total_area n standard_dev
## triangulated tessellation summary 674.9631 7754447 6196 1578538715
#2.3
tess_summary = bind_rows(
tess_features(counties, "original counties summary"),
tess_features(sq_grid, "square grid summary"),
tess_features(hex_grid, "hexagonal grid summary"),
tess_features(v_grid_simp, "voronoi tessellation summary"),
tess_features(t_grid_simp, "triangulated tessellation summary"))
#table with the five summaries (2.4)
knitr::kable(tess_summary,
caption = "Tessellation Summary",
col.names = c("Mean Area in km^2", "Total Area in km^2", "Number of Features", "Standard Deviation")) %>%
kable_styling(bootstrap_options = "striped", font_size = 15)
Mean Area in km^2 | Total Area in km^2 | Number of Features | Standard Deviation | |
---|---|---|---|---|
original counties summary | 1157.8837 | 7837583 | 3108 | 3.404325e+09 |
square grid summary | 3819.3758 | 8563041 | 2242 | 1.460000e-05 |
hexagonal grid summary | 3763.0520 | 8545891 | 2271 | 1.410000e-05 |
voronoi tessellation summary | 3946.5852 | 7822549 | 3105 | 2.891631e+09 |
triangulated tessellation summary | 674.9631 | 7754447 | 6196 | 1.578539e+09 |
Comment on the traits of each tessellation (2.5)
Square grid and hexagonal grid has the least amount of features since they are both pretty simple, easy to assemble and equally spread out. Hexagonal grid would be better for mapping large areas since it fits curve surface better.
The voronoi tessellation and triangulated tessellation above are both simplified. The voronoi tessellation is the most similar to the original counties coverage. Triangulated tessellation has the most amount of features but is more accurate.
##Question 3
#loading dam data (3.1)
read_excel("~/github/geog-176A-labs/data/NID2019_U.xlsx") %>%
filter(!is.na(LATITUDE)) %>%
st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>%
st_transform(5070) %>%
st_filter(counties) ->
NID
#point-in-polygon function (3.2)
point_in_polygon = function(points, polygon, group){
st_join(polygon, points) %>%
st_drop_geometry() %>%
count(get(group)) %>%
setNames(c(group, "n")) %>%
left_join(polygon, by = group) %>%
st_as_sf()
}
#apply PIP to five tessellations (3.3)
point_in_polygon(NID, counties, "name")
point_in_polygon(NID, v_grid_simp, "id")
point_in_polygon(NID, t_grid_simp, "id")
point_in_polygon(NID, sq_grid, "id")
point_in_polygon(NID, hex_grid, "id")
#function for plotting PIP (3.4)
plot_pip = function(data, title){
ggplot() +
geom_sf(data = data, aes(fill = log(n)), col = NA) +
scale_fill_viridis_b() +
theme_void() +
labs(title = title,
caption = paste("Dams:", sum(data$n)))
}
#plot the five tessellations (3.5)
point_in_polygon(NID, counties, "name") %>%
plot_pip(., "Dam Locations on Original Counties Coverage")
point_in_polygon(NID, v_grid_simp, "id") %>%
plot_pip(., "Dam Locations on Voronoi Tessellation")
point_in_polygon(NID, t_grid_simp, "id") %>%
plot_pip(., "Dam Locations on Triangulated Tessellation")
point_in_polygon(NID, sq_grid, "id") %>%
plot_pip(., "Dam Locations on Square Grid")
point_in_polygon(NID, hex_grid, "id") %>%
plot_pip(., "Dam Locations on Hexagonal Grid")
Comment on the influence of the tessellated surface in the visualization of point counts. How does this related to the MAUP problem. Moving forward you will only use one tessellation, which will you chose and why? (3.6)
Being able to visualize the point counts on different tessellated surfaces show how the object coverage can look significantly different based on how polygons are shown.
MAUP problem is of statistical bias, where data can be interpreted in different ways based on what spatial shape and layout is given.
I would use the hexagonal grid because it’s represented on equal area instead of relying on points for calculating the polygons. Unlike the square grid, it fits curved surfaces better and reduce edge effects.
#filtering dam purposes (4.1)
NID %>%
filter(grepl("S", PURPOSES)) ->
water_supply
NID %>%
filter(grepl("R", PURPOSES)) ->
recreation
NID %>%
filter(grepl("N", PURPOSES)) ->
navigation
NID %>%
filter(grepl("D", PURPOSES)) ->
debris_control
#apply dam purposes through PIP. My elected tessellation is hexagonal grid. (4.2)
point_in_polygon(water_supply, hex_grid, "id")
point_in_polygon(recreation, hex_grid, "id")
point_in_polygon(navigation, hex_grid, "id")
point_in_polygon(debris_control, hex_grid, "id")
#function for plotting (4.2)
plot_pip_2 = function(data, title){
ggplot() +
geom_sf(data = data, aes(fill = log(n)), col = NA) +
scale_fill_viridis_b() +
gghighlight::gghighlight(n > mean(n) + sd(n)) +
theme_void() +
labs(title = title,
caption = paste("Dams:", sum(data$n)))
}
#plotting dam purposes on hexagonal grid tessellation
point_in_polygon(water_supply, hex_grid, "id") %>%
plot_pip_2(., "Dam Locations with Water Supply Purpose")
point_in_polygon(recreation, hex_grid, "id") %>%
plot_pip_2(., "Dam Locations with Recreation Purpose")
point_in_polygon(navigation, hex_grid, "id") %>%
plot_pip_2(., "Dam Locations with Navigation Purpose")
point_in_polygon(debris_control, hex_grid, "id") %>%
plot_pip_2(., "Dam Locations with Debris Control Purpose")
Comment of geographic distribution of dams you found. Does it make sense? How might the tessellation you chose impact your findings? How does the distribution of dams coiencide with other geogaphic factors such as river systems, climate, ect?