The workflow outlined below utilizes the TreeMap raster and
tree-level attributes which the user must download (~4 GB) from
this repository
(“RDS-2021-0074_Data.zip” as of 2024-01-18). The user must then define
the directory location input_treemap_dir
of the extracted
files TreeMap2016.tif
and
TreeMap2016_tree_table.csv
reside.
The user must also define the directory location
input_las_dir
of point cloud data in las
or
laz
file format from Lidar or UAS structure from motion
(SfM) data collection systems (can be multiple files or a single
file).
These parameters (input_treemap_dir
and
input_las_dir
) and other parameterization is entered by the
user in this section.
This script processes point cloud data in las
or
laz
file format from Lidar or UAS structure from motion
(SfM) data collection systems. Products created include:
This script incorporates and builds upon the techniques outlined in research including:
Load all packages used in program.
Note that the TreeLS
package was removed from CRAN
and must be installed from the development version on GitHub by
uncommenting the relevant code section below.
# bread-and-butter
library(tidyverse) # the tidyverse
library(viridis) # viridis colors
library(scales) # work with number and plot scales
library(latex2exp) # math formulas with latex
# spatial analysis
library(terra) # raster
library(sf) # simple features
library(sfarrow) # sf to Apache-Parquet files for working with large files
# point cloud processing
library(lidR)
library(ForestTools) # for crown delineation but relies on depreciated `raster`
library(raster) # !! this was depreciated in 2023, used for ForestTools
library(rlas) # write las index files .lax
## lidR::watershed requires EBImage::watershed
# install.packages("BiocManager")
# BiocManager::install("EBImage")
# library(BiocManager) # required for lidR::watershed
library(EBImage) # required for lidR::watershed
## !! TreeLS package removed from CRAN...
## uncomment to install from github dev repo: https://github.com/tiagodc/TreeLS
# library(pak)
# pak::pkg_install("tiagodc/TreeLS")
library(TreeLS) # removed from CRAN
# FIA data
## devtools::install_github('hunter-stanke/rFIA')
# library(rFIA) ## removed from CRAN
# library(FIESTA) ## available from CRAN
# modeling
library(randomForest)
library(Metrics)
library(RCSF) # for the cloth simulation filter (csf) to classify points
library(brms) # bayesian modelling using STAN engine
# parallel computing
# library(ini) # Parse simple '.ini' configuration files to an structured list
library(parallel) # parallel
library(doParallel)
library(foreach) # facilitates parallelization by lapply'ing %dopar% on for loop
# visualization
library(mapview) # interactive html maps
library(plot3D) # 3d visualization
library(rgl) # 3d visualization
# others
library(tools)
Parameters to be set by the user
# !!!!!!!!!!!!!!!!!!!!!!! USER-DEFINED PARAMETERS !!!!!!!!!!!!!!!!!!!!!!! #
###____________________###
### Set directory for outputs ###
###____________________###
# rootdir = "../data"
rootdir = "../data"
###_________________________###
### Set input las directory ###
###_________________________###
# !!!!!!!!!! ENSURE FILES ARE PROJECTED IN CRS THAT USES METRE AS MEASURMENT UNIT
input_las_dir = "../data/las_raw"
###_________________________###
### Set input TreeMap directory ###
###_________________________###
input_treemap_dir = "../data/treemap"
###__________________________________________________________###
### Set parameters for tiling, buffering, and tree detection ###
###__________________________________________________________###
### Set the resolution of the tile grid in metres
desired_tile_res = 50
### Set the desired buffer size in metres
desired_buffer_size = 10
###__________________________________________________________###
### Set parameters for the Canopy Height Model (CHM) ###
###__________________________________________________________###
### Set the maximum height for the canopy height model
max_height_threshold = 60
### Set the desired raster resolution in metres for the canopy height model
desired_chm_res = 0.25
### Set the maximum height for the stem-only canopy height model
max_stem_height_threshold = 60
### Set the window size for top down tree detection in metres
window_size = 3.5
### Set the minimum height (m) for individual tree detection in `lidR::locate_trees`
minimum_tree_height = 2
### Set the maximum dbh size (meters)
dbh_max_size_m = 1.5
# !!!!!!!!!!!!!!!!!!!!!!! USER-DEFINED PARAMETERS !!!!!!!!!!!!!!!!!!!!!!! #
Use the user-defined directory to create output file structure.
### Function to generate nested project directories
create_project_structure = function(rootdir,input_las_dir){
###______________________________________________###
### Set output las directory and sub directories ###
###______________________________________________###
output_dir = file.path(rootdir, "02_processed_data")
### Set output directory for storing lasgrid object
las_grid_dir = file.path(output_dir, "00_grid_dir")
### Set output directory for las tiles
las_tile_dir = file.path(output_dir, "01_tiled_las")
### Set output directory for las ground tiles
las_ground_tile_dir = file.path(output_dir, "02_tiled_ground_las")
### Set output directory for normalized tiles
las_norm_tile_dir = file.path(output_dir, "03_tiled_normalized_las")
### Set output directory for the las stem files
las_stem_dir = file.path(output_dir, "04_las_stem_dir")
### Set output directory for stem polygon tiles
stem_poly_tile_dir = file.path(output_dir, "05_stem_polygon_dir")
### Set output directory for storing intermediate spatial files
working_spatial_dir = file.path(output_dir, "06_working_spatial_dir")
### Set output directory for cropped tree files
las_tree_dir = file.path(output_dir, "07_las_tree_dir")
###___________________________________________________###
### Set output delivery directory amd sub directories ###
###___________________________________________________###
delivery_dir = file.path(rootdir, "03_delivery")
### Set output delivery directory for stats files
delivery_stats_dir = file.path(delivery_dir, "01_processing_stats")
### Set output delivery directory for spatial files
delivery_spatial_dir = file.path(delivery_dir, "02_spatial_files")
### Set output delivery directory for point cloud files
delivery_las_dir = file.path(delivery_dir, "03_las_files")
### Create the directories
dir.create(las_grid_dir, showWarnings = FALSE)
dir.create(output_dir, showWarnings = FALSE)
dir.create(las_tile_dir, showWarnings = FALSE)
dir.create(las_ground_tile_dir, showWarnings = FALSE)
dir.create(las_norm_tile_dir, showWarnings = FALSE)
dir.create(las_stem_dir, showWarnings = FALSE)
dir.create(stem_poly_tile_dir, showWarnings = FALSE)
dir.create(working_spatial_dir, showWarnings = FALSE)
dir.create(las_tree_dir, showWarnings = FALSE)
dir.create(delivery_dir, showWarnings = FALSE)
dir.create(delivery_stats_dir, showWarnings = FALSE)
dir.create(delivery_spatial_dir, showWarnings = FALSE)
dir.create(delivery_las_dir, showWarnings = FALSE)
###______________________________###
### Set names of the directories ###
###______________________________###
names(rootdir) = "rootdir"
names(input_las_dir) = "input_las_dir"
names(output_dir) = "output_dir"
names(las_grid_dir) = "las_grid_dir"
names(las_tile_dir) = "las_tile_dir"
names(las_ground_tile_dir) = "las_ground_tile_dir"
names(las_norm_tile_dir) = "las_norm_tile_dir"
names(las_stem_dir) = "las_stem_dir"
names(stem_poly_tile_dir) = "stem_poly_tile_dir"
names(working_spatial_dir) = "working_spatial_dir"
names(las_tree_dir) = "las_tree_dir"
names(delivery_dir) = "delivery_dir"
names(delivery_stats_dir) = "delivery_stats_dir"
names(delivery_spatial_dir) = "delivery_spatial_dir"
names(delivery_las_dir) = "delivery_las_dir"
###______________________________###
### Append to output config list ###
###______________________________###
config = cbind(rootdir, input_las_dir, output_dir, las_grid_dir, las_tile_dir, las_ground_tile_dir,
las_norm_tile_dir, las_stem_dir, stem_poly_tile_dir, working_spatial_dir,
las_tree_dir,delivery_dir, delivery_stats_dir, delivery_spatial_dir, delivery_las_dir)
config = as.data.frame(config)
#config
### Return config
return(config)
}
config = create_project_structure(rootdir, input_las_dir)
Function to read in available las
or laz
data.
### Function to read in available las data within input directory
read_las_as_ctg = function(config){
### Get the input directory read in
ctg = lidR::readLAScatalog(config$input_las_dir)
# print(ctg)
# print(st_crs(ctg))
return(ctg)
}
### Read in the las files, check the number of points and CRS
las_ctg = read_las_as_ctg(config)
las_ctg
class : LAScatalog (v1.2 format 2) extent : 610551.7, 610751.7, 4888774, 4888974 (xmin, xmax, ymin, ymax) coord. ref. : NAD83 / UTM zone 13N area : 39999.6 m² points : 20.48 million points density : 512 points/m² density : 512 pulses/m² num. files : 1
if(
lidR::st_crs(las_ctg, parameters = TRUE)$units_gdal != "metre"
){ stop("las file not projected in coordinate system that uses metre as measurement unit")}
Map point cloud extent
# map of point cloud extent
# mapview
mapview::mapviewOptions(
homebutton = FALSE
, basemaps = c("Esri.WorldImagery","OpenStreetMap")
)
mapview::mapview(las_ctg@data$geometry, color = "black", col.regions = "blue", lwd = 3, alpha.regions = 0.3, legend = F, label = F)
Make a raster grid covering the point cloud extent with the desired tile resolution in cm.
This grid is used for ??? 1. make smaller
laz files to loop processing over (see new_tile_las_to_grid_foreach
function)
### Function to check input las and extent of desired tile resolution
generate_grid_over_extent = function(las_ctg, desired_tile_res){
### Pull the las extent geometry
las_ctg_geom = sf::st_as_sf(las_ctg@data$geometry)
### Make a grid with the desired tile size
# cellsize units determined by projection
grid = sf::st_make_grid(las_ctg_geom, cellsize = desired_tile_res)
grid = sf::st_as_sf(grid)
### Create grid ID
grid_id = rep(1:nrow(grid))
grid = cbind(grid, grid_id)
message("Output grid has ", nrow(grid), " tiles ...")
return(grid)
}
Call the function to make the grid and save to disk as
parquet
file which allows for fast read/write. A key goal
of the sfarrow
package is to support inter-operability of
spatial data in files between R and Python through the use of
standardized metadata.
G. Woolsey notes: this buffer is buffering each tile instead of buffering the extent and then creating the grid…which is it supposed to be for processing?
### Generate the grid
las_grid = generate_grid_over_extent(las_ctg, desired_tile_res)
# this buffer is buffering each cell instead of buffering the extent and then creating the grid
las_grid_buff = sf::st_buffer(las_grid, desired_buffer_size)
###____________________________###
### Write the grid to the disk ###
###____________________________###
sfarrow::st_write_parquet(
las_grid
, dsn = paste0(
config$las_grid_dir
, "/"
, "project_grid.parquet"
)
)
Map grid and point cloud extent
mapview::mapview(
las_ctg@data$geometry
, color = "black"
, col.regions = "blue"
, lwd = 3, alpha.regions = 0.3, legend = F, label = F
) +
mapview::mapview(las_grid_buff, color = "gray55", alpha.regions = 0, lwd = 2, legend = F, label = F, hide = F) +
mapview::mapview(las_grid, color = "gray22", alpha.regions = 0, lwd = 1, legend = F, label = F, hide = F)
Spatial index the point cloud to enable more efficient point cloud
querying. From The
lidR
Package Book:
Spatial indexing is a key feature for performing spatial queries over a large point cloud. Any search for points of interest in the absence of indexing would require a “sequential scan” of every point - this could take a lot of time. In brief, spatial indexing organizes data into a search structure that can be quickly traversed to find specific records. Some algorithms would take unreasonable amounts of time to complete without spatial indexing.
### Function to generate .lax index files for input directory path
create_lax_for_tiles = function(desired_las_dir){
## desired_las_dir = config$input_las_dir
###__________________________________________###
### Create a lax index file for the las file ###
###__________________________________________###
message(paste0("Initializing .lax indexing for ", desired_las_dir, " ... "))
las_list = list.files(desired_las_dir, pattern = ".las")
laz_list = list.files(desired_las_dir, pattern = ".laz")
lidar_list = append(las_list, laz_list)
# lidar_list
message("Indexing ", length(lidar_list), " las files ... ")
start_time = Sys.time()
# configure parallel
cores = parallel::detectCores()
cluster = parallel::makeCluster(cores)
# register the parallel backend with the `foreach` package
doParallel::registerDoParallel(cluster)
# pass to foreach to process each lidar file in parallel
foreach::foreach(i = 1:length(lidar_list)) %dopar% {
### Get the desired file
des_file = lidar_list[i]
# des_file
### Compile the .lax file name
des_file_lax = tools::file_path_sans_ext(des_file)
des_file_lax = paste0(des_file_lax, ".lax")
des_file_lax_path = paste0(desired_las_dir, "/", des_file_lax)
# des_file_lax_path
### See if the .lax version exists in the input directory
does_file_exsist = file.exists(des_file_lax_path)
# does_file_exsist
### If file exsists, do nothing
if(does_file_exsist == TRUE){return(NULL)}
### If file doesnt exsist, create a .lax index
if(does_file_exsist == FALSE){
### Append the directory path to the las file
path = paste0(desired_las_dir, "/", des_file)
### Write index
rlas::writelax(path)
}
}
parallel::stopCluster(cluster)
end_time = Sys.time()
total_time = difftime(end_time, start_time, units = c("mins"))
message("Total lax index time took ", total_time, " minutes ... ")
}
Create the spatial index lax
file for the raw input
point cloud.
# call the function
create_lax_for_tiles(config$input_las_dir)
This function writes individual point cloud laz
files
for each grid tile to the disk in the
“../data/02_processed_data/01_tiled_las” directory. Each grid tile is
buffered by the user-defined buffer and the raw point cloud is cropped
to that extent. The purpose is to…??? (reduce processing time)
G. Woolsey notes: is there potential for individual trees to either be i) double-counted if crown spans multiple grid tiles; or ii) excluded when recombining tiles and removing duplicates? Not sure what the recombine process looks like yet.
### New function to tile the raw point cloud to the desired grid - with buffers
new_tile_las_to_grid_foreach = function(
config
, desired_buffer_size
, my_las_grid = las_grid # generate_grid_over_extent(las_ctg, desired_tile_res)
, my_las_ctg = las_ctg # lidR::readLAScatalog(config$input_las_dir)
){
# define files
las_grid = my_las_grid
las_ctg = my_las_ctg
# start message
message(" --- Starting grid tiling algorithim --- ")
master_start = Sys.time()
# ###_____________________________###
# ### Read in the las grid object ###
# ###_____________________________###
# ## !!!!!!! the grid is already in memory, no need to re-read
# message("Reading in project grid ... ")
# ## read parquet grid created with generate_grid_over_extent function above
# grid_fpth_temp = paste0(
# config$las_grid_dir
# , "/"
# , "project_grid.parquet"
# )
# las_grid = sfarrow::st_read_parquet(grid_fpth_temp)
# ###_____________________________###
# ### Read in the lascatalog ###
# ###_____________________________###
# ## !!!!!!! the lascatalog is already in memory, no need to re-read
# message("Reading in the las as a lascatalog for indexing ... ")
# las_ctg = lidR::readLAScatalog(config$input_las_dir)
###____________________________________________###
### Loop through grid tiles and tile the input las object ###
###____________________________________________###
start_time = Sys.time()
## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
# # set up parallel processing
# cores = parallel::detectCores()
# message("Registering parallel session on ", cores-2, " cores ... ")
# cluster = parallel::makeCluster(cores-2)
# doParallel::registerDoParallel(cluster)
## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
# message("Starting NON-parallel ;D grid tiling at ", start_time)
message("Creating tiles for ", nrow(las_grid), " grids ... ")
## loop over grid tiles but NOT in parallel ftw
for(i in 1:nrow(las_grid)) {
# for(i in 1:4) {
# message("Creating laz for grid number ", i, " ... ")
## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
# loop over grid tiles but in parallel ftw
# foreach::foreach(
# i=1:nrow(las_grid)
# , .packages = c("lidR", "sf")
# , .inorder = FALSE
# , .errorhandling = "remove"
# ) %dopar% {
## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
### Get the desired grid
des_grid_cell = las_grid[i,]
# des_grid_cell
### Does file exist
file_to_generate = paste0(config$las_tile_dir, "/", i, ".laz")
does_file_exist = file.exists(file_to_generate)
# does_file_exist
### If file has been generated already, skip
if(does_file_exist == TRUE){
message("laz for grid number ", i, " already exists guy ... ")
}
### If file has not been generated already, try to generate
if(does_file_exist == FALSE){
### Buffer the grid cell to the desired buffer size
des_grid_cell_buff = sf::st_buffer(des_grid_cell, desired_buffer_size)
### Crop the las file from the catalog for the grid cell extent
las_clipped = suppressWarnings(lidR::clip_roi(las_ctg, des_grid_cell_buff))
#st_crs(las) = st_crs(las_grid)
### Check if the point cloud is empty
is_cloud_empty = suppressWarnings(lidR::is.empty(las_clipped))
# is_cloud_empty
### If is_cloud_empty == TRUE, return NULL
if(is_cloud_empty == TRUE){
message("laz for grid number ", i, " is empty so i skipped it ... ")
}
### If is_cloud_empty = FALSE, write to disk
if(is_cloud_empty == FALSE){
### Write the cropped las to the disk
out_name = paste0(
config$las_tile_dir
, "/"
, des_grid_cell$grid_id
, ".laz"
)
suppressWarnings(lidR::writeLAS(las_clipped, out_name))
## return(NULL) # this kills it or something
# message("Hey i wrote a laz for grid number ", i, " ... ")
} # is_cloud_empty
} # does_file_exist
} # foreach grid
## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
# parallel::stopCluster(cluster)
## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
end_time = Sys.time()
total_time = difftime(end_time, start_time, units = c("mins"))
message("Total grid tiling time took ", total_time, " minutes ... ")
### Get some stats for the tiling run
tiled_ctg = lidR::readLAScatalog(config$las_tile_dir)
tiled_ctg_geom = tiled_ctg$geometry
tiled_ctg_geom = sf::st_as_sf(tiled_ctg_geom)
las_area_m2 = sum(sf::st_area(tiled_ctg_geom))
las_area_acres = las_area_m2/4047
processing_time_mins = total_time
number_starting_grids = nrow(las_grid)
number_ending_grids = nrow(tiled_ctg_geom)
### Combine the output stats dataframe and write to the disk
stats_output = data.frame(
number_starting_grids, number_ending_grids, las_area_m2
, las_area_acres, processing_time_mins
)
write.csv(
stats_output
, file = paste0(config$delivery_stats_dir,"/","01_grid_tiling_stats.csv")
)
### Get a list of tiles that processed
processed_tile_list = list.files(config$las_tile_dir)
message(length(processed_tile_list), " files were generated ... ")
message(length(processed_tile_list)/nrow(las_grid) *100, " percent of tile grid occupied with spatial points ... ")
return(tiled_ctg_geom)
}
Call the function to make the tiled laz
files and write
to disk. Also, call the function to create the lax
spatial indexed file for each tiled grid file.
tiled_grid = new_tile_las_to_grid_foreach(
config = config
, desired_buffer_size = desired_buffer_size
, my_las_grid = las_grid
, my_las_ctg = las_ctg
)
create_lax_for_tiles(config$las_tile_dir)
Map the tiled grid extent and the raw point cloud extent
mapview::mapview(
las_ctg@data$geometry
, color = "black"
, col.regions = "blue"
, lwd = 3, alpha.regions = 0.3, legend = F, label = F
) +
mapview::mapview(tiled_grid, color = "gray55", alpha.regions = 0, lwd = 2, legend = F, label = F, hide = F)
Classify the point cloud points as ground or non-ground. Use this
classification to height normalize the tiled
laz
files.
Points are classified using the lidR::classify
function
with the option
algorithm = csf(rigidness = 1, sloop_smooth = TRUE)
. The
csf
ground classification algorithm implements an algorithm
for segmentation of ground points based on a “Cloth Simulation Filter”
(CSF). This method is a strict implementation of the CSF algorithm made
by Zhang
et al. (2016) that relies on the authors’ original source code
written and exposed to R via the the RCSF
package. The
rigidness
of the cloth set to the default 1 stands for very
soft (to fit rugged terrain), 2 stands for medium, and 3 stands for hard
cloth (for flat terrain). When steep slopes exist, the
sloop_smooth
parameter set to TRUE
reduces
errors during post-processing.
The points are normalized using the
lidR::normalize_height
function with the option
algorithm = knnidw()
. By default the terrain is computed
by using ground points (class 2) and water points (class 9) which
requires the points to be classified before normalizing the height. The
knnidw
algorithm implements an algorithm for spatial
interpolation using a k-nearest neighbour (KNN) approach with an
inverse-distance weighting (IDW). An alternative height normalization
process uses a separate digital terrain model (DTM) raster via the
dtm
option. When a DTM raster is provided, then the DTM is
used in place of ground points. This is different than providing a DTM
in algorithm. If algorithm = dtm
the DTM is subtracted
naively. If algorithm = tin()
and dtm = raster
the ground points are not used and the DTM is interpolated as if it were
made of regularly-spaced ground points.
After height normalizing the points, non-ground points that are below
the ground surface are removed using
lidR::filter_poi(las, Z > -0.05)
and high-elevation
outlier points are removed using
lidR::filter_poi(las, Z < quantile(las@data$Z, 0.99999))
.
An alternative method to detect and remove outliers is available via
the lidR::classify_noise
function. For example:
# Using IVF
las <- lidR::classify_noise(las, ivf(res=5, n=2))
#plot(las, color = "Classification")
# Remove outliers using filter_poi()
las_denoise <- lidR::filter_poi(las, Classification != LASNOISE)
, where ivf
implements an algorithm for outliers (noise)
segmentation based on isolated voxels filter (IVF). The algorithm finds
points that have only a few other points in their surrounding 3 x 3 x 3
= 27 voxels.
G. Woolsey notes: the point classification and height normalization procedures are using only data within the bounds of each individual tile (+buffer). How does this impact: i) classification of ground (e.g. in low point density tiles or tiles with limited canopy penetration) and the resulting normalization; ii) the detection of outlier points based on normalized height within the tile versus across the entire point cloud extent? Also, should outliers (noise) be detected and removed prior to applying the ground classification (currently is applied after at the individual grid tile level)?
### Function to classify ground and height normalize las tiles
classify_ground_normalize = function(
config = config
, my_las_grid = las_grid # generate_grid_over_extent(las_ctg, desired_tile_res)
, my_las_ctg = las_ctg # lidR::readLAScatalog(config$input_las_dir)
, want_to_classify_ground = T
){
# define files passed to function
las_grid = my_las_grid
las_ctg = my_las_ctg
###_______________________###
### Configure directories ###
###_______________________###
message(" --- Initializing ground classification and normalization algorithm --- ")
master_start = Sys.time()
###_____________________________###
### Read in the las grid object ###
###_____________________________###
## !!!! THIS IS ALREADY IN MEMORY, NO NEED TO RE-READ
# message("Reading in project grid ... ")
# las_grid = sfarrow::st_read_parquet(paste0(
# config$las_grid_dir
# , "/"
# , "project_grid.parquet"
# ))
#
# message("Reading in the las as a lascatalog for indexing ... ")
# las_ctg = lidR::readALSLAScatalog(config$input_las_dir)
###________________________________________###
### Next, classify ground across the tiles ###
###________________________________________###
### Get a list of tiled files to ground classify
las_list = list.files(config$las_tile_dir, pattern = ".las")
laz_list = list.files(config$las_tile_dir, pattern = ".laz")
lidar_list = append(las_list, laz_list)
message("Classifying ", length(lidar_list), " ground and height normalizing tiles in parallel ... ")
# lidar_list
###______________________________________________________________________________________###
### In parallel, classify ground, and height normalize across the tiles and rewrite them ###
###______________________________________________________________________________________###
start_time = Sys.time()
## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
# set up parallel processing
# cores = parallel::detectCores()
# message("Registering parallel session on ", cores-1, " cores ... ")
# cluster = parallel::makeCluster(cores-1)
# doParallel::registerDoParallel(cluster)
## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
message("Starting NON-parallel classification/normalization ... ")
# loop over each tiled las file to classify and normalize
for(i in 1:length(lidar_list)) {
# for(i in 1:4) {
## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
# foreach(
# # i = 1:length(lidar_list)
# , .packages = c("lidR", "sf")
# , .inorder = FALSE
# , .errorhandling = "remove"
# ) %dopar% {
## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
### Get the desired lidar tile
des_tile_name = lidar_list[i]
# des_tile_name
### Has the file been generated already?
des_tile_to_check = paste0(config$las_norm_tile_dir, "/", des_tile_name)
does_file_exist = file.exists(des_tile_to_check)
# does_file_exist
### If file exists, skip
if(does_file_exist == TRUE){
message("normalized tile ", des_tile_to_check, " exists so skipped it ... ")
}
### If file does not exsist, classify and height normalize
if(does_file_exist == FALSE){
### Get the matching grid polygon
las_grid_id = tools::file_path_sans_ext(des_tile_name)
matching_grid_cell = las_grid[las_grid$grid_id == las_grid_id,]
# matching_grid_cell
### Read in the lidar tile
las_tile = lidR::readALSLAS(paste0(
config$las_tile_dir
, "/"
, des_tile_name
))
#st_crs(las_tile) = st_crs(las_grid)
### Drop duplicated points
las_tile = lidR::filter_duplicates(las_tile)
### Set threads to 2 for speed up in height normalization
lidR::set_lidr_threads(1)
### Classify ground points
if(want_to_classify_ground == TRUE){
las_tile = lidR::classify_ground(
las_tile
, algorithm = csf(rigidness = 1, sloop_smooth = TRUE)
)
}
### Pull out the ground points
ground = lidR::filter_poi(las_tile, Classification == 2)
### Check if ground is empty, if not, write to disk
ground_is_empty = lidR::is.empty(ground)
# ground_is_empty
if(ground_is_empty == FALSE){
lidR::writeLAS(
ground
, file = paste0(
config$las_ground_tile_dir
, "/"
, des_tile_name
)
)
}
### Height normalize the file
las_tile = lidR::normalize_height(las_tile, algorithm = knnidw())
### Remove points below 0.05
las_tile = lidR::filter_poi(las_tile, Z > -0.05)
### Remove high outlier points
height_filter = quantile(las_tile@data$Z, 0.99999)
las_tile = lidR::filter_poi(las_tile, Z < height_filter)
### Set crs
#desired_crs = sf::st_crs(las_grid)
#lidR::st_crs(las_tile) = desired_crs
#las_tile
### Crop the las to the original grid polygon
las_tile = lidR::clip_roi(las_tile, matching_grid_cell)
### Is las null?
check = is.null(las_tile)
### If las is null, return NULL
if(check == TRUE){
message("normalized laz for grid number ", i, " is null ... ")
}
### Is the las file empty
is_las_empty = lidR::is.empty(las_tile)
# is_las_empty
### If las is empty, return Null
if(is_las_empty == TRUE){
message("normalized laz for grid number ", i, " is empty (void of pts) ... ")
}
### If las isnt empty, write the las to the disk
if(is_las_empty == FALSE){
### Overwrite the existing file
lidR::writeLAS(
las_tile
, file = paste0(
config$las_norm_tile_dir
, "/"
, des_tile_name
)
)
message("normalized laz for grid number ", i, " completed successfully ... ")
}
}
}
## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
# parallel::stopCluster(cluster)
## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
end_time = Sys.time()
total_time = difftime(end_time, start_time, units = c("mins"))
message("Total ground classification and height normalization complete in ", total_time, " minutes ... ")
###________________________________________###
### Get a catalog of the normalized points ###
###________________________________________###
message("Reading in the normalized las file as a catalog ... ")
ctg = lidR::readALSLAScatalog(config$las_norm_tile_dir)
ctg_geom = ctg$geometry
num_points_list = ctg$Number.of.point.records
num_points_list = unlist(num_points_list)
num_points_list = as.data.frame(num_points_list)
number_points = sum(num_points_list$num_points_list)
# number_points
### Get the area of the dataset ina acres
catalog_area = sf::st_union(ctg@data$geometry)
catalog_area = sf::st_area(catalog_area)
catalog_area = as.numeric(catalog_area)
las_area_m2 = catalog_area
las_area_acres = catalog_area/4047
# las_area_acres
### Get a list of processed files
normalized_file_list = list.files(config$las_norm_tile_dir, pattern = ".laz")
message(length(normalized_file_list), " lidar files height normalized ... ")
difference = length(lidar_list) - length(normalized_file_list)
message(difference, " files failed height normalization ...")
###_______________________________###
### Get the total processing time ###
###_______________________________###
master_end = Sys.time()
total_master_time = difftime(master_end, master_start, units = c("mins"))
###____________________________________________###
### Write some stats to the delivery directory ###
###____________________________________________###
processing_time_mins = total_master_time
stats_output = data.frame(number_points, las_area_m2, las_area_acres, processing_time_mins)
write.csv(stats_output, file = paste0(
config$delivery_stats_dir
, "/"
, "02_point_cloud_classification_normalization_stats.csv"
))
message(" --- Total ground classification and height normalization took ", total_master_time, " minutes --- ")
return(ctg_geom)
}
Call the function to classify ground/non-ground points and height
normalize the tiled point clouds and write to disk. Also, call the
function to create the lax
spatial
indexed file for each normalized tile grid.
height_norm_grid = classify_ground_normalize(
config = config
, my_las_grid = las_grid # generate_grid_over_extent(las_ctg, desired_tile_res)
, my_las_ctg = las_ctg # lidR::readLAScatalog(config$input_las_dir)
, want_to_classify_ground = T
)
create_lax_for_tiles(config$las_ground_tile_dir)
create_lax_for_tiles(config$las_norm_tile_dir)
Map the classified and normalized grid tiles
mapview::mapview(
las_ctg@data$geometry
, color = "black"
, col.regions = "blue"
, lwd = 3, alpha.regions = 0.3, legend = F, label = F
) +
mapview::mapview(height_norm_grid, color = "gray55", alpha.regions = 0, lwd = 2, legend = F, label = F, hide = F)
This section uses the point cloud grid tiles with only points classified as “ground” (created in this section) to create a DTM raster.
The process loops over each ground point cloud grid tile and creates a DTM raster of user-specified resolution. The specific function used for each tile is:
lidR::rasterize_canopy(las_ground_tile, res = 3, algorithm = p2r(), pkg = "terra")
, where the algorithm = p2r()
option implements an
algorithm for digital surface model computation based on a
points-to-raster method: for each pixel of the output raster the
function attributes the height of the highest point found. If specified,
the subcircle
option of the p2r
function
replaces each point with 8 points around the original one. This allows
for virtual ’emulation’ of the fact that a lidar point is not a point as
such, but more realistically a disc. This tweak densifies the point
cloud and the resulting canopy model is smoother and contains fewer
“pits” and empty pixels. The pkg = "terra"
results in a
raster that is compatible with the terra
package.
It is worth noting that the lidR
package
includes a rasterize_terrain
function which interpolates
the ground points (in a classified point cloud that has not been
normalized) and creates a rasterized digital terrain model. The
algorithm uses the points classified as “ground” and “water”
(Classification = 2 and 9, respectively) to compute the interpolation.
How well the edges of the dataset are interpolated depends on the
interpolation method used. A buffer around the region of interest is
always recommended to avoid edge effects. This function can be
implemented via:
dtm1 = lidR::rasterize_terrain(las, algorithm = knnidw(k = 6L, p = 2))
dtm2 = lidR::rasterize_terrain(las, algorithm = tin())
dtm3 = lidR::rasterize_terrain(las, algorithm = kriging(k = 10L))
# plot results
plot(dtm1, col = gray(0:25/25))
plot(dtm2, col = gray(0:25/25))
plot(dtm3, col = gray(0:25/25))
plot_dtm3d(dtm1)
plot_dtm3d(dtm2)
plot_dtm3d(dtm3)
G. Woolsey notes: none of the interpolation
algorithms available in lidR::rasterize_terrain
use
single-point methods to create the DTM. As written, this program uses
lidR::rasterize_canopy
with the p2r
algorithm
set to attribute the height of the highest point found to the value of
the raster tile. This could result in i) empty cells; and/or ii) a less
smooth DTM which is influenced by artifact points (especially since the
current outlier removal is applied only to the non-ground points
after the classification algorithm is applied). Furthermore, as
a DTM is created for each tile separately, there is potential for edge
effects to be amplified across the study extent. Calculating the DTM for
each tile separately excludes valid data which would be otherwise
available to the algorithm if processed altogether. A potential solution
is to continue processing by grid tiles with a buffer that overlaps with
neighboring tiles, use a multi-point interpolation algorithm on each
buffered tile, and then combine the grid tile rasters via
terra::mosaic
where values in overlapping cells are
averaged. Or, process as a single point cloud over the entire study
extent.
# GW added
# create funtion to read las, rasterize, and return data frame
las2dtm2df <- function(las_path_name, des_dtm_res = 3) {
### Read in the file
las_ground_tile = lidR::readLAS(las_path_name, select = "xyzc")
### Create a DEM from the file
dtm_tile = lidR::rasterize_canopy(
las_ground_tile
, res = des_dtm_res
, algorithm = p2r()
, pkg = "terra"
)
### Get the DEM points
dtm_points = as.data.frame(dtm_tile, xy = T) %>%
dplyr::rename_with(toupper)
### Return the points to the function
return(dtm_points)
}
## map files over the function
# list.files(config$las_ground_tile_dir, pattern = ".*\\.(laz|las)$", full.names = T) %>%
# .[2:3] %>%
# purrr::map(las2dtm2df) %>%
# dplyr::bind_rows() %>%
# str()
### Function to get DTM from ground classified tiles
rasterize_tiles_to_dtm = function(
config
, my_des_dtm_res = 3 #in meters
, calculate_extent = TRUE
, my_las_grid = las_grid # generate_grid_over_extent(las_ctg, desired_tile_res)
, my_las_ctg = las_ctg # lidR::readLAScatalog(config$input_las_dir)
){
# define files
las_grid = my_las_grid
las_ctg = my_las_ctg
message(" --- Initializing digitial elevation model rasterization --- ")
master_start = Sys.time()
# ###_____________________________###
# ### Read in the las grid object ###
# ###_____________________________###
# ## !!!!!!! the grid is already in memory, no need to re-read
# message("Reading in project grid ... ")
# las_grid = sfarrow::st_read_parquet(paste0(
# config$las_grid_dir
# , "/"
# , "project_grid.parquet")
# )
#
# message("Reading in the las as a lascatalog for indexing ... ")
# las_ctg = lidR::readLAScatalog(config$input_las_dir)
# las_ctg
###_______________________________________________________________________________###
### Get a list of classified ground normalized tiles to generate DTM raster files ###
###_______________________________________________________________________________###
### Loop through and generate DTM data.frame for each tile and then bind_rows
start_time = Sys.time()
# file list
tile_list = list.files(config$las_ground_tile_dir, pattern = ".*\\.(laz|las)$", full.names = T)
# map las2dtm2df function
merged_dtm_points = tile_list %>%
purrr::map(las2dtm2df, des_dtm_res = my_des_dtm_res) %>%
# .[2:3] %>%
dplyr::bind_rows()
###_____________________________________###
### Make a las file from the CHM points ###
###_____________________________________###
message("Generating the master LiDAR file ... ")
las = lidR::LAS(merged_dtm_points)
sf::st_crs(las) = sf::st_crs(las_grid)
# las
### Write cleaned las to the disk
message("Writing the master lidar file to the disk ... ")
lidR::writeLAS(las, file = paste0(
config$delivery_las_dir
, "/"
, "master_dem_point_cloud.laz"
))
las_area_m2 = lidR::area(las)
las_area_acres = las_area_m2/4047
message("Total merged DEM area of ", las_area_acres, " acres .... ")
###____________________________________###
### Generate a CHM from the CHM points ###
###____________________________________###
message("Generating the master DEM file ... ")
master_dem = lidR::rasterize_canopy(las, res = my_des_dtm_res, algorithm = p2r())
# master_dem
###_____________________________________###
### Write the master raster to the disk ###
###_____________________________________###
message("Writing the master DEM file to the disk ... ")
outname = paste0(
config$delivery_spatial_dir
, "/"
, "master_dem_raster_", my_des_dtm_res, "m.tif"
)
terra::writeRaster(master_dem, outname, overwrite = TRUE)
###_______________________________________###
### Get a polygon of the full file extent ###
###_______________________________________###
if(calculate_extent == TRUE){
message("Getting the extent of the DEM file ... ")
extent_raster = terra::clamp(master_dem, 1, 1, 1)
extent_poly = terra::as.polygons(extent_raster)
extent_poly = terra::simplifyGeom(extent_poly)
extent_poly = terra::fillHoles(extent_poly)
extent_poly = sf::st_as_sf(extent_poly)
extent_poly = st_union(extent_poly)
extent_poly = st_as_sf(extent_poly)
# extent_poly
### Write the poly to the disk
#sf::st_write(extent_poly, dsn = "chm_extent_kml.kml", delete_dsn = TRUE, quiet = TRUE)
sf::st_write(
extent_poly
, dsn = paste0(
config$working_spatial_dir
, "/dem_extent_geopackage.gpkg"
)
, delete_dsn = TRUE
, quiet = TRUE
)
}
###____________________###
### Get the total time ###
###____________________###
end_time = Sys.time()
total_time = difftime(end_time, start_time, units = c("mins"))
message(" --- Total DEM rasterization took ", total_time, " minutes --- ")
}
Call the function to create the DTM
rasterize_tiles_to_dtm(
config = config
, my_des_dtm_res = 3 #in meters
, calculate_extent = F
, my_las_grid = las_grid # generate_grid_over_extent(las_ctg, desired_tile_res)
, my_las_ctg = las_ctg # lidR::readLAScatalog(config$input_las_dir)
)
Map the DTM
terra::rast(paste0(
config$delivery_spatial_dir
, "/master_dem_raster_3m.tif"
)) %>%
stars::st_as_stars() %>%
mapview::mapview(
layer.name = "elev.(m)"
, alpha.regions = 0.6
)
# # uncomment for ggplot
# as.data.frame(xy=T) %>%
# dplyr::rename_with(tolower) %>%
# dplyr::rename(f=3) %>%
# ggplot(mapping = aes(x=x,y=y,fill=f)) +
# geom_tile() +
# scale_fill_viridis_c(na.value = "black") +
# labs(fill = "elevation (m)") +
# theme_void()
This section uses the height normalized point cloud grid tiles (created in this section) to create a CHM raster.
The process loops over each normalized point cloud grid tile and creates a Canopy Height Model (CHM) raster of user-specified resolution. The specific function used for each tile is:
lidR::rasterize_canopy(las_norm_tile, res = 1, algorithm = p2r(), pkg = "terra")
, where the algorithm = p2r()
option implements an
algorithm for digital surface model computation based on a
points-to-raster method: for each pixel of the output raster the
function attributes the height of the highest point found. The subcircle
tweak replaces each point with 8 points around the original one. This
allows for virtual ’emulation’ of the fact that a lidar point is not a
point as such, but more realistically a disc. This tweak densifies the
point cloud and the resulting canopy model is smoother and contains
fewer “pits” and empty pixels. The pkg = "terra"
results in
a raster that is compatible with the terra
package.
G. Woolsey notes: The DTM process and CHM process both use
lidR::rasterize_canopy
with the p2r
algorithm
to attribute the height of the raster to the highest point found to the
value of the raster tile. The current program:
lidR::rasterize_canopy
with the p2r
algorithm to create a raster separately for each tilelas
,lidR::rasterize_canopy
with the p2r
algorithm
to select the highest point in a raster tile. This could result in i)
empty cells; and/or ii) a less smooth DTM which is influenced by
artifact points (especially since the current outlier removal is applied
only to the non-ground points after the classification
algorithm is applied). Furthermore, as a DTM is created for each tile
separately, there is potential for edge effects to be amplified across
the study extent. Calculating the DTM for each tile separately excludes
valid data which would be otherwise available to the algorithm if
processed altogether. A potential solution is to continue processing by
grid tiles with a buffer that overlaps with neighboring tiles, use a
multi-point interpolation algorithm (e.g. knnidw
) on each
buffered tile, and then combine the grid tile rasters via
terra::mosaic
where values in overlapping cells are
averaged. Or, process as a single point cloud over the entire study
extent.# GW added
# create funtion to read las, rasterize, and return data frame
las2chm2df <- function(las_path_name, des_chm_res = 3, min_z = -Inf, max_z = Inf) {
### Read in the file
las_norm_tile = lidR::readLAS(las_path_name, select = "xyzc")
### Filter the points to within the min and max Z range
las_norm_tile = lidR::filter_poi(las_norm_tile, Z > as.numeric(min_z))
las_norm_tile = lidR::filter_poi(las_norm_tile, Z < as.numeric(max_z))
### Get a logic check
is_las_empty = lidR::is.empty(las_norm_tile)
### If the las file is empty, return NULL
if(is_las_empty == TRUE){return(
data.frame(
X=as.numeric(NULL)
, Y=as.numeric(NULL)
, Z=as.numeric(NULL)
)
)}else{
### Create a CHM from the file
chm_tile = lidR::rasterize_canopy(
las_norm_tile
, res = des_chm_res
, algorithm = p2r()
, pkg = "terra"
)
### Get the DEM points
chm_points = as.data.frame(chm_tile, xy = T) %>%
dplyr::rename_with(toupper)
### Return the points to the function
return(chm_points)
}
}
## map files over the function
# list.files(config$las_norm_tile_dir, pattern = ".*\\.(laz|las)$", full.names = T) %>%
# .[2:3] %>%
# purrr::map(las2chm2df, des_chm_res = 3) %>%
# dplyr::bind_rows() %>%
# str()
# # dplyr::pull(Z) %>%
# # max(na.rm=T)
### Function to get CHM from height normalized tiles
rasterize_tiles_to_chm = function(
config
, my_des_chm_res = 3 #in meters
, calculate_extent = TRUE
, my_las_grid = las_grid # generate_grid_over_extent(las_ctg, desired_tile_res)
, my_las_ctg = las_ctg # lidR::readLAScatalog(config$input_las_dir)
, my_min_z = -Inf
, my_max_z= Inf
){
# define files
las_grid = my_las_grid
las_ctg = my_las_ctg
message(" --- Initializing canopy height model rasterization --- ")
master_start = Sys.time()
# ###_____________________________###
# ### Read in the las grid object ###
# ###_____________________________###
# ## !!!!!!! the grid is already in memory, no need to re-read
# message("Reading in project grid ... ")
# las_grid = sfarrow::st_read_parquet(paste0(
# config$las_grid_dir
# , "/"
# , "project_grid.parquet")
# )
#
# message("Reading in the las as a lascatalog for indexing ... ")
# las_ctg = lidR::readLAScatalog(config$input_las_dir)
# las_ctg
###_______________________________________________________________________________###
### Get a list of classified ground normalized tiles to generate CHM raster files ###
###_______________________________________________________________________________###
### Loop through and generate DTM data.frame for each tile and then bind_rows
start_time = Sys.time()
# file list
tile_list = list.files(config$las_norm_tile_dir, pattern = ".*\\.(laz|las)$", full.names = T)
# map las2chm2df function
merged_chm_points = tile_list %>%
purrr::map(las2chm2df, des_chm_res = my_des_chm_res, min_z = my_min_z, max_z = my_max_z) %>%
# .[2:3] %>%
dplyr::bind_rows()
###_____________________________________###
### Make a las file from the CHM points ###
###_____________________________________###
message("Generating the CHM las file ... ")
des_crs = sf::st_crs(las_grid)
las = lidR::LAS(merged_chm_points)
sf::st_crs(las) = des_crs
# las
### Write cleaned las to the disk
message("Writing the CHM las file to the disk ... ")
lidR::writeLAS(
las
, file = paste0(
config$delivery_las_dir
, "/master_chm_point_cloud_z"
, as.character(my_min_z)
, "to"
, as.character(my_max_z)
, ".laz"
)
)
las_area_m2 = lidR::area(las)
las_area_acres = las_area_m2/4047
message("Total merged CHM area of ", las_area_acres, " acres .... ")
###____________________________________###
### Generate a CHM from the CHM points ###
###____________________________________###
message("Generating the CHM raster file ... ")
master_chm = lidR::rasterize_canopy(las, res = my_des_chm_res, algorithm = p2r(), pkg="terra")
# master_chm
###_____________________________________###
### Write the master raster to the disk ###
###_____________________________________###
message("Writing the CHM raster file to the disk ... ")
outname = paste0(
config$delivery_spatial_dir
,"/chm_raster_z"
, as.character(my_min_z)
, "to"
, as.character(my_max_z)
, "m_res"
, my_des_chm_res
, "m.tif"
)
terra::writeRaster(master_chm, outname, overwrite = TRUE)
###_______________________________________###
### Get a polygon of the full file extent ###
###_______________________________________###
if(calculate_extent == TRUE){
message("Getting the extent of the CHM file ... ")
extent_raster = terra::clamp(master_chm, 1, 1, 1)
extent_poly = terra::as.polygons(extent_raster)
extent_poly = terra::simplifyGeom(extent_poly)
extent_poly = terra::fillHoles(extent_poly)
extent_poly = sf::st_as_sf(extent_poly)
extent_poly = st_union(extent_poly)
extent_poly = st_as_sf(extent_poly)
# extent_poly
### Write the poly to the disk
#sf::st_write(extent_poly, dsn = "chm_extent_kml.kml", delete_dsn = TRUE, quiet = TRUE)
sf::st_write(
extent_poly
, dsn = paste0(
config$working_spatial_dir
, "/chm_extent_geopackage.gpkg"
)
, delete_dsn = TRUE, quiet = TRUE
)
}
###____________________###
### Get the total time ###
###____________________###
end_time = Sys.time()
total_time = difftime(end_time, start_time, units = c("mins"))
message(" --- Total lidar rasterization took ", total_time, " minutes --- ")
return(master_chm)
}
Call the function to create the CHM
master_chm = rasterize_tiles_to_chm(
config = config
, my_des_chm_res = desired_chm_res #0.25 #in meters
, calculate_extent = F
, my_las_grid = las_grid # generate_grid_over_extent(las_ctg, desired_tile_res)
, my_las_ctg = las_ctg # lidR::readLAScatalog(config$input_las_dir)
, my_min_z = -Inf
, my_max_z= max_height_threshold
)
Map the CHM
terra::rast(paste0(
config$delivery_spatial_dir
,"/chm_raster_z"
, as.character(-Inf)
, "to"
, as.character(max_height_threshold)
, "m_res"
, desired_chm_res
, "m.tif"
)) %>%
stars::st_as_stars() %>%
mapview::mapview(
layer.name = "CHM ht (m)"
, alpha.regions = 0.6
)
# # uncomment for ggplot
# as.data.frame(xy=T) %>%
# dplyr::rename_with(tolower) %>%
# dplyr::rename(f=3) %>%
# ggplot(mapping = aes(x=x,y=y,fill=f)) +
# geom_tile() +
# scale_fill_viridis_c(na.value = "black") +
# labs(fill = "elevation (m)") +
# theme_void()
This process uses the normalized grid tiled laz
files
created in this section to detect tree
stems using the TreeLS
package based on methods described
in de
Conto et al. (2017). This section (and the coded functions included
within) which relies on the TreeLS
package requires that
data be stored within the R environment until all steps reliant on
TreeLS
functions are completed. Writing out
laz
files (i.e. for later TreeLS
processing)
strips all information that is required for further processing using
TreeLS
. It may be possible to save a csv
file
with the required TreeLS
attributes alongside the
laz
file and then combine these data during later
processing with the TreeLS
package but the authors have not
tested this.
Each normalized grid tile point cloud is individually passed to the
TreeLS::treeMap
function using the map.hough
algorithm to estimate tree occurrence regions from a normalized point
cloud using the Hough Transform for circle search. Good methods for
noise filtering are paramount for a robust stem isolation algorithm.
There are a handful of methods to remove noise from stem point cloud
data; one technique is the Hough transformation technique in which every
data point “votes” for a cylinder center (de
Conto et al. (2017)). The specific tree detection function is
implemented using the following code:
TreeLS::treeMap(
las = las_norm_tile
, method = map.hough(
# height thresholds applied to filter a point cloud before processing
min_h = 1
, max_h = 5
# height interval to perform point filtering/assignment/classification
, h_step = 0.5
# pixel side length to discretize the point cloud layers
# while performing the Hough Transform circle search
, pixel_size = 0.025
# largest tree diameter expected in the point cloud
, max_d = 0.75 # 0.75m = 30in
# minimum point density (0 to 1) within a pixel evaluated
# on the Hough Transform - i.e. only dense point clousters will undergo circle search
# hey google, define "clouster" ?
, min_density = 0.0001
# minimum number of circle intersections over a pixel
# to assign it as a circle center candidate.
, min_votes = 3
)
# parameter passed down to treeMap.merge (if merge > 0)
, merge = 0
)
After tree detection, the tree coordinates that overlap (or are too
close) are merged into a single tree ID using the
TreeLS::treeMap.merge
function. The detected trees are then
used to assign tree IDs to the original normalized point cloud based on
coordinates extracted from the TreeLS::treeMap
object via
TreeLS::treePoints(las=las_norm_tile, map=treemap_merge, method = trp.crop(l = 3))
,
where the trp.crop()
algorithm assigns points to a tree ID
that are inside a circle of fixed area (i.e. 3m) around the
TreeLS::treeMap
object.
In the next step, points in the normalized point cloud are classified
as stem points and assigned a tree ID using the
TreeLS::stemPoints
function with the algorithm
stm.hough
:
TreeLS::stemPoints(
las = las
, method = stm.hough(
# height interval to perform point filtering/assignment/classification.
h_step = 0.5
# largest tree diameter expected in the point cloud
, max_d = 0.75 # 0.75m = 30in
# tree base height interval to initiate circle search
, h_base = c(1, 2.5)
# pixel side length to discretize the point cloud layers
# while performing the Hough Transform circle search.
, pixel_size = 0.025
# minimum point density (0 to 1) within a pixel evaluated
# on the Hough Transform - i.e. only dense point clousters will undergo circle search
# hey google, define "clouster" ?
, min_density = 0.1
# minimum number of circle intersections over a pixel
# to assign it as a circle center candidate.
, min_votes = 3
)
)
Lastly, the normalized point cloud with points assigned to a tree ID (where a stem was detected) and points classified as stems is used to estimate the DBH.
DBH estimation is done using the TreeLS::tlsInventory
function with using a 2D circle fitting algorithm
(i.e. ransac
). The specific function call used is:
TreeLS::tlsInventory(
las = las
# height layer (above ground) to estimate stem diameters, in point cloud units
, dh = 1.37
# height layer width, in point cloud units
, dw = 0.2
# parameterized shapeFit function, i.e. method to use for diameter estimation.
, d_method = shapeFit(
# either "circle" or "cylinder".
shape = "circle"
# optimization method for estimating the shape's parameters
, algorithm = "ransac"
# number of points selected on every RANSAC iteration.
, n = 20
)
)
###_____________________________________________________###
### Define function to map for potential tree locations ###
### Using TreeLS::treeMap ###
###_____________________________________________________###
### Function to map for potential tree locations with error handling
tree_map_function <- function(las){
result <- tryCatch(
expr = {
map = TreeLS::treeMap(
las = las
, method = map.hough(
# height thresholds applied to filter a point cloud before processing
min_h = 1
, max_h = 5
# height interval to perform point filtering/assignment/classification
, h_step = 0.5
# pixel side length to discretize the point cloud layers
# while performing the Hough Transform circle search
, pixel_size = 0.025
# largest tree diameter expected in the point cloud
, max_d = 0.75 # 0.75m = 30in
# minimum point density (0 to 1) within a pixel evaluated
# on the Hough Transform - i.e. only dense point clousters will undergo circle search
# hey google, define "clouster" ?
, min_density = 0.0001
# minimum number of circle intersections over a pixel
# to assign it as a circle center candidate.
, min_votes = 3
)
# parameter passed down to treeMap.merge (if merge > 0)
, merge = 0
)
},
error = function(e) {
message <- paste("Error:", e$message)
return(message)
}
)
if (inherits(result, "error")) {
return(result)
} else {
return(result)
}
}
This function combines TreeLS
processing to the
normalized point cloud to:
TreeLS::treeMap
stem detection functionTreeLS::treeMap.merge
TreeLS::treePoints
TreeLS::stemPoints
TreeLS::tlsInventory
The result writes i) a laz
to the
../data/02_processed_data/04_las_stem_dir directory with the
Classification
data updated to: ground points (class 2);
water points (class 9); stem points (class 4); non-stem (class 5). Also
written is a parquet
file with the tree identification stem
locations, heights, and DBH estimates.
# pass this function a file path of the normalized las you wish to detect stems and classify
write_stem_las_fn <- function(las_path_name) {
### Get the desired las file
las_name = basename(las_path_name)
# las_name
### See if the las file has been generated
path_to_check = paste0(config$las_stem_dir, "/", las_name)
does_file_exist = file.exists(path_to_check)
# does_file_exist
### See if the vector file has been generated
path_to_check = paste0(config$stem_poly_tile_dir, "/", tools::file_path_sans_ext(las_name), ".parquet")
does_file_exist2 = file.exists(path_to_check)
# does_file_exist2
if(does_file_exist == TRUE & does_file_exist2 == TRUE){
message("stem detect for grid number ", las_name, " already exists guy ... ")
return(FALSE)
}
### Read in the desired las file
las_norm_tile = lidR::readLAS(las_path_name)
las_norm_tile = lidR::filter_duplicates(las_norm_tile)
# plot(las_norm_tile)
# get the maximum point height
max_point_height = max(las_norm_tile@data$Z)
###____________________________________________________________###
### If the max point height is below X feet, return classified tile ###
###____________________________________________________________###
if(max_point_height < 2){
message("No points >2m for grid number ", las_name, " so skipped it ... ")
# ### !!!! UNCOMMENT TO WRITE CLASSIFIED LAS INSTEAD WITH NO STEM POINTS
# ### Pull out the ground points
# ground = filter_poi(las_norm_tile, Classification == 2)
#
# ### Pull out the remaining points that arent ground
# remaining_points = filter_poi(las_norm_tile, Classification != 2)
# remaining_points@data$Classification = 5
#
# ### Combine the newly classified data
# las_reclassified = rbind(ground, remaining_points)
# las_reclassified@data$TreeID = as.numeric(0)
# las_reclassified@data$Stem = as.logical(F)
# las_reclassified@data$Segment = as.numeric(0)
# las_reclassified@data$Radius = as.numeric(0)
# las_reclassified@data$Votes = as.numeric(0)
#
# ### Write the stem points to the disk
# lidR::writeLAS(las_reclassified, paste0(config$las_stem_dir, "/", las_name))
return(FALSE)
}
###______________________________________________________________###
### If the max point height is above X feet, try to detect stems ###
###______________________________________________________________###
if(max_point_height >= 2){
###______________________________________________________________###
### 1) Apply the `TreeLS::treeMap` [stem detection function](#detect_stem_fn)
###______________________________________________________________###
### Run the function to search for candidate locations
treemap_temp = tree_map_function(las_norm_tile)
### Get a logic check
check = class(treemap_temp)
# check
###_______________________________________________________________###
### If the class of the result === Character, then no stems found ###
###_______________________________________________________________###
if(check == "character"){
message("No stems detected for grid number ", las_name, " so skipped it ... ")
return(FALSE)
}
### If the class of the result == "LAS"
if(check == "LAS"){
###___________________________________###
### Classify the tree and stem points ###
###___________________________________###
###______________________________________________________________###
### 2) Merge overlapping tree coordinates using `TreeLS::treeMap.merge`
###______________________________________________________________###
treemap_temp = TreeLS::treeMap.merge(treemap_temp)
###______________________________________________________________###
### 3) Assign tree IDs to the original points using `TreeLS::treePoints`
###______________________________________________________________###
### Classify tree regions
## Assigns TreeIDs to a LAS object based on coordinates extracted from a treeMap object.
las_norm_tile = TreeLS::treePoints(
las = las_norm_tile
, map = treemap_temp
, method = trp.crop(l = 3)
)
# plot(las_norm_tile, color = "TreeID")
###______________________________________________________________###
### 4) Flag only the stem points using `TreeLS::stemPoints`
###______________________________________________________________###
### Classify stem points
las_norm_tile = TreeLS::stemPoints(
las = las_norm_tile
, method = stm.hough(
# height interval to perform point filtering/assignment/classification.
h_step = 0.5
# largest tree diameter expected in the point cloud
, max_d = 0.75 # 0.75m = 30in
# tree base height interval to initiate circle search
, h_base = c(1, 2.5)
# pixel side length to discretize the point cloud layers
# while performing the Hough Transform circle search.
, pixel_size = 0.025
# minimum point density (0 to 1) within a pixel evaluated
# on the Hough Transform - i.e. only dense point clousters will undergo circle search
# hey google, define "clouster" ?
, min_density = 0.1
# minimum number of circle intersections over a pixel
# to assign it as a circle center candidate.
, min_votes = 3
)
)
###______________________________________________________________###
### 5) DBH estimation is done using `TreeLS::tlsInventory`
###______________________________________________________________###
### Search through tree points and estimate DBH to return a data frame of results
tree_inv_df = TreeLS::tlsInventory(
las = las_norm_tile
# height layer (above ground) to estimate stem diameters, in point cloud units
, dh = 1.37
# height layer width, in point cloud units
, dw = 0.2
# parameterized shapeFit function, i.e. method to use for diameter estimation.
, d_method = shapeFit(
# either "circle" or "cylinder".
shape = "circle"
# optimization method for estimating the shape's parameters
, algorithm = "ransac"
# number of points selected on every RANSAC iteration.
, n = 20
)
)
# class(tree_inv_df)
# tree_inv_df %>% dplyr::glimpse()
###_______________________________________________________###
### clean up the DBH stem data frame ###
###_______________________________________________________###
# add details to table and convert to sf data
tree_inv_df = tree_inv_df %>%
dplyr::mutate(
Radius = as.numeric(Radius)
, dbh_m = Radius*2
, dbh_cm = dbh_m*100
, basal_area_m2 = pi * (Radius)^2
, basal_area_ft2 = basal_area_m2 * 10.764
, treeID = paste0(X, "_", Y)
, stem_x = X
, stem_y = Y
) %>%
sf::st_as_sf(coords = c("X", "Y"), crs = sf::st_crs(las_norm_tile)) %>%
dplyr::select(
treeID, H, stem_x, stem_y, Radius, Error
, dbh_m, dbh_cm, basal_area_m2, basal_area_ft2
) %>%
dplyr::rename(
tree_height_m = H
, radius_m = Radius
, radius_error_m = Error
)
# tree_inv_df %>% dplyr::glimpse()
### Remove points outside the bounding box of the laz tile + 1m buffer
tree_inv_df = tree_inv_df %>%
sf::st_crop(
sf::st_bbox(las_norm_tile) %>%
sf::st_as_sfc() %>%
sf::st_buffer(1)
)
###_______________________________________________________###
### Set the classification codes of different point types ###
###_______________________________________________________###
### Pull out the stem files
stem_points = lidR::filter_poi(las_norm_tile, Stem == TRUE)
stem_points@data$Classification = 4
### Pull out the ground points
ground = filter_poi(las_norm_tile, Classification %in% c(2,9))
### Pull out the remaining points that arent ground
remaining_points = filter_poi(las_norm_tile, Stem == FALSE & !(Classification %in% c(2,9)))
remaining_points@data$Classification = 5
### Combine the newly classified data
las_reclassified = rbind(stem_points, ground, remaining_points)
# str(las_reclassified)
# class(las_reclassified)
# plot(las_reclassified, color = "Classification")
###_______________________________________________________###
### Write output to disk ###
###_______________________________________________________###
### Write the stem points to the disk
lidR::writeLAS(las_reclassified, paste0(config$las_stem_dir, "/", las_name))
message("Wrote stem detect laz for grid number ", las_name, " successfully ... ")
### Write stem polygons to the disk
out_name = tools::file_path_sans_ext(las_name)
out_name = paste0(config$stem_poly_tile_dir, "/", out_name, ".parquet")
sfarrow::st_write_parquet(tree_inv_df, out_name)
message("Wrote stem detect vector data for grid number ", las_name, " successfully ... ")
# return(las_norm_tile)
return(TRUE)
}
}
}
# ### test the function
# list.files(config$las_norm_tile_dir, pattern = ".*\\.(laz|las)$", full.names = T) %>%
# .[1:2] %>%
# purrr::map(write_stem_las_fn)
Call the function to write the stem classified point clouds to the disk
# map over the normalized point cloud tiles
list.files(config$las_norm_tile_dir, pattern = ".*\\.(laz|las)$", full.names = T) %>%
purrr::map(write_stem_las_fn)
This is an example of the full process:
# read single data tile
las_norm_tile_temp = list.files(
config$las_norm_tile_dir
, pattern = ".*\\.(laz|las)$", full.names = T
) %>%
.[1] %>%
lidR::readLAS() %>%
lidR::filter_duplicates()
# plot(las_norm_tile_temp)
plot3D::scatter3D(
x = las_norm_tile_temp@data$X
, y = las_norm_tile_temp@data$Y
, z = las_norm_tile_temp@data$Z
, colvar = las_norm_tile_temp@data$Z
, pch = 19, cex = 0.2
)
# implement the tree detection function
# this function is essentially TreeLS::treeMap(las, method = map.hough(...))
treemap_temp = tree_map_function(las_norm_tile_temp)
# what is this?
# class(treemap_temp)
# str(treemap_temp)
## merge TreeIDs which are too close in a treeMap's object.
treemap_temp = TreeLS::treeMap.merge(treemap_temp)
# str(treemap_temp)
# treemap_temp@data$TreeID %>% unique() %>% length()
# plot(treemap_temp, color = "TreeID")
plot3D::scatter3D(
x = treemap_temp@data$X
, y = treemap_temp@data$Y
, z = treemap_temp@data$Z
, colvar = treemap_temp@data$TreeID
, col = viridis::turbo(treemap_temp@data$TreeID %>% unique() %>% length())
, pch = 19, cex = 0.5
, colkey = F
)
### Classify tree regions
## Assigns TreeIDs to a LAS object based on coordinates extracted from a treeMap object.
las_norm_tile_temp = las_norm_tile_temp %>%
TreeLS::treePoints(map = treemap_temp, method = trp.crop(l = 3))
# las_cls_temp@data %>% dplyr::glimpse()
# las_cls_temp@data %>% dplyr::count(TreeID) %>% nrow()
# plot(las_norm_tile_temp, color = "TreeID")
plot3D::scatter3D(
x = las_norm_tile_temp@data$X
, y = las_norm_tile_temp@data$Y
, z = las_norm_tile_temp@data$Z
, colvar = las_norm_tile_temp@data$TreeID
, col = viridis::turbo(las_norm_tile_temp@data$TreeID %>% unique() %>% length())
, pch = 19, cex = 0.2
, colkey = F
)
### Classify stem points
las_norm_tile_temp = TreeLS::stemPoints(
las = las_norm_tile_temp
, method = stm.hough(
# height interval to perform point filtering/assignment/classification.
h_step = 0.5
# largest tree diameter expected in the point cloud
, max_d = 0.75 # 0.75m = 30in
# tree base height interval to initiate circle search
, h_base = c(1, 2.5)
# pixel side length to discretize the point cloud layers
# while performing the Hough Transform circle search.
, pixel_size = 0.025
# minimum point density (0 to 1) within a pixel evaluated
# on the Hough Transform - i.e. only dense point clousters will undergo circle search
# hey google, define "clouster" ?
, min_density = 0.1
# minimum number of circle intersections over a pixel
# to assign it as a circle center candidate.
, min_votes = 3
)
)
# las_norm_tile_temp@data %>% dplyr::glimpse()
# las_norm_tile_temp@data %>% dplyr::count(Classification)
# las_norm_tile_temp@data %>% dplyr::count(Stem)
# las_norm_tile_temp@data %>% dplyr::count(TreeID) %>% nrow()
# plot only ground points and stem points
# lidR::filter_poi(las_norm_tile_temp, (Stem==TRUE | Classification==2)) %>%
# plot(color = "Stem")
# plot only ground points and stem points
plt_las_norm_tile_temp = lidR::filter_poi(las_norm_tile_temp, (Stem==TRUE | Classification==2))
plot3D::scatter3D(
x = plt_las_norm_tile_temp@data$X
, y = plt_las_norm_tile_temp@data$Y
, z = plt_las_norm_tile_temp@data$Z
, colvar = plt_las_norm_tile_temp@data$Stem
, col = viridis::viridis(plt_las_norm_tile_temp@data$Stem %>% unique() %>% length())
, pch = 19, cex = 0.4
, colkey = F
)
### Search through tree points and estimate DBH to return a data frame of results
tree_inv_temp = TreeLS::tlsInventory(
las = las_norm_tile_temp
# height layer (above ground) to estimate stem diameters, in point cloud units
, dh = 1.37
# height layer width, in point cloud units
, dw = 0.2
# parameterized shapeFit function, i.e. method to use for diameter estimation.
, d_method = shapeFit(
# either "circle" or "cylinder".
shape = "circle"
# optimization method for estimating the shape's parameters
, algorithm = "ransac"
# number of points selected on every RANSAC iteration.
, n = 20
)
)
# class(tree_inv_temp)
# tree_inv_temp %>% dplyr::glimpse()
# add details to table and convert to sf data
tree_inv_temp = tree_inv_temp %>%
dplyr::mutate(
Radius = as.numeric(Radius)
, dbh_m = Radius*2
, dbh_cm = dbh_m*100
, basal_area_m2 = pi * (Radius)^2
, basal_area_ft2 = basal_area_m2 * 10.764
, treeID = paste0(X, "_", Y)
, stem_x = X
, stem_y = Y
) %>%
sf::st_as_sf(coords = c("X", "Y"), crs = sf::st_crs(las_norm_tile_temp)) %>%
dplyr::select(
treeID, H, stem_x, stem_y, Radius, Error
, dbh_m, dbh_cm, basal_area_m2, basal_area_ft2
) %>%
dplyr::rename(
tree_height_m = H
, radius_m = Radius
, radius_error_m = Error
)
# tree_inv_temp %>% dplyr::glimpse()
### Remove points outside the bounding box of the laz tile + 1m buffer
tree_inv_temp = tree_inv_temp %>%
sf::st_crop(
sf::st_bbox(las_norm_tile_temp) %>%
sf::st_as_sfc() %>%
sf::st_buffer(1)
)
### plot
ggplot() +
geom_sf(
data = sf::st_bbox(las_norm_tile_temp) %>%
sf::st_as_sfc() %>%
sf::st_buffer(1)
, lwd = 1.5
, alpha = 0
) +
geom_sf(
data = tree_inv_temp
, mapping = aes(size=dbh_m, color=dbh_m)
) +
scale_color_viridis_c(option="mako",direction = -1) +
theme_light()
In the prior section, each normalized
grid tile point cloud was individually analyzed with a
TreeLS
package workflow to: i) write a laz
to
the ../data/02_processed_data/04_las_stem_dir
(config$las_stem_dir
) directory with the
Classification
data updated to: ground points (class 2);
water points (class 9); stem points (class 4); non-stem (class 5); and
ii) write a parquet
file with the tree identification stem
locations, heights, and DBH estimates.
This section combines these classified grid tile point clouds into a
single point cloud that includes stem points only
(Classification == 4
) and a single vector data layer with
the stem locations and identifying information.
Combine grid tile stem point clouds. This data does not contain tree
ID, tree height, or tree DBH estimates. That data resides in the vector
data written to the ../data/02_processed_data/05_stem_polygon_dir
(config$stem_poly_tile_dir
) directory as
parquet
tile files created in this
section. See the example vector data here.
This vector data is combined in the next
section.
###___________________________________________________###
### create function to read las and return data frame
###___________________________________________________###
las2df <- function(las_path_name, class_filter_list = as.numeric(NA)) {
### Read in the desired las file
las_stem_tile = lidR::readLAS(las_path_name) %>%
lidR::filter_duplicates()
# if las classification is null return whole data set
if(max(is.na(as.numeric(class_filter_list)))==1){
stem_point_df = las_stem_tile@data %>%
# track tile file name
dplyr::mutate(las_stem_dir_file = basename(las_path_name))
}else{
### Grab the filtered dataframe
stem_point_df = las_stem_tile@data %>%
### Pull the stem only points
dplyr::filter(Classification %in% as.numeric(class_filter_list)) %>%
# track tile file name
dplyr::mutate(las_stem_dir_file = basename(las_path_name))
}
# str(stem_point_df)
### Return the dataframe
return(stem_point_df)
}
# ## test function
# list.files(config$las_stem_dir, pattern = ".*\\.(laz|las)$", full.names = T) %>%
# .[1:2] %>%
# purrr::map(las2df, class_filter_list=c(4)) %>%
# dplyr::bind_rows() %>%
# dplyr::count(Classification)
###___________________________________________________###
### Call the function to Merge the stem las point files into a single file ###
###___________________________________________________###
merged_stem_points = list.files(config$las_stem_dir, pattern = ".*\\.(laz|las)$", full.names = T) %>%
purrr::map(las2df, class_filter_list=c(4)) %>%
dplyr::bind_rows()
# str(merged_stem_points)
###___________________________________________________###
### Points to LAS
###___________________________________________________###
stem_points_las = lidR::LAS(merged_stem_points)
st_crs(stem_points_las) = st_crs(las_grid)
stem_points_las = lidR::filter_duplicates(stem_points_las)
# plot(stem_points_las)
### Write the las file to the disk
lidR::writeLAS(stem_points_las, paste0(config$delivery_las_dir,"/detected_stem_points.laz"))
Plot the combined stems point cloud
# structure of the merged_stem_points data
dplyr::glimpse(merged_stem_points)
## Rows: 148,510
## Columns: 19
## $ X <dbl> 610558.6, 610558.9, 610558.6, 610558.6, 610558.5, 61…
## $ Y <dbl> 4888822, 4888822, 4888822, 4888822, 4888822, 4888822…
## $ Z <dbl> 2.265, 2.576, 1.009, 1.867, 2.456, 1.619, 2.337, 0.7…
## $ Intensity <int> 12079, 21331, 28527, 23130, 12079, 2827, 9509, 3598,…
## $ ReturnNumber <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ NumberOfReturns <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ ScanDirectionFlag <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ EdgeOfFlightline <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Classification <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4…
## $ Synthetic_flag <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ Keypoint_flag <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ Withheld_flag <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ ScanAngleRank <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ UserData <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ PointSourceID <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ R <int> 10537, 16962, 25443, 21074, 11565, 2313, 8224, 3341,…
## $ G <int> 13621, 24672, 31611, 24929, 13107, 3341, 10537, 4112…
## $ B <int> 9509, 17476, 21845, 19789, 9509, 1799, 8481, 3084, 7…
## $ las_stem_dir_file <chr> "1.laz", "1.laz", "1.laz", "1.laz", "1.laz", "1.laz"…
# plot
plot3D::scatter3D(
x = stem_points_las@data$X
, y = stem_points_las@data$Y
, z = stem_points_las@data$Z
, colvar = stem_points_las@data$Z
, col = viridis::viridis(50)
, pch = 19, cex = 0.3
)
Combine the vector data written to the
../data/02_processed_data/05_stem_polygon_dir
(config$stem_poly_tile_dir
) directory as
parquet
tile files created in this
section. An example of this data is shown here. This data contains tree ID, tree height,
and tree DBH estimates as well as xy coordinates of the tree stems.
If stem vector points spatially overlap … ???
###__________________________________________________________###
### Merge the stem vector location tiles into a single object ###
###__________________________________________________________###
dbh_locations_sf = list.files(config$stem_poly_tile_dir, pattern = ".*\\.parquet$", full.names = T) %>%
purrr::map(sfarrow::st_read_parquet) %>%
dplyr::bind_rows() %>%
sf::st_as_sf() %>%
sf::st_make_valid() %>%
sf::st_set_crs(sf::st_crs(las_grid))
What is the structure of this vector data?
str(dbh_locations_sf)
## Classes 'sf' and 'data.frame': 294 obs. of 11 variables:
## $ treeID : chr "610596.540376442_4888792.2245489" "610568.642130108_4888778.00420689" "610577.747388244_4888780.16525558" "610555.320981336_4888809.46613208" ...
## $ tree_height_m : num 13.72 11.05 10.38 7.63 11.11 ...
## $ stem_x : num 610597 610569 610578 610555 610578 ...
## $ stem_y : num 4888792 4888778 4888780 4888809 4888823 ...
## $ radius_m : num 0.225 0.319 0.272 0.306 0.265 ...
## $ radius_error_m: num 0.012 0.0406 0.066 0.0171 0.0234 ...
## $ dbh_m : num 0.451 0.637 0.545 0.612 0.53 ...
## $ dbh_cm : num 45.1 63.7 54.5 61.2 53 ...
## $ basal_area_m2 : num 0.16 0.319 0.233 0.295 0.221 ...
## $ basal_area_ft2: num 1.72 3.43 2.51 3.17 2.38 ...
## $ geometry :sfc_POINT of length 294; first list element: 'XY' num 610597 4888792
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA
## ..- attr(*, "names")= chr [1:10] "treeID" "tree_height_m" "stem_x" "stem_y" ...
Is a row unique by treeID
?
dbh_locations_sf %>%
sf::st_drop_geometry() %>%
dplyr::ungroup() %>%
dplyr::summarise(
n_rows = dplyr::n()
, n_unique_treeID = dplyr::n_distinct(treeID)
) %>%
kableExtra::kbl() %>%
kableExtra::kable_styling()
n_rows | n_unique_treeID |
---|---|
294 | 294 |
Do stem points overlap spatially?
### plot
ggplot(data = dbh_locations_sf %>% dplyr::mutate(random=runif(dplyr::n()))) +
geom_sf(
mapping = aes(size=dbh_m, fill=as.factor(random))
, shape = 21
, alpha = 0.6
) +
scale_fill_viridis_d(option="turbo") +
theme_light() +
theme(legend.position = "none")
How do the stem points look on satellite imagery?
Note, in the map layer the las_grid_buff
box can be
checked to view the buffered grid used for processing the grid
tiles
mapview::mapview(
las_ctg@data$geometry
, color = "black"
, lwd = 3, alpha.regions = 0.0, legend = F, label = F
) +
mapview::mapview(
las_grid_buff
, zcol = "grid_id"
, col.regions = viridis::cividis(nrow(las_grid_buff))
, color = "gray55"
, alpha.regions = 0.2, lwd = 2, legend = F, label = F, hide = T
) +
mapview::mapview(
dbh_locations_sf
, zcol = "treeID"
, cex = "dbh_m"
, legend = F
, label = F
)
Note!: this clean-up process uses code from the original author Neal Swayze with no edits. It does not include any filtering of stems (and associated DBHs) that spatially overlap nor does it have any selection criteria for individual trees that were “cut in half” in one tile but wholly contained in another tile when the grid tiles were combined (i.e. these trees will be included twice with one “good” DBH value and one “bad” DBH).
The cleaning process uses the following steps:
TreeLS::tlsInventory
DBH estimation stepdbh_max_size_m
(1.5m in this example)This step also creates a vector file of the stems with points sized
by the radius (m) estimate of DBH and writes the vector files to the
../data/02_processed_data/06_working_spatial_dir
(config$working_spatial_dir
) directory.
###________________________________________________###
### CLEAN UP THE STEM POLYGONS WITH SOME FILTERING ###
###________________________________________________###
### Get the NA condition
is_na = is.na(dbh_locations_sf$radius_m)
dbh_locations_sf = cbind(dbh_locations_sf, is_na)
### Drop stems with NA values
dbh_locations_sf = dbh_locations_sf[dbh_locations_sf$is_na == "FALSE",]
dbh_locations_sf = subset(dbh_locations_sf, select = -c(is_na))
### Drop DBHs above set threshold
dbh_locations_sf = dbh_locations_sf[dbh_locations_sf$dbh_m < dbh_max_size_m,]
### Remove empty geometry stem locations
condition = sf::st_is_empty(dbh_locations_sf)
dbh_locations_sf = cbind(dbh_locations_sf, condition)
dbh_locations_sf = dbh_locations_sf[dbh_locations_sf$condition == FALSE,]
###___________________________________________________________###
### create a variable and buffer the points with the radius ###
###___________________________________________________________###
### Reset the condition
dbh_locations_sf$condition = rep("detected_stem", nrow(dbh_locations_sf))
###___________________________________________________________###
### Write the DBHs and the merged canopy polygons to the disk ###
###___________________________________________________________###
sf:::st_write(
dbh_locations_sf
, dsn = paste0(config$delivery_spatial_dir, "/bottom_up_detected_stem_locations.gpkg")
, append = FALSE
, delete_dsn = TRUE
, quiet = TRUE
)
sf:::st_write(
###K Buffer the points by the radius of the DBH
sf::st_buffer(dbh_locations_sf, dist = dbh_locations_sf$radius_m)
, dsn = paste0(config$delivery_spatial_dir, "/bottom_up_detected_stem_locations_buffered.gpkg")
, append = FALSE
, delete_dsn = TRUE
, quiet = TRUE
)
Result of the cleaning process (compare to same table above)
dbh_locations_sf %>%
sf::st_drop_geometry() %>%
dplyr::ungroup() %>%
dplyr::summarise(
n_rows = dplyr::n()
, n_unique_treeID = dplyr::n_distinct(treeID)
) %>%
kableExtra::kbl() %>%
kableExtra::kable_styling()
n_rows | n_unique_treeID |
---|---|
285 | 285 |
This section utilizes the Canopy Height Model (CHM) raster created in
this section which as a grid resolution of
0.25m to perform individual tree detection from the top
down (i.e. using raster cell height values). From The
lidR
Package Book:
Individual tree detection (ITD) is the process of spatially
locating trees and extracting height information. Individual tree
segmentation (ITS) is the process of individually delineating detected
trees. In lidR
, detecting and segmenting functions are
decoupled to maximize flexibility. Tree tops are first detected using
the locate_trees()
function, followed crown delineation
using segment_trees()
.
The CHM is used to perform individual tree detection using the
lidR::locate_trees
function with the lmf
algorithm which implements a local maximum filter based on the window
size (ws
) parameter with a minimum tree height
(hmin
) parameter of 2 m. The window size can be fixed or
variable and its shape can be square or circular.
The individual tree detection algorithm recommended in Swayze and Tinkham (2022) is a local maximum variable window function based on research in ponderosa pine (Pinus ponderosa) forests by Creasy et al. (2021) and Tinkham and Swayze (2021). This recommended variable window function has the form:
\[ \textrm{variable window radius} = \textrm{pixel height} \times 0.1 \]
This particular variable window radius function (with a minimum window size of 2m and a maximum of 5m) has the shape:
A variable window size can be implemented in the
lidR::locate_trees
function via the following code:
# define the variable window function
ws_fn = function(x) {
y = dplyr::case_when(
is.na(x) ~ 1e-3 # requires non-null
, x < 0 ~ 1e-3 # requires positive
, x < 2 ~ 2 # set lower bound
, x > 30 ~ 5 # set upper bound
, TRUE ~ 2 + (x * 0.1)
)
return(y)
}
# call the locate trees function and pass the variable window
lidR::locate_trees(
chm
, algorithm = lmf(
ws = ws_fn
, hmin = 2
)
)
This function uses the CHM raster created in this section and applies the individual tree
detection algorithm using a variable window function of linear
form (with a minimum window size of 2m for pixels 2m and below and
a maximum window size of 5m for pixels 30m in height above). The result
is a sf
data frame with point locations of tree tops.
###___________________________________________________###
### define the variable window function
###___________________________________________________###
# define the variable window function
ws_fn = function(x) {
y = dplyr::case_when(
is.na(x) ~ 1e-3 # requires non-null
, x < 0 ~ 1e-3 # requires positive
, x < 2 ~ 2 # set lower bound
, x > 30 ~ 5 # set upper bound
, TRUE ~ 2 + (x * 0.1)
)
return(y)
}
###___________________________________________________###
### Individual tree detection using CHM (top down)
###___________________________________________________###
# master_chm = rasterize_tiles_to_chm()
# terra::crs(master_chm)
# plot(master_chm)
### ITD on CHM
# call the locate_trees function and pass the variable window
tree_tops = lidR::locate_trees(
master_chm
, algorithm = lmf(
ws = ws_fn
, hmin = minimum_tree_height
)
)
# str(tree_tops)
View the results of the individual tree detection overlaid on the CHM raster
ggplot() +
geom_tile(
data = master_chm %>% as.data.frame(xy=T) %>% dplyr::rename(f=3)
, mapping = aes(x=x,y=y,fill=f)
) +
geom_sf(
data = tree_tops
, shape = 20
, color = "black"
) +
coord_sf(
expand = FALSE
) +
scale_fill_viridis_c(option = "plasma") +
labs(fill = "Hgt. (m)", x = "", y = "") +
theme_light()
This step uses the Individual Tree Detection based on the CHM to delineate tree crown vector (i.e. polygon) shapes.
Creasy
et al. (2021) evaluated four different crown delineation
algorithms in ponderosa pine (Pinus ponderosa) forests: the
Dalponte lidR::dalponte2016
, Silva
lidR::silva2016
, and watershed lidR::watershed
algorithms inside the lidR::segment_trees
function and the
marker-controlled watershed function from the ForestTools
package. Overall, the lidR::watershed
algorithm provided
the smallest crown area bias though small crown areas were overestimated
and larger crowns were underestimated.
The lidR::segment_trees
function works on an input point
cloud (i.e. las
) without the requirement to create a canopy
height model (CHM) beforehand. However, the crown delineation algorithms
available within the lidR
package can be run outside of the
lidR::segment_trees
function using only an input CHM
raster. From The
lidR
Package Book:
While point cloud segmentation is standard in lidR, users may only have access to a CHM. There are many reasons for only using a CHM, and this is why raster-based methods can be run standalone outside segment_trees(). Whether using the point cloud or a raster, segmentation results will be exactly the same. The difference will be the data format of the segmentation result. In lidR, a LAS object will gain a treeID attribute, while for rasters, delineated crowns are returned in a raster format. To work outside segment_trees() it suffices to call the function standalone like this:
crowns = lidR::watershed(
# input raster layer !! works with terra or stars !!
chm = master_chm
# threshold below which a pixel cannot be a tree. Default is 2
, th_tree = 2
)
plot(crowns, col = (viridis::turbo(2000) %>% sample()))
G. Woolsey notes: the
lidR::watershed
call above fails because the input CHM
created in this section requires no empty
raster cells. Obtaining a CHM raster with unempty cells can be
accomplished using the interpolation and mosaic methods discussed in
that section. Alternatively, the lidR::segment_trees
function can be utilized with a point cloud covering the full study
extent which requires a complete (non-tiled) point cloud (perhaps using
the laz
file created based off of the CHM raster and
written to the disk ../data/03_delivery/03_las_files
directory).
Instead, the ForestTools::mcws
marker-controlled
watershed segmentation algorithm is used below and requires input of
individual tree top points and a CHM raster. However, this package
relies on the raster
package which was depreciated in 2023.
Tinkham
et al. (2022) used this same ForestTools::mcws
algorithm to delineate tree crowns.
# this requires the `raster` package which is depreciated
crowns = ForestTools::mcws(
treetops = sf::st_zm(tree_tops, drop = T) # drops z values
, CHM = raster::raster(master_chm) # converts to raster data
, minHeight = minimum_tree_height
) %>% terra::rast()
# str(crowns)
# plot(crowns, col = (viridis::turbo(2000) %>% sample()))
### Write the crown raster to the disk
terra::writeRaster(
crowns
, paste0(config$delivery_spatial_dir, "/top_down_detected_tree_crowns.tif")
, overwrite = TRUE
)
Plot the tree crowns with the tree tops
ggplot() +
geom_tile(
data = crowns %>% as.data.frame(xy=T) %>% dplyr::rename(f=3)
, mapping = aes(x=x,y=y,fill=as.factor(f))
) +
geom_sf(
data = tree_tops
, shape = 20
, color = "black"
) +
coord_sf(
expand = FALSE
) +
scale_fill_manual(
values = viridis::turbo(n = terra::freq(crowns) %>% nrow()) %>% sample()
) +
labs(x = "", y = "") +
theme_light() +
theme(legend.position = "none")
From Tinkham et al. (2022):
Local competition metrics, including the distance to the nearest neighbor, trees ha−1 within a 5 m radius, and the relative tree height within a 5 m radius, were calculated for each tree and used in the subsequent DBH modeling. A 5 m radius was selected as it represents a distance slightly less than the point at which two mature ponderosa pine trees in the region would begin to have interlocking crowns, representing a proxy for direct competition. The relative tree height within a 5 m radius was estimated using Equation (2):
\[ \textrm{Relative Height} = \frac{\textrm{Height}}{\textrm{Height}_\textrm{max}} \times 100 \]
, where \(\textrm{Height}\) is the height of the subject tree and \(\textrm{Height}_\textrm{max}\) is the height of the tallest tree within a 5 m radius of the subject tree.
### add tree data to tree_tops
tree_tops = tree_tops %>%
# pull out the coordinates and update treeID
dplyr::mutate(
tree_x = sf::st_coordinates(.)[,1]
, tree_y = sf::st_coordinates(.)[,2]
, tree_height_m = sf::st_coordinates(.)[,3]
, treeID = paste(treeID,round(tree_x, 1),round(tree_y, 1), sep = "_")
)
# str(tree_tops)
### set buffer around tree to calculate competition metrics
competition_buffer_temp = 5
### how much of the buffered tree area is within the study boundary?
# use this to scale the TPA estimates below
tree_tops_pct_buffer_temp = tree_tops %>%
# buffer point
sf::st_buffer(competition_buffer_temp) %>%
dplyr::mutate(
point_buffer_area_m2 = as.numeric(sf::st_area(.))
) %>%
# intersect with study bounds
sf::st_intersection(
las_ctg$geometry
) %>%
# calculate area of buffer within study
dplyr::mutate(
buffer_area_in_study_m2 = as.numeric(sf::st_area(.))
) %>%
sf::st_drop_geometry() %>%
dplyr::select(treeID, buffer_area_in_study_m2)
### use the tree top location points to get competition metrics
comp_tree_tops_temp = tree_tops %>%
# buffer point
sf::st_buffer(competition_buffer_temp) %>%
dplyr::select(treeID, tree_height_m) %>%
# spatial join with all tree points
sf::st_join(
tree_tops %>%
dplyr::select(treeID, tree_height_m) %>%
dplyr::rename_with(
.fn = ~ paste0("comp_",.x)
, .cols = tidyselect::everything()[
-dplyr::any_of(c("geometry"))
]
)
) %>%
sf::st_drop_geometry() %>%
# calculate metrics by treeID
dplyr::group_by(treeID,tree_height_m) %>%
dplyr::summarise(
n_trees = dplyr::n()
, max_tree_height_m = max(comp_tree_height_m)
) %>%
dplyr::ungroup() %>%
dplyr::inner_join(
tree_tops_pct_buffer_temp
, by = dplyr::join_by("treeID")
) %>%
dplyr::mutate(
comp_trees_per_ha = (n_trees/buffer_area_in_study_m2)*10000
, comp_relative_tree_height = tree_height_m/max_tree_height_m*100
) %>%
dplyr::select(
treeID, comp_trees_per_ha, comp_relative_tree_height
)
### calculate distance to nearest neighbor
### cap distance to nearest tree within xxm buffer
dist_buffer_temp = 50
# get trees within radius
dist_tree_tops_temp = tree_tops %>%
dplyr::select(treeID) %>%
# buffer point
sf::st_buffer(dist_buffer_temp) %>%
# spatial join with all tree points
sf::st_join(
tree_tops %>%
dplyr::select(treeID, tree_x, tree_y) %>%
dplyr::rename(treeID2=treeID)
) %>%
dplyr::filter(treeID != treeID2)
# calculate row by row distances
dist_tree_tops_temp = dist_tree_tops_temp %>%
sf::st_centroid() %>%
st_set_geometry("geom1") %>%
dplyr::bind_cols(
dist_tree_tops_temp %>%
sf::st_drop_geometry() %>%
dplyr::select("tree_x", "tree_y") %>%
sf::st_as_sf(coords = c("tree_x", "tree_y"), crs = sf::st_crs(tree_tops)) %>%
st_set_geometry("geom2")
) %>%
dplyr::mutate(
comp_dist_to_nearest_m = sf::st_distance(geom1, geom2, by_element = T) %>% as.numeric()
) %>%
sf::st_drop_geometry() %>%
dplyr::select(treeID,comp_dist_to_nearest_m) %>%
dplyr::group_by(treeID) %>%
dplyr::summarise(comp_dist_to_nearest_m = min(comp_dist_to_nearest_m, na.rm = T)) %>%
dplyr::ungroup()
### join with original tree tops data
tree_tops = tree_tops %>%
dplyr::left_join(
comp_tree_tops_temp
, by = dplyr::join_by("treeID")
) %>%
# add distance
dplyr::left_join(
dist_tree_tops_temp
, by = dplyr::join_by("treeID")
) %>%
dplyr::mutate(comp_dist_to_nearest_m = dplyr::coalesce(comp_dist_to_nearest_m,dist_buffer_temp))
### Write the trees to the disk
sf::st_write(
tree_tops
, paste0(config$delivery_spatial_dir, "/top_down_detected_tree_tops.gpkg")
, quiet = TRUE, append = FALSE
)
Map tree competition metrics
# trees per ha
tree_tops %>%
ggplot() +
geom_sf(mapping = aes(fill = comp_trees_per_ha), shape = 21, alpha = 0.8, size = 2, color = "gray77") +
scale_fill_viridis_c() +
labs(fill = "trees\nper ha") +
theme_light()
# distance to nearest neighbor
tree_tops %>%
ggplot() +
geom_sf(mapping = aes(fill = comp_dist_to_nearest_m), shape = 21, alpha = 0.8, size = 2, color = "gray77") +
scale_fill_viridis_c(option="rocket", direction = -1) +
labs(fill = "meters to\nnearest\nneighbor") +
theme_light()
The delineated crown raster (terra
SpatRaster) is
converted to vector data (i.e. polygons) and crowns with an area smaller
than 0.1 m2 are removed. The tree tops are then spatially
joined with the tree crowns.
### Convert crown raster to terra rast, then polygons, then to Sf
crowns_sf = crowns %>%
# convert raster to polygons for each individual crown
terra::as.polygons() %>%
# fix polygon validity
terra::makeValid() %>%
# reduce the number of nodes in geometries
terra::simplifyGeom() %>%
# remove holes in polygons
terra::fillHoles() %>%
# convert to sf
sf::st_as_sf() %>%
# get the crown area
dplyr::mutate(
crown_area_m2 = as.numeric(sf::st_area(.))
) %>%
#remove super small crowns
dplyr::filter(
crown_area_m2 > 0.1
)
# str(crowns_sf)
# ggplot(crowns_sf) + geom_sf(aes(fill=as.factor(layer))) +
# scale_fill_manual(values = viridis::turbo(nrow(crowns_sf)) %>% sample()) +
# theme_void() + theme(legend.position = "none")
### Join the crowns with the seeds to append data, remove Nulls
crowns_sf = crowns_sf %>%
sf::st_join(tree_tops) %>%
dplyr::select(c(
"treeID", "tree_height_m"
, "tree_x", "tree_y"
, "crown_area_m2"
, tidyselect::starts_with("comp_")
))
# str(crowns_sf)
# crowns_sf %>% dplyr::count(treeID) %>% nrow()
# crowns_sf %>%
# sf::st_drop_geometry() %>%
# dplyr::summarise(
# n = n()
# , n_distinct = dplyr::n_distinct(treeID)
# )
### Add crown data summaries
crown_sum_temp = data.frame(
mean_crown_ht_m = terra::extract(x = master_chm, y = terra::vect(crowns_sf), fun = "mean", na.rm = T)
, median_crown_ht_m = terra::extract(x = master_chm, y = terra::vect(crowns_sf), fun = "median", na.rm = T)
, min_crown_ht_m = terra::extract(x = master_chm, y = terra::vect(crowns_sf), fun = "min", na.rm = T)
) %>%
dplyr::select(-c(tidyselect::ends_with(".ID"))) %>%
dplyr::rename_with(~ stringr::str_remove_all(.x,".Z"))
### join crown data summary
crowns_sf = crowns_sf %>%
dplyr::bind_cols(crown_sum_temp)
# str(crowns_sf)
### Write the crowns to the disk
sf::st_write(
crowns_sf
, paste0(config$delivery_spatial_dir, "/top_down_detected_crowns.gpkg")
, quiet = TRUE, append = FALSE
)
Plot the tree crowns with the tree tops
ggplot() +
geom_sf(
data = crowns_sf
, mapping = aes(fill = mean_crown_ht_m)
) +
geom_sf(
data = tree_tops
, shape = 20
, color = "black"
) +
coord_sf(
expand = FALSE
) +
scale_fill_viridis_c(option = "plasma") +
labs(fill = "Mean Crown\nHgt. (m)", x = "", y = "") +
theme_light()
Note that not all tree tops (black dots) have an associated crown. Plot only matched tree tops and crowns.
ggplot() +
geom_sf(
data = crowns_sf
, mapping = aes(fill = mean_crown_ht_m)
) +
geom_sf(
data = tree_tops %>%
dplyr::inner_join(
crowns_sf %>% sf::st_drop_geometry() %>%
dplyr::select(treeID)
, dplyr::join_by(treeID)
)
, shape = 20
, color = "black"
) +
coord_sf(
expand = FALSE
) +
scale_fill_viridis_c(option = "plasma") +
labs(fill = "Mean Crown\nHgt. (m)", x = "", y = "") +
theme_light()
Tinkham et al. (2022) evaluated five models in the form of simple linear, multiple linear, and three non-linear model structures to estimate missing tree DBH values based on allometric relationships to independent variables including tree height using the sample of UAS SfM point cloud derived DBH values (i.e. from trees where DBH could be detected using aerial point cloud data alone). For the ponderosa pine forest studied, the simple linear form (\(DBH_i = \beta_0 + \beta_1 \cdot Height_i\), where \(i\) is the individual tree estimate from UAS SfM data) resulted in the best predictive model for both tree and stand level summarizations after first filtering SfM DBHs estimates using regionally derived height to DBH allometries from US Forest Inventory and Analysis (FIA) data.
This section uses the sample of DBH values extracted from the point cloud (example here) in the Detect Stems section to create a model using the SfM extracted height, crown area, and competition metrics as independent variables, and the SfM-derived DBH as the dependent variable.
Importantly, before building a local DBH-to-Height model the SfM-derived DBH estimates should be filtered using height to DBH allometries specific to the region to improve the local model performance (Tinkham et al. (2022)). This filtering is implemented in the Filter the SfM DBHs section below with full details on the data and modelling described in a supplementary section.
Spatially join the tree crown polygons created based off of the CHM in this section with the tree stems where DBH was detected created in this section.
###________________________________________________________###
### Join the Top down crowns with the stem location points ###
###________________________________________________________###
### Join the top down crowns with the stem location points
## !! Note that one crown can have multiple stems within its bounds
crowns_sf_joined_stems_temp = crowns_sf %>%
sf::st_join(
dbh_locations_sf %>%
# rename all columns to have "stem" prefix
dplyr::rename_with(
.fn = ~ paste0("stem_",.x,recycle0 = T)
, .cols = tidyselect::everything()[
-dplyr::any_of(
c(tidyselect::starts_with("stem_x"),"geometry")
)
]
)
)
# str(crowns_sf_joined_stems_temp)
###________________________________________________________###
### Check for row uniqueness ###
###________________________________________________________###
crowns_sf_joined_stems_temp %>%
sf::st_drop_geometry() %>%
dplyr::summarise(
n = dplyr::n()
, n_unique_crowns = dplyr::n_distinct(treeID)
) %>%
kableExtra::kbl() %>%
kableExtra::kable_styling()
n | n_unique_crowns |
---|---|
1550 | 1545 |
G. Woolsey added section. There was no DBH filtering done in the original script handed off by N. Swayze. Instead, the model to estimate missing DBHs was built off of all stems with DBH values created in this section. There are a few potential pitfalls with this methodology. First, the stems and DBH estimates were made separately for each grid tile and then combined with no filtering for stems (and associated DBH estimates) that were “cut in half” at the grid tile boundary. There was also no filtering for stems that overlap when the grid tiles were combined.
A regionally derived height to DBH relationship was modeled using the data and methods discussed in detail in this supplementary section. The SfM-derived DBH filtering steps implemented here are:
See full details in this supplementary section
###__________________________________________________###
### read in FIA data ###
###__________________________________________________###
# read in treemap data
# downloaded from: https://www.fs.usda.gov/rds/archive/Catalog/RDS-2021-0074
# read in treemap (no memory is taken)
treemap_rast = terra::rast("../data/treemap/TreeMap2016.tif")
# filter treemap based on las now memory
treemap_rast = treemap_rast %>%
terra::crop(
las_ctg@data$geometry %>%
sf::st_union() %>%
terra::vect() %>%
terra::project(terra::crs(treemap_rast))
) %>%
terra::mask(
las_ctg@data$geometry %>%
sf::st_union() %>%
terra::vect() %>%
terra::project(terra::crs(treemap_rast))
)
### get weights for weighting each tree in the population models
# treemap id = tm_id for linking to tabular data
tm_id_weight_temp = terra::freq(treemap_rast) %>%
dplyr::select(-layer) %>%
dplyr::rename(tm_id = value, tree_weight = count)
### get the TreeMap FIA tree list for only the plots included
treemap_trees_df = readr::read_csv(
"../data/treemap/TreeMap2016_tree_table.csv"
, col_select = c(
tm_id
, TREE
, STATUSCD
, DIA
, HT
)
) %>%
dplyr::rename_with(tolower) %>%
dplyr::inner_join(
tm_id_weight_temp
, by = dplyr::join_by("tm_id")
) %>%
dplyr::filter(
# keep live trees only: 1=live;2=dead
statuscd == 1
& !is.na(dia)
& !is.na(ht)
) %>%
dplyr::mutate(
dbh_cm = dia*2.54
, tree_height_m = ht/3.28084
) %>%
dplyr::select(-c(statuscd,dia,ht)) %>%
dplyr::rename(tree_id=tree)
# dplyr::glimpse(treemap_trees_df)
# treemap_trees_df %>% dplyr::count(tm_id)
See full details in this supplementary section
Note, this step takes ~5 mins for this example area
###__________________________________________________________###
### population model, height non-linear
###__________________________________________________________###
# population model with no random effects (i.e. no group-level variation)
# quadratic model form with Gamma distribution for strictly positive response variable dbh
# set up prior
p_temp <- prior(normal(1, 2), nlpar = "b1") +
prior(normal(0, 2), nlpar = "b2")
mod_nl_pop = brms::brm(
formula = brms::bf(
formula = dbh_cm|weights(tree_weight) ~ (b1 * tree_height_m) + tree_height_m^b2
, b1 + b2 ~ 1
, nl = TRUE # !! specify non-linear
)
, data = treemap_trees_df
, prior = p_temp
, family = brms::brmsfamily("Gamma")
, iter = 4000
)
# plot(mod_nl_pop)
# summary(mod_nl_pop)
Summary of model results
#### extract posterior draws to a df
brms::as_draws_df(
mod_nl_pop
, variable = c("^b_", "shape")
, regex = TRUE
) %>%
# quick way to get a table of summary statistics and diagnostics
posterior::summarize_draws(
"mean", "median", "sd"
, ~quantile(.x, probs = c(
0.05, 0.95
# , 0.025, 0.975
))
, "rhat"
) %>%
dplyr::mutate(
variable = stringr::str_remove_all(variable, "_Intercept")
) %>%
kableExtra::kbl(
caption = paste(
"non-linear model:"
, mod_nl_pop$formula %>% as.character() %>% purrr::pluck(1)
)
, digits = 3
) %>%
kableExtra::kable_styling()
variable | mean | median | sd | 5% | 95% | rhat |
---|---|---|---|---|---|---|
b_b1 | -0.159 | -0.159 | 0.013 | -0.180 | -0.138 | 1.001 |
b_b2 | 0.639 | 0.640 | 0.013 | 0.618 | 0.660 | 1.001 |
shape | 20.127 | 20.125 | 0.593 | 19.152 | 21.110 | 1.001 |
Plot the modeled allometric relationship with 90% prediction bounds. In Bayesian statistics, the credibility interval is defined as: The true value is contained with a probability of 90%.
By comparison, in frequentist statistics the 90% confidence interval is constructed in such a way that, if the same experiment were repeated an infinite number of times, in 90% of these repetitions, the true value is contained in the interval.
See McElreath 2018.
### obtain model predictions over range
# range of x var to predict
height_range = dplyr::tibble(
tree_height_m = seq(
from = 0
, to = round(max(crowns_sf$tree_height_m)*1.05,0)
, by = 0.1 # by 0.1 m increments
)
)
# predict and put estimates in a data frame
pred_mod_nl_pop_temp = predict(
mod_nl_pop
, newdata = height_range
, probs = c(.05, .95)
) %>%
dplyr::as_tibble() %>%
dplyr::rename(
lower_b = 3, upper_b = 4
) %>%
dplyr::rename_with(tolower) %>%
dplyr::select(-c(est.error)) %>%
dplyr::bind_cols(height_range)
# str(pred_mod_nl_pop_temp)
# plot predictions with data
ggplot(
data = pred_mod_nl_pop_temp
, mapping = aes(x = tree_height_m)
) +
geom_ribbon(
mapping = aes(ymin = lower_b, ymax = upper_b)
, fill = "grey88"
) +
geom_line(
aes(y = estimate)
, color = "navy"
, lwd = 1
) +
labs(
y = "DBH (cm)"
, x = "Tree Ht. (m)"
, title = "Regional height to DBH allometry from US Forest Inventory and Analysis (FIA) data"
) +
theme_light() +
theme(legend.position = "none", plot.title = element_text(size = 9))
# attach allometric data to CHM derived trees and canopy data
crowns_sf_joined_stems_temp = crowns_sf_joined_stems_temp %>%
# join with model predictions at 0.1 m height intervals
dplyr::mutate(
tree_height_m_tnth = round(tree_height_m,1) %>% as.character()
) %>%
dplyr::inner_join(
pred_mod_nl_pop_temp %>%
dplyr::rename(
tree_height_m_tnth=tree_height_m
, est_dbh_cm = estimate
, est_dbh_cm_lower = lower_b
, est_dbh_cm_upper = upper_b
) %>%
dplyr::mutate(tree_height_m_tnth=as.character(tree_height_m_tnth))
, by = dplyr::join_by(tree_height_m_tnth)
) %>%
dplyr::select(-tree_height_m_tnth) %>%
dplyr::mutate(
est_dbh_pct_diff = abs(stem_dbh_cm-est_dbh_cm)/est_dbh_cm
)
# what is the estimated difference
# summary(crowns_sf_joined_stems_temp$est_dbh_pct_diff)
# crowns_sf_joined_stems_temp %>% dplyr::glimpse()
### build training data set
dbh_training_data_temp = crowns_sf_joined_stems_temp %>%
sf::st_drop_geometry() %>%
dplyr::filter(
!is.na(stem_dbh_cm)
& stem_dbh_cm >= est_dbh_cm_lower
& stem_dbh_cm <= est_dbh_cm_upper
) %>%
dplyr::group_by(treeID) %>%
# select the minimum difference to regional dbh estimate
dplyr::filter(
est_dbh_pct_diff==min(est_dbh_pct_diff)
) %>%
# just take one if same dbh
dplyr::filter(
dplyr::row_number()==1
) %>%
dplyr::ungroup() %>%
dplyr::select(c(
treeID # id
, stem_dbh_cm # y
# x vars
, crown_area_m2, tree_height_m
, median_crown_ht_m, mean_crown_ht_m, min_crown_ht_m
, tidyselect::starts_with("comp_")
))
# dbh_training_data_temp %>% dplyr::glimpse()
View correlation matrix of potential predictor variables
# correlation matrix of potential predictors
dbh_training_data_temp %>%
dplyr::select(-c(treeID,stem_dbh_cm)) %>%
cor() %>%
corrplot::corrplot(type = "lower", diag = F)
# remove highly correlated predictors
dbh_training_data_temp = dbh_training_data_temp %>%
dplyr::select(c(
treeID # id
, stem_dbh_cm # y
# x vars
, crown_area_m2, tree_height_m
, min_crown_ht_m
, tidyselect::starts_with("comp_")
))
Use the SfM-detected stems remaining after the filtering workflow using regional allometries as the training data in the local DBH to height allometric relationship model.
Tinkham
et al. (2022) found the simple linear form (\(DBH_i = \beta_0 + \beta_1 \cdot Height_i\),
where \(i\) is the individual tree
estimate from UAS SfM data), resulted in the best estimates for both
tree and stand level summarizations. However, this workflow uses a
random forest model with the randomForest
package to
estimate DBH based on the sample of DBH values extracted from the point
cloud (example here) in the Detect Stems section.
The random forest model is “tuned” using the
randomForest::tuneRF
function to determine the optimal
mtry
hyperparameter. The mtry
parameter
represents the number of variables to randomly sample as candidates at
each split (view more
detail here). The randomForest::tuneRF
function will
start at a default value of mtry
and increase by a certain
step factor until the Out-of-bag (OOB) error stops improving by a
specified amount. For example, the below starts with the default
mtry
and increases by a factor of 0.5 until the OOB error
stops improving by 1%.
set.seed(21)
# dbh_training_data_temp %>% dplyr::glimpse()
### tuning RF model
# If we are interested with just starting out and tuning the mtry parameter
# we can use randomForest::tuneRF for a quick and easy tuning assessment.
# tuneRf will start at a value of mtry that you supply and increase by a
# certain step factor until the OOB error stops improving be a specified amount.
rf_tune_temp = randomForest::tuneRF(
y = dbh_training_data_temp$stem_dbh_cm
, x = dbh_training_data_temp %>% dplyr::select(-c(treeID,stem_dbh_cm))
, stepFactor = 0.5
, ntreeTry = 500
, mtryStart = 0.5
, improve = 0.01
, plot = F
, trace = F
)
# rf_tune_temp
### Run a randomForest model to predict DBH using various crown predictors
stem_prediction_model = randomForest::randomForest(
y = dbh_training_data_temp$stem_dbh_cm
, x = dbh_training_data_temp %>% dplyr::select(-c(treeID,stem_dbh_cm))
, mtry = rf_tune_temp %>%
dplyr::as_tibble() %>%
dplyr::filter(OOBError==min(OOBError)) %>%
dplyr::pull(mtry)
, na.action = na.omit
)
# stem_prediction_model
# str(stem_prediction_model)
Random forest variable importance plot
#variable importance plot
randomForest::varImpPlot(stem_prediction_model, main = "RF variable importance plot for DBH estimate")
Estimated versus observed DBH
data.frame(
dbh_training_data_temp
, predicted = stem_prediction_model$predicted
) %>%
ggplot() +
geom_abline() +
geom_point(mapping = aes(x = stem_dbh_cm, y = predicted)) +
scale_x_continuous(limits = c(0,max(dbh_training_data_temp$stem_dbh_cm)*1.05)) +
scale_y_continuous(limits = c(0,max(dbh_training_data_temp$stem_dbh_cm)*1.05)) +
labs(
x = "SfM DBH (cm)"
, y = "Predicted DBH (cm) by RF"
) +
theme_light()
Table of model performance
data.frame(
mae = Metrics::mae(dbh_training_data_temp$stem_dbh_cm, stem_prediction_model$predicted)
, mape = Metrics::mape(dbh_training_data_temp$stem_dbh_cm, stem_prediction_model$predicted) %>%
scales::percent(accuracy = .01)
, rmse = Metrics::rmse(dbh_training_data_temp$stem_dbh_cm, stem_prediction_model$predicted)
) %>%
kableExtra::kbl(
caption = "Random forest model prediction performance for DBH (cm)"
, col.names = c(
"Mean Abs. Error (cm)"
, "Mean Abs. Percent Error"
, "Root Mean Squared Error (cm)"
)
, digits = 2
) %>%
kableExtra::kable_styling()
Mean Abs. Error (cm) | Mean Abs. Percent Error | Root Mean Squared Error (cm) |
---|---|---|
4.01 | 17.32% | 4.97 |
Use the model built in the prior section to estimate DBH for all trees with missing values.
The final result is a vector dataset of tree crowns created in this section with DBH values for: i) the training data based on the DBH values extracted from the point cloud in the Detect Stems section that passed the DBH filtering process; and ii) DBH values for all other trees using predicted DBH values based on SfM-derived DBH and height allometries.
###___________________________________________________________________###
### Predict missing radius values for the top down crowns with no DBH ###
###___________________________________________________________________###
crowns_sf_predict_only_temp = crowns_sf %>%
sf::st_drop_geometry() %>%
dplyr::anti_join(
dbh_training_data_temp
, by = dplyr::join_by("treeID")
) %>%
dplyr::select(
dbh_training_data_temp %>% dplyr::select(-c(stem_dbh_cm)) %>% names()
)
# get predicted dbh
predicted_dbh_cm_temp = predict(
stem_prediction_model
, crowns_sf_predict_only_temp %>% dplyr::select(-treeID)
)
# summary(predicted_dbh_cm_temp)
## combine predicted data with training data for full data set for all tree crowns with a matched tree top
crowns_sf_with_dbh = crowns_sf %>%
# join training data
dplyr::left_join(
dbh_training_data_temp %>%
dplyr::mutate(is_training_data = T) %>%
dplyr::select(treeID, is_training_data, stem_dbh_cm)
, by = dplyr::join_by("treeID")
) %>%
# join with predicted data estimates
dplyr::left_join(
crowns_sf_predict_only_temp %>%
dplyr::mutate(
predicted_dbh_cm = predicted_dbh_cm_temp
) %>%
dplyr::select(treeID, predicted_dbh_cm)
, by = dplyr::join_by("treeID")
) %>%
# clean up data and calculate metrics from dbh
dplyr::mutate(
is_training_data = dplyr::coalesce(is_training_data,F)
, dbh_cm = dplyr::coalesce(stem_dbh_cm, predicted_dbh_cm)
, dbh_m = dbh_cm/100
, radius_m = dbh_m/2
, basal_area_m2 = pi * (radius_m)^2
, basal_area_ft2 = basal_area_m2 * 10.764
) %>%
dplyr::select(
!tidyselect::ends_with("_dbh_cm")
)
# write the data to the disk
sf::st_write(
crowns_sf_with_dbh
, paste0(config$delivery_spatial_dir, "/final_detected_crowns.gpkg")
, append = FALSE
, quiet = TRUE
)
### plot
ggplot(data = crowns_sf_with_dbh) +
geom_density(
mapping = aes(x=dbh_cm)
, fill = "navy"
, color = "gray"
, alpha = 0.9
) +
labs(
x = "DBH (cm)"
, y = "Density"
, title = "Distribution of tree DBH"
) +
theme_light()
### plot
ggplot(
data = crowns_sf_with_dbh
, mapping = aes(y=tree_height_m, x = dbh_cm)
) +
geom_smooth(
method = "loess"
, span = 1
, color = "gray44"
) +
geom_point(
mapping = aes(color = is_training_data)
, alpha = 0.6
) +
labs(
x = "DBH (cm)"
, y = "Tree Ht. (m)"
, color = "Training Data"
, title = "SfM derived tree height and DBH relationship"
) +
scale_color_manual(values = c("gray", "firebrick")) +
theme_light() +
theme(
legend.position = "bottom"
, legend.direction = "horizontal"
)
# get tree points
map_temp = crowns_sf_with_dbh %>%
sf::st_drop_geometry() %>%
sf::st_as_sf(coords = c("tree_x", "tree_y"), crs = sf::st_crs(crowns_sf_with_dbh))
# map
mapview::mapview(
las_ctg@data$geometry
, color = "black"
, lwd = 3, alpha.regions = 0.0, legend = F, label = F
, layer.name = "las boundary"
) +
mapview::mapview(
map_temp
, zcol = "tree_height_m"
, col.regions = viridis::viridis(20)
, cex = 3
, legend = T
, label = F
, layer.name = "height_m"
) +
mapview::mapview(
map_temp
, zcol = "dbh_cm"
, col.regions = viridis::inferno(10)
, cex = 3
, legend = T
, label = F
, layer.name = "dbh_cm"
, hide = T
)
Utilize the final data produced in the prior
section which includes unique trees and tree locations with tree
height based on identification from the
CHM, tree crown information, and
estimated DBH values for the entire study area
extent based on the original laz
input folder
(../data/las_raw).
Common silvicultural metrics are calculated for the entire extent. Note, that stand-level summaries can be computed if stand vector data is provided.
### stand-level summaries
silv_metrics_temp = crowns_sf_with_dbh %>%
sf::st_drop_geometry() %>%
dplyr::ungroup() %>%
dplyr::mutate(
plotID = "1" # can spatially join to plot vectors if available
, plot_area_m2 = las_ctg@data$geometry %>%
sf::st_union() %>%
sf::st_area() %>% # result is m2
as.numeric()
, plot_area_ha = plot_area_m2/10000 # convert to ha
) %>%
dplyr::group_by(plotID,plot_area_ha) %>%
dplyr::summarise(
n_trees = dplyr::n_distinct(treeID)
, mean_dbh_cm = mean(dbh_cm, na.rm = T)
, mean_tree_height_m = mean(tree_height_m, na.rm = T)
, loreys_height_m = sum(basal_area_m2*tree_height_m, na.rm = T) / sum(basal_area_m2, na.rm = T)
, basal_area_m2 = sum(basal_area_m2, na.rm = T)
, sum_dbh_cm_sq = sum(dbh_cm^2, na.rm = T)
) %>%
dplyr::ungroup() %>%
dplyr::mutate(
trees_per_ha = (n_trees/plot_area_ha)
, basal_area_m2_per_ha = (basal_area_m2/plot_area_ha)
, qmd_cm = sqrt(sum_dbh_cm_sq/n_trees)
) %>%
dplyr::select(-c(sum_dbh_cm_sq)) %>%
# convert to imperial units
dplyr::mutate(
dplyr::across(
.cols = tidyselect::ends_with("_cm")
, ~ .x * 0.394
, .names = "{.col}_in"
)
, dplyr::across(
.cols = tidyselect::ends_with("_m")
, ~ .x * 3.28
, .names = "{.col}_ft"
)
, dplyr::across(
.cols = tidyselect::ends_with("_m2_per_ha")
, ~ .x * 4.359
, .names = "{.col}_ftac"
)
, dplyr::across(
.cols = tidyselect::ends_with("_per_ha") & !tidyselect::ends_with("_m2_per_ha")
, ~ .x * 0.405
, .names = "{.col}_ac"
)
, dplyr::across(
.cols = tidyselect::ends_with("_area_ha")
, ~ .x * 2.471
, .names = "{.col}_ac"
)
, dplyr::across(
.cols = tidyselect::ends_with("_m2")
, ~ .x * 10.764
, .names = "{.col}_ft2"
)
) %>%
dplyr::rename_with(
.fn = function(x){dplyr::case_when(
stringr::str_ends(x,"_cm_in") ~ stringr::str_replace(x,"_cm_in","_in")
, stringr::str_ends(x,"_m_ft") ~ stringr::str_replace(x,"_m_ft","_ft")
, stringr::str_ends(x,"_m2_per_ha_ftac") ~ stringr::str_replace(x,"_m2_per_ha_ftac","_ft2_per_ac")
, stringr::str_ends(x,"_per_ha_ac") ~ stringr::str_replace(x,"_per_ha_ac","_per_ac")
, stringr::str_ends(x,"_area_ha_ac") ~ stringr::str_replace(x,"_area_ha_ac","_area_ac")
, stringr::str_ends(x,"_m2_ft2") ~ stringr::str_replace(x,"_m2_ft2","_ft2")
, TRUE ~ x
)}
) %>%
dplyr::select(
"plotID"
, "n_trees"
, "plot_area_ha"
, "trees_per_ha"
, "mean_dbh_cm"
, "qmd_cm"
, "mean_tree_height_m"
, "loreys_height_m"
, "basal_area_m2"
, "basal_area_m2_per_ha"
# imperial
, "plot_area_ac"
, "trees_per_ac"
, "mean_dbh_in"
, "qmd_in"
, "mean_tree_height_ft"
, "loreys_height_ft"
, "basal_area_ft2"
, "basal_area_ft2_per_ac"
)
### export tabular
write.csv(
silv_metrics_temp
, paste0(config$delivery_spatial_dir, "/final_plot_silv_metrics.csv")
, row.names = F
)
### Join back to spatial data
# this would just be a vector file if available
silv_metrics_temp = las_ctg@data %>%
dplyr::select(
1, geometry
) %>%
dplyr::mutate(
plotID = "1" # can spatially join to plot vectors if available
) %>%
dplyr::group_by(plotID) %>%
dplyr::summarise(
geometry = sf::st_union(geometry)
) %>%
dplyr::ungroup() %>%
# join with plot data data
dplyr::inner_join(
silv_metrics_temp
, by = dplyr::join_by("plotID")
)
# str(silv_metrics_temp)
### export spatial
sf::st_write(
silv_metrics_temp
, paste0(config$delivery_spatial_dir, "/final_plot_silv_metrics.gpkg")
, append = FALSE
, quiet = TRUE
)
Table of silvicultural measurements in both metric and imperial units
### summary table
silv_metrics_temp %>%
sf::st_drop_geometry() %>%
dplyr::filter(plotID==silv_metrics_temp$plotID[1]) %>%
tidyr::pivot_longer(
cols = -c(plotID)
, names_to = "measure"
, values_to = "value"
) %>%
kableExtra::kbl(
caption = "Silvicultural metrics in both metric and imperial units"
, digits = 2
) %>%
kableExtra::kable_styling()
plotID | measure | value |
---|---|---|
1 | n_trees | 1545.00 |
1 | plot_area_ha | 4.00 |
1 | trees_per_ha | 386.25 |
1 | mean_dbh_cm | 22.01 |
1 | qmd_cm | 22.44 |
1 | mean_tree_height_m | 7.89 |
1 | loreys_height_m | 9.26 |
1 | basal_area_m2 | 61.10 |
1 | basal_area_m2_per_ha | 15.27 |
1 | plot_area_ac | 9.88 |
1 | trees_per_ac | 156.43 |
1 | mean_dbh_in | 8.67 |
1 | qmd_in | 8.84 |
1 | mean_tree_height_ft | 25.87 |
1 | loreys_height_ft | 30.37 |
1 | basal_area_ft2 | 657.63 |
1 | basal_area_ft2_per_ac | 66.58 |
Before building a local DBH-to-Height model using SfM extracted height, crown area, and competition metrics as independent variables, and the SfM-derived DBH as the dependent variable of DBH based on DBH values extracted from the point cloud; the SfM-derived DBH estimates should be filtered using regionally derived height to DBH allometries.
The DBH filtering methodology described by Tinkham et al. (2022) is not applied here:
This filtering used local Forest Inventory and Analysis (FIA) data from the study region to develop…a model representing the regional height to DBH relationship. This model used FIA data from Colorado with greater than 70% ponderosa pine by basal area, densities exceeding 10 m2 ha−1 basal area represented untreated conditions, and site indices within 3 m of the study site’s estimated site index of 22 m at a base age of 100 years were used to maintain a similar height to DBH relationship. Based on the results from Swayze et al. (2021), the model was fit as a second order polynomial using the stats R package. Subsequently, Equation (3) was applied to predict a regionally expected DBH value for each tree height. For trees with multiple SfM-derived DBH values, we compared each estimate to the predictions from Equation (3) and retained the SfM-derived DBH with the smallest error.
Filtering was done against the 90th percentile prediction bounds of Equation (3). If a SfM-derived DBH was outside these bounds, we removed it from the dataset used for further analysis. This process resulted in a dataset with the height for all identified trees and a subset of DBH values meeting regional observations of tree growth form.
Equation 3: \[ \textrm{DBH (cm)} = 1.6121 \times \textrm{Height (m)} + \textrm{Height (m)}^{0.0276} \]
Expanding this filtering methodology to other areas where Equation
(3) may or may not be appropriate requires the use of Forest Inventory
and Analysis (FIA) data to develop a model representing the height to
DBH relationship in the particular study region. While multiple R
packages exist to access FIA data (e.g. FIESTA
and
rFIA
), the tree-level data required for building such a
model is either too sparse or too coarse (Tinkham
et al. 2018). FIA sample plots are widely separated and do not
provide wall-to-wall coverage of forest information while state-level
tree lists span diverse forest types that may not accurately
represent a particular area of interest. Furthermore, expecting a
spatial analyst to select the most appropriate FIA data to use to
represent the point cloud data study area may be an unreasonable
expectation as these analysts are often are not part of the data
acquisition.
TreeMap is a 30×30m-resolution gridded dataset of forest plot identifers developed by Riley et al. (2021) that imputes representative FIA plots across the forests of the conterminous United States (CONUS). This raster map of representative plot identifers can be linked to a number of tree- and plot-level attributes stored in the accompanying tables or accessed via one of the previously mentioned R packages or [FIA datamart]((https://apps.fs.usda.gov/fia/datamart/datamart.html).
The workflow outlined below utilizes the TreeMap raster and
tree-level attributes which the user must download (~4 GB) from
this repository
(“RDS-2021-0074_Data.zip” as of 2024-01-18). The user must then define
the directory location input_treemap_dir
of the extracted
files TreeMap2016.tif
and
TreeMap2016_tree_table.csv
reside. The workflow then:
input_las_dir
###__________________________________________________###
### BEGIN USER-DEFINED PARAMETERS ###
###__________________________________________________###
###_________________________###
### Set input las directory ###
###_________________________###
input_las_dir = "../data/las_raw"
###_________________________###
### Set input treemap directory ###
###_________________________###
input_treemap_dir = "../data/treemap"
###__________________________________________________###
### END USER-DEFINED PARAMETERS ###
###__________________________________________________###
###__________________________________________________###
### load libraries so this section can be run in stand-alone ###
###__________________________________________________###
library(tidyverse)
library(terra)
library(lidR)
library(brms)
library(tidybayes)
library(ggdist)
library(patchwork) #combine plots
library(kableExtra)
###__________________________________________________###
### read in data ###
###__________________________________________________###
# read in treemap data
# downloaded from: https://www.fs.usda.gov/rds/archive/Catalog/RDS-2021-0074
las_ctg = lidR::readALSLAScatalog("../data/las_raw/")
# read in treemap (no memory is taken)
treemap_rast = terra::rast("../data/treemap/TreeMap2016.tif")
# filter treemap based on las
treemap_rast = treemap_rast %>%
terra::crop(
las_ctg@data$geometry %>%
sf::st_union() %>%
terra::vect() %>%
terra::project(terra::crs(treemap_rast))
)
### plot treemap raster
# plot(treemap_rast)
treemap_rast %>%
as.data.frame(xy=T) %>%
dplyr::rename(tm_id=3) %>%
ggplot(mapping = aes(x=x,y=y,fill=as.factor(tm_id))) +
geom_tile(color="gray") +
geom_text(aes(label=tm_id), size=3, color = "gray11") +
scale_fill_viridis_d(option = "turbo", alpha = 0.8) +
coord_sf(expand = FALSE) +
theme_light() +
theme(legend.position = "none", panel.grid = element_blank())
### get weights for weighting each tree in the population models
# treemap id = tm_id for linking to tabular data
tm_id_weight_temp = terra::freq(treemap_rast) %>%
dplyr::select(-layer) %>%
dplyr::rename(tm_id = value, tree_weight = count)
tm_id_weight_temp %>%
dplyr::slice_sample(n=6) %>%
kableExtra::kbl(caption="TreeMap ID plot weights") %>%
kableExtra::kable_styling()
tm_id | tree_weight |
---|---|
81860 | 26 |
60651 | 5 |
81849 | 1 |
60720 | 1 |
60657 | 17 |
37971 | 1 |
### get the TreeMap FIA tree list for only the plots included
treemap_trees_df = readr::read_csv(
"../data/treemap/TreeMap2016_tree_table.csv"
, col_select = c(
tm_id
, TREE
, STATUSCD
, DIA
, HT
)
) %>%
dplyr::rename_with(tolower) %>%
dplyr::inner_join(
tm_id_weight_temp
, by = dplyr::join_by("tm_id")
) %>%
dplyr::filter(
# keep live trees only: 1=live;2=dead
statuscd == 1
& !is.na(dia)
& !is.na(ht)
) %>%
dplyr::mutate(
dbh_cm = dia*2.54
, tree_height_m = ht/3.28084
) %>%
dplyr::select(-c(statuscd,dia,ht)) %>%
dplyr::rename(tree_id=tree)
dplyr::glimpse(treemap_trees_df)
## Rows: 211
## Columns: 5
## $ tm_id <dbl> 37971, 37971, 37971, 37971, 37971, 37971, 37971, 37971, …
## $ tree_id <dbl> 1, 2, 4, 5, 7, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2, 11, …
## $ tree_weight <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ dbh_cm <dbl> 41.402, 48.768, 23.622, 33.528, 35.052, 3.810, 19.558, 1…
## $ tree_height_m <dbl> 22.5552, 21.6408, 16.4592, 24.0792, 18.2880, 3.9624, 9.4…
# treemap_trees_df %>% dplyr::count(tm_id)
Modelling was done using the brms
package to implement
Bayesian regression in \(\textbf{R}\)
using the probabilistic programming language \(\textbf{STAN}\).
In the two models tested, we will ignore site/plot effects, such that all observations are treated as independent from one another. This is what’s known as complete pooling (McElreath 2018) or just a pooled model.
The first model tested is a quadratic linear model:
\[\begin{align*} DBH_{i} &\sim {\sf Gamma} \bigl(g(\beta_0, \beta_1, \beta_2, x_i) \bigr) \\ g(\beta_0, \beta_1, \beta_2, x_i) &= {\exp \bigl(\beta_0 + \beta_1 x_{1i} + \beta_2 x_{1i}^{2}\bigr)} \\ \; \textrm{where } x_1 &= \textrm{tree height} \end{align*}\]
The \(\sf Gamma\) response distribution represents \(DBH_{i}\) as strictly non-negative. Utilizing a quadratic regression model with the inclusion of \(x_{1i}^{2}\) in the model allows us to account for a nonlinear (e.g. parabolic) relationship between DBH and tree height.
###__________________________________________________________###
### model 1: population model, height quadratic
###__________________________________________________________###
# population model with no random effects (i.e. no group-level variation)
# quadratic model form with Gamma distribution for strictly positive response variable dbh
# set non-informative priors
mod_quad_pop = brms::brm(
formula = dbh_cm|weights(tree_weight) ~ 1 + tree_height_m + I(tree_height_m^2)
, data = treemap_trees_df
, family = brms::brmsfamily("Gamma")
, iter = 4000
)
# brms::add_criterion(mod_quad_pop, "loo")
# plot(mod_quad_pop)
# summary(mod_quad_pop)
# brms::prior_summary(mod_quad_pop)
Summary of model results
#### extract posterior draws to a df
brms::as_draws_df(
mod_quad_pop
, variable = c("^b_", "shape")
, regex = TRUE
) %>%
# quick way to get a table of summary statistics and diagnostics
posterior::summarize_draws(
"mean", "median", "sd"
, ~quantile(.x, probs = c(
0.05, 0.95
# , 0.025, 0.975
))
, "rhat"
) %>%
kableExtra::kbl(
caption = paste(
"quadratic model:"
, mod_quad_pop$formula %>% as.character() %>% purrr::pluck(1)
)
, digits = 3
) %>%
kableExtra::kable_styling()
variable | mean | median | sd | 5% | 95% | rhat |
---|---|---|---|---|---|---|
b_Intercept | 0.975 | 1.068 | 0.590 | 0.994 | 1.125 | 1.030 |
b_tree_height_m | 0.219 | 0.215 | 0.027 | 0.206 | 0.225 | 1.031 |
b_Itree_height_mE2 | -0.005 | -0.004 | 0.001 | -0.005 | -0.004 | 1.031 |
shape | 17.749 | 18.458 | 3.435 | 16.736 | 19.409 | 1.046 |
Parameter estimates and confidence intervals
### use tidybayes to summarize draws and plot
mod_quad_pop %>%
tidybayes::gather_draws(
`b_.*`
# , `shape`
, regex = TRUE
) %>%
dplyr::rename_with(~stringr::str_remove_all(.x, "\\.")) %>%
ggplot(aes(y = variable, x = value)) +
geom_vline(xintercept = 0, color = "gray44", linetype = "dashed") +
ggdist::stat_interval(.width = c(0.5, 0.9, 0.95), alpha = 0.8) +
# ggdist::stat_halfeye() +
stat_summary(
fun = "median"
, color = "black"
, size = 2
, geom = "point"
) +
scale_color_viridis_d(option = "mako") +
labs(y = "", x = "parameter estimate") +
theme_bw() +
theme(
legend.position = "bottom"
, legend.direction = "horizontal"
, panel.grid = element_blank()
)
# plot another way
# mod_quad_pop %>%
# tidybayes::gather_draws(
# `b_.*`
# # , `shape`
# , regex = TRUE
# ) %>%
# tidybayes::median_qi(
# .width = c(0.95, 0.5) #,.66)
# ) %>%
# ggplot(aes(y = .variable, x = .value, xmin = .lower, xmax = .upper)) +
# geom_vline(xintercept = 0) +
# ggdist::geom_pointinterval() +
# theme_bw()
Model predictions versus FIA tree data
### obtain model predictions over range
# range of x var to predict
height_range = dplyr::tibble(
tree_height_m = seq(
from = 0
, to = round(max(treemap_trees_df$tree_height_m)*1.05,0)
, by = 1
)
)
# predict and put estimates in a data frame
pred_mod_quad_pop_temp = predict(
mod_quad_pop
, newdata = height_range
, probs = c(.05, .95) # c(.025, .975)
) %>%
dplyr::as_tibble() %>%
dplyr::rename(
lower_b = 3, upper_b = 4
) %>%
dplyr::rename_with(tolower) %>%
dplyr::bind_cols(height_range)
# str(pred_mod_quad_pop_temp)
# plot predictions with data
plt_mod_quad_pop = ggplot(
data = treemap_trees_df
, mapping = aes(x = tree_height_m)
) +
geom_ribbon(
data = pred_mod_quad_pop_temp
, mapping = aes(ymin = lower_b, ymax = upper_b)
, fill = "grey88"
) +
geom_line(
data = pred_mod_quad_pop_temp
, aes(y = estimate)
, color = "navy"
, lwd = 1
) +
geom_point(
mapping = aes(y = dbh_cm)
, color = "gray33"
, alpha = 0.7
) +
scale_y_continuous(limits = c(0, max(treemap_trees_df$dbh_cm)*1.45 )) +
labs(
y = "DBH (cm)"
, x = "Tree Ht. (m)"
, title = paste(
"quadratic model:"
, mod_quad_pop$formula %>% as.character() %>% purrr::pluck(1)
)
) +
theme_light() +
theme(legend.position = "none", plot.title = element_text(size = 9))
plt_mod_quad_pop
The second model tested is a quadratic linear model:
\[\begin{align*} DBH_{i} &\sim {\sf Gamma} \bigl(g(\beta_1, \beta_2, x_i) \bigr) \\ g(\beta_1, \beta_2, x_i) &= {\exp \bigl(\beta_1 x_{1i} + x_{1i}^{\beta_2}\bigr)} \\ \; \textrm{where } x_1 &= \textrm{tree height} \end{align*}\]
The \(\sf Gamma\) response distribution represents \(DBH_{i}\) as strictly non-negative.
###__________________________________________________________###
### model 2: population model, height non-linear
###__________________________________________________________###
# population model with no random effects (i.e. no group-level variation)
# quadratic model form with Gamma distribution for strictly positive response variable dbh
# set up prior
p_temp <- prior(normal(1, 2), nlpar = "b1") +
prior(normal(0, 2), nlpar = "b2")
mod_nl_pop = brms::brm(
formula = brms::bf(
formula = dbh_cm|weights(tree_weight) ~ (b1 * tree_height_m) + tree_height_m^b2
, b1 + b2 ~ 1
, nl = TRUE # !! specify non-linear
)
, data = treemap_trees_df
, prior = p_temp
, family = brms::brmsfamily("Gamma")
, iter = 4000
)
# brms::add_criterion(mod_nl_pop, "loo")
# plot(mod_nl_pop)
# summary(mod_nl_pop)
Summary of model results
#### extract posterior draws to a df
brms::as_draws_df(
mod_nl_pop
, variable = c("^b_", "shape")
, regex = TRUE
) %>%
# quick way to get a table of summary statistics and diagnostics
posterior::summarize_draws(
"mean", "median", "sd"
, ~quantile(.x, probs = c(
0.05, 0.95
# , 0.025, 0.975
))
, "rhat"
) %>%
dplyr::mutate(
variable = stringr::str_remove_all(variable, "_Intercept")
) %>%
kableExtra::kbl(
caption = paste(
"non-linear model:"
, mod_nl_pop$formula %>% as.character() %>% purrr::pluck(1)
)
, digits = 3
) %>%
kableExtra::kable_styling()
variable | mean | median | sd | 5% | 95% | rhat |
---|---|---|---|---|---|---|
b_b1 | -0.159 | -0.159 | 0.013 | -0.181 | -0.137 | 1.002 |
b_b2 | 0.640 | 0.640 | 0.013 | 0.617 | 0.660 | 1.002 |
shape | 20.124 | 20.113 | 0.600 | 19.143 | 21.124 | 1.001 |
Parameter estimates and confidence intervals
### use tidybayes to summarize draws and plot
mod_nl_pop %>%
tidybayes::gather_draws(
`b_.*`
# , `shape`
, regex = TRUE
) %>%
dplyr::rename_with(~stringr::str_remove_all(.x, "\\.")) %>%
dplyr::mutate(
variable = stringr::str_remove_all(variable, "_Intercept")
) %>%
ggplot(aes(y = variable, x = value)) +
geom_vline(xintercept = 0, color = "gray44", linetype = "dashed") +
ggdist::stat_interval(.width = c(0.5, 0.9, 0.95), alpha = 0.8) +
# ggdist::stat_halfeye() +
stat_summary(
fun = "median"
, color = "black"
, size = 2
, geom = "point"
) +
scale_color_viridis_d(option = "mako") +
labs(y = "", x = "parameter estimate") +
theme_bw() +
theme(
legend.position = "bottom"
, legend.direction = "horizontal"
, panel.grid = element_blank()
)
# plot another way
# mod_nl_pop %>%
# tidybayes::gather_draws(
# `b_.*`
# # , `shape`
# , regex = TRUE
# ) %>%
# tidybayes::median_qi(
# .width = c(0.95, 0.5) #,.66)
# ) %>%
# ggplot(aes(y = .variable, x = .value, xmin = .lower, xmax = .upper)) +
# geom_vline(xintercept = 0) +
# ggdist::geom_pointinterval() +
# theme_bw()
Model predictions versus FIA tree data
### obtain model predictions over range
# predict and put estimates in a data frame
pred_mod_nl_pop_temp = predict(
mod_nl_pop
, newdata = height_range
, probs = c(.05, .95) # c(.025, .975)
) %>%
dplyr::as_tibble() %>%
dplyr::rename(
lower_b = 3, upper_b = 4
) %>%
dplyr::rename_with(tolower) %>%
dplyr::bind_cols(height_range)
# str(pred_mod_nl_pop_temp)
# plot predictions with data
plt_mod_nl_pop = ggplot(
data = treemap_trees_df
, mapping = aes(x = tree_height_m)
) +
geom_ribbon(
data = pred_mod_nl_pop_temp
, mapping = aes(ymin = lower_b, ymax = upper_b)
, fill = "grey88"
) +
geom_line(
data = pred_mod_nl_pop_temp
, aes(y = estimate)
, color = "navy"
, lwd = 1
) +
geom_point(
mapping = aes(y = dbh_cm)
, color = "gray33"
, alpha = 0.7
) +
scale_y_continuous(limits = c(0, max(treemap_trees_df$dbh_cm)*1.45 )) +
labs(
y = "DBH (cm)"
, x = "Tree Ht. (m)"
, title = paste(
"non-linear model:"
, mod_nl_pop$formula %>% as.character() %>% purrr::pluck(1)
)
) +
theme_light() +
theme(legend.position = "none", plot.title = element_text(size = 9))
plt_mod_nl_pop
The pointwise log-likelihood can be used, among others, to calculate the leave-one-out cross-validation (LOO; Vehtari, Gelman, and Gabry 2015) allowing for comparison of different models applied to the same data (lower LOOs indicate better model fit).
### compare models
# The pointwise log-likelihood can be used, among others, to calculate
# the Watanabe-Akaike information criterion (WAIC) proposed by Watanabe (2010)
# and leave-one-out cross-validation (LOO; Vehtari, Gelman, and Gabry 2015)
# both allowing to compare different models applied to the same data
# (lower WAICs and LOOs indicate better model fit).
dplyr::bind_rows(
brms::loo(mod_quad_pop) %>%
purrr::pluck("estimates") %>%
dplyr::as_tibble(rownames = NA) %>%
tibble::rownames_to_column("metric") %>%
dplyr::mutate(model = "mod_quad_pop")
, brms::loo(mod_nl_pop) %>%
purrr::pluck("estimates") %>%
dplyr::as_tibble(rownames = NA) %>%
tibble::rownames_to_column("metric") %>%
dplyr::mutate(model = "mod_nl_pop")
) %>%
dplyr::filter(metric == "looic") %>%
dplyr::arrange(`Estimate`) %>%
dplyr::mutate(model_rank = dplyr::row_number()) %>%
dplyr::relocate(model_rank, model) %>%
kableExtra::kbl(
caption = "model comparison based on leave-one-out cross-validation (LOO)"
, digits = 3
) %>%
kableExtra::kable_styling()
model_rank | model | metric | Estimate | SE |
---|---|---|---|---|
1 | mod_nl_pop | looic | 12629.21 | 798.133 |
2 | mod_quad_pop | looic | 422264.25 | 31683.446 |
Based on this data, the non-linear model of DBH on height has a better fit.
plt_mod_quad_pop + plt_mod_nl_pop
This section uses the height normalized point cloud grid tiles (created in this section) to create a CHM raster using only points between a specified height based on the program defined in this section. The stem section at DBH includes points that fall between 1.2 and 1.4 meters.
G. Woolsey notes: it is unclear what this
data is used for…the las
file may be used to extract DBH
measurements using the TreeLS
package. See page 3 of Swayze
and Tinkham (2022). However, it is likely that the point cloud would
need to be filtered based on point color to ensure extraction of stem
points and not understory (e.g. tree regeneration) points.
Call the function to create the stem-only CHM
###_____________________________________________________###
### Rasterize the stem section at DBH to stem CHM tiles ###
###_____________________________________________________###
rasterize_tiles_to_chm(
config = config
, my_des_chm_res = 0.5 #0.07 #in meters
, calculate_extent = F
, my_las_grid = las_grid # generate_grid_over_extent(las_ctg, desired_tile_res)
, my_las_ctg = las_ctg # lidR::readLAScatalog(config$input_las_dir)
, my_min_z = 1.2
, my_max_z= 1.4
)
Map the stem-only CHM
terra::rast(paste0(
config$delivery_spatial_dir
,"/chm_raster_z"
, as.character(1.2)
, "to"
, as.character(1.4)
, "m_res"
, as.character(0.5)
, "m.tif"
)) %>%
# terra::aggregate(fact = 20, fun = "mean") %>%
stars::st_as_stars() %>%
mapview::mapview(
layer.name = "stem-only ht (m)"
, alpha.regions = 0.6
)
# # uncomment for ggplot
# as.data.frame(xy=T) %>%
# dplyr::rename_with(tolower) %>%
# dplyr::rename(f=3) %>%
# ggplot(mapping = aes(x=x,y=y,fill=f)) +
# geom_tile() +
# scale_fill_viridis_c(na.value = "black") +
# labs(fill = "elevation (m)") +
# theme_void()
TreeLS
identified treesThis step is distinct from the tree crown delineation in the prior section which used the Individual Tree Detection based on the CHM to delineate tree crown vector (i.e. polygon) shapes.
Instead, this section uses the stems detected using the
TreeLS
workflow in this section
to delineate tree crown vector (i.e. polygon) shapes.
G. Woolsey notes: It is unclear if this
step is necessary. It seems like the “master” tree list should be based
on the overhead tree identification using the CHM data. This “master”
tree list can be spatially joined to the stems detected in the
TreeLS
workflow to attach DBH estimates. If more than one
stem location is attached to a single tree from the master list, then
the best DBH estimate can be selected based on the process outlined in
Swayze
and Tinkham (2022).
G. Woolsey notes: Update - in the original script from N. Swayze these “bottom-up” tree crown delineations were utilized in the random forest model to generate the local model of DBH based on the independent variables with crown metrics and tree height. Furthermore, in the orignial script, there was no filtering of DBH values based on regional allometric relationships and no filtering for stem DBH estimates that were potentially from the same tree but duplicated in the dataset due to the grid tile processing and later combining. G. Woolsey updated the local model of DBH based on the independent variables with crown metrics and tree height using methods described in this section.
### Use the stems detected using TreeLS::treeMap to delineate crowns using MCWS from ForestTools
### Use the tree tops to delineate crowns using MCWS from ForestTools
crowns_treels_temp = ForestTools::mcws(
treetops = dbh_locations_sf %>%
dplyr::mutate(treeID = dplyr::row_number()) %>%
dplyr::rename(Z=tree_height_m) %>%
dplyr::select(treeID, Z) %>%
sf::st_zm(drop = T) # drops z values
, CHM = raster::raster(master_chm) # converts to raster data
, minHeight = minimum_tree_height
) %>% terra::rast()
### Write the crown raster to the disk
terra::writeRaster(
crowns
, paste0(config$delivery_spatial_dir, "/bottom_up_detected_tree_crowns.tif")
, overwrite = TRUE
)
Plot the tree crowns with the TreeLS
identified tree
stems
ggplot() +
geom_tile(
data = crowns_treels_temp %>% as.data.frame(xy=T) %>% dplyr::rename(f=3)
, mapping = aes(x=x,y=y,fill=as.factor(f))
) +
geom_sf(
data = dbh_locations_sf
, shape = 20
, color = "black"
) +
coord_sf(
expand = FALSE
) +
scale_fill_manual(
values = viridis::turbo(n = terra::freq(crowns_treels_temp) %>% nrow()) %>% sample()
) +
labs(x = "", y = "") +
theme_light() +
theme(legend.position = "none")
TreeLS
stems with the
TreeLS
tree crownsThe delineated crown raster (terra
SpatRaster) is
converted to vector data (i.e. polygons) and crowns with an area smaller
than 0.1 m2 are removed. The tree tops are then spatially
joined with the tree crowns.
### Convert crown raster to terra rast, then polygons, then to Sf
crowns_treels_sf_temp = crowns_treels_temp %>%
# convert raster to polygons for each individual crown
terra::as.polygons() %>%
# fix polygon validity
terra::makeValid() %>%
# reduce the number of nodes in geometries
terra::simplifyGeom() %>%
# remove holes in polygons
terra::fillHoles() %>%
# convert to sf
sf::st_as_sf() %>%
# get the crown area
dplyr::mutate(
crown_area_m2 = as.numeric(sf::st_area(.))
) %>%
#remove super small crowns
dplyr::filter(
crown_area_m2 > 0.1
)
# str(crowns_treels_sf_temp)
# ggplot(crowns_treels_sf_temp) + geom_sf(aes(fill=as.factor(layer))) +
# scale_fill_manual(values = viridis::turbo(nrow(crowns_treels_sf_temp)) %>% sample()) +
# theme_void() + theme(legend.position = "none")
### Join the crowns_treels_temp with the seeds to append data, remove Nulls
crowns_treels_sf_temp = crowns_treels_sf_temp %>%
sf::st_join(dbh_locations_sf) %>%
dplyr::select(-c(layer))
# str(crowns_treels_sf_temp)
# crowns_treels_sf_temp %>% dplyr::count(treeID) %>% nrow()
# crowns_treels_sf_temp %>%
# sf::st_drop_geometry() %>%
# dplyr::summarise(
# n = n()
# , n_distinct = dplyr::n_distinct(treeID)
# )
### Add crown data summaries
crown_sum_temp = data.frame(
mean_crown_ht_m = terra::extract(x = master_chm, y = terra::vect(crowns_treels_sf_temp), fun = "mean", na.rm = T)
, median_crown_ht_m = terra::extract(x = master_chm, y = terra::vect(crowns_treels_sf_temp), fun = "median", na.rm = T)
, min_crown_ht_m = terra::extract(x = master_chm, y = terra::vect(crowns_treels_sf_temp), fun = "min", na.rm = T)
) %>%
dplyr::select(-c(tidyselect::ends_with(".ID"))) %>%
dplyr::rename_with(~ stringr::str_remove_all(.x,".Z"))
### join crown data summary
crowns_treels_sf_temp = crowns_treels_sf_temp %>%
dplyr::bind_cols(crown_sum_temp)
# str(crowns_treels_sf_temp)
### Write the crowns_treels_temp to the disk
sf::st_write(
crowns_treels_sf_temp
, paste0(config$delivery_spatial_dir, "/bottom_up_detected_crowns.gpkg")
, quiet = TRUE, append = FALSE
)
Plot the tree crowns with the tree tops
ggplot() +
geom_sf(
data = crowns_treels_sf_temp
, mapping = aes(fill = crown_area_m2)
, color = "gray77"
) +
geom_sf(
data = dbh_locations_sf
, shape = 20
, color = "gray33"
) +
coord_sf(
expand = FALSE
) +
scale_fill_viridis_c(option = "mako", direction = -1) +
labs(fill = "Crown\nArea (m2)", x = "", y = "") +
theme_light()
G. Woolsey notes: this crown delineation methodology results in crown areas that are highly improbable. Creasy et al. (2021) measured crown areas of 163.8 (+/- 81.4) m2. Hopefully this crown delineation is not utilized in any following analysis.
This section includes an example workflow only and is not completed for the full data extent.
Additional information on colorizing point cloud can be found in The
lidR
Package Book.
This workflow uses point cloud data generated in this section which was exported as a
laz
file to the ../data/02_processed_data/04_las_stem_dir
directory with the Classification
data updated to: ground
points (class 2); water points (class 9); stem points (class 4);
non-stem (class 5). The color (RGB values) of the classified points are
compared to expected colors for each class to filter for “pure” stem and
canopy points.
In the literature, this process has been utilized to filter the point
cloud for stem-only points based on spectral (i.e. RGB) information to
estimate DBH. In the DBH estimation workflow
above, this filtering process was not applied. To implement this
filter, the point cloud passed to the TreeLS::tlsInventory
function would need to be filtered using points matching the desired
color.
This process is explained in Swayze et al. (2022):
a green:red ratio index (NGRR; Eq. (1)) was calculated on each point to determine the distribution (Fig. 4A) of stem and canopy NGRR values. Based on the distributions, each of the 30 point clouds were segmented to only include points with NGRR values below the 90th percentile of the sample stem points (− 0.0152), producing 30 point clouds only containing points with stem reflectance characteristics. For each of these stem point clouds, a 10 cm tall slice (1.32–1.42 m) was extracted and compressed to a height of 1.37 m to represent the DBH region on the trees…These DBH slices were later used to extract individual tree DBH values.
Equation 1:
\[ NGGR_i = \frac{Green_i - Red_i}{Green_i + Red_i} \]
, where \(NGRR\) is the green:red ratio index for the \(i_{th}\) point in the point cloud.
This function adds the user-defined color to the RGB values of the user-defined point cloud
### this function adds a random color to the RGB values of the point cloud
colorize_input_las = function(input_las, desired_color){
# extract RGB values from hex color code
color_ramp_rgb = paste(as.vector(col2rgb(desired_color)), collapse = " ")
color_ramp_rgb = stringr::str_split(color_ramp_rgb, pattern = " ")
# rows represent RGB
color_ramp_rgb = as.data.frame(color_ramp_rgb)
# transpose so columns represent RGB
color_ramp_rgb = t(color_ramp_rgb)
rownames(color_ramp_rgb) = NULL
color_ramp_rgb = as.data.frame(color_ramp_rgb)
colnames(color_ramp_rgb) = c("R", "G", "B")
# create vectors of RGB values to add to las data
R = rep(as.integer(color_ramp_rgb$R), nrow(input_las@data))
G = rep(as.integer(color_ramp_rgb$G), nrow(input_las@data))
B = rep(as.integer(color_ramp_rgb$B), nrow(input_las@data))
### Use the lidR functions to colorize the point cloud
# Adds 3 columns named RGB and updates the point format
# of the LAS object for a format that supports RGB attributes.
# If the RGB values are ranging from 0 to 255 they are automatically scaled on 16 bits
input_las = lidR::add_lasrgb(input_las, R = R, G = G, B = B)
return(input_las)
}
Filter the laz
grid tile files created in this section and exported to the
../data/02_processed_data/04_las_stem_dir using an individual crown
polygon. Points that are below 0.1 m in height are dropped. This example
uses a tree with SfM-derived DBH that was used as part of the training
data for the local DBH estimation model.
### New function to crop each crown to each canopy polygon, colorize, write to disk
# crop_crowns_by_id_and_colorize_stems_and_veg = function(config, las_grid, colorize_input_las, stem_color_hex_temp, veg_color_hex_temp, ground_color_hex_temp, use_bottom_up_crowns){
###___________________________###
### Read in the las files ###
###___________________________###
### Read in the classified las files as a catalog
las_stem_ctg_temp = lidR::readTLSLAScatalog(config$las_stem_dir)
###___________________________###
### # for each individual tree crown {
###___________________________###
### Get the desired crown file
des_crown_temp = crowns_sf_with_dbh %>%
dplyr::filter(is_training_data) %>%
dplyr::slice_sample(n=1)
### Crop the crown file from the las catalog
cropped_crown_temp = lidR::clip_roi(las_stem_ctg_temp, des_crown_temp)
## Chunk 1 of 1 (100%): state ✓
# drop points close to the ground
cropped_crown_temp = lidR::filter_poi(cropped_crown_temp, Z > 0.1)
# plot(cropped_crown_temp)
Plot the extracted point cloud for the example crown
plot3D::scatter3D(
x = cropped_crown_temp@data$X
, y = cropped_crown_temp@data$Y
, z = cropped_crown_temp@data$Z
, colvar = cropped_crown_temp@data$Z
, pch = 19, cex = 0.2
, colkey = F
, phi = 0.5
)
Call the colorize_input_las
function defined above to
assign a color to the points classified as stem and other,
non-ground/non-stem based on the Classification
value:
ground points (class 2); water points (class 9); stem points (class 4);
non-stem (class 5).
### Pull the veg and stems from the cropped crown
veg_temp = lidR::filter_poi(cropped_crown_temp, Classification == 5)
stem_temp = lidR::filter_poi(cropped_crown_temp, Classification == 4)
###______________________________________________________________________###
### If stems and crowns are not empty, color them then write to the disk ###
###______________________________________________________________________###
# if(lidR::is.empty(veg_temp) == FALSE && lidR::is.empty(stem_temp) == FALSE){
### Get random color for the veg
des_veg_color_temp = "forestgreen"
# randomcoloR::randomColor(count = 1, hue = c("green"), luminosity = c("dark"))
### Colorize the veg points
veg_temp = colorize_input_las(veg_temp, desired_color = des_veg_color_temp)
### Get random colors for the stems
des_stem_color_temp = "tan4"
# randomcoloR::randomColor(count = 1, hue = c("orange"), luminosity = c("dark"))
### Colorize the stem points
stem_temp = colorize_input_las(stem_temp, desired_color = des_stem_color_temp)
### Combine the colored stems and veg
combined_veg_stem_temp = rbind(veg_temp, stem_temp)
Plot the colorized stems versus non-stem, non-ground points (assumed to be other vegetation)
plot3D::scatter3D(
x = cropped_crown_temp@data$X
, y = cropped_crown_temp@data$Y
, z = cropped_crown_temp@data$Z
, colvar = cropped_crown_temp@data$Classification
, col = c(des_stem_color_temp,des_veg_color_temp)
, pch = 19, cex = 0.2
, colkey = F
, phi = 0.5
)
# can also plot using lidR plot with color = "RGB"
# plot(combined_veg_stem_temp, color = "RGB")