Requirements

The workflow outlined below utilizes the TreeMap raster and tree-level attributes which the user must download (~4 GB) from this repository (“RDS-2021-0074_Data.zip” as of 2024-01-18). The user must then define the directory location input_treemap_dir of the extracted files TreeMap2016.tif and TreeMap2016_tree_table.csv reside.

The user must also define the directory location input_las_dir of point cloud data in las or laz file format from Lidar or UAS structure from motion (SfM) data collection systems (can be multiple files or a single file).

These parameters (input_treemap_dir and input_las_dir) and other parameterization is entered by the user in this section.

Setup

Load packages

Load all packages used in program.

Note that the TreeLS package was removed from CRAN and must be installed from the development version on GitHub by uncommenting the relevant code section below.

# bread-and-butter
library(tidyverse) # the tidyverse
library(viridis) # viridis colors
library(scales) # work with number and plot scales
library(latex2exp) # math formulas with latex

# spatial analysis
library(terra) # raster
library(sf) # simple features
library(sfarrow) # sf to Apache-Parquet files for working with large files

# point cloud processing
library(lidR)
library(ForestTools) # for crown delineation but relies on depreciated `raster`
library(raster) # !! this was depreciated in 2023, used for ForestTools
library(rlas) # write las index files .lax
## lidR::watershed requires EBImage::watershed 
  # install.packages("BiocManager")
  # BiocManager::install("EBImage")
  # library(BiocManager) # required for lidR::watershed
  library(EBImage) # required for lidR::watershed
## !! TreeLS package removed from CRAN...
  ## uncomment to install from github dev repo: https://github.com/tiagodc/TreeLS
  # library(pak)
  # pak::pkg_install("tiagodc/TreeLS")
  library(TreeLS) # removed from CRAN

# FIA data
## devtools::install_github('hunter-stanke/rFIA')
# library(rFIA) ## removed from CRAN
# library(FIESTA) ## available from CRAN

# modeling
library(randomForest)
library(Metrics)
library(RCSF) # for the cloth simulation filter (csf) to classify points
library(brms) # bayesian modelling using STAN engine

# parallel computing
# library(ini) # Parse simple '.ini' configuration files to an structured list
library(parallel) # parallel
library(doParallel)
library(foreach) # facilitates parallelization by lapply'ing %dopar% on for loop

# visualization
library(mapview) # interactive html maps
library(plot3D) # 3d visualization
library(rgl) # 3d visualization

# others
library(tools)

User-Defined Parameters

Parameters to be set by the user

# !!!!!!!!!!!!!!!!!!!!!!! USER-DEFINED PARAMETERS !!!!!!!!!!!!!!!!!!!!!!! # 
###____________________###
### Set directory for outputs ###
###____________________###
# rootdir = "../data"
rootdir = "../data"
###_________________________###
### Set input las directory ###
###_________________________###
# !!!!!!!!!! ENSURE FILES ARE PROJECTED IN CRS THAT USES METRE AS MEASURMENT UNIT
input_las_dir = "../data/las_raw"
###_________________________###
### Set input TreeMap directory ###
###_________________________###
input_treemap_dir = "../data/treemap"
###__________________________________________________________###
### Set parameters for tiling, buffering, and tree detection ###
###__________________________________________________________###

### Set the resolution of the tile grid in metres
desired_tile_res = 50

### Set the desired buffer size in metres
desired_buffer_size = 10

###__________________________________________________________###
### Set parameters for the Canopy Height Model (CHM) ###
###__________________________________________________________###

### Set the maximum height for the canopy height model
max_height_threshold = 60
### Set the desired raster resolution in metres for the canopy height model
desired_chm_res = 0.25

### Set the maximum height for the stem-only canopy height model
max_stem_height_threshold = 60

### Set the window size for top down tree detection in metres
window_size = 3.5

### Set the minimum height (m) for individual tree detection in `lidR::locate_trees`
minimum_tree_height = 2

### Set the maximum dbh size (meters)
dbh_max_size_m = 1.5

# !!!!!!!!!!!!!!!!!!!!!!! USER-DEFINED PARAMETERS !!!!!!!!!!!!!!!!!!!!!!! #

Data Load and Setup

Configure File Structure

Use the user-defined directory to create output file structure.

### Function to generate nested project directories
create_project_structure = function(rootdir,input_las_dir){
  ###______________________________________________###
  ### Set output las directory and sub directories ###
  ###______________________________________________###
  output_dir = file.path(rootdir, "02_processed_data")
  
  ### Set output directory for storing lasgrid object
  las_grid_dir = file.path(output_dir, "00_grid_dir")
  
  ### Set output directory for las tiles
  las_tile_dir = file.path(output_dir, "01_tiled_las")
  
  ### Set output directory for las ground tiles
  las_ground_tile_dir = file.path(output_dir, "02_tiled_ground_las")
  
  ### Set output directory for normalized tiles
  las_norm_tile_dir = file.path(output_dir, "03_tiled_normalized_las")
  
  ### Set output directory for the las stem files
  las_stem_dir = file.path(output_dir, "04_las_stem_dir")
  
  ### Set output directory for stem polygon tiles
  stem_poly_tile_dir = file.path(output_dir, "05_stem_polygon_dir")
  
  ### Set output directory for storing intermediate spatial files
  working_spatial_dir = file.path(output_dir, "06_working_spatial_dir")
  
  ### Set output directory for cropped tree files 
  las_tree_dir = file.path(output_dir, "07_las_tree_dir")
  
  ###___________________________________________________###
  ### Set output delivery directory amd sub directories ###
  ###___________________________________________________###
  delivery_dir = file.path(rootdir, "03_delivery")
  
  ### Set output delivery directory for stats files
  delivery_stats_dir = file.path(delivery_dir, "01_processing_stats")
  
  ### Set output delivery directory for spatial files
  delivery_spatial_dir = file.path(delivery_dir, "02_spatial_files")
  
  ### Set output delivery directory for point cloud files
  delivery_las_dir = file.path(delivery_dir, "03_las_files")
  
  ### Create the directories
  dir.create(las_grid_dir, showWarnings = FALSE)
  dir.create(output_dir, showWarnings = FALSE)
  dir.create(las_tile_dir, showWarnings = FALSE)
  dir.create(las_ground_tile_dir, showWarnings = FALSE)
  dir.create(las_norm_tile_dir, showWarnings = FALSE)
  dir.create(las_stem_dir, showWarnings = FALSE)
  dir.create(stem_poly_tile_dir, showWarnings = FALSE)
  dir.create(working_spatial_dir, showWarnings = FALSE)
  dir.create(las_tree_dir, showWarnings = FALSE)
  dir.create(delivery_dir, showWarnings = FALSE)
  dir.create(delivery_stats_dir, showWarnings = FALSE)
  dir.create(delivery_spatial_dir, showWarnings = FALSE)
  dir.create(delivery_las_dir, showWarnings = FALSE)
  
  ###______________________________###
  ### Set names of the directories ###
  ###______________________________###
  
  names(rootdir) = "rootdir"
  names(input_las_dir) = "input_las_dir"
  names(output_dir) = "output_dir"
  names(las_grid_dir) = "las_grid_dir"
  names(las_tile_dir) = "las_tile_dir"
  names(las_ground_tile_dir) = "las_ground_tile_dir"
  names(las_norm_tile_dir) = "las_norm_tile_dir"
  names(las_stem_dir) = "las_stem_dir"
  names(stem_poly_tile_dir) = "stem_poly_tile_dir"
  names(working_spatial_dir) = "working_spatial_dir"
  names(las_tree_dir) = "las_tree_dir"
  names(delivery_dir) = "delivery_dir"
  names(delivery_stats_dir) = "delivery_stats_dir"
  names(delivery_spatial_dir) = "delivery_spatial_dir"
  names(delivery_las_dir) = "delivery_las_dir"
  
  ###______________________________###
  ### Append to output config list ###
  ###______________________________###
  
  config = cbind(rootdir, input_las_dir, output_dir, las_grid_dir, las_tile_dir, las_ground_tile_dir,
                 las_norm_tile_dir, las_stem_dir, stem_poly_tile_dir, working_spatial_dir,
                 las_tree_dir,delivery_dir, delivery_stats_dir, delivery_spatial_dir, delivery_las_dir)
  
  config = as.data.frame(config)
  #config
  
  ### Return config 
  return(config)
  
}
config = create_project_structure(rootdir, input_las_dir)

Read Point Cloud Data

Function to read in available las or laz data.

### Function to read in available las data within input directory
read_las_as_ctg = function(config){
  ### Get the input directory read in 
  ctg = lidR::readLAScatalog(config$input_las_dir)
  # print(ctg)
  # print(st_crs(ctg))
  return(ctg)
}
### Read in the las files, check the number of points and CRS
las_ctg = read_las_as_ctg(config)
las_ctg

class : LAScatalog (v1.2 format 2) extent : 610551.7, 610751.7, 4888774, 4888974 (xmin, xmax, ymin, ymax) coord. ref. : NAD83 / UTM zone 13N area : 39999.6 m² points : 20.48 million points density : 512 points/m² density : 512 pulses/m² num. files : 1

if(
  lidR::st_crs(las_ctg, parameters = TRUE)$units_gdal != "metre"
){ stop("las file not projected in coordinate system that uses metre as measurement unit")}

Map point cloud extent

# map of point cloud extent
# mapview
mapview::mapviewOptions(
  homebutton = FALSE
  , basemaps = c("Esri.WorldImagery","OpenStreetMap")
)
mapview::mapview(las_ctg@data$geometry, color = "black", col.regions = "blue", lwd = 3, alpha.regions = 0.3, legend = F, label = F)

Make Spatial Grid Over Extent

Make a raster grid covering the point cloud extent with the desired tile resolution in cm.

This grid is used for ??? 1. make smaller laz files to loop processing over (see new_tile_las_to_grid_foreach function)

### Function to check input las and extent of desired tile resolution
generate_grid_over_extent = function(las_ctg, desired_tile_res){
  
  ### Pull the las extent geometry
  las_ctg_geom = sf::st_as_sf(las_ctg@data$geometry)
  
  ### Make a grid with the desired tile size
    # cellsize units determined by projection
  grid = sf::st_make_grid(las_ctg_geom, cellsize = desired_tile_res)
  grid = sf::st_as_sf(grid)
  
  ### Create grid ID 
  grid_id = rep(1:nrow(grid))
  grid = cbind(grid, grid_id)
  
  message("Output grid has ", nrow(grid), " tiles ...")
  
  return(grid)
}

Call the function to make the grid and save to disk as parquet file which allows for fast read/write. A key goal of the sfarrow package is to support inter-operability of spatial data in files between R and Python through the use of standardized metadata.

G. Woolsey notes: this buffer is buffering each tile instead of buffering the extent and then creating the grid…which is it supposed to be for processing?

### Generate the grid
las_grid = generate_grid_over_extent(las_ctg, desired_tile_res)
# this buffer is buffering each cell instead of buffering the extent and then creating the grid
las_grid_buff = sf::st_buffer(las_grid, desired_buffer_size)
###____________________________###
### Write the grid to the disk ###
###____________________________###
sfarrow::st_write_parquet(
  las_grid
  , dsn = paste0(
      config$las_grid_dir
      , "/"
      , "project_grid.parquet"
    )
)

Map grid and point cloud extent

mapview::mapview(
    las_ctg@data$geometry
    , color = "black"
    , col.regions = "blue"
    , lwd = 3, alpha.regions = 0.3, legend = F, label = F
  ) +
  mapview::mapview(las_grid_buff, color = "gray55", alpha.regions = 0, lwd = 2, legend = F, label = F, hide = F) +
  mapview::mapview(las_grid, color = "gray22", alpha.regions = 0, lwd = 1, legend = F, label = F, hide = F)

Spatial Index Point Cloud Function

Spatial index the point cloud to enable more efficient point cloud querying. From The lidR Package Book:

Spatial indexing is a key feature for performing spatial queries over a large point cloud. Any search for points of interest in the absence of indexing would require a “sequential scan” of every point - this could take a lot of time. In brief, spatial indexing organizes data into a search structure that can be quickly traversed to find specific records. Some algorithms would take unreasonable amounts of time to complete without spatial indexing.

### Function to generate .lax index files for input directory path
create_lax_for_tiles = function(desired_las_dir){
  ## desired_las_dir = config$input_las_dir
  ###__________________________________________###
  ### Create a lax index file for the las file ###
  ###__________________________________________###
  message(paste0("Initializing .lax indexing for ", desired_las_dir, " ... "))
  las_list = list.files(desired_las_dir, pattern = ".las")
  laz_list = list.files(desired_las_dir, pattern = ".laz")
  lidar_list = append(las_list, laz_list)
  # lidar_list
  
  message("Indexing ", length(lidar_list), " las files ... ")
  
  start_time = Sys.time()
  # configure parallel
  cores = parallel::detectCores()
  cluster = parallel::makeCluster(cores)
  # register the parallel backend with the `foreach` package
  doParallel::registerDoParallel(cluster)
  # pass to foreach to process each lidar file in parallel
  foreach::foreach(i = 1:length(lidar_list)) %dopar% {
    
    ### Get the desired file
    des_file = lidar_list[i]
    # des_file
    
    ### Compile the .lax file name
    des_file_lax = tools::file_path_sans_ext(des_file)
    des_file_lax = paste0(des_file_lax, ".lax")
    des_file_lax_path = paste0(desired_las_dir, "/", des_file_lax)
    # des_file_lax_path
    
    ### See if the .lax version exists in the input directory
    does_file_exsist = file.exists(des_file_lax_path)
    # does_file_exsist
    
    ### If file exsists, do nothing
    if(does_file_exsist == TRUE){return(NULL)}
    
    ### If file doesnt exsist, create a .lax index
    if(does_file_exsist == FALSE){
      
      ### Append the directory path to the las file
      path = paste0(desired_las_dir, "/", des_file)
      
      ### Write index
      rlas::writelax(path)
      
    }
    
  }
  parallel::stopCluster(cluster)
  end_time = Sys.time()
  total_time = difftime(end_time, start_time, units = c("mins"))
  message("Total lax index time took ", total_time, " minutes ... ")
  
}

Apply Spatial Index to Raw Point Cloud

Create the spatial index lax file for the raw input point cloud.

# call the function
create_lax_for_tiles(config$input_las_dir)

Tile Raw Point Cloud to Grid

This function writes individual point cloud laz files for each grid tile to the disk in the “../data/02_processed_data/01_tiled_las” directory. Each grid tile is buffered by the user-defined buffer and the raw point cloud is cropped to that extent. The purpose is to…??? (reduce processing time)

G. Woolsey notes: is there potential for individual trees to either be i) double-counted if crown spans multiple grid tiles; or ii) excluded when recombining tiles and removing duplicates? Not sure what the recombine process looks like yet.

### New function to tile the raw point cloud to the desired grid - with buffers 
new_tile_las_to_grid_foreach = function(
  config
  , desired_buffer_size
  , my_las_grid = las_grid # generate_grid_over_extent(las_ctg, desired_tile_res)
  , my_las_ctg = las_ctg # lidR::readLAScatalog(config$input_las_dir)
){
  # define files
  las_grid = my_las_grid
  las_ctg = my_las_ctg
  # start message
  message(" --- Starting grid tiling algorithim --- ")
  master_start = Sys.time()
  # ###_____________________________###
  # ### Read in the las grid object ###
  # ###_____________________________###
  #   ## !!!!!!! the grid is already in memory, no need to re-read
  #   message("Reading in project grid ... ")
  #   ## read parquet grid created with generate_grid_over_extent function above
  #   grid_fpth_temp = paste0(
  #     config$las_grid_dir
  #     , "/"
  #     , "project_grid.parquet"
  #   )
  #   las_grid = sfarrow::st_read_parquet(grid_fpth_temp)
  # ###_____________________________###
  # ### Read in the lascatalog  ###
  # ###_____________________________###
  #   ## !!!!!!! the lascatalog is already in memory, no need to re-read
  #   message("Reading in the las as a lascatalog for indexing ... ")
  #   las_ctg = lidR::readLAScatalog(config$input_las_dir)

  ###____________________________________________###
  ### Loop through grid tiles and tile the input las object ###
  ###____________________________________________###
    start_time = Sys.time()
    ## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
    # # set up parallel processing
    # cores = parallel::detectCores()
    # message("Registering parallel session on ",  cores-2, " cores ... ")
    # cluster = parallel::makeCluster(cores-2)
    # doParallel::registerDoParallel(cluster)
    ## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
    # message("Starting NON-parallel ;D grid tiling at ", start_time)
    message("Creating tiles for ", nrow(las_grid), " grids ... ")
    ## loop over grid tiles but NOT in parallel ftw
    for(i in 1:nrow(las_grid)) {
    # for(i in 1:4) {
      # message("Creating laz for grid number ", i, " ... ")
    ## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
    # loop over grid tiles but in parallel ftw
    # foreach::foreach(
    #   i=1:nrow(las_grid)
    #   , .packages = c("lidR", "sf")
    #   , .inorder = FALSE
    #   , .errorhandling = "remove"
    # ) %dopar% {
    ## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
      
      ### Get the desired grid
      des_grid_cell = las_grid[i,]
      # des_grid_cell
      
      ### Does file exist
      file_to_generate = paste0(config$las_tile_dir, "/", i, ".laz")
      does_file_exist = file.exists(file_to_generate)
      # does_file_exist
      
      ### If file has been generated already, skip
      if(does_file_exist == TRUE){
        message("laz for grid number ", i, " already exists guy ... ")
      }
      
      ### If file has not been generated already, try to generate
      if(does_file_exist == FALSE){
        
        ### Buffer the grid cell to the desired buffer size
        des_grid_cell_buff = sf::st_buffer(des_grid_cell, desired_buffer_size)
        
        ### Crop the las file from the catalog for the grid cell extent  
        las_clipped = suppressWarnings(lidR::clip_roi(las_ctg, des_grid_cell_buff))
        #st_crs(las) = st_crs(las_grid)
        
        ### Check if the point cloud is empty
        is_cloud_empty = suppressWarnings(lidR::is.empty(las_clipped))
        # is_cloud_empty
        
        ### If is_cloud_empty == TRUE, return NULL
        if(is_cloud_empty == TRUE){
          message("laz for grid number ", i, " is empty so i skipped it ... ")
        }
        
        ### If is_cloud_empty = FALSE, write to disk
        if(is_cloud_empty == FALSE){
          
          ### Write the cropped las to the disk
          out_name = paste0(
            config$las_tile_dir
            , "/"
            , des_grid_cell$grid_id
            , ".laz"
          )
          suppressWarnings(lidR::writeLAS(las_clipped, out_name))
          ## return(NULL) # this kills it or something
          # message("Hey i wrote a laz for grid number ", i, " ... ")
        } # is_cloud_empty
      } # does_file_exist
    } # foreach grid
    ## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
    # parallel::stopCluster(cluster)
    ## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
    end_time = Sys.time()
    total_time = difftime(end_time, start_time, units = c("mins"))
    message("Total grid tiling time took ", total_time, " minutes ... ")
    
  ### Get some stats for the tiling run
  tiled_ctg = lidR::readLAScatalog(config$las_tile_dir)
  tiled_ctg_geom = tiled_ctg$geometry
  tiled_ctg_geom = sf::st_as_sf(tiled_ctg_geom)
  las_area_m2 = sum(sf::st_area(tiled_ctg_geom))
  las_area_acres = las_area_m2/4047
  processing_time_mins = total_time
  number_starting_grids = nrow(las_grid)
  number_ending_grids = nrow(tiled_ctg_geom)
  
  ### Combine the output stats dataframe and write to the disk
  stats_output = data.frame(
    number_starting_grids, number_ending_grids, las_area_m2
    , las_area_acres, processing_time_mins
  )
  write.csv(
    stats_output
    , file = paste0(config$delivery_stats_dir,"/","01_grid_tiling_stats.csv")
  )
  
  ### Get a list of tiles that processed
  processed_tile_list = list.files(config$las_tile_dir)
  message(length(processed_tile_list), " files were generated ... ")
  message(length(processed_tile_list)/nrow(las_grid) *100, " percent of tile grid occupied with spatial points ... ")
  
  return(tiled_ctg_geom)
  
}

Call the function to make the tiled laz files and write to disk. Also, call the function to create the lax spatial indexed file for each tiled grid file.

tiled_grid = new_tile_las_to_grid_foreach(
  config = config
  , desired_buffer_size = desired_buffer_size
  , my_las_grid = las_grid
  , my_las_ctg = las_ctg
)
create_lax_for_tiles(config$las_tile_dir)

Map the tiled grid extent and the raw point cloud extent

mapview::mapview(
    las_ctg@data$geometry
    , color = "black"
    , col.regions = "blue"
    , lwd = 3, alpha.regions = 0.3, legend = F, label = F
  ) +
  mapview::mapview(tiled_grid, color = "gray55", alpha.regions = 0, lwd = 2, legend = F, label = F, hide = F)

Classify Points

Classify the point cloud points as ground or non-ground. Use this classification to height normalize the tiled laz files.

Points are classified using the lidR::classify function with the option algorithm = csf(rigidness = 1, sloop_smooth = TRUE) . The csf ground classification algorithm implements an algorithm for segmentation of ground points based on a “Cloth Simulation Filter” (CSF). This method is a strict implementation of the CSF algorithm made by Zhang et al. (2016) that relies on the authors’ original source code written and exposed to R via the the RCSF package. The rigidness of the cloth set to the default 1 stands for very soft (to fit rugged terrain), 2 stands for medium, and 3 stands for hard cloth (for flat terrain). When steep slopes exist, the sloop_smooth parameter set to TRUE reduces errors during post-processing.

The points are normalized using the lidR::normalize_height function with the option algorithm = knnidw() . By default the terrain is computed by using ground points (class 2) and water points (class 9) which requires the points to be classified before normalizing the height. The knnidw algorithm implements an algorithm for spatial interpolation using a k-nearest neighbour (KNN) approach with an inverse-distance weighting (IDW). An alternative height normalization process uses a separate digital terrain model (DTM) raster via the dtm option. When a DTM raster is provided, then the DTM is used in place of ground points. This is different than providing a DTM in algorithm. If algorithm = dtm the DTM is subtracted naively. If algorithm = tin() and dtm = raster the ground points are not used and the DTM is interpolated as if it were made of regularly-spaced ground points.

After height normalizing the points, non-ground points that are below the ground surface are removed using lidR::filter_poi(las, Z > -0.05) and high-elevation outlier points are removed using lidR::filter_poi(las, Z < quantile(las@data$Z, 0.99999)) .

An alternative method to detect and remove outliers is available via the lidR::classify_noise function. For example:

# Using IVF
las <- lidR::classify_noise(las, ivf(res=5, n=2))
#plot(las, color = "Classification")

# Remove outliers using filter_poi()
las_denoise <- lidR::filter_poi(las, Classification != LASNOISE)

, where ivf implements an algorithm for outliers (noise) segmentation based on isolated voxels filter (IVF). The algorithm finds points that have only a few other points in their surrounding 3 x 3 x 3 = 27 voxels.

G. Woolsey notes: the point classification and height normalization procedures are using only data within the bounds of each individual tile (+buffer). How does this impact: i) classification of ground (e.g. in low point density tiles or tiles with limited canopy penetration) and the resulting normalization; ii) the detection of outlier points based on normalized height within the tile versus across the entire point cloud extent? Also, should outliers (noise) be detected and removed prior to applying the ground classification (currently is applied after at the individual grid tile level)?

### Function to classify ground and height normalize las tiles
classify_ground_normalize = function(
  config = config
  , my_las_grid = las_grid # generate_grid_over_extent(las_ctg, desired_tile_res)
  , my_las_ctg = las_ctg # lidR::readLAScatalog(config$input_las_dir)
  , want_to_classify_ground = T
){
  # define files passed to function
  las_grid = my_las_grid
  las_ctg = my_las_ctg
  
  ###_______________________###
  ### Configure directories ###
  ###_______________________###
  
  message(" --- Initializing ground classification and normalization algorithm --- ")
  master_start = Sys.time()
  
  ###_____________________________###
  ### Read in the las grid object ###
  ###_____________________________###
  ## !!!! THIS IS ALREADY IN MEMORY, NO NEED TO RE-READ
  # message("Reading in project grid ... ")
  # las_grid = sfarrow::st_read_parquet(paste0(
  #   config$las_grid_dir
  #   , "/"
  #   , "project_grid.parquet"
  # ))
  # 
  # message("Reading in the las as a lascatalog for indexing ... ")
  # las_ctg = lidR::readALSLAScatalog(config$input_las_dir)
  
  ###________________________________________###
  ### Next, classify ground across the tiles ###
  ###________________________________________###
  
  ### Get a list of tiled files to ground classify
  las_list = list.files(config$las_tile_dir, pattern = ".las")
  laz_list = list.files(config$las_tile_dir, pattern = ".laz")
  lidar_list = append(las_list, laz_list)
  message("Classifying ", length(lidar_list), " ground and height normalizing tiles in parallel ... ")
  # lidar_list
  
  ###______________________________________________________________________________________###
  ### In parallel, classify ground, and height normalize across the tiles and rewrite them ###
  ###______________________________________________________________________________________###
  
  start_time = Sys.time()
  ## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
  # set up parallel processing
  # cores = parallel::detectCores()
  # message("Registering parallel session on ", cores-1, " cores ... ")
  # cluster = parallel::makeCluster(cores-1)
  # doParallel::registerDoParallel(cluster)
  ## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
  message("Starting NON-parallel classification/normalization ... ")
  # loop over each tiled las file to classify and normalize
  for(i in 1:length(lidar_list)) {
  # for(i in 1:4) {
  ## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF  
  # foreach(
  #   # i = 1:length(lidar_list)
  #   , .packages = c("lidR", "sf")
  #   , .inorder = FALSE
  #   , .errorhandling = "remove"
  # ) %dopar% {
  ## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
    
    ### Get the desired lidar tile
    des_tile_name = lidar_list[i]
    # des_tile_name
    
    ### Has the file been generated already?
    des_tile_to_check = paste0(config$las_norm_tile_dir, "/", des_tile_name)
    does_file_exist = file.exists(des_tile_to_check)
    # does_file_exist
    
    ### If file exists, skip
    if(does_file_exist == TRUE){
      message("normalized tile ", des_tile_to_check, " exists so skipped it ... ")
    }
    
    ### If file does not exsist, classify and height normalize
    if(does_file_exist == FALSE){
      
      ### Get the matching grid polygon 
      las_grid_id = tools::file_path_sans_ext(des_tile_name)
      matching_grid_cell = las_grid[las_grid$grid_id == las_grid_id,]
      # matching_grid_cell
      
      ### Read in the lidar tile
      las_tile = lidR::readALSLAS(paste0(
        config$las_tile_dir
        , "/"
        , des_tile_name
      ))
      #st_crs(las_tile) = st_crs(las_grid)
      
      ### Drop duplicated points
      las_tile = lidR::filter_duplicates(las_tile)
      
      ### Set threads to 2 for speed up in height normalization
      lidR::set_lidr_threads(1)
      
      ### Classify ground points
      if(want_to_classify_ground == TRUE){
        las_tile = lidR::classify_ground(
          las_tile
          , algorithm = csf(rigidness = 1, sloop_smooth = TRUE)
        )
      }
      
      ### Pull out the ground points
      ground = lidR::filter_poi(las_tile, Classification == 2)
      
      ### Check if ground is empty, if not, write to disk
      ground_is_empty = lidR::is.empty(ground)
      # ground_is_empty
      
      if(ground_is_empty == FALSE){
        lidR::writeLAS(
          ground
          , file = paste0(
            config$las_ground_tile_dir
            , "/"
            , des_tile_name
          )
        )
      }
      
      ### Height normalize the file
      las_tile = lidR::normalize_height(las_tile, algorithm = knnidw())
      
      ### Remove points below 0.05
      las_tile = lidR::filter_poi(las_tile, Z > -0.05)
      
      ### Remove high outlier points
      height_filter = quantile(las_tile@data$Z, 0.99999)
      las_tile = lidR::filter_poi(las_tile, Z < height_filter)
      
      ### Set crs
      #desired_crs = sf::st_crs(las_grid)
      #lidR::st_crs(las_tile) = desired_crs
      #las_tile
      
      ### Crop the las to the original grid polygon
      las_tile = lidR::clip_roi(las_tile, matching_grid_cell)
      
      ### Is las null?
      check = is.null(las_tile)
      
      ### If las is null, return NULL
      if(check == TRUE){
        message("normalized laz for grid number ", i, " is null ... ")
      }
      
      ### Is the las file empty
      is_las_empty = lidR::is.empty(las_tile)
      # is_las_empty
      
      ### If las is empty, return Null
      if(is_las_empty == TRUE){
        message("normalized laz for grid number ", i, " is empty (void of pts) ... ")
      }
      
      ### If las isnt empty, write the las to the disk
      if(is_las_empty == FALSE){
        
        ### Overwrite the existing file
        lidR::writeLAS(
          las_tile
          , file = paste0(
            config$las_norm_tile_dir
            , "/"
            , des_tile_name
          )
        )
        message("normalized laz for grid number ", i, " completed successfully ... ")
        
      }
      
    }
    
  }
  ## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
  # parallel::stopCluster(cluster)
  ## !!!!!! TURNED OFF PARALLEL B/C IT KILLS STUFF
  end_time = Sys.time()
  total_time = difftime(end_time, start_time, units = c("mins"))
  message("Total ground classification and height normalization complete in ", total_time,  " minutes ... ")
  
  ###________________________________________###
  ### Get a catalog of the normalized points ###
  ###________________________________________###
  
  message("Reading in the normalized las file as a catalog ... ")
  ctg = lidR::readALSLAScatalog(config$las_norm_tile_dir)
  ctg_geom = ctg$geometry
  num_points_list = ctg$Number.of.point.records
  num_points_list = unlist(num_points_list)
  num_points_list = as.data.frame(num_points_list)
  number_points = sum(num_points_list$num_points_list)
  # number_points
  
  ### Get the area of the dataset ina acres
  catalog_area = sf::st_union(ctg@data$geometry)
  catalog_area = sf::st_area(catalog_area)
  catalog_area = as.numeric(catalog_area)
  las_area_m2 = catalog_area
  las_area_acres = catalog_area/4047
  # las_area_acres
  
  ### Get a list of processed files
  normalized_file_list = list.files(config$las_norm_tile_dir, pattern = ".laz")
  message(length(normalized_file_list), " lidar files height normalized ... ")
  difference = length(lidar_list) - length(normalized_file_list)
  message(difference, " files failed height normalization ...")
  
  ###_______________________________###
  ### Get the total processing time ###
  ###_______________________________###
  
  master_end = Sys.time()
  total_master_time = difftime(master_end, master_start, units = c("mins"))
  
  ###____________________________________________###
  ### Write some stats to the delivery directory ###
  ###____________________________________________###
  processing_time_mins = total_master_time
  stats_output = data.frame(number_points, las_area_m2, las_area_acres, processing_time_mins)
  
  write.csv(stats_output, file = paste0(
    config$delivery_stats_dir
    , "/" 
    , "02_point_cloud_classification_normalization_stats.csv"
  ))
  message(" --- Total ground classification and height normalization took ", total_master_time, " minutes --- ")
  
  return(ctg_geom)
  
}

Call the function to classify ground/non-ground points and height normalize the tiled point clouds and write to disk. Also, call the function to create the lax spatial indexed file for each normalized tile grid.

height_norm_grid = classify_ground_normalize(
  config = config
  , my_las_grid = las_grid # generate_grid_over_extent(las_ctg, desired_tile_res)
  , my_las_ctg = las_ctg # lidR::readLAScatalog(config$input_las_dir)
  , want_to_classify_ground = T
)
create_lax_for_tiles(config$las_ground_tile_dir)
create_lax_for_tiles(config$las_norm_tile_dir)

Map the classified and normalized grid tiles

mapview::mapview(
    las_ctg@data$geometry
    , color = "black"
    , col.regions = "blue"
    , lwd = 3, alpha.regions = 0.3, legend = F, label = F
  ) +
  mapview::mapview(height_norm_grid, color = "gray55", alpha.regions = 0, lwd = 2, legend = F, label = F, hide = F)

Create DTM

This section uses the point cloud grid tiles with only points classified as “ground” (created in this section) to create a DTM raster.

The process loops over each ground point cloud grid tile and creates a DTM raster of user-specified resolution. The specific function used for each tile is:

lidR::rasterize_canopy(las_ground_tile, res = 3, algorithm = p2r(), pkg = "terra")

, where the algorithm = p2r() option implements an algorithm for digital surface model computation based on a points-to-raster method: for each pixel of the output raster the function attributes the height of the highest point found. If specified, the subcircle option of the p2r function replaces each point with 8 points around the original one. This allows for virtual ’emulation’ of the fact that a lidar point is not a point as such, but more realistically a disc. This tweak densifies the point cloud and the resulting canopy model is smoother and contains fewer “pits” and empty pixels. The pkg = "terra" results in a raster that is compatible with the terra package.

It is worth noting that the lidR package includes a rasterize_terrain function which interpolates the ground points (in a classified point cloud that has not been normalized) and creates a rasterized digital terrain model. The algorithm uses the points classified as “ground” and “water” (Classification = 2 and 9, respectively) to compute the interpolation. How well the edges of the dataset are interpolated depends on the interpolation method used. A buffer around the region of interest is always recommended to avoid edge effects. This function can be implemented via:

dtm1 = lidR::rasterize_terrain(las, algorithm = knnidw(k = 6L, p = 2))
dtm2 = lidR::rasterize_terrain(las, algorithm = tin())
dtm3 = lidR::rasterize_terrain(las, algorithm = kriging(k = 10L))
# plot results
plot(dtm1, col = gray(0:25/25))
plot(dtm2, col = gray(0:25/25))
plot(dtm3, col = gray(0:25/25))
plot_dtm3d(dtm1)
plot_dtm3d(dtm2)
plot_dtm3d(dtm3)

G. Woolsey notes: none of the interpolation algorithms available in lidR::rasterize_terrain use single-point methods to create the DTM. As written, this program uses lidR::rasterize_canopy with the p2r algorithm set to attribute the height of the highest point found to the value of the raster tile. This could result in i) empty cells; and/or ii) a less smooth DTM which is influenced by artifact points (especially since the current outlier removal is applied only to the non-ground points after the classification algorithm is applied). Furthermore, as a DTM is created for each tile separately, there is potential for edge effects to be amplified across the study extent. Calculating the DTM for each tile separately excludes valid data which would be otherwise available to the algorithm if processed altogether. A potential solution is to continue processing by grid tiles with a buffer that overlaps with neighboring tiles, use a multi-point interpolation algorithm on each buffered tile, and then combine the grid tile rasters via terra::mosaic where values in overlapping cells are averaged. Or, process as a single point cloud over the entire study extent.

# GW added
# create funtion to read las, rasterize, and return data frame
las2dtm2df <- function(las_path_name, des_dtm_res = 3) {
  ### Read in the file
  las_ground_tile = lidR::readLAS(las_path_name, select = "xyzc")
  
  ### Create a DEM from the file
  dtm_tile = lidR::rasterize_canopy(
    las_ground_tile
    , res = des_dtm_res
    , algorithm = p2r()
    , pkg = "terra"
  )
  
  ### Get the DEM points
  dtm_points = as.data.frame(dtm_tile, xy = T) %>% 
    dplyr::rename_with(toupper)
  
  ### Return the points to the function 
  return(dtm_points)
}
## map files over the function
# list.files(config$las_ground_tile_dir, pattern = ".*\\.(laz|las)$", full.names = T) %>%
#   .[2:3] %>%
#   purrr::map(las2dtm2df) %>%
#   dplyr::bind_rows() %>%
#   str()
  

### Function to get DTM from ground classified tiles
rasterize_tiles_to_dtm = function(
  config
  , my_des_dtm_res = 3 #in meters
  , calculate_extent = TRUE
  , my_las_grid = las_grid # generate_grid_over_extent(las_ctg, desired_tile_res)
  , my_las_ctg = las_ctg # lidR::readLAScatalog(config$input_las_dir)
){
  # define files
  las_grid = my_las_grid
  las_ctg = my_las_ctg
  
  message(" --- Initializing digitial elevation model rasterization --- ")
  master_start = Sys.time()
  
  # ###_____________________________###
  # ### Read in the las grid object ###
  # ###_____________________________###
  # ## !!!!!!! the grid is already in memory, no need to re-read 
  # message("Reading in project grid ... ")
  # las_grid = sfarrow::st_read_parquet(paste0(
  #   config$las_grid_dir
  #   , "/"
  #   , "project_grid.parquet")
  # )
  # 
  # message("Reading in the las as a lascatalog for indexing ... ")
  # las_ctg = lidR::readLAScatalog(config$input_las_dir)
  # las_ctg
  
  ###_______________________________________________________________________________###
  ### Get a list of classified ground normalized tiles to generate DTM raster files ###
  ###_______________________________________________________________________________###
  ### Loop through and generate DTM data.frame for each tile and then bind_rows
  start_time = Sys.time()
  # file list
  tile_list = list.files(config$las_ground_tile_dir, pattern = ".*\\.(laz|las)$", full.names = T)
  # map las2dtm2df function
  merged_dtm_points = tile_list %>%
    purrr::map(las2dtm2df, des_dtm_res = my_des_dtm_res) %>%
    # .[2:3] %>%
    dplyr::bind_rows()
  
  ###_____________________________________###
  ### Make a las file from the CHM points ###
  ###_____________________________________###
  
  message("Generating the master LiDAR file ... ")
  las = lidR::LAS(merged_dtm_points)
  sf::st_crs(las) = sf::st_crs(las_grid)
  # las
  
  ### Write cleaned las to the disk
  message("Writing the master lidar file to the disk ... ")
  lidR::writeLAS(las, file = paste0(
    config$delivery_las_dir
    , "/"
    , "master_dem_point_cloud.laz"
  ))
  
  las_area_m2 = lidR::area(las)
  las_area_acres = las_area_m2/4047
  message("Total merged DEM area of ", las_area_acres, " acres .... ")
  
  ###____________________________________###
  ### Generate a CHM from the CHM points ###
  ###____________________________________###
  
  message("Generating the master DEM file ... ")
  master_dem = lidR::rasterize_canopy(las, res = my_des_dtm_res, algorithm = p2r())
  # master_dem
  
  ###_____________________________________###
  ### Write the master raster to the disk ###
  ###_____________________________________###
  
  message("Writing the master DEM file to the disk ... ")
  outname = paste0(
    config$delivery_spatial_dir
    , "/"
    , "master_dem_raster_", my_des_dtm_res, "m.tif"
  )
  terra::writeRaster(master_dem, outname, overwrite = TRUE)
  
  ###_______________________________________###
  ### Get a polygon of the full file extent ###
  ###_______________________________________###
  
  if(calculate_extent == TRUE){
    
    message("Getting the extent of the DEM file ... ")
    extent_raster = terra::clamp(master_dem, 1, 1, 1)
    extent_poly = terra::as.polygons(extent_raster)
    extent_poly = terra::simplifyGeom(extent_poly)
    extent_poly = terra::fillHoles(extent_poly)
    extent_poly = sf::st_as_sf(extent_poly)
    extent_poly = st_union(extent_poly)
    extent_poly = st_as_sf(extent_poly)
    # extent_poly
    
    ### Write the poly to the disk
    #sf::st_write(extent_poly, dsn = "chm_extent_kml.kml", delete_dsn = TRUE, quiet = TRUE)
    sf::st_write(
      extent_poly
      , dsn = paste0(
        config$working_spatial_dir
        , "/dem_extent_geopackage.gpkg"
      )
      , delete_dsn = TRUE
      , quiet = TRUE
    )
    
  }
  
  ###____________________###
  ### Get the total time ###
  ###____________________###
  
  end_time = Sys.time()
  total_time = difftime(end_time, start_time, units = c("mins"))
  message(" --- Total DEM rasterization took ", total_time, " minutes --- ")
  
}

Call the function to create the DTM

rasterize_tiles_to_dtm(
  config = config
  , my_des_dtm_res = 3 #in meters
  , calculate_extent = F
  , my_las_grid = las_grid # generate_grid_over_extent(las_ctg, desired_tile_res)
  , my_las_ctg = las_ctg # lidR::readLAScatalog(config$input_las_dir)
)

Map the DTM

terra::rast(paste0(
  config$delivery_spatial_dir
  , "/master_dem_raster_3m.tif"
)) %>%
  stars::st_as_stars() %>% 
  mapview::mapview(
    layer.name = "elev.(m)"
    , alpha.regions = 0.6
  )
# # uncomment for ggplot
#   as.data.frame(xy=T) %>% 
#   dplyr::rename_with(tolower) %>% 
#   dplyr::rename(f=3) %>% 
#   ggplot(mapping = aes(x=x,y=y,fill=f)) +
#     geom_tile() +
#     scale_fill_viridis_c(na.value = "black") +
#     labs(fill = "elevation (m)") +
#     theme_void()

Create Canopy Height Model

This section uses the height normalized point cloud grid tiles (created in this section) to create a CHM raster.

The process loops over each normalized point cloud grid tile and creates a Canopy Height Model (CHM) raster of user-specified resolution. The specific function used for each tile is:

lidR::rasterize_canopy(las_norm_tile, res = 1, algorithm = p2r(), pkg = "terra")

, where the algorithm = p2r() option implements an algorithm for digital surface model computation based on a points-to-raster method: for each pixel of the output raster the function attributes the height of the highest point found. The subcircle tweak replaces each point with 8 points around the original one. This allows for virtual ’emulation’ of the fact that a lidar point is not a point as such, but more realistically a disc. This tweak densifies the point cloud and the resulting canopy model is smoother and contains fewer “pits” and empty pixels. The pkg = "terra" results in a raster that is compatible with the terra package.

G. Woolsey notes: The DTM process and CHM process both use lidR::rasterize_canopy with the p2r algorithm to attribute the height of the raster to the highest point found to the value of the raster tile. The current program:

  1. Uses lidR::rasterize_canopy with the p2r algorithm to create a raster separately for each tile
  2. The tile raster is converted to a data frame with xyz values
  3. A data frame with all tiles combined is created,
  4. This all-tiles data frame is converted back to a las,
  5. which is then rasterized again using lidR::rasterize_canopy with the p2r algorithm to select the highest point in a raster tile. This could result in i) empty cells; and/or ii) a less smooth DTM which is influenced by artifact points (especially since the current outlier removal is applied only to the non-ground points after the classification algorithm is applied). Furthermore, as a DTM is created for each tile separately, there is potential for edge effects to be amplified across the study extent. Calculating the DTM for each tile separately excludes valid data which would be otherwise available to the algorithm if processed altogether. A potential solution is to continue processing by grid tiles with a buffer that overlaps with neighboring tiles, use a multi-point interpolation algorithm (e.g. knnidw) on each buffered tile, and then combine the grid tile rasters via terra::mosaic where values in overlapping cells are averaged. Or, process as a single point cloud over the entire study extent.
# GW added
# create funtion to read las, rasterize, and return data frame
las2chm2df <- function(las_path_name, des_chm_res = 3, min_z = -Inf, max_z = Inf) {
  ### Read in the file
  las_norm_tile = lidR::readLAS(las_path_name, select = "xyzc")
  
  ### Filter the points to within the min and max Z range 
    las_norm_tile = lidR::filter_poi(las_norm_tile, Z > as.numeric(min_z))
    las_norm_tile = lidR::filter_poi(las_norm_tile, Z < as.numeric(max_z))
    
  ### Get a logic check 
  is_las_empty = lidR::is.empty(las_norm_tile)
  
  ### If the las file is empty, return NULL
  if(is_las_empty == TRUE){return(
    data.frame(
      X=as.numeric(NULL)
      , Y=as.numeric(NULL)
      , Z=as.numeric(NULL)
    )
  )}else{
    ### Create a CHM from the file
    chm_tile = lidR::rasterize_canopy(
      las_norm_tile
      , res = des_chm_res
      , algorithm = p2r()
      , pkg = "terra"
    )
    
    ### Get the DEM points
    chm_points = as.data.frame(chm_tile, xy = T) %>% 
      dplyr::rename_with(toupper)
    
    ### Return the points to the function 
    return(chm_points)
  }
}
## map files over the function
# list.files(config$las_norm_tile_dir, pattern = ".*\\.(laz|las)$", full.names = T) %>%
#   .[2:3] %>%
#   purrr::map(las2chm2df, des_chm_res = 3) %>%
#   dplyr::bind_rows() %>%
#   str()
#   # dplyr::pull(Z) %>% 
#   # max(na.rm=T)
  

### Function to get CHM from height normalized tiles
rasterize_tiles_to_chm = function(
  config
  , my_des_chm_res = 3 #in meters
  , calculate_extent = TRUE
  , my_las_grid = las_grid # generate_grid_over_extent(las_ctg, desired_tile_res)
  , my_las_ctg = las_ctg # lidR::readLAScatalog(config$input_las_dir)
  , my_min_z = -Inf
  , my_max_z= Inf
){
  # define files
  las_grid = my_las_grid
  las_ctg = my_las_ctg
  
  message(" --- Initializing canopy height model rasterization --- ")
  master_start = Sys.time()
  
  # ###_____________________________###
  # ### Read in the las grid object ###
  # ###_____________________________###
  # ## !!!!!!! the grid is already in memory, no need to re-read 
  # message("Reading in project grid ... ")
  # las_grid = sfarrow::st_read_parquet(paste0(
  #   config$las_grid_dir
  #   , "/"
  #   , "project_grid.parquet")
  # )
  # 
  # message("Reading in the las as a lascatalog for indexing ... ")
  # las_ctg = lidR::readLAScatalog(config$input_las_dir)
  # las_ctg
  
  ###_______________________________________________________________________________###
  ### Get a list of classified ground normalized tiles to generate CHM raster files ###
  ###_______________________________________________________________________________###
  ### Loop through and generate DTM data.frame for each tile and then bind_rows
  start_time = Sys.time()
  # file list
  tile_list = list.files(config$las_norm_tile_dir, pattern = ".*\\.(laz|las)$", full.names = T)
  # map las2chm2df function
  merged_chm_points = tile_list %>%
    purrr::map(las2chm2df, des_chm_res = my_des_chm_res, min_z = my_min_z, max_z = my_max_z) %>%
    # .[2:3] %>%
    dplyr::bind_rows()
  ###_____________________________________###
  ### Make a las file from the CHM points ###
  ###_____________________________________###
  
  message("Generating the CHM las file ... ")
  des_crs = sf::st_crs(las_grid)
  las = lidR::LAS(merged_chm_points)
  sf::st_crs(las) = des_crs
  # las
  
  ### Write cleaned las to the disk
  message("Writing the CHM las file to the disk ... ")
  lidR::writeLAS(
    las
    , file = paste0(
      config$delivery_las_dir
      , "/master_chm_point_cloud_z"
      , as.character(my_min_z)
      , "to"
      , as.character(my_max_z)
      , ".laz"
    )
  )
  
  las_area_m2 = lidR::area(las)
  las_area_acres = las_area_m2/4047
  message("Total merged CHM area of ", las_area_acres, " acres .... ")
  
  ###____________________________________###
  ### Generate a CHM from the CHM points ###
  ###____________________________________###
  
  message("Generating the CHM raster file ... ")
  master_chm = lidR::rasterize_canopy(las, res = my_des_chm_res, algorithm = p2r(), pkg="terra")
  # master_chm
  
  ###_____________________________________###
  ### Write the master raster to the disk ###
  ###_____________________________________###
  
  message("Writing the CHM raster file to the disk ... ")
  outname = paste0(
    config$delivery_spatial_dir
    ,"/chm_raster_z"
    , as.character(my_min_z)
    , "to"
    , as.character(my_max_z)
    , "m_res"
    , my_des_chm_res
    , "m.tif"
    
  )
  terra::writeRaster(master_chm, outname, overwrite = TRUE)
  
  ###_______________________________________###
  ### Get a polygon of the full file extent ###
  ###_______________________________________###
  
  if(calculate_extent == TRUE){
    
    message("Getting the extent of the CHM file ... ")
    extent_raster = terra::clamp(master_chm, 1, 1, 1)
    extent_poly = terra::as.polygons(extent_raster)
    extent_poly = terra::simplifyGeom(extent_poly)
    extent_poly = terra::fillHoles(extent_poly)
    extent_poly = sf::st_as_sf(extent_poly)
    extent_poly = st_union(extent_poly)
    extent_poly = st_as_sf(extent_poly)
    # extent_poly
    
    ### Write the poly to the disk
    #sf::st_write(extent_poly, dsn = "chm_extent_kml.kml", delete_dsn = TRUE, quiet = TRUE)
    sf::st_write(
      extent_poly
      , dsn = paste0(
        config$working_spatial_dir
        , "/chm_extent_geopackage.gpkg"
      )
      , delete_dsn = TRUE, quiet = TRUE
    )
    
  }
  
  ###____________________###
  ### Get the total time ###
  ###____________________###
  
  end_time = Sys.time()
  total_time = difftime(end_time, start_time, units = c("mins"))
  message(" --- Total lidar rasterization took ", total_time, " minutes --- ")
  return(master_chm)
}

Call the function to create the CHM

master_chm = rasterize_tiles_to_chm(
  config = config
  , my_des_chm_res = desired_chm_res #0.25 #in meters
  , calculate_extent = F
  , my_las_grid = las_grid # generate_grid_over_extent(las_ctg, desired_tile_res)
  , my_las_ctg = las_ctg # lidR::readLAScatalog(config$input_las_dir)
  , my_min_z = -Inf
  , my_max_z= max_height_threshold
)

Map the CHM

terra::rast(paste0(
    config$delivery_spatial_dir
    ,"/chm_raster_z"
    , as.character(-Inf)
    , "to"
    , as.character(max_height_threshold)
    , "m_res"
    , desired_chm_res
    , "m.tif"
  )) %>%
  stars::st_as_stars() %>% 
  mapview::mapview(
    layer.name = "CHM ht (m)"
    , alpha.regions = 0.6
  )
# # uncomment for ggplot
#   as.data.frame(xy=T) %>% 
#   dplyr::rename_with(tolower) %>% 
#   dplyr::rename(f=3) %>% 
#   ggplot(mapping = aes(x=x,y=y,fill=f)) +
#     geom_tile() +
#     scale_fill_viridis_c(na.value = "black") +
#     labs(fill = "elevation (m)") +
#     theme_void()

Detect Stems

This process uses the normalized grid tiled laz files created in this section to detect tree stems using the TreeLS package based on methods described in de Conto et al. (2017). This section (and the coded functions included within) which relies on the TreeLS package requires that data be stored within the R environment until all steps reliant on TreeLS functions are completed. Writing out laz files (i.e. for later TreeLS processing) strips all information that is required for further processing using TreeLS. It may be possible to save a csv file with the required TreeLS attributes alongside the laz file and then combine these data during later processing with the TreeLS package but the authors have not tested this.

Each normalized grid tile point cloud is individually passed to the TreeLS::treeMap function using the map.hough algorithm to estimate tree occurrence regions from a normalized point cloud using the Hough Transform for circle search. Good methods for noise filtering are paramount for a robust stem isolation algorithm. There are a handful of methods to remove noise from stem point cloud data; one technique is the Hough transformation technique in which every data point “votes” for a cylinder center (de Conto et al. (2017)). The specific tree detection function is implemented using the following code:

TreeLS::treeMap(
  las = las_norm_tile
  , method = map.hough(
    # height thresholds applied to filter a point cloud before processing
    min_h = 1
    , max_h = 5
    # height interval to perform point filtering/assignment/classification
    , h_step = 0.5
    # pixel side length to discretize the point cloud layers 
      # while performing the Hough Transform circle search
    , pixel_size = 0.025
    # largest tree diameter expected in the point cloud
    , max_d = 0.75 # 0.75m = 30in
    # minimum point density (0 to 1) within a pixel evaluated 
      # on the Hough Transform - i.e. only dense point clousters will undergo circle search
      # hey google, define "clouster" ?
    , min_density = 0.0001
    # minimum number of circle intersections over a pixel 
      # to assign it as a circle center candidate.
    , min_votes = 3
  )
  # parameter passed down to treeMap.merge (if merge > 0)
  , merge = 0
)

After tree detection, the tree coordinates that overlap (or are too close) are merged into a single tree ID using the TreeLS::treeMap.merge function. The detected trees are then used to assign tree IDs to the original normalized point cloud based on coordinates extracted from the TreeLS::treeMap object via TreeLS::treePoints(las=las_norm_tile, map=treemap_merge, method = trp.crop(l = 3)), where the trp.crop() algorithm assigns points to a tree ID that are inside a circle of fixed area (i.e. 3m) around the TreeLS::treeMap object.

In the next step, points in the normalized point cloud are classified as stem points and assigned a tree ID using the TreeLS::stemPoints function with the algorithm stm.hough:

TreeLS::stemPoints(
  las = las
  , method = stm.hough(
    # height interval to perform point filtering/assignment/classification.
    h_step = 0.5
    # largest tree diameter expected in the point cloud
    , max_d = 0.75 # 0.75m = 30in
    # tree base height interval to initiate circle search
    , h_base = c(1, 2.5)
    #  pixel side length to discretize the point cloud layers 
      # while performing the Hough Transform circle search.
    , pixel_size = 0.025
    # minimum point density (0 to 1) within a pixel evaluated 
      # on the Hough Transform - i.e. only dense point clousters will undergo circle search
      # hey google, define "clouster" ?
    , min_density = 0.1
    # minimum number of circle intersections over a pixel 
      # to assign it as a circle center candidate.
    , min_votes = 3
  )
)

Lastly, the normalized point cloud with points assigned to a tree ID (where a stem was detected) and points classified as stems is used to estimate the DBH.

DBH estimation is done using the TreeLS::tlsInventory function with using a 2D circle fitting algorithm (i.e. ransac). The specific function call used is:

TreeLS::tlsInventory(
  las = las
  # height layer (above ground) to estimate stem diameters, in point cloud units
  , dh = 1.37
  # height layer width, in point cloud units
  , dw = 0.2
  # parameterized shapeFit function, i.e. method to use for diameter estimation.
  , d_method = shapeFit(
    # either "circle" or "cylinder".
    shape = "circle"
    # optimization method for estimating the shape's parameters
    , algorithm = "ransac"
    # number of points selected on every RANSAC iteration.
    , n = 20
  )
)

Define the stem detection function

###_____________________________________________________###
### Define function to map for potential tree locations ###
### Using TreeLS::treeMap                               ###
###_____________________________________________________###
### Function to map for potential tree locations with error handling
  tree_map_function <- function(las){
    result <- tryCatch(
      expr = {
        map = TreeLS::treeMap(
          las = las
          , method = map.hough(
            # height thresholds applied to filter a point cloud before processing
            min_h = 1
            , max_h = 5
            # height interval to perform point filtering/assignment/classification
            , h_step = 0.5
            # pixel side length to discretize the point cloud layers 
              # while performing the Hough Transform circle search
            , pixel_size = 0.025
            # largest tree diameter expected in the point cloud
            , max_d = 0.75 # 0.75m = 30in
            # minimum point density (0 to 1) within a pixel evaluated 
              # on the Hough Transform - i.e. only dense point clousters will undergo circle search
              # hey google, define "clouster" ?
            , min_density = 0.0001
            # minimum number of circle intersections over a pixel 
              # to assign it as a circle center candidate.
            , min_votes = 3
          )
          # parameter passed down to treeMap.merge (if merge > 0)
          , merge = 0
        )
      },
      error = function(e) {
        message <- paste("Error:", e$message)
        return(message)
      }
    )
    if (inherits(result, "error")) {
      return(result)
    } else {
      return(result)
    }
  }

Define the stem processing function

This function combines TreeLS processing to the normalized point cloud to:

  1. Apply the TreeLS::treeMap stem detection function
  2. Merge overlapping tree coordinates using TreeLS::treeMap.merge
  3. Assign tree IDs to the original points using TreeLS::treePoints
  4. Flag only the stem points using TreeLS::stemPoints
  5. DBH estimation is done using TreeLS::tlsInventory

The result writes i) a laz to the ../data/02_processed_data/04_las_stem_dir directory with the Classification data updated to: ground points (class 2); water points (class 9); stem points (class 4); non-stem (class 5). Also written is a parquet file with the tree identification stem locations, heights, and DBH estimates.

# pass this function a file path of the normalized las you wish to detect stems and classify
write_stem_las_fn <- function(las_path_name) {
    ### Get the desired las file
    las_name = basename(las_path_name)
    # las_name
    
    ### See if the las file has been generated
    path_to_check = paste0(config$las_stem_dir, "/", las_name)
    does_file_exist = file.exists(path_to_check)
    # does_file_exist
    ### See if the vector file has been generated
    path_to_check = paste0(config$stem_poly_tile_dir, "/", tools::file_path_sans_ext(las_name), ".parquet")
    does_file_exist2 = file.exists(path_to_check)
    # does_file_exist2
    
    if(does_file_exist == TRUE & does_file_exist2 == TRUE){
      message("stem detect for grid number ", las_name, " already exists guy ... ")
      return(FALSE)
    }
    
    ### Read in the desired las file
    las_norm_tile = lidR::readLAS(las_path_name)
    las_norm_tile = lidR::filter_duplicates(las_norm_tile)
    # plot(las_norm_tile)
    
    # get the maximum point height
    max_point_height = max(las_norm_tile@data$Z)
    
    ###____________________________________________________________###
    ### If the max point height is below X feet, return classified tile ###
    ###____________________________________________________________###
    
    if(max_point_height < 2){
        message("No points >2m for grid number ", las_name, " so skipped it ... ")
        # ### !!!! UNCOMMENT TO WRITE CLASSIFIED LAS INSTEAD WITH NO STEM POINTS
        # ### Pull out the ground points
        # ground = filter_poi(las_norm_tile, Classification == 2)
        # 
        # ### Pull out the remaining points that arent ground
        # remaining_points = filter_poi(las_norm_tile, Classification != 2)
        # remaining_points@data$Classification = 5
        # 
        # ### Combine the newly classified data
        # las_reclassified = rbind(ground, remaining_points)
        # las_reclassified@data$TreeID = as.numeric(0)
        # las_reclassified@data$Stem = as.logical(F)
        # las_reclassified@data$Segment = as.numeric(0)
        # las_reclassified@data$Radius = as.numeric(0)
        # las_reclassified@data$Votes = as.numeric(0)
        # 
        # ### Write the stem points to the disk
        # lidR::writeLAS(las_reclassified, paste0(config$las_stem_dir, "/", las_name))
      return(FALSE)
    }
    
    ###______________________________________________________________###
    ### If the max point height is above X feet, try to detect stems ###
    ###______________________________________________________________###
    
    if(max_point_height >= 2){
      ###______________________________________________________________###
      ### 1) Apply the `TreeLS::treeMap` [stem detection function](#detect_stem_fn)
      ###______________________________________________________________###
      ### Run the function to search for candidate locations
      treemap_temp = tree_map_function(las_norm_tile)
      
      ### Get a logic check
      check = class(treemap_temp)
      # check
      
      ###_______________________________________________________________###
      ### If the class of the result === Character, then no stems found ###
      ###_______________________________________________________________###
      
      if(check == "character"){
        message("No stems detected for grid number ", las_name, " so skipped it ... ")
        return(FALSE)
      }
      
      ### If the class of the result == "LAS"
      if(check == "LAS"){
        
        ###___________________________________###
        ### Classify the tree and stem points ###
        ###___________________________________###
        
        ###______________________________________________________________###
        ### 2) Merge overlapping tree coordinates using `TreeLS::treeMap.merge`
        ###______________________________________________________________###
        treemap_temp = TreeLS::treeMap.merge(treemap_temp)
        
        ###______________________________________________________________###
        ### 3) Assign tree IDs to the original points using `TreeLS::treePoints`
        ###______________________________________________________________###
        ### Classify tree regions
        ## Assigns TreeIDs to a LAS object based on coordinates extracted from a treeMap object.
        las_norm_tile = TreeLS::treePoints(
          las = las_norm_tile
          , map = treemap_temp
          , method = trp.crop(l = 3)
        )
        # plot(las_norm_tile, color = "TreeID")
        
        ###______________________________________________________________###
        ### 4) Flag only the stem points using `TreeLS::stemPoints`
        ###______________________________________________________________###
        ### Classify stem points
        las_norm_tile = TreeLS::stemPoints(
          las = las_norm_tile
          , method = stm.hough(
            # height interval to perform point filtering/assignment/classification.
            h_step = 0.5
            # largest tree diameter expected in the point cloud
            , max_d = 0.75 # 0.75m = 30in
            # tree base height interval to initiate circle search
            , h_base = c(1, 2.5)
            #  pixel side length to discretize the point cloud layers 
              # while performing the Hough Transform circle search.
            , pixel_size = 0.025
            # minimum point density (0 to 1) within a pixel evaluated 
              # on the Hough Transform - i.e. only dense point clousters will undergo circle search
              # hey google, define "clouster" ?
            , min_density = 0.1
            # minimum number of circle intersections over a pixel 
              # to assign it as a circle center candidate.
            , min_votes = 3
          )
        )
        
        ###______________________________________________________________###
        ### 5) DBH estimation is done using `TreeLS::tlsInventory`
        ###______________________________________________________________###
        ### Search through tree points and estimate DBH to return a data frame of results
          tree_inv_df = TreeLS::tlsInventory(
            las = las_norm_tile
            # height layer (above ground) to estimate stem diameters, in point cloud units
            , dh = 1.37
            # height layer width, in point cloud units
            , dw = 0.2
            # parameterized shapeFit function, i.e. method to use for diameter estimation.
            , d_method = shapeFit(
              # either "circle" or "cylinder".
              shape = "circle"
              # optimization method for estimating the shape's parameters
              , algorithm = "ransac"
              # number of points selected on every RANSAC iteration.
              , n = 20
            )
          )
          # class(tree_inv_df)
          # tree_inv_df %>% dplyr::glimpse()
        ###_______________________________________________________###
        ### clean up the DBH stem data frame ###
        ###_______________________________________________________###
          # add details to table and convert to sf data
          tree_inv_df = tree_inv_df %>% 
            dplyr::mutate(
              Radius = as.numeric(Radius)
              , dbh_m = Radius*2
              , dbh_cm = dbh_m*100
              , basal_area_m2 = pi * (Radius)^2
              , basal_area_ft2 = basal_area_m2 * 10.764
              , treeID = paste0(X, "_", Y)
              , stem_x = X
              , stem_y = Y
            ) %>% 
            sf::st_as_sf(coords = c("X", "Y"), crs = sf::st_crs(las_norm_tile)) %>% 
            dplyr::select(
              treeID, H, stem_x, stem_y, Radius, Error
              , dbh_m, dbh_cm, basal_area_m2, basal_area_ft2
            ) %>% 
            dplyr::rename(
              tree_height_m = H
              , radius_m = Radius
              , radius_error_m = Error
            )
          # tree_inv_df %>% dplyr::glimpse()
          
          ### Remove points outside the bounding box of the laz tile + 1m buffer
          tree_inv_df = tree_inv_df %>% 
            sf::st_crop(
              sf::st_bbox(las_norm_tile) %>% 
                sf::st_as_sfc() %>% 
                sf::st_buffer(1)
            )
        
        ###_______________________________________________________###
        ### Set the classification codes of different point types ###
        ###_______________________________________________________###
        
        ### Pull out the stem files
        stem_points = lidR::filter_poi(las_norm_tile, Stem == TRUE)
        stem_points@data$Classification = 4
        
        ### Pull out the ground points
        ground = filter_poi(las_norm_tile, Classification %in% c(2,9))
        
        ### Pull out the remaining points that arent ground
        remaining_points = filter_poi(las_norm_tile, Stem == FALSE & !(Classification %in% c(2,9)))
        remaining_points@data$Classification = 5
        
        ### Combine the newly classified data
        las_reclassified = rbind(stem_points, ground, remaining_points)
        # str(las_reclassified)
        # class(las_reclassified)
        # plot(las_reclassified, color = "Classification")
        
        ###_______________________________________________________###
        ### Write output to disk ###
        ###_______________________________________________________###
        ### Write the stem points to the disk
        lidR::writeLAS(las_reclassified, paste0(config$las_stem_dir, "/", las_name))
        message("Wrote stem detect laz for grid number ", las_name, " successfully ... ")
        ### Write stem polygons to the disk
        out_name = tools::file_path_sans_ext(las_name)
        out_name = paste0(config$stem_poly_tile_dir, "/", out_name, ".parquet")
        sfarrow::st_write_parquet(tree_inv_df, out_name)
        message("Wrote stem detect vector data for grid number ", las_name, " successfully ... ")
        
        # return(las_norm_tile)
        return(TRUE)
      }
    }
}
# ### test the function
# list.files(config$las_norm_tile_dir, pattern = ".*\\.(laz|las)$", full.names = T) %>%
#   .[1:2] %>%
#   purrr::map(write_stem_las_fn)

Call the function to write the stem classified point clouds to the disk

# map over the normalized point cloud tiles
  list.files(config$las_norm_tile_dir, pattern = ".*\\.(laz|las)$", full.names = T) %>%
    purrr::map(write_stem_las_fn)

Example of Detect Stems Steps

This is an example of the full process:

1. Read Normalized Point Cloud

# read single data tile
  las_norm_tile_temp = list.files(
      config$las_norm_tile_dir
      , pattern = ".*\\.(laz|las)$", full.names = T
    ) %>% 
    .[1] %>% 
    lidR::readLAS() %>% 
    lidR::filter_duplicates()

# plot(las_norm_tile_temp)
plot3D::scatter3D(
  x = las_norm_tile_temp@data$X
  , y = las_norm_tile_temp@data$Y
  , z = las_norm_tile_temp@data$Z
  , colvar = las_norm_tile_temp@data$Z
  , pch = 19, cex = 0.2
)

2. Detect Stems

# implement the tree detection function
# this function is essentially TreeLS::treeMap(las, method = map.hough(...))
  treemap_temp = tree_map_function(las_norm_tile_temp)
  # what is this?
  # class(treemap_temp)
  # str(treemap_temp)
  
  ## merge TreeIDs which are too close in a treeMap's object.
  treemap_temp = TreeLS::treeMap.merge(treemap_temp)
  # str(treemap_temp)
  # treemap_temp@data$TreeID %>% unique() %>% length()

# plot(treemap_temp, color = "TreeID")
plot3D::scatter3D(
  x = treemap_temp@data$X
  , y = treemap_temp@data$Y
  , z = treemap_temp@data$Z
  , colvar = treemap_temp@data$TreeID
  , col = viridis::turbo(treemap_temp@data$TreeID %>% unique() %>% length())
  , pch = 19, cex = 0.5
  , colkey = F
)

3. Assign TreeID to Normalized Points

### Classify tree regions
  ## Assigns TreeIDs to a LAS object based on coordinates extracted from a treeMap object.
  las_norm_tile_temp = las_norm_tile_temp %>% 
    TreeLS::treePoints(map = treemap_temp, method = trp.crop(l = 3))
  # las_cls_temp@data %>% dplyr::glimpse()
  # las_cls_temp@data %>% dplyr::count(TreeID) %>% nrow()

# plot(las_norm_tile_temp, color = "TreeID")
plot3D::scatter3D(
  x = las_norm_tile_temp@data$X
  , y = las_norm_tile_temp@data$Y
  , z = las_norm_tile_temp@data$Z
  , colvar = las_norm_tile_temp@data$TreeID
  , col = viridis::turbo(las_norm_tile_temp@data$TreeID %>% unique() %>% length())
  , pch = 19, cex = 0.2
  , colkey = F
)

4. Classify Normalized Points as Stem Points

### Classify stem points
  las_norm_tile_temp = TreeLS::stemPoints(
    las = las_norm_tile_temp
    , method = stm.hough(
      # height interval to perform point filtering/assignment/classification.
      h_step = 0.5
      # largest tree diameter expected in the point cloud
      , max_d = 0.75 # 0.75m = 30in
      # tree base height interval to initiate circle search
      , h_base = c(1, 2.5)
      #  pixel side length to discretize the point cloud layers 
        # while performing the Hough Transform circle search.
      , pixel_size = 0.025
      # minimum point density (0 to 1) within a pixel evaluated 
        # on the Hough Transform - i.e. only dense point clousters will undergo circle search
        # hey google, define "clouster" ?
      , min_density = 0.1
      # minimum number of circle intersections over a pixel 
        # to assign it as a circle center candidate.
      , min_votes = 3
    )
  )
  # las_norm_tile_temp@data %>% dplyr::glimpse()
  # las_norm_tile_temp@data %>% dplyr::count(Classification)
  # las_norm_tile_temp@data %>% dplyr::count(Stem)
  # las_norm_tile_temp@data %>% dplyr::count(TreeID) %>% nrow()
# plot only ground points and stem points
  # lidR::filter_poi(las_norm_tile_temp, (Stem==TRUE | Classification==2)) %>%
  #   plot(color = "Stem")
# plot only ground points and stem points
plt_las_norm_tile_temp = lidR::filter_poi(las_norm_tile_temp, (Stem==TRUE | Classification==2))
plot3D::scatter3D(
  x = plt_las_norm_tile_temp@data$X
  , y = plt_las_norm_tile_temp@data$Y
  , z = plt_las_norm_tile_temp@data$Z
  , colvar = plt_las_norm_tile_temp@data$Stem
  , col = viridis::viridis(plt_las_norm_tile_temp@data$Stem %>% unique() %>% length())
  , pch = 19, cex = 0.4
  , colkey = F
)

5. Estimate DBH from the Stem Points

### Search through tree points and estimate DBH to return a data frame of results
  tree_inv_temp = TreeLS::tlsInventory(
    las = las_norm_tile_temp
    # height layer (above ground) to estimate stem diameters, in point cloud units
    , dh = 1.37
    # height layer width, in point cloud units
    , dw = 0.2
    # parameterized shapeFit function, i.e. method to use for diameter estimation.
    , d_method = shapeFit(
      # either "circle" or "cylinder".
      shape = "circle"
      # optimization method for estimating the shape's parameters
      , algorithm = "ransac"
      # number of points selected on every RANSAC iteration.
      , n = 20
    )
  )
  # class(tree_inv_temp)
  # tree_inv_temp %>% dplyr::glimpse()

  # add details to table and convert to sf data
  tree_inv_temp = tree_inv_temp %>% 
    dplyr::mutate(
      Radius = as.numeric(Radius)
      , dbh_m = Radius*2
      , dbh_cm = dbh_m*100
      , basal_area_m2 = pi * (Radius)^2
      , basal_area_ft2 = basal_area_m2 * 10.764
      , treeID = paste0(X, "_", Y)
      , stem_x = X
      , stem_y = Y
    ) %>% 
    sf::st_as_sf(coords = c("X", "Y"), crs = sf::st_crs(las_norm_tile_temp)) %>% 
    dplyr::select(
      treeID, H, stem_x, stem_y, Radius, Error
      , dbh_m, dbh_cm, basal_area_m2, basal_area_ft2
    ) %>% 
    dplyr::rename(
      tree_height_m = H
      , radius_m = Radius
      , radius_error_m = Error
    )
  # tree_inv_temp %>% dplyr::glimpse()
  
  ### Remove points outside the bounding box of the laz tile + 1m buffer
  tree_inv_temp = tree_inv_temp %>% 
    sf::st_crop(
      sf::st_bbox(las_norm_tile_temp) %>% 
        sf::st_as_sfc() %>% 
        sf::st_buffer(1)
    )
  
  ### plot
  ggplot() + 
    geom_sf(
      data = sf::st_bbox(las_norm_tile_temp) %>% 
        sf::st_as_sfc() %>% 
        sf::st_buffer(1)
      , lwd = 1.5
      , alpha = 0
    ) +
    geom_sf(
      data = tree_inv_temp
      , mapping = aes(size=dbh_m, color=dbh_m)
    ) + 
    scale_color_viridis_c(option="mako",direction = -1) +
    theme_light()

Combine the Stem Grid Tiles

In the prior section, each normalized grid tile point cloud was individually analyzed with a TreeLS package workflow to: i) write a laz to the ../data/02_processed_data/04_las_stem_dir (config$las_stem_dir) directory with the Classification data updated to: ground points (class 2); water points (class 9); stem points (class 4); non-stem (class 5); and ii) write a parquet file with the tree identification stem locations, heights, and DBH estimates.

This section combines these classified grid tile point clouds into a single point cloud that includes stem points only (Classification == 4) and a single vector data layer with the stem locations and identifying information.

Combine Stem Point Clouds

Combine grid tile stem point clouds. This data does not contain tree ID, tree height, or tree DBH estimates. That data resides in the vector data written to the ../data/02_processed_data/05_stem_polygon_dir (config$stem_poly_tile_dir) directory as parquet tile files created in this section. See the example vector data here. This vector data is combined in the next section.

###___________________________________________________###
### create function to read las and return data frame
###___________________________________________________###
  las2df <- function(las_path_name, class_filter_list = as.numeric(NA)) {
    ### Read in the desired las file
    las_stem_tile = lidR::readLAS(las_path_name) %>% 
      lidR::filter_duplicates()
  
    # if las classification is null return whole data set
    if(max(is.na(as.numeric(class_filter_list)))==1){
      stem_point_df = las_stem_tile@data %>% 
        # track tile file name
        dplyr::mutate(las_stem_dir_file = basename(las_path_name))
    }else{
      ### Grab the filtered dataframe
      stem_point_df = las_stem_tile@data %>% 
        ### Pull the stem only points
        dplyr::filter(Classification %in% as.numeric(class_filter_list)) %>% 
        # track tile file name
        dplyr::mutate(las_stem_dir_file = basename(las_path_name))
    }
    # str(stem_point_df)
    
    ### Return the dataframe
    return(stem_point_df)
  }
  # ## test function
  # list.files(config$las_stem_dir, pattern = ".*\\.(laz|las)$", full.names = T) %>%
  #   .[1:2] %>% 
  #   purrr::map(las2df, class_filter_list=c(4)) %>% 
  #   dplyr::bind_rows() %>% 
  #   dplyr::count(Classification)

###___________________________________________________###
### Call the function to Merge the stem las point files into a single file ###
###___________________________________________________###
  merged_stem_points = list.files(config$las_stem_dir, pattern = ".*\\.(laz|las)$", full.names = T) %>%
    purrr::map(las2df, class_filter_list=c(4)) %>% 
    dplyr::bind_rows()

  # str(merged_stem_points)
###___________________________________________________###
### Points to LAS
###___________________________________________________###
  stem_points_las = lidR::LAS(merged_stem_points)
  st_crs(stem_points_las) = st_crs(las_grid)
  stem_points_las = lidR::filter_duplicates(stem_points_las)
  
  # plot(stem_points_las)
  
  ### Write the las file to the disk
  lidR::writeLAS(stem_points_las, paste0(config$delivery_las_dir,"/detected_stem_points.laz"))

Plot the combined stems point cloud

# structure of the merged_stem_points data
dplyr::glimpse(merged_stem_points)
## Rows: 148,510
## Columns: 19
## $ X                 <dbl> 610558.6, 610558.9, 610558.6, 610558.6, 610558.5, 61…
## $ Y                 <dbl> 4888822, 4888822, 4888822, 4888822, 4888822, 4888822…
## $ Z                 <dbl> 2.265, 2.576, 1.009, 1.867, 2.456, 1.619, 2.337, 0.7…
## $ Intensity         <int> 12079, 21331, 28527, 23130, 12079, 2827, 9509, 3598,…
## $ ReturnNumber      <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ NumberOfReturns   <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ ScanDirectionFlag <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ EdgeOfFlightline  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Classification    <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4…
## $ Synthetic_flag    <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ Keypoint_flag     <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ Withheld_flag     <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ ScanAngleRank     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ UserData          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ PointSourceID     <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ R                 <int> 10537, 16962, 25443, 21074, 11565, 2313, 8224, 3341,…
## $ G                 <int> 13621, 24672, 31611, 24929, 13107, 3341, 10537, 4112…
## $ B                 <int> 9509, 17476, 21845, 19789, 9509, 1799, 8481, 3084, 7…
## $ las_stem_dir_file <chr> "1.laz", "1.laz", "1.laz", "1.laz", "1.laz", "1.laz"…
# plot
plot3D::scatter3D(
  x = stem_points_las@data$X
  , y = stem_points_las@data$Y
  , z = stem_points_las@data$Z
  , colvar = stem_points_las@data$Z
  , col = viridis::viridis(50)
  , pch = 19, cex = 0.3
)

Combine Stem Vector Data

Combine the vector data written to the ../data/02_processed_data/05_stem_polygon_dir (config$stem_poly_tile_dir) directory as parquet tile files created in this section. An example of this data is shown here. This data contains tree ID, tree height, and tree DBH estimates as well as xy coordinates of the tree stems.

If stem vector points spatially overlap … ???

###__________________________________________________________###
### Merge the stem vector location tiles into a single object ###
###__________________________________________________________###
dbh_locations_sf = list.files(config$stem_poly_tile_dir, pattern = ".*\\.parquet$", full.names = T) %>% 
    purrr::map(sfarrow::st_read_parquet) %>% 
    dplyr::bind_rows() %>% 
    sf::st_as_sf() %>% 
    sf::st_make_valid() %>% 
    sf::st_set_crs(sf::st_crs(las_grid))

What is the structure of this vector data?

str(dbh_locations_sf)
## Classes 'sf' and 'data.frame':   294 obs. of  11 variables:
##  $ treeID        : chr  "610596.540376442_4888792.2245489" "610568.642130108_4888778.00420689" "610577.747388244_4888780.16525558" "610555.320981336_4888809.46613208" ...
##  $ tree_height_m : num  13.72 11.05 10.38 7.63 11.11 ...
##  $ stem_x        : num  610597 610569 610578 610555 610578 ...
##  $ stem_y        : num  4888792 4888778 4888780 4888809 4888823 ...
##  $ radius_m      : num  0.225 0.319 0.272 0.306 0.265 ...
##  $ radius_error_m: num  0.012 0.0406 0.066 0.0171 0.0234 ...
##  $ dbh_m         : num  0.451 0.637 0.545 0.612 0.53 ...
##  $ dbh_cm        : num  45.1 63.7 54.5 61.2 53 ...
##  $ basal_area_m2 : num  0.16 0.319 0.233 0.295 0.221 ...
##  $ basal_area_ft2: num  1.72 3.43 2.51 3.17 2.38 ...
##  $ geometry      :sfc_POINT of length 294; first list element:  'XY' num  610597 4888792
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA
##   ..- attr(*, "names")= chr [1:10] "treeID" "tree_height_m" "stem_x" "stem_y" ...

Is a row unique by treeID ?

dbh_locations_sf %>% 
  sf::st_drop_geometry() %>% 
  dplyr::ungroup() %>% 
  dplyr::summarise(
    n_rows = dplyr::n()
    , n_unique_treeID = dplyr::n_distinct(treeID)
  ) %>% 
  kableExtra::kbl() %>% 
  kableExtra::kable_styling()
n_rows n_unique_treeID
294 294

Do stem points overlap spatially?

### plot
  ggplot(data = dbh_locations_sf %>% dplyr::mutate(random=runif(dplyr::n()))) +
    geom_sf(
      mapping = aes(size=dbh_m, fill=as.factor(random))
      , shape = 21
      , alpha = 0.6
    ) + 
    scale_fill_viridis_d(option="turbo") +
    theme_light() + 
    theme(legend.position = "none")

How do the stem points look on satellite imagery?

Note, in the map layer the las_grid_buff box can be checked to view the buffered grid used for processing the grid tiles

mapview::mapview(
    las_ctg@data$geometry
    , color = "black"
    , lwd = 3, alpha.regions = 0.0, legend = F, label = F
  ) +
  mapview::mapview(
    las_grid_buff
    , zcol = "grid_id"
    , col.regions = viridis::cividis(nrow(las_grid_buff))
    , color = "gray55"
    , alpha.regions = 0.2, lwd = 2, legend = F, label = F, hide = T
  ) +
  mapview::mapview(
    dbh_locations_sf
    , zcol = "treeID"
    , cex = "dbh_m"
    , legend = F
    , label = F
  )

Clean the Stem Vector Data

Note!: this clean-up process uses code from the original author Neal Swayze with no edits. It does not include any filtering of stems (and associated DBHs) that spatially overlap nor does it have any selection criteria for individual trees that were “cut in half” in one tile but wholly contained in another tile when the grid tiles were combined (i.e. these trees will be included twice with one “good” DBH value and one “bad” DBH).

The cleaning process uses the following steps:

  • remove stems with empty radius estimates from the TreeLS::tlsInventory DBH estimation step
  • remove stems >= DBH threshold set by the user in the parameter dbh_max_size_m (1.5m in this example)
  • remove stems with empty or invalid xy coordinates

This step also creates a vector file of the stems with points sized by the radius (m) estimate of DBH and writes the vector files to the ../data/02_processed_data/06_working_spatial_dir (config$working_spatial_dir) directory.

###________________________________________________###
### CLEAN UP THE STEM POLYGONS WITH SOME FILTERING ###
###________________________________________________###
  
  ### Get the NA condition
  is_na = is.na(dbh_locations_sf$radius_m)
  dbh_locations_sf = cbind(dbh_locations_sf, is_na)
  
  ### Drop stems with NA values
  dbh_locations_sf = dbh_locations_sf[dbh_locations_sf$is_na == "FALSE",]
  dbh_locations_sf = subset(dbh_locations_sf, select = -c(is_na))
  
  ### Drop DBHs above set threshold 
  dbh_locations_sf = dbh_locations_sf[dbh_locations_sf$dbh_m < dbh_max_size_m,]
  
  ### Remove empty geometry stem locations
  condition = sf::st_is_empty(dbh_locations_sf)
  dbh_locations_sf = cbind(dbh_locations_sf, condition)
  dbh_locations_sf = dbh_locations_sf[dbh_locations_sf$condition == FALSE,]

###___________________________________________________________###
### create a variable and buffer the points with the radius ###
###___________________________________________________________###
  
  ### Reset the condition
  dbh_locations_sf$condition = rep("detected_stem", nrow(dbh_locations_sf))

###___________________________________________________________###
### Write the DBHs and the merged canopy polygons to the disk ###
###___________________________________________________________###
  sf:::st_write(
    dbh_locations_sf
    , dsn = paste0(config$delivery_spatial_dir, "/bottom_up_detected_stem_locations.gpkg")
    , append = FALSE
    , delete_dsn = TRUE
    , quiet = TRUE
  )
  sf:::st_write(
    ###K Buffer the points by the radius of the DBH
    sf::st_buffer(dbh_locations_sf, dist = dbh_locations_sf$radius_m)
    , dsn = paste0(config$delivery_spatial_dir, "/bottom_up_detected_stem_locations_buffered.gpkg")
    , append = FALSE
    , delete_dsn = TRUE
    , quiet = TRUE
  )

Result of the cleaning process (compare to same table above)

dbh_locations_sf %>% 
  sf::st_drop_geometry() %>% 
  dplyr::ungroup() %>% 
  dplyr::summarise(
    n_rows = dplyr::n()
    , n_unique_treeID = dplyr::n_distinct(treeID)
  ) %>% 
  kableExtra::kbl() %>% 
  kableExtra::kable_styling()
n_rows n_unique_treeID
285 285

CHM Individual Tree Detection

This section utilizes the Canopy Height Model (CHM) raster created in this section which as a grid resolution of 0.25m to perform individual tree detection from the top down (i.e. using raster cell height values). From The lidR Package Book:

Individual tree detection (ITD) is the process of spatially locating trees and extracting height information. Individual tree segmentation (ITS) is the process of individually delineating detected trees. In lidR, detecting and segmenting functions are decoupled to maximize flexibility. Tree tops are first detected using the locate_trees() function, followed crown delineation using segment_trees().

The CHM is used to perform individual tree detection using the lidR::locate_trees function with the lmf algorithm which implements a local maximum filter based on the window size (ws) parameter with a minimum tree height (hmin) parameter of 2 m. The window size can be fixed or variable and its shape can be square or circular.

The individual tree detection algorithm recommended in Swayze and Tinkham (2022) is a local maximum variable window function based on research in ponderosa pine (Pinus ponderosa) forests by Creasy et al. (2021) and Tinkham and Swayze (2021). This recommended variable window function has the form:

\[ \textrm{variable window radius} = \textrm{pixel height} \times 0.1 \]

This particular variable window radius function (with a minimum window size of 2m and a maximum of 5m) has the shape:

A variable window size can be implemented in the lidR::locate_trees function via the following code:

# define the variable window function
ws_fn = function(x) {
  y = dplyr::case_when(
    is.na(x) ~ 1e-3 # requires non-null
    , x < 0 ~ 1e-3 # requires positive
    , x < 2 ~ 2 # set lower bound
    , x > 30 ~ 5  # set upper bound
    , TRUE ~ 2 + (x * 0.1)
  )
  return(y)
}

# call the locate trees function and pass the variable window
lidR::locate_trees(
  chm
  , algorithm = lmf(
    ws = ws_fn
    , hmin = 2
  )
)

Implement CHM Individual Tree Detection

This function uses the CHM raster created in this section and applies the individual tree detection algorithm using a variable window function of linear form (with a minimum window size of 2m for pixels 2m and below and a maximum window size of 5m for pixels 30m in height above). The result is a sf data frame with point locations of tree tops.

###___________________________________________________###
### define the variable window function
###___________________________________________________###
# define the variable window function
ws_fn = function(x) {
  y = dplyr::case_when(
    is.na(x) ~ 1e-3 # requires non-null
    , x < 0 ~ 1e-3 # requires positive
    , x < 2 ~ 2 # set lower bound
    , x > 30 ~ 5  # set upper bound
    , TRUE ~ 2 + (x * 0.1)
  )
  return(y)
}

###___________________________________________________###
### Individual tree detection using CHM (top down)
###___________________________________________________###
  # master_chm = rasterize_tiles_to_chm()
  # terra::crs(master_chm)
  # plot(master_chm)
  
  ### ITD on CHM
  # call the locate_trees function and pass the variable window
  tree_tops = lidR::locate_trees(
      master_chm
      , algorithm = lmf(
        ws = ws_fn
        , hmin = minimum_tree_height
      )
    )
  # str(tree_tops)

View the results of the individual tree detection overlaid on the CHM raster

ggplot() + 
  geom_tile(
    data = master_chm %>% as.data.frame(xy=T) %>% dplyr::rename(f=3)
    , mapping = aes(x=x,y=y,fill=f)
  ) +
  geom_sf(
    data = tree_tops
    , shape = 20
    , color = "black"
  ) +
  coord_sf(
    expand = FALSE
  ) +
  scale_fill_viridis_c(option = "plasma") +
  labs(fill = "Hgt. (m)", x = "", y = "") +
  theme_light()

Delineate Tree Crowns

This step uses the Individual Tree Detection based on the CHM to delineate tree crown vector (i.e. polygon) shapes.

Creasy et al. (2021) evaluated four different crown delineation algorithms in ponderosa pine (Pinus ponderosa) forests: the Dalponte lidR::dalponte2016, Silva lidR::silva2016, and watershed lidR::watershed algorithms inside the lidR::segment_trees function and the marker-controlled watershed function from the ForestTools package. Overall, the lidR::watershed algorithm provided the smallest crown area bias though small crown areas were overestimated and larger crowns were underestimated.

The lidR::segment_trees function works on an input point cloud (i.e. las) without the requirement to create a canopy height model (CHM) beforehand. However, the crown delineation algorithms available within the lidR package can be run outside of the lidR::segment_trees function using only an input CHM raster. From The lidR Package Book:

While point cloud segmentation is standard in lidR, users may only have access to a CHM. There are many reasons for only using a CHM, and this is why raster-based methods can be run standalone outside segment_trees(). Whether using the point cloud or a raster, segmentation results will be exactly the same. The difference will be the data format of the segmentation result. In lidR, a LAS object will gain a treeID attribute, while for rasters, delineated crowns are returned in a raster format. To work outside segment_trees() it suffices to call the function standalone like this:

crowns = lidR::watershed(
  # input raster layer !! works with terra or stars !!
  chm = master_chm
  # threshold below which a pixel cannot be a tree. Default is 2
  , th_tree = 2
)

plot(crowns, col = (viridis::turbo(2000) %>% sample()))

G. Woolsey notes: the lidR::watershed call above fails because the input CHM created in this section requires no empty raster cells. Obtaining a CHM raster with unempty cells can be accomplished using the interpolation and mosaic methods discussed in that section. Alternatively, the lidR::segment_trees function can be utilized with a point cloud covering the full study extent which requires a complete (non-tiled) point cloud (perhaps using the laz file created based off of the CHM raster and written to the disk ../data/03_delivery/03_las_files directory).

Instead, the ForestTools::mcws marker-controlled watershed segmentation algorithm is used below and requires input of individual tree top points and a CHM raster. However, this package relies on the raster package which was depreciated in 2023. Tinkham et al. (2022) used this same ForestTools::mcws algorithm to delineate tree crowns.

# this requires the `raster` package which is depreciated
crowns = ForestTools::mcws(
  treetops = sf::st_zm(tree_tops, drop = T) # drops z values
  , CHM = raster::raster(master_chm) # converts to raster data
  , minHeight = minimum_tree_height
) %>% terra::rast()

# str(crowns)
# plot(crowns, col = (viridis::turbo(2000) %>% sample()))

### Write the crown raster to the disk
terra::writeRaster(
  crowns
  , paste0(config$delivery_spatial_dir, "/top_down_detected_tree_crowns.tif")
  , overwrite = TRUE
)

Plot the tree crowns with the tree tops

ggplot() + 
  geom_tile(
    data = crowns %>% as.data.frame(xy=T) %>% dplyr::rename(f=3)
    , mapping = aes(x=x,y=y,fill=as.factor(f))
  ) +
  geom_sf(
    data = tree_tops
    , shape = 20
    , color = "black"
  ) +
  coord_sf(
    expand = FALSE
  ) +
  scale_fill_manual(
    values = viridis::turbo(n = terra::freq(crowns) %>% nrow()) %>% sample()
  ) +
  labs(x = "", y = "") +
  theme_light() +
  theme(legend.position = "none")

Calculate local tree competition metrics

From Tinkham et al. (2022):

Local competition metrics, including the distance to the nearest neighbor, trees ha−1 within a 5 m radius, and the relative tree height within a 5 m radius, were calculated for each tree and used in the subsequent DBH modeling. A 5 m radius was selected as it represents a distance slightly less than the point at which two mature ponderosa pine trees in the region would begin to have interlocking crowns, representing a proxy for direct competition. The relative tree height within a 5 m radius was estimated using Equation (2):

\[ \textrm{Relative Height} = \frac{\textrm{Height}}{\textrm{Height}_\textrm{max}} \times 100 \]

, where \(\textrm{Height}\) is the height of the subject tree and \(\textrm{Height}_\textrm{max}\) is the height of the tallest tree within a 5 m radius of the subject tree.

### add tree data to tree_tops
  tree_tops = tree_tops %>% 
    # pull out the coordinates and update treeID
    dplyr::mutate(
      tree_x = sf::st_coordinates(.)[,1]
      , tree_y = sf::st_coordinates(.)[,2]
      , tree_height_m = sf::st_coordinates(.)[,3]
      , treeID = paste(treeID,round(tree_x, 1),round(tree_y, 1), sep = "_")
    )
  # str(tree_tops)
### set buffer around tree to calculate competition metrics
competition_buffer_temp = 5
### how much of the buffered tree area is within the study boundary?
  # use this to scale the TPA estimates below
tree_tops_pct_buffer_temp = tree_tops %>% 
  # buffer point
  sf::st_buffer(competition_buffer_temp) %>% 
  dplyr::mutate(
    point_buffer_area_m2 = as.numeric(sf::st_area(.))
  ) %>% 
  # intersect with study bounds
  sf::st_intersection(
    las_ctg$geometry
  ) %>% 
  # calculate area of buffer within study
  dplyr::mutate(
    buffer_area_in_study_m2 = as.numeric(sf::st_area(.))
  ) %>% 
  sf::st_drop_geometry() %>% 
  dplyr::select(treeID, buffer_area_in_study_m2)

### use the tree top location points to get competition metrics
comp_tree_tops_temp = tree_tops %>% 
  # buffer point
  sf::st_buffer(competition_buffer_temp) %>% 
  dplyr::select(treeID, tree_height_m) %>% 
  # spatial join with all tree points
  sf::st_join(
    tree_tops %>% 
      dplyr::select(treeID, tree_height_m) %>% 
      dplyr::rename_with(
        .fn = ~ paste0("comp_",.x)
        , .cols = tidyselect::everything()[
            -dplyr::any_of(c("geometry"))
          ]
      )
  ) %>%
  sf::st_drop_geometry() %>% 
  # calculate metrics by treeID
  dplyr::group_by(treeID,tree_height_m) %>% 
  dplyr::summarise(
    n_trees = dplyr::n()
    , max_tree_height_m = max(comp_tree_height_m)
  ) %>% 
  dplyr::ungroup() %>% 
  dplyr::inner_join(
    tree_tops_pct_buffer_temp
    , by = dplyr::join_by("treeID")
  ) %>% 
  dplyr::mutate(
    comp_trees_per_ha = (n_trees/buffer_area_in_study_m2)*10000
    , comp_relative_tree_height = tree_height_m/max_tree_height_m*100
  ) %>% 
  dplyr::select(
    treeID, comp_trees_per_ha, comp_relative_tree_height
  )

### calculate distance to nearest neighbor
  ### cap distance to nearest tree within xxm buffer
  dist_buffer_temp = 50
  # get trees within radius
  dist_tree_tops_temp = tree_tops %>% 
    dplyr::select(treeID) %>% 
    # buffer point
    sf::st_buffer(dist_buffer_temp) %>% 
    # spatial join with all tree points
    sf::st_join(
      tree_tops %>% 
        dplyr::select(treeID, tree_x, tree_y) %>% 
        dplyr::rename(treeID2=treeID)
    ) %>% 
    dplyr::filter(treeID != treeID2)
  
  # calculate row by row distances 
  dist_tree_tops_temp = dist_tree_tops_temp %>% 
    sf::st_centroid() %>% 
    st_set_geometry("geom1") %>% 
    dplyr::bind_cols(
      dist_tree_tops_temp %>% 
        sf::st_drop_geometry() %>% 
        dplyr::select("tree_x", "tree_y") %>% 
        sf::st_as_sf(coords = c("tree_x", "tree_y"), crs = sf::st_crs(tree_tops)) %>% 
        st_set_geometry("geom2")
    ) %>% 
    dplyr::mutate(
      comp_dist_to_nearest_m = sf::st_distance(geom1, geom2, by_element = T) %>% as.numeric()
    ) %>% 
    sf::st_drop_geometry() %>% 
    dplyr::select(treeID,comp_dist_to_nearest_m) %>% 
    dplyr::group_by(treeID) %>% 
    dplyr::summarise(comp_dist_to_nearest_m = min(comp_dist_to_nearest_m, na.rm = T)) %>% 
    dplyr::ungroup()

### join with original tree tops data
  tree_tops = tree_tops %>% 
    dplyr::left_join(
      comp_tree_tops_temp
      , by = dplyr::join_by("treeID")
    ) %>% 
    # add distance
    dplyr::left_join(
      dist_tree_tops_temp
      , by = dplyr::join_by("treeID")
    ) %>% 
    dplyr::mutate(comp_dist_to_nearest_m = dplyr::coalesce(comp_dist_to_nearest_m,dist_buffer_temp))

### Write the trees to the disk
  sf::st_write(
    tree_tops
    , paste0(config$delivery_spatial_dir, "/top_down_detected_tree_tops.gpkg")
    , quiet = TRUE, append = FALSE
  )

Map tree competition metrics

# trees per ha
tree_tops %>% 
  ggplot() + 
    geom_sf(mapping = aes(fill = comp_trees_per_ha), shape = 21, alpha = 0.8, size = 2, color = "gray77") +
    scale_fill_viridis_c() +
    labs(fill = "trees\nper ha") +
    theme_light()

# distance to nearest neighbor
tree_tops %>% 
  ggplot() + 
    geom_sf(mapping = aes(fill = comp_dist_to_nearest_m), shape = 21, alpha = 0.8, size = 2, color = "gray77") +
    scale_fill_viridis_c(option="rocket", direction = -1) +
    labs(fill = "meters to\nnearest\nneighbor") +
    theme_light()

Spatial join the tree tops with the tree crowns

The delineated crown raster (terra SpatRaster) is converted to vector data (i.e. polygons) and crowns with an area smaller than 0.1 m2 are removed. The tree tops are then spatially joined with the tree crowns.

### Convert crown raster to terra rast, then polygons, then to Sf
crowns_sf = crowns %>% 
  # convert raster to polygons for each individual crown
  terra::as.polygons() %>% 
  # fix polygon validity
  terra::makeValid() %>% 
  # reduce the number of nodes in geometries
  terra::simplifyGeom() %>% 
  # remove holes in polygons
  terra::fillHoles() %>% 
  # convert to sf
  sf::st_as_sf() %>% 
  # get the crown area
  dplyr::mutate(
    crown_area_m2 = as.numeric(sf::st_area(.))
  ) %>% 
  #remove super small crowns
  dplyr::filter(
    crown_area_m2 > 0.1
  )
  
  # str(crowns_sf)
  # ggplot(crowns_sf) + geom_sf(aes(fill=as.factor(layer))) + 
  #   scale_fill_manual(values = viridis::turbo(nrow(crowns_sf)) %>% sample()) +
  #   theme_void() + theme(legend.position = "none")
  
### Join the crowns with the seeds to append data, remove Nulls
crowns_sf = crowns_sf %>%
  sf::st_join(tree_tops) %>% 
  dplyr::select(c(
    "treeID", "tree_height_m"
    , "tree_x", "tree_y"
    , "crown_area_m2"
    , tidyselect::starts_with("comp_")
  ))
  
  # str(crowns_sf)
  # crowns_sf %>% dplyr::count(treeID) %>% nrow()
  # crowns_sf %>%
  #   sf::st_drop_geometry() %>%
  #   dplyr::summarise(
  #     n = n()
  #     , n_distinct = dplyr::n_distinct(treeID)
  #   )

### Add crown data summaries
crown_sum_temp = data.frame(
    mean_crown_ht_m = terra::extract(x = master_chm, y = terra::vect(crowns_sf), fun = "mean", na.rm = T)
    , median_crown_ht_m = terra::extract(x = master_chm, y = terra::vect(crowns_sf), fun = "median", na.rm = T)
    , min_crown_ht_m = terra::extract(x = master_chm, y = terra::vect(crowns_sf), fun = "min", na.rm = T)
  ) %>% 
  dplyr::select(-c(tidyselect::ends_with(".ID"))) %>% 
  dplyr::rename_with(~ stringr::str_remove_all(.x,".Z"))


### join crown data summary
crowns_sf = crowns_sf %>% 
  dplyr::bind_cols(crown_sum_temp)

# str(crowns_sf)

### Write the crowns to the disk
  sf::st_write(
    crowns_sf
    , paste0(config$delivery_spatial_dir, "/top_down_detected_crowns.gpkg")
    , quiet = TRUE, append = FALSE
  )

Plot the tree crowns with the tree tops

ggplot() + 
  geom_sf(
    data = crowns_sf
    , mapping = aes(fill = mean_crown_ht_m)
  ) +
  geom_sf(
    data = tree_tops
    , shape = 20
    , color = "black"
  ) +
  coord_sf(
    expand = FALSE
  ) +
  scale_fill_viridis_c(option = "plasma") +
  labs(fill = "Mean Crown\nHgt. (m)", x = "", y = "") +
  theme_light() 

Note that not all tree tops (black dots) have an associated crown. Plot only matched tree tops and crowns.

ggplot() + 
  geom_sf(
    data = crowns_sf
    , mapping = aes(fill = mean_crown_ht_m)
  ) +
  geom_sf(
    data = tree_tops %>% 
      dplyr::inner_join(
        crowns_sf %>% sf::st_drop_geometry() %>% 
          dplyr::select(treeID)
        , dplyr::join_by(treeID)
      )
    , shape = 20
    , color = "black"
  ) +
  coord_sf(
    expand = FALSE
  ) +
  scale_fill_viridis_c(option = "plasma") +
  labs(fill = "Mean Crown\nHgt. (m)", x = "", y = "") +
  theme_light() 

Model Missing DBH’s

Tinkham et al. (2022) evaluated five models in the form of simple linear, multiple linear, and three non-linear model structures to estimate missing tree DBH values based on allometric relationships to independent variables including tree height using the sample of UAS SfM point cloud derived DBH values (i.e. from trees where DBH could be detected using aerial point cloud data alone). For the ponderosa pine forest studied, the simple linear form (\(DBH_i = \beta_0 + \beta_1 \cdot Height_i\), where \(i\) is the individual tree estimate from UAS SfM data) resulted in the best predictive model for both tree and stand level summarizations after first filtering SfM DBHs estimates using regionally derived height to DBH allometries from US Forest Inventory and Analysis (FIA) data.

This section uses the sample of DBH values extracted from the point cloud (example here) in the Detect Stems section to create a model using the SfM extracted height, crown area, and competition metrics as independent variables, and the SfM-derived DBH as the dependent variable.

Importantly, before building a local DBH-to-Height model the SfM-derived DBH estimates should be filtered using height to DBH allometries specific to the region to improve the local model performance (Tinkham et al. (2022)). This filtering is implemented in the Filter the SfM DBHs section below with full details on the data and modelling described in a supplementary section.

Join CHM derived Crowns with DBH stems

Spatially join the tree crown polygons created based off of the CHM in this section with the tree stems where DBH was detected created in this section.

###________________________________________________________###
### Join the Top down crowns with the stem location points ###
###________________________________________________________###
  ### Join the top down crowns with the stem location points
  ## !! Note that one crown can have multiple stems within its bounds
  crowns_sf_joined_stems_temp = crowns_sf %>%
    sf::st_join(
      dbh_locations_sf %>% 
        # rename all columns to have "stem" prefix
        dplyr::rename_with(
          .fn = ~ paste0("stem_",.x,recycle0 = T)
          , .cols = tidyselect::everything()[
            -dplyr::any_of(
              c(tidyselect::starts_with("stem_x"),"geometry")
            )
          ]
        )
    )
  # str(crowns_sf_joined_stems_temp)
###________________________________________________________###
### Check for row uniqueness ###
###________________________________________________________###
  crowns_sf_joined_stems_temp %>% 
    sf::st_drop_geometry() %>% 
    dplyr::summarise(
      n = dplyr::n()
      , n_unique_crowns = dplyr::n_distinct(treeID)
    ) %>% 
    kableExtra::kbl() %>% 
    kableExtra::kable_styling()
n n_unique_crowns
1550 1545

Filter the SfM DBHs

G. Woolsey added section. There was no DBH filtering done in the original script handed off by N. Swayze. Instead, the model to estimate missing DBHs was built off of all stems with DBH values created in this section. There are a few potential pitfalls with this methodology. First, the stems and DBH estimates were made separately for each grid tile and then combined with no filtering for stems (and associated DBH estimates) that were “cut in half” at the grid tile boundary. There was also no filtering for stems that overlap when the grid tiles were combined.

A regionally derived height to DBH relationship was modeled using the data and methods discussed in detail in this supplementary section. The SfM-derived DBH filtering steps implemented here are:

  1. Predict an expected DBH value for each CHM derived tree height based on the regional model
  2. Remove stem DBH estimates that are outside the 90% prediction bounds
  3. Select the stem DBH estimate that is closest to the predicted DBH value (from 1) if multiple stems are within the bounds of one crown
  4. Use the SfM-detected stems remaining after this filtering workflow as the training data in the local DBH to height allometric relationship model

Representative FIA Plots and Data

See full details in this supplementary section

###__________________________________________________###
### read in FIA data ###
###__________________________________________________###
# read in treemap data
# downloaded from: https://www.fs.usda.gov/rds/archive/Catalog/RDS-2021-0074
# read in treemap (no memory is taken)
treemap_rast = terra::rast("../data/treemap/TreeMap2016.tif")
# filter treemap based on las now memory
treemap_rast = treemap_rast %>% 
  terra::crop(
    las_ctg@data$geometry %>% 
      sf::st_union() %>% 
      terra::vect() %>% 
      terra::project(terra::crs(treemap_rast))
  ) %>% 
  terra::mask(
    las_ctg@data$geometry %>% 
      sf::st_union() %>% 
      terra::vect() %>% 
      terra::project(terra::crs(treemap_rast))
  )
### get weights for weighting each tree in the population models
# treemap id = tm_id for linking to tabular data 
tm_id_weight_temp = terra::freq(treemap_rast) %>%
  dplyr::select(-layer) %>% 
  dplyr::rename(tm_id = value, tree_weight = count)

### get the TreeMap FIA tree list for only the plots included
treemap_trees_df = readr::read_csv(
    "../data/treemap/TreeMap2016_tree_table.csv"
    , col_select = c(
      tm_id
      , TREE
      , STATUSCD
      , DIA
      , HT
    )
  ) %>% 
  dplyr::rename_with(tolower) %>% 
  dplyr::inner_join(
    tm_id_weight_temp
    , by = dplyr::join_by("tm_id")
  ) %>% 
  dplyr::filter(
    # keep live trees only: 1=live;2=dead
    statuscd == 1
    & !is.na(dia) 
    & !is.na(ht) 
  ) %>%
  dplyr::mutate(
    dbh_cm = dia*2.54
    , tree_height_m = ht/3.28084
  ) %>% 
  dplyr::select(-c(statuscd,dia,ht)) %>% 
  dplyr::rename(tree_id=tree) 

# dplyr::glimpse(treemap_trees_df)
# treemap_trees_df %>% dplyr::count(tm_id)

Regional model of DBH as predicted by height

See full details in this supplementary section

Note, this step takes ~5 mins for this example area

###__________________________________________________________###
### population model, height non-linear
###__________________________________________________________###
  # population model with no random effects (i.e. no group-level variation)
  # quadratic model form with Gamma distribution for strictly positive response variable dbh
  # set up prior
  p_temp <- prior(normal(1, 2), nlpar = "b1") +
    prior(normal(0, 2), nlpar = "b2")
  mod_nl_pop = brms::brm(
    formula = brms::bf(
      formula = dbh_cm|weights(tree_weight) ~ (b1 * tree_height_m) + tree_height_m^b2
      , b1 + b2 ~ 1
      , nl = TRUE # !! specify non-linear
    )
    , data = treemap_trees_df
    , prior = p_temp
    , family = brms::brmsfamily("Gamma")
    , iter = 4000
  )
  # plot(mod_nl_pop)
  # summary(mod_nl_pop)

Summary of model results

#### extract posterior draws to a df
    brms::as_draws_df(
        mod_nl_pop
        , variable = c("^b_", "shape")
        , regex = TRUE
      ) %>% 
      # quick way to get a table of summary statistics and diagnostics
      posterior::summarize_draws(
        "mean", "median", "sd"
        ,  ~quantile(.x, probs = c(
          0.05, 0.95
          # , 0.025, 0.975
        ))
        , "rhat"
      ) %>% 
      dplyr::mutate(
        variable = stringr::str_remove_all(variable, "_Intercept")
      ) %>% 
      kableExtra::kbl(
        caption = paste(
          "non-linear model:"
          , mod_nl_pop$formula %>% as.character() %>% purrr::pluck(1)
        )
        , digits = 3
      ) %>% 
        kableExtra::kable_styling()
non-linear model: dbh_cm | weights(tree_weight) ~ (b1 * tree_height_m) + tree_height_m^b2
variable mean median sd 5% 95% rhat
b_b1 -0.159 -0.159 0.013 -0.180 -0.138 1.001
b_b2 0.639 0.640 0.013 0.618 0.660 1.001
shape 20.127 20.125 0.593 19.152 21.110 1.001

Plot the modeled allometric relationship with 90% prediction bounds. In Bayesian statistics, the credibility interval is defined as: The true value is contained with a probability of 90%.

By comparison, in frequentist statistics the 90% confidence interval is constructed in such a way that, if the same experiment were repeated an infinite number of times, in 90% of these repetitions, the true value is contained in the interval.

See McElreath 2018.

### obtain model predictions over range
    # range of x var to predict
      height_range = dplyr::tibble(
        tree_height_m = seq(
          from = 0
          , to = round(max(crowns_sf$tree_height_m)*1.05,0)
          , by = 0.1 # by 0.1 m increments
        )
      )
    # predict and put estimates in a data frame
      pred_mod_nl_pop_temp = predict(
          mod_nl_pop
          , newdata = height_range
          , probs = c(.05, .95)
        ) %>%
        dplyr::as_tibble() %>%
        dplyr::rename(
          lower_b = 3, upper_b = 4
        ) %>% 
        dplyr::rename_with(tolower) %>% 
        dplyr::select(-c(est.error)) %>% 
        dplyr::bind_cols(height_range)
      # str(pred_mod_nl_pop_temp)
      
    # plot predictions with data
      ggplot(
        data = pred_mod_nl_pop_temp
        , mapping = aes(x = tree_height_m)
      ) +
        geom_ribbon(
          mapping = aes(ymin = lower_b, ymax = upper_b)
          , fill = "grey88"
        ) +
        geom_line(
          aes(y = estimate)
          , color = "navy"
          , lwd = 1
        ) +
        labs(
          y = "DBH (cm)"
          , x = "Tree Ht. (m)"
          , title = "Regional height to DBH allometry from US Forest Inventory and Analysis (FIA) data"
        ) +
        theme_light() +
        theme(legend.position = "none", plot.title = element_text(size = 9))

Predict and filter SfM-derived DBH

# attach allometric data to CHM derived trees and canopy data
crowns_sf_joined_stems_temp = crowns_sf_joined_stems_temp %>% 
  # join with model predictions at 0.1 m height intervals
  dplyr::mutate(
    tree_height_m_tnth = round(tree_height_m,1) %>% as.character()
  ) %>% 
  dplyr::inner_join(
    pred_mod_nl_pop_temp %>% 
      dplyr::rename(
        tree_height_m_tnth=tree_height_m
        , est_dbh_cm = estimate
        , est_dbh_cm_lower = lower_b
        , est_dbh_cm_upper = upper_b
      ) %>% 
      dplyr::mutate(tree_height_m_tnth=as.character(tree_height_m_tnth))
    , by = dplyr::join_by(tree_height_m_tnth)  
  ) %>% 
  dplyr::select(-tree_height_m_tnth) %>% 
  dplyr::mutate(
    est_dbh_pct_diff = abs(stem_dbh_cm-est_dbh_cm)/est_dbh_cm
  )
  # what is the estimated difference
  # summary(crowns_sf_joined_stems_temp$est_dbh_pct_diff)
  # crowns_sf_joined_stems_temp %>% dplyr::glimpse()

### build training data set
dbh_training_data_temp = crowns_sf_joined_stems_temp %>%
  sf::st_drop_geometry() %>% 
  dplyr::filter(
    !is.na(stem_dbh_cm)
    & stem_dbh_cm >= est_dbh_cm_lower
    & stem_dbh_cm <= est_dbh_cm_upper
  ) %>% 
  dplyr::group_by(treeID) %>% 
  # select the minimum difference to regional dbh estimate
  dplyr::filter(
    est_dbh_pct_diff==min(est_dbh_pct_diff)
  ) %>% 
  # just take one if same dbh
  dplyr::filter(
    dplyr::row_number()==1
  ) %>% 
  dplyr::ungroup() %>% 
  dplyr::select(c(
    treeID # id
    , stem_dbh_cm # y
    # x vars
    , crown_area_m2, tree_height_m
    , median_crown_ht_m, mean_crown_ht_m, min_crown_ht_m
    , tidyselect::starts_with("comp_")
  ))
# dbh_training_data_temp %>% dplyr::glimpse()

View correlation matrix of potential predictor variables

# correlation matrix of potential predictors
dbh_training_data_temp %>% 
  dplyr::select(-c(treeID,stem_dbh_cm)) %>% 
  cor() %>% 
  corrplot::corrplot(type = "lower", diag = F)

# remove highly correlated predictors
dbh_training_data_temp = dbh_training_data_temp %>% 
  dplyr::select(c(
    treeID # id
    , stem_dbh_cm # y
    # x vars
    , crown_area_m2, tree_height_m
    , min_crown_ht_m
    , tidyselect::starts_with("comp_")
  ))

Model to estimate missing DBHs

Use the SfM-detected stems remaining after the filtering workflow using regional allometries as the training data in the local DBH to height allometric relationship model.

Tinkham et al. (2022) found the simple linear form (\(DBH_i = \beta_0 + \beta_1 \cdot Height_i\), where \(i\) is the individual tree estimate from UAS SfM data), resulted in the best estimates for both tree and stand level summarizations. However, this workflow uses a random forest model with the randomForest package to estimate DBH based on the sample of DBH values extracted from the point cloud (example here) in the Detect Stems section.

The random forest model is “tuned” using the randomForest::tuneRF function to determine the optimal mtry hyperparameter. The mtry parameter represents the number of variables to randomly sample as candidates at each split (view more detail here). The randomForest::tuneRF function will start at a default value of mtry and increase by a certain step factor until the Out-of-bag (OOB) error stops improving by a specified amount. For example, the below starts with the default mtry and increases by a factor of 0.5 until the OOB error stops improving by 1%.

set.seed(21)
# dbh_training_data_temp %>% dplyr::glimpse()

### tuning RF model
  # If we are interested with just starting out and tuning the mtry parameter 
  # we can use randomForest::tuneRF for a quick and easy tuning assessment. 
  # tuneRf will start at a value of mtry that you supply and increase by a 
  # certain step factor until the OOB error stops improving be a specified amount.
  rf_tune_temp = randomForest::tuneRF(
    y = dbh_training_data_temp$stem_dbh_cm
    , x = dbh_training_data_temp %>% dplyr::select(-c(treeID,stem_dbh_cm))
    , stepFactor = 0.5
    , ntreeTry = 500
    , mtryStart = 0.5
    , improve = 0.01
    , plot = F
    , trace = F
  )
  # rf_tune_temp

### Run a randomForest model to predict DBH using various crown predictors
  stem_prediction_model = randomForest::randomForest(
    y = dbh_training_data_temp$stem_dbh_cm
    , x = dbh_training_data_temp %>% dplyr::select(-c(treeID,stem_dbh_cm))
    , mtry = rf_tune_temp %>% 
        dplyr::as_tibble() %>% 
        dplyr::filter(OOBError==min(OOBError)) %>% 
        dplyr::pull(mtry)
    , na.action = na.omit
  )
  # stem_prediction_model
  # str(stem_prediction_model)

Model performance

Random forest variable importance plot

#variable importance plot
randomForest::varImpPlot(stem_prediction_model, main = "RF variable importance plot for DBH estimate")

Estimated versus observed DBH

data.frame(
  dbh_training_data_temp
  , predicted = stem_prediction_model$predicted
) %>% 
ggplot() +
  geom_abline() +
  geom_point(mapping = aes(x = stem_dbh_cm, y = predicted)) +
  scale_x_continuous(limits = c(0,max(dbh_training_data_temp$stem_dbh_cm)*1.05)) +
  scale_y_continuous(limits = c(0,max(dbh_training_data_temp$stem_dbh_cm)*1.05)) +
  labs(
    x = "SfM DBH (cm)"
    , y = "Predicted DBH (cm) by RF"
  ) +
  theme_light()

Table of model performance

data.frame(
  mae = Metrics::mae(dbh_training_data_temp$stem_dbh_cm, stem_prediction_model$predicted)
  , mape = Metrics::mape(dbh_training_data_temp$stem_dbh_cm, stem_prediction_model$predicted) %>% 
    scales::percent(accuracy = .01)
  , rmse = Metrics::rmse(dbh_training_data_temp$stem_dbh_cm, stem_prediction_model$predicted)
) %>% 
  kableExtra::kbl(
    caption = "Random forest model prediction performance for DBH (cm)"
    , col.names = c(
      "Mean Abs. Error (cm)"
      , "Mean Abs. Percent Error"
      , "Root Mean Squared Error (cm)"
    )
    , digits = 2
  ) %>% 
  kableExtra::kable_styling()
Random forest model prediction performance for DBH (cm)
Mean Abs. Error (cm) Mean Abs. Percent Error Root Mean Squared Error (cm)
4.01 17.32% 4.97

Predict missing DBHs

Use the model built in the prior section to estimate DBH for all trees with missing values.

The final result is a vector dataset of tree crowns created in this section with DBH values for: i) the training data based on the DBH values extracted from the point cloud in the Detect Stems section that passed the DBH filtering process; and ii) DBH values for all other trees using predicted DBH values based on SfM-derived DBH and height allometries.

###___________________________________________________________________###
### Predict missing radius values for the top down crowns with no DBH ###
###___________________________________________________________________###
crowns_sf_predict_only_temp = crowns_sf %>%
  sf::st_drop_geometry() %>% 
  dplyr::anti_join(
    dbh_training_data_temp
    , by = dplyr::join_by("treeID")
  ) %>% 
  dplyr::select(
    dbh_training_data_temp %>% dplyr::select(-c(stem_dbh_cm)) %>% names()
  )
# get predicted dbh
predicted_dbh_cm_temp = predict(
  stem_prediction_model
  , crowns_sf_predict_only_temp %>% dplyr::select(-treeID)
)
# summary(predicted_dbh_cm_temp)
## combine predicted data with training data for full data set for all tree crowns with a matched tree top
crowns_sf_with_dbh = crowns_sf %>% 
  # join training data
  dplyr::left_join(
    dbh_training_data_temp %>% 
      dplyr::mutate(is_training_data = T) %>% 
      dplyr::select(treeID, is_training_data, stem_dbh_cm)
    , by = dplyr::join_by("treeID")
  ) %>% 
  # join with predicted data estimates
  dplyr::left_join(
    crowns_sf_predict_only_temp %>% 
      dplyr::mutate(
        predicted_dbh_cm = predicted_dbh_cm_temp
      ) %>% 
      dplyr::select(treeID, predicted_dbh_cm)
    , by = dplyr::join_by("treeID")
  ) %>% 
  # clean up data and calculate metrics from dbh
  dplyr::mutate(
    is_training_data = dplyr::coalesce(is_training_data,F)
    , dbh_cm = dplyr::coalesce(stem_dbh_cm, predicted_dbh_cm)
    , dbh_m = dbh_cm/100
    , radius_m = dbh_m/2
    , basal_area_m2 = pi * (radius_m)^2
    , basal_area_ft2 = basal_area_m2 * 10.764
  ) %>% 
  dplyr::select(
    !tidyselect::ends_with("_dbh_cm")
  )
# write the data to the disk
  sf::st_write(
    crowns_sf_with_dbh
    , paste0(config$delivery_spatial_dir, "/final_detected_crowns.gpkg")
    , append = FALSE
    , quiet = TRUE
  )

Histogram of estimated DBH values

### plot
  ggplot(data = crowns_sf_with_dbh) +
    geom_density(
      mapping = aes(x=dbh_cm)
      , fill = "navy"
      , color = "gray"
      , alpha = 0.9
    ) + 
    labs(
      x = "DBH (cm)"
      , y = "Density"
      , title = "Distribution of tree DBH"
    ) +
    theme_light()

Relationship between height and DBH

### plot
  ggplot(
    data = crowns_sf_with_dbh
    , mapping = aes(y=tree_height_m, x = dbh_cm)
  ) +
  geom_smooth(
    method = "loess"
    , span = 1
    , color = "gray44"
  ) +
  geom_point(
    mapping = aes(color = is_training_data)  
    , alpha = 0.6
  ) + 
  labs(
    x = "DBH (cm)"
    , y = "Tree Ht. (m)"
    , color = "Training Data"
    , title = "SfM derived tree height and DBH relationship"
  ) +
  scale_color_manual(values = c("gray", "firebrick")) +
  theme_light() +
  theme(
    legend.position = "bottom"
    , legend.direction = "horizontal"
  )

Map Final Tree Heights and Diameters

# get tree points
map_temp = crowns_sf_with_dbh %>% 
  sf::st_drop_geometry() %>% 
  sf::st_as_sf(coords = c("tree_x", "tree_y"), crs = sf::st_crs(crowns_sf_with_dbh))

# map
mapview::mapview(
    las_ctg@data$geometry
    , color = "black"
    , lwd = 3, alpha.regions = 0.0, legend = F, label = F
    , layer.name = "las boundary"
  ) +
  mapview::mapview(
    map_temp
    , zcol = "tree_height_m"
    , col.regions = viridis::viridis(20)
    , cex = 3
    , legend = T
    , label = F
    , layer.name = "height_m"
  ) +
  mapview::mapview(
    map_temp
    , zcol = "dbh_cm"
    , col.regions = viridis::inferno(10)
    , cex = 3
    , legend = T
    , label = F
    , layer.name = "dbh_cm"
    , hide = T
  )

Calculate Silviculture Metrics

Utilize the final data produced in the prior section which includes unique trees and tree locations with tree height based on identification from the CHM, tree crown information, and estimated DBH values for the entire study area extent based on the original laz input folder (../data/las_raw).

Common silvicultural metrics are calculated for the entire extent. Note, that stand-level summaries can be computed if stand vector data is provided.

### stand-level summaries
  silv_metrics_temp = crowns_sf_with_dbh %>%
    sf::st_drop_geometry() %>% 
    dplyr::ungroup() %>% 
    dplyr::mutate(
      plotID = "1" # can spatially join to plot vectors if available
      , plot_area_m2 = las_ctg@data$geometry %>% 
        sf::st_union() %>% 
        sf::st_area() %>% # result is m2
        as.numeric()
      , plot_area_ha = plot_area_m2/10000 # convert to ha
    ) %>% 
    dplyr::group_by(plotID,plot_area_ha) %>% 
    dplyr::summarise(
      n_trees = dplyr::n_distinct(treeID)
      , mean_dbh_cm = mean(dbh_cm, na.rm = T)
      , mean_tree_height_m = mean(tree_height_m, na.rm = T)
      , loreys_height_m = sum(basal_area_m2*tree_height_m, na.rm = T) / sum(basal_area_m2, na.rm = T)
      , basal_area_m2 = sum(basal_area_m2, na.rm = T)
      , sum_dbh_cm_sq = sum(dbh_cm^2, na.rm = T)
    ) %>% 
    dplyr::ungroup() %>% 
    dplyr::mutate(
      trees_per_ha = (n_trees/plot_area_ha)
      , basal_area_m2_per_ha = (basal_area_m2/plot_area_ha)
      , qmd_cm = sqrt(sum_dbh_cm_sq/n_trees)
    ) %>% 
    dplyr::select(-c(sum_dbh_cm_sq)) %>% 
    # convert to imperial units
    dplyr::mutate(
      dplyr::across(
        .cols = tidyselect::ends_with("_cm")
        , ~ .x * 0.394
        , .names = "{.col}_in"
      )
      , dplyr::across(
        .cols = tidyselect::ends_with("_m")
        , ~ .x * 3.28
        , .names = "{.col}_ft"
      )
      , dplyr::across(
        .cols = tidyselect::ends_with("_m2_per_ha")
        , ~ .x * 4.359
        , .names = "{.col}_ftac"
      )
      , dplyr::across(
        .cols = tidyselect::ends_with("_per_ha") & !tidyselect::ends_with("_m2_per_ha")
        , ~ .x * 0.405
        , .names = "{.col}_ac"
      )
      , dplyr::across(
        .cols = tidyselect::ends_with("_area_ha")
        , ~ .x * 2.471
        , .names = "{.col}_ac"
      )
      , dplyr::across(
        .cols = tidyselect::ends_with("_m2")
        , ~ .x * 10.764
        , .names = "{.col}_ft2"
      )
    ) %>% 
    dplyr::rename_with(
      .fn = function(x){dplyr::case_when(
        stringr::str_ends(x,"_cm_in") ~ stringr::str_replace(x,"_cm_in","_in")
        , stringr::str_ends(x,"_m_ft") ~ stringr::str_replace(x,"_m_ft","_ft")
        , stringr::str_ends(x,"_m2_per_ha_ftac") ~ stringr::str_replace(x,"_m2_per_ha_ftac","_ft2_per_ac")
        , stringr::str_ends(x,"_per_ha_ac") ~ stringr::str_replace(x,"_per_ha_ac","_per_ac")
        , stringr::str_ends(x,"_area_ha_ac") ~ stringr::str_replace(x,"_area_ha_ac","_area_ac")
        , stringr::str_ends(x,"_m2_ft2") ~ stringr::str_replace(x,"_m2_ft2","_ft2")
        , TRUE ~ x
      )}
    ) %>% 
    dplyr::select(
      "plotID"
      , "n_trees"
      , "plot_area_ha"
      , "trees_per_ha"
      , "mean_dbh_cm"
      , "qmd_cm"
      , "mean_tree_height_m"
      , "loreys_height_m"
      , "basal_area_m2"
      , "basal_area_m2_per_ha"
      # imperial
      , "plot_area_ac"
      , "trees_per_ac"
      , "mean_dbh_in"
      , "qmd_in"
      , "mean_tree_height_ft"
      , "loreys_height_ft"
      , "basal_area_ft2"
      , "basal_area_ft2_per_ac"
    )

### export tabular
  write.csv(
      silv_metrics_temp
      , paste0(config$delivery_spatial_dir, "/final_plot_silv_metrics.csv")
      , row.names = F
    )

### Join back to spatial data
  # this would just be a vector file if available
  silv_metrics_temp = las_ctg@data %>%
    dplyr::select(
      1, geometry  
    ) %>% 
    dplyr::mutate(
      plotID = "1" # can spatially join to plot vectors if available
    ) %>% 
    dplyr::group_by(plotID) %>% 
    dplyr::summarise(
      geometry = sf::st_union(geometry)
    ) %>% 
    dplyr::ungroup() %>% 
    # join with plot data data
    dplyr::inner_join(
      silv_metrics_temp
      , by = dplyr::join_by("plotID")
    )
  # str(silv_metrics_temp)

### export spatial
  sf::st_write(
      silv_metrics_temp
      , paste0(config$delivery_spatial_dir, "/final_plot_silv_metrics.gpkg")
      , append = FALSE
      , quiet = TRUE
    )

Table of silvicultural measurements in both metric and imperial units

### summary table
silv_metrics_temp %>% 
  sf::st_drop_geometry() %>% 
  dplyr::filter(plotID==silv_metrics_temp$plotID[1]) %>% 
  tidyr::pivot_longer(
    cols = -c(plotID)
    , names_to = "measure"
    , values_to = "value"
  ) %>% 
  kableExtra::kbl(
    caption = "Silvicultural metrics in both metric and imperial units"
    , digits = 2
  ) %>% 
  kableExtra::kable_styling()
Silvicultural metrics in both metric and imperial units
plotID measure value
1 n_trees 1545.00
1 plot_area_ha 4.00
1 trees_per_ha 386.25
1 mean_dbh_cm 22.01
1 qmd_cm 22.44
1 mean_tree_height_m 7.89
1 loreys_height_m 9.26
1 basal_area_m2 61.10
1 basal_area_m2_per_ha 15.27
1 plot_area_ac 9.88
1 trees_per_ac 156.43
1 mean_dbh_in 8.67
1 qmd_in 8.84
1 mean_tree_height_ft 25.87
1 loreys_height_ft 30.37
1 basal_area_ft2 657.63
1 basal_area_ft2_per_ac 66.58

Model Regional DBH-to-Height Relationship

Before building a local DBH-to-Height model using SfM extracted height, crown area, and competition metrics as independent variables, and the SfM-derived DBH as the dependent variable of DBH based on DBH values extracted from the point cloud; the SfM-derived DBH estimates should be filtered using regionally derived height to DBH allometries.

The DBH filtering methodology described by Tinkham et al. (2022) is not applied here:

This filtering used local Forest Inventory and Analysis (FIA) data from the study region to develop…a model representing the regional height to DBH relationship. This model used FIA data from Colorado with greater than 70% ponderosa pine by basal area, densities exceeding 10 m2 ha−1 basal area represented untreated conditions, and site indices within 3 m of the study site’s estimated site index of 22 m at a base age of 100 years were used to maintain a similar height to DBH relationship. Based on the results from Swayze et al. (2021), the model was fit as a second order polynomial using the stats R package. Subsequently, Equation (3) was applied to predict a regionally expected DBH value for each tree height. For trees with multiple SfM-derived DBH values, we compared each estimate to the predictions from Equation (3) and retained the SfM-derived DBH with the smallest error.

Filtering was done against the 90th percentile prediction bounds of Equation (3). If a SfM-derived DBH was outside these bounds, we removed it from the dataset used for further analysis. This process resulted in a dataset with the height for all identified trees and a subset of DBH values meeting regional observations of tree growth form.

Equation 3: \[ \textrm{DBH (cm)} = 1.6121 \times \textrm{Height (m)} + \textrm{Height (m)}^{0.0276} \]

Expanding this filtering methodology to other areas where Equation (3) may or may not be appropriate requires the use of Forest Inventory and Analysis (FIA) data to develop a model representing the height to DBH relationship in the particular study region. While multiple R packages exist to access FIA data (e.g. FIESTA and rFIA), the tree-level data required for building such a model is either too sparse or too coarse (Tinkham et al. 2018). FIA sample plots are widely separated and do not provide wall-to-wall coverage of forest information while state-level tree lists span diverse forest types that may not accurately represent a particular area of interest. Furthermore, expecting a spatial analyst to select the most appropriate FIA data to use to represent the point cloud data study area may be an unreasonable expectation as these analysts are often are not part of the data acquisition.

TreeMap is a 30×30m-resolution gridded dataset of forest plot identifers developed by Riley et al. (2021) that imputes representative FIA plots across the forests of the conterminous United States (CONUS). This raster map of representative plot identifers can be linked to a number of tree- and plot-level attributes stored in the accompanying tables or accessed via one of the previously mentioned R packages or [FIA datamart]((https://apps.fs.usda.gov/fia/datamart/datamart.html).

The workflow outlined below utilizes the TreeMap raster and tree-level attributes which the user must download (~4 GB) from this repository (“RDS-2021-0074_Data.zip” as of 2024-01-18). The user must then define the directory location input_treemap_dir of the extracted files TreeMap2016.tif and TreeMap2016_tree_table.csv reside. The workflow then:

  1. Identifies representative FIA sample plots within the bounds of the point cloud data specificed by the user in input_las_dir
  2. Pulls FIA tree list data using the plot identifiers and filter for only live trees with complete DBH and height measurements
  3. Builds the model representing the regional height to DBH relationship to use for filtering SfM-derived DBHs
###__________________________________________________###
### BEGIN USER-DEFINED PARAMETERS ###
###__________________________________________________###
###_________________________###
### Set input las directory ###
###_________________________###
input_las_dir = "../data/las_raw"
###_________________________###
### Set input treemap directory ###
###_________________________###
input_treemap_dir = "../data/treemap"
###__________________________________________________###
### END USER-DEFINED PARAMETERS ###
###__________________________________________________###

###__________________________________________________###
### load libraries so this section can be run in stand-alone ###
###__________________________________________________###

library(tidyverse)
library(terra)
library(lidR)
library(brms)
library(tidybayes)
library(ggdist)
library(patchwork) #combine plots
library(kableExtra)

###__________________________________________________###
### read in data ###
###__________________________________________________###

# read in treemap data
# downloaded from: https://www.fs.usda.gov/rds/archive/Catalog/RDS-2021-0074
las_ctg = lidR::readALSLAScatalog("../data/las_raw/")
# read in treemap (no memory is taken)
treemap_rast = terra::rast("../data/treemap/TreeMap2016.tif")
# filter treemap based on las
treemap_rast = treemap_rast %>% 
  terra::crop(
    las_ctg@data$geometry %>% 
      sf::st_union() %>% 
      terra::vect() %>% 
      terra::project(terra::crs(treemap_rast))
  )
### plot treemap raster
# plot(treemap_rast)
treemap_rast %>% 
  as.data.frame(xy=T) %>% 
  dplyr::rename(tm_id=3) %>% 
  ggplot(mapping = aes(x=x,y=y,fill=as.factor(tm_id))) +
    geom_tile(color="gray") +
    geom_text(aes(label=tm_id), size=3, color = "gray11") +
    scale_fill_viridis_d(option = "turbo", alpha = 0.8) +
    coord_sf(expand = FALSE) +
    theme_light() +
    theme(legend.position = "none", panel.grid = element_blank())

### get weights for weighting each tree in the population models
# treemap id = tm_id for linking to tabular data 
tm_id_weight_temp = terra::freq(treemap_rast) %>%
  dplyr::select(-layer) %>% 
  dplyr::rename(tm_id = value, tree_weight = count)

tm_id_weight_temp %>% 
  dplyr::slice_sample(n=6) %>% 
  kableExtra::kbl(caption="TreeMap ID plot weights") %>% 
    kableExtra::kable_styling()
TreeMap ID plot weights
tm_id tree_weight
81860 26
60651 5
81849 1
60720 1
60657 17
37971 1
### get the TreeMap FIA tree list for only the plots included
treemap_trees_df = readr::read_csv(
    "../data/treemap/TreeMap2016_tree_table.csv"
    , col_select = c(
      tm_id
      , TREE
      , STATUSCD
      , DIA
      , HT
    )
  ) %>% 
  dplyr::rename_with(tolower) %>% 
  dplyr::inner_join(
    tm_id_weight_temp
    , by = dplyr::join_by("tm_id")
  ) %>% 
  dplyr::filter(
    # keep live trees only: 1=live;2=dead
    statuscd == 1
    & !is.na(dia) 
    & !is.na(ht) 
  ) %>%
  dplyr::mutate(
    dbh_cm = dia*2.54
    , tree_height_m = ht/3.28084
  ) %>% 
  dplyr::select(-c(statuscd,dia,ht)) %>% 
  dplyr::rename(tree_id=tree) 

dplyr::glimpse(treemap_trees_df)
## Rows: 211
## Columns: 5
## $ tm_id         <dbl> 37971, 37971, 37971, 37971, 37971, 37971, 37971, 37971, …
## $ tree_id       <dbl> 1, 2, 4, 5, 7, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2, 11, …
## $ tree_weight   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ dbh_cm        <dbl> 41.402, 48.768, 23.622, 33.528, 35.052, 3.810, 19.558, 1…
## $ tree_height_m <dbl> 22.5552, 21.6408, 16.4592, 24.0792, 18.2880, 3.9624, 9.4…
# treemap_trees_df %>% dplyr::count(tm_id)

Modelling was done using the brms package to implement Bayesian regression in \(\textbf{R}\) using the probabilistic programming language \(\textbf{STAN}\).

In the two models tested, we will ignore site/plot effects, such that all observations are treated as independent from one another. This is what’s known as complete pooling (McElreath 2018) or just a pooled model.

Quadratic Model of DBH on Height

The first model tested is a quadratic linear model:

\[\begin{align*} DBH_{i} &\sim {\sf Gamma} \bigl(g(\beta_0, \beta_1, \beta_2, x_i) \bigr) \\ g(\beta_0, \beta_1, \beta_2, x_i) &= {\exp \bigl(\beta_0 + \beta_1 x_{1i} + \beta_2 x_{1i}^{2}\bigr)} \\ \; \textrm{where } x_1 &= \textrm{tree height} \end{align*}\]

The \(\sf Gamma\) response distribution represents \(DBH_{i}\) as strictly non-negative. Utilizing a quadratic regression model with the inclusion of \(x_{1i}^{2}\) in the model allows us to account for a nonlinear (e.g. parabolic) relationship between DBH and tree height.

###__________________________________________________________###
### model 1: population model, height quadratic
###__________________________________________________________###
  # population model with no random effects (i.e. no group-level variation)
  # quadratic model form with Gamma distribution for strictly positive response variable dbh
  # set non-informative priors
  mod_quad_pop = brms::brm(
    formula = dbh_cm|weights(tree_weight) ~ 1 + tree_height_m + I(tree_height_m^2)
    , data = treemap_trees_df
    , family = brms::brmsfamily("Gamma")
    , iter = 4000
  )
  # brms::add_criterion(mod_quad_pop, "loo")
  # plot(mod_quad_pop)
  # summary(mod_quad_pop)
  # brms::prior_summary(mod_quad_pop)

Summary of model results

#### extract posterior draws to a df
    brms::as_draws_df(
        mod_quad_pop
        , variable = c("^b_", "shape")
        , regex = TRUE
      ) %>% 
      # quick way to get a table of summary statistics and diagnostics
      posterior::summarize_draws(
        "mean", "median", "sd"
        ,  ~quantile(.x, probs = c(
          0.05, 0.95
          # , 0.025, 0.975
        ))
        , "rhat"
      ) %>% 
      kableExtra::kbl(
        caption = paste(
          "quadratic model:"
          , mod_quad_pop$formula %>% as.character() %>% purrr::pluck(1)
        )
        , digits = 3
      ) %>% 
        kableExtra::kable_styling()
quadratic model: dbh_cm | weights(tree_weight) ~ 1 + tree_height_m + I(tree_height_m^2)
variable mean median sd 5% 95% rhat
b_Intercept 0.975 1.068 0.590 0.994 1.125 1.030
b_tree_height_m 0.219 0.215 0.027 0.206 0.225 1.031
b_Itree_height_mE2 -0.005 -0.004 0.001 -0.005 -0.004 1.031
shape 17.749 18.458 3.435 16.736 19.409 1.046

Parameter estimates and confidence intervals

### use tidybayes to summarize draws and plot
    mod_quad_pop %>%
      tidybayes::gather_draws(
        `b_.*`
        # , `shape`
        , regex = TRUE
      ) %>% 
      dplyr::rename_with(~stringr::str_remove_all(.x, "\\.")) %>% 
        ggplot(aes(y = variable, x = value)) +
          geom_vline(xintercept = 0, color = "gray44", linetype = "dashed") +
          ggdist::stat_interval(.width = c(0.5, 0.9, 0.95), alpha = 0.8) +
          # ggdist::stat_halfeye() +
          stat_summary(
            fun = "median"
            , color = "black"
            , size = 2
            , geom = "point"
          ) +
          scale_color_viridis_d(option = "mako") +
          labs(y = "", x = "parameter estimate") +
          theme_bw() +
          theme(
            legend.position = "bottom"
            , legend.direction = "horizontal"
            , panel.grid = element_blank()
          )

    # plot another way
    # mod_quad_pop %>%
    #   tidybayes::gather_draws(
    #     `b_.*`
    #     # , `shape`
    #     , regex = TRUE
    #   ) %>% 
    #   tidybayes::median_qi(
    #     .width = c(0.95, 0.5) #,.66)
    #   ) %>% 
    #     ggplot(aes(y = .variable, x = .value, xmin = .lower, xmax = .upper)) +
    #       geom_vline(xintercept = 0) +
    #       ggdist::geom_pointinterval() +
    #       theme_bw()

Model predictions versus FIA tree data

### obtain model predictions over range
    # range of x var to predict
      height_range = dplyr::tibble(
        tree_height_m = seq(
          from = 0
          , to = round(max(treemap_trees_df$tree_height_m)*1.05,0)
          , by = 1
        )
      )
      
    # predict and put estimates in a data frame
      pred_mod_quad_pop_temp = predict(
          mod_quad_pop
          , newdata = height_range
          , probs = c(.05, .95) # c(.025, .975)
        ) %>%
        dplyr::as_tibble() %>%
        dplyr::rename(
          lower_b = 3, upper_b = 4
        ) %>% 
        dplyr::rename_with(tolower) %>% 
        dplyr::bind_cols(height_range)
      # str(pred_mod_quad_pop_temp)
      
    # plot predictions with data
      plt_mod_quad_pop = ggplot(
        data = treemap_trees_df
        , mapping = aes(x = tree_height_m)
      ) +
        geom_ribbon(
          data = pred_mod_quad_pop_temp
          , mapping = aes(ymin = lower_b, ymax = upper_b)
          , fill = "grey88"
        ) +
        geom_line(
          data = pred_mod_quad_pop_temp
          , aes(y = estimate)
          , color = "navy"
          , lwd = 1
        ) +
        geom_point(
          mapping = aes(y = dbh_cm)
          , color = "gray33"
          , alpha = 0.7
        ) +
        scale_y_continuous(limits = c(0, max(treemap_trees_df$dbh_cm)*1.45 )) +
        labs(
          y = "DBH (cm)"
          , x = "Tree Ht. (m)"
          , title = paste(
              "quadratic model:"
              , mod_quad_pop$formula %>% as.character() %>% purrr::pluck(1)
            )
        ) +
        theme_light() +
        theme(legend.position = "none", plot.title = element_text(size = 9))
    plt_mod_quad_pop

Non-Linear Model of DBH on Height

The second model tested is a quadratic linear model:

\[\begin{align*} DBH_{i} &\sim {\sf Gamma} \bigl(g(\beta_1, \beta_2, x_i) \bigr) \\ g(\beta_1, \beta_2, x_i) &= {\exp \bigl(\beta_1 x_{1i} + x_{1i}^{\beta_2}\bigr)} \\ \; \textrm{where } x_1 &= \textrm{tree height} \end{align*}\]

The \(\sf Gamma\) response distribution represents \(DBH_{i}\) as strictly non-negative.

###__________________________________________________________###
### model 2: population model, height non-linear
###__________________________________________________________###
  # population model with no random effects (i.e. no group-level variation)
  # quadratic model form with Gamma distribution for strictly positive response variable dbh
  # set up prior
  p_temp <- prior(normal(1, 2), nlpar = "b1") +
    prior(normal(0, 2), nlpar = "b2")
  mod_nl_pop = brms::brm(
    formula = brms::bf(
      formula = dbh_cm|weights(tree_weight) ~ (b1 * tree_height_m) + tree_height_m^b2
      , b1 + b2 ~ 1
      , nl = TRUE # !! specify non-linear
    )
    , data = treemap_trees_df
    , prior = p_temp
    , family = brms::brmsfamily("Gamma")
    , iter = 4000
  )
  # brms::add_criterion(mod_nl_pop, "loo")
  # plot(mod_nl_pop)
  # summary(mod_nl_pop)

Summary of model results

#### extract posterior draws to a df
    brms::as_draws_df(
        mod_nl_pop
        , variable = c("^b_", "shape")
        , regex = TRUE
      ) %>% 
      # quick way to get a table of summary statistics and diagnostics
      posterior::summarize_draws(
        "mean", "median", "sd"
        ,  ~quantile(.x, probs = c(
          0.05, 0.95
          # , 0.025, 0.975
        ))
        , "rhat"
      ) %>% 
      dplyr::mutate(
        variable = stringr::str_remove_all(variable, "_Intercept")
      ) %>% 
      kableExtra::kbl(
        caption = paste(
          "non-linear model:"
          , mod_nl_pop$formula %>% as.character() %>% purrr::pluck(1)
        )
        , digits = 3
      ) %>% 
        kableExtra::kable_styling()
non-linear model: dbh_cm | weights(tree_weight) ~ (b1 * tree_height_m) + tree_height_m^b2
variable mean median sd 5% 95% rhat
b_b1 -0.159 -0.159 0.013 -0.181 -0.137 1.002
b_b2 0.640 0.640 0.013 0.617 0.660 1.002
shape 20.124 20.113 0.600 19.143 21.124 1.001

Parameter estimates and confidence intervals

### use tidybayes to summarize draws and plot
    mod_nl_pop %>%
      tidybayes::gather_draws(
        `b_.*`
        # , `shape`
        , regex = TRUE
      ) %>% 
      dplyr::rename_with(~stringr::str_remove_all(.x, "\\.")) %>% 
      dplyr::mutate(
        variable = stringr::str_remove_all(variable, "_Intercept")
      ) %>% 
        ggplot(aes(y = variable, x = value)) +
          geom_vline(xintercept = 0, color = "gray44", linetype = "dashed") +
          ggdist::stat_interval(.width = c(0.5, 0.9, 0.95), alpha = 0.8) +
          # ggdist::stat_halfeye() +
          stat_summary(
            fun = "median"
            , color = "black"
            , size = 2
            , geom = "point"
          ) +
          scale_color_viridis_d(option = "mako") +
          labs(y = "", x = "parameter estimate") +
          theme_bw() +
          theme(
            legend.position = "bottom"
            , legend.direction = "horizontal"
            , panel.grid = element_blank()
          )

    # plot another way
    # mod_nl_pop %>%
    #   tidybayes::gather_draws(
    #     `b_.*`
    #     # , `shape`
    #     , regex = TRUE
    #   ) %>% 
    #   tidybayes::median_qi(
    #     .width = c(0.95, 0.5) #,.66)
    #   ) %>% 
    #     ggplot(aes(y = .variable, x = .value, xmin = .lower, xmax = .upper)) +
    #       geom_vline(xintercept = 0) +
    #       ggdist::geom_pointinterval() +
    #       theme_bw()

Model predictions versus FIA tree data

### obtain model predictions over range
    # predict and put estimates in a data frame
      pred_mod_nl_pop_temp = predict(
          mod_nl_pop
          , newdata = height_range
          , probs = c(.05, .95) # c(.025, .975)
        ) %>%
        dplyr::as_tibble() %>%
        dplyr::rename(
          lower_b = 3, upper_b = 4
        ) %>% 
        dplyr::rename_with(tolower) %>% 
        dplyr::bind_cols(height_range)
      # str(pred_mod_nl_pop_temp)
      
    # plot predictions with data
      plt_mod_nl_pop = ggplot(
        data = treemap_trees_df
        , mapping = aes(x = tree_height_m)
      ) +
        geom_ribbon(
          data = pred_mod_nl_pop_temp
          , mapping = aes(ymin = lower_b, ymax = upper_b)
          , fill = "grey88"
        ) +
        geom_line(
          data = pred_mod_nl_pop_temp
          , aes(y = estimate)
          , color = "navy"
          , lwd = 1
        ) +
        geom_point(
          mapping = aes(y = dbh_cm)
          , color = "gray33"
          , alpha = 0.7
        ) +
        scale_y_continuous(limits = c(0, max(treemap_trees_df$dbh_cm)*1.45 )) +
        labs(
          y = "DBH (cm)"
          , x = "Tree Ht. (m)"
          , title = paste(
              "non-linear model:"
              , mod_nl_pop$formula %>% as.character() %>% purrr::pluck(1)
            )
        ) +
        theme_light() +
        theme(legend.position = "none", plot.title = element_text(size = 9))
      plt_mod_nl_pop

Model Comparison

The pointwise log-likelihood can be used, among others, to calculate the leave-one-out cross-validation (LOO; Vehtari, Gelman, and Gabry 2015) allowing for comparison of different models applied to the same data (lower LOOs indicate better model fit).

### compare models
# The pointwise log-likelihood can be used, among others, to calculate 
    # the Watanabe-Akaike information criterion (WAIC) proposed by Watanabe (2010) 
    # and leave-one-out cross-validation (LOO; Vehtari, Gelman, and Gabry 2015) 
    # both allowing to compare different models applied to the same data 
    # (lower WAICs and LOOs indicate better model fit).
  dplyr::bind_rows(
    brms::loo(mod_quad_pop) %>% 
      purrr::pluck("estimates") %>% 
      dplyr::as_tibble(rownames = NA) %>% 
      tibble::rownames_to_column("metric") %>% 
      dplyr::mutate(model = "mod_quad_pop")
    , brms::loo(mod_nl_pop) %>% 
      purrr::pluck("estimates") %>% 
      dplyr::as_tibble(rownames = NA) %>% 
      tibble::rownames_to_column("metric") %>% 
      dplyr::mutate(model = "mod_nl_pop")
  ) %>% 
  dplyr::filter(metric == "looic") %>% 
  dplyr::arrange(`Estimate`) %>% 
  dplyr::mutate(model_rank = dplyr::row_number()) %>% 
    dplyr::relocate(model_rank, model) %>%
      kableExtra::kbl(
        caption = "model comparison based on leave-one-out cross-validation (LOO)"
        , digits = 3
      ) %>% 
        kableExtra::kable_styling()
model comparison based on leave-one-out cross-validation (LOO)
model_rank model metric Estimate SE
1 mod_nl_pop looic 12629.21 798.133
2 mod_quad_pop looic 422264.25 31683.446

Based on this data, the non-linear model of DBH on height has a better fit.

plt_mod_quad_pop + plt_mod_nl_pop

Micellaneous Unused Processes

Create Stem-Only CHM

This section uses the height normalized point cloud grid tiles (created in this section) to create a CHM raster using only points between a specified height based on the program defined in this section. The stem section at DBH includes points that fall between 1.2 and 1.4 meters.

G. Woolsey notes: it is unclear what this data is used for…the las file may be used to extract DBH measurements using the TreeLS package. See page 3 of Swayze and Tinkham (2022). However, it is likely that the point cloud would need to be filtered based on point color to ensure extraction of stem points and not understory (e.g. tree regeneration) points.

Call the function to create the stem-only CHM

###_____________________________________________________###
### Rasterize the stem section at DBH to stem CHM tiles ###
###_____________________________________________________###
rasterize_tiles_to_chm(
  config = config
  , my_des_chm_res = 0.5 #0.07 #in meters
  , calculate_extent = F
  , my_las_grid = las_grid # generate_grid_over_extent(las_ctg, desired_tile_res)
  , my_las_ctg = las_ctg # lidR::readLAScatalog(config$input_las_dir)
  , my_min_z = 1.2
  , my_max_z= 1.4
)

Map the stem-only CHM

terra::rast(paste0(
    config$delivery_spatial_dir
    ,"/chm_raster_z"
    , as.character(1.2)
    , "to"
    , as.character(1.4)
    , "m_res"
    , as.character(0.5)
    , "m.tif"
  )) %>%
  # terra::aggregate(fact = 20, fun = "mean") %>%
  stars::st_as_stars() %>% 
  mapview::mapview(
    layer.name = "stem-only ht (m)"
    , alpha.regions = 0.6
  )
# # uncomment for ggplot
#   as.data.frame(xy=T) %>% 
#   dplyr::rename_with(tolower) %>% 
#   dplyr::rename(f=3) %>% 
#   ggplot(mapping = aes(x=x,y=y,fill=f)) +
#     geom_tile() +
#     scale_fill_viridis_c(na.value = "black") +
#     labs(fill = "elevation (m)") +
#     theme_void()

Delineate tree crowns for TreeLS identified trees

This step is distinct from the tree crown delineation in the prior section which used the Individual Tree Detection based on the CHM to delineate tree crown vector (i.e. polygon) shapes.

Instead, this section uses the stems detected using the TreeLS workflow in this section to delineate tree crown vector (i.e. polygon) shapes.

G. Woolsey notes: It is unclear if this step is necessary. It seems like the “master” tree list should be based on the overhead tree identification using the CHM data. This “master” tree list can be spatially joined to the stems detected in the TreeLS workflow to attach DBH estimates. If more than one stem location is attached to a single tree from the master list, then the best DBH estimate can be selected based on the process outlined in Swayze and Tinkham (2022).

G. Woolsey notes: Update - in the original script from N. Swayze these “bottom-up” tree crown delineations were utilized in the random forest model to generate the local model of DBH based on the independent variables with crown metrics and tree height. Furthermore, in the orignial script, there was no filtering of DBH values based on regional allometric relationships and no filtering for stem DBH estimates that were potentially from the same tree but duplicated in the dataset due to the grid tile processing and later combining. G. Woolsey updated the local model of DBH based on the independent variables with crown metrics and tree height using methods described in this section.

### Use the stems detected using TreeLS::treeMap to delineate crowns using MCWS from ForestTools
  ### Use the tree tops to delineate crowns using MCWS from ForestTools
  crowns_treels_temp = ForestTools::mcws(
    treetops = dbh_locations_sf %>% 
      dplyr::mutate(treeID = dplyr::row_number()) %>% 
      dplyr::rename(Z=tree_height_m) %>% 
      dplyr::select(treeID, Z) %>% 
      sf::st_zm(drop = T) # drops z values
    , CHM = raster::raster(master_chm) # converts to raster data
    , minHeight = minimum_tree_height
  ) %>% terra::rast()


### Write the crown raster to the disk
terra::writeRaster(
  crowns
  , paste0(config$delivery_spatial_dir, "/bottom_up_detected_tree_crowns.tif")
  , overwrite = TRUE
)

Plot the tree crowns with the TreeLS identified tree stems

ggplot() + 
  geom_tile(
    data = crowns_treels_temp %>% as.data.frame(xy=T) %>% dplyr::rename(f=3)
    , mapping = aes(x=x,y=y,fill=as.factor(f))
  ) +
  geom_sf(
    data = dbh_locations_sf
    , shape = 20
    , color = "black"
  ) +
  coord_sf(
    expand = FALSE
  ) +
  scale_fill_manual(
    values = viridis::turbo(n = terra::freq(crowns_treels_temp) %>% nrow()) %>% sample()
  ) +
  labs(x = "", y = "") +
  theme_light() +
  theme(legend.position = "none")

Spatial join the TreeLS stems with the TreeLS tree crowns

The delineated crown raster (terra SpatRaster) is converted to vector data (i.e. polygons) and crowns with an area smaller than 0.1 m2 are removed. The tree tops are then spatially joined with the tree crowns.

### Convert crown raster to terra rast, then polygons, then to Sf
crowns_treels_sf_temp = crowns_treels_temp %>% 
  # convert raster to polygons for each individual crown
  terra::as.polygons() %>% 
  # fix polygon validity
  terra::makeValid() %>% 
  # reduce the number of nodes in geometries
  terra::simplifyGeom() %>% 
  # remove holes in polygons
  terra::fillHoles() %>% 
  # convert to sf
  sf::st_as_sf() %>% 
  # get the crown area
  dplyr::mutate(
    crown_area_m2 = as.numeric(sf::st_area(.))
  ) %>% 
  #remove super small crowns
  dplyr::filter(
    crown_area_m2 > 0.1
  )
  
  # str(crowns_treels_sf_temp)
  # ggplot(crowns_treels_sf_temp) + geom_sf(aes(fill=as.factor(layer))) + 
  #   scale_fill_manual(values = viridis::turbo(nrow(crowns_treels_sf_temp)) %>% sample()) +
  #   theme_void() + theme(legend.position = "none")
  
### Join the crowns_treels_temp with the seeds to append data, remove Nulls
crowns_treels_sf_temp = crowns_treels_sf_temp %>%
  sf::st_join(dbh_locations_sf) %>% 
  dplyr::select(-c(layer))
  
  # str(crowns_treels_sf_temp)
  # crowns_treels_sf_temp %>% dplyr::count(treeID) %>% nrow()
  # crowns_treels_sf_temp %>%
  #   sf::st_drop_geometry() %>%
  #   dplyr::summarise(
  #     n = n()
  #     , n_distinct = dplyr::n_distinct(treeID)
  #   )

### Add crown data summaries
crown_sum_temp = data.frame(
    mean_crown_ht_m = terra::extract(x = master_chm, y = terra::vect(crowns_treels_sf_temp), fun = "mean", na.rm = T)
    , median_crown_ht_m = terra::extract(x = master_chm, y = terra::vect(crowns_treels_sf_temp), fun = "median", na.rm = T)
    , min_crown_ht_m = terra::extract(x = master_chm, y = terra::vect(crowns_treels_sf_temp), fun = "min", na.rm = T)
  ) %>% 
  dplyr::select(-c(tidyselect::ends_with(".ID"))) %>% 
  dplyr::rename_with(~ stringr::str_remove_all(.x,".Z"))


### join crown data summary
crowns_treels_sf_temp = crowns_treels_sf_temp %>% 
  dplyr::bind_cols(crown_sum_temp)

# str(crowns_treels_sf_temp)

### Write the crowns_treels_temp to the disk
  sf::st_write(
    crowns_treels_sf_temp
    , paste0(config$delivery_spatial_dir, "/bottom_up_detected_crowns.gpkg")
    , quiet = TRUE, append = FALSE
  )

Plot the tree crowns with the tree tops

ggplot() + 
  geom_sf(
    data = crowns_treels_sf_temp
    , mapping = aes(fill = crown_area_m2)
    , color = "gray77"
  ) +
  geom_sf(
    data = dbh_locations_sf
    , shape = 20
    , color = "gray33"
  ) +
  coord_sf(
    expand = FALSE
  ) +
  scale_fill_viridis_c(option = "mako", direction = -1) +
  labs(fill = "Crown\nArea (m2)", x = "", y = "") +
  theme_light() 

G. Woolsey notes: this crown delineation methodology results in crown areas that are highly improbable. Creasy et al. (2021) measured crown areas of 163.8 (+/- 81.4) m2. Hopefully this crown delineation is not utilized in any following analysis.

Colorize the Point Cloud (example)

This section includes an example workflow only and is not completed for the full data extent.

Additional information on colorizing point cloud can be found in The lidR Package Book.

This workflow uses point cloud data generated in this section which was exported as a laz file to the ../data/02_processed_data/04_las_stem_dir directory with the Classification data updated to: ground points (class 2); water points (class 9); stem points (class 4); non-stem (class 5). The color (RGB values) of the classified points are compared to expected colors for each class to filter for “pure” stem and canopy points.

In the literature, this process has been utilized to filter the point cloud for stem-only points based on spectral (i.e. RGB) information to estimate DBH. In the DBH estimation workflow above, this filtering process was not applied. To implement this filter, the point cloud passed to the TreeLS::tlsInventory function would need to be filtered using points matching the desired color.

This process is explained in Swayze et al. (2022):

a green:red ratio index (NGRR; Eq. (1)) was calculated on each point to determine the distribution (Fig. 4A) of stem and canopy NGRR values. Based on the distributions, each of the 30 point clouds were segmented to only include points with NGRR values below the 90th percentile of the sample stem points (− 0.0152), producing 30 point clouds only containing points with stem reflectance characteristics. For each of these stem point clouds, a 10 cm tall slice (1.32–1.42 m) was extracted and compressed to a height of 1.37 m to represent the DBH region on the trees…These DBH slices were later used to extract individual tree DBH values.

Equation 1:

\[ NGGR_i = \frac{Green_i - Red_i}{Green_i + Red_i} \]

, where \(NGRR\) is the green:red ratio index for the \(i_{th}\) point in the point cloud.

Function to add RGB to points

This function adds the user-defined color to the RGB values of the user-defined point cloud

### this function adds a random color to the RGB values of the point cloud
colorize_input_las = function(input_las, desired_color){
  # extract RGB values from hex color code
  color_ramp_rgb = paste(as.vector(col2rgb(desired_color)), collapse = " ")
  color_ramp_rgb = stringr::str_split(color_ramp_rgb, pattern = " ")
  # rows represent RGB
  color_ramp_rgb = as.data.frame(color_ramp_rgb)
  # transpose so columns represent RGB
  color_ramp_rgb = t(color_ramp_rgb)
  rownames(color_ramp_rgb) = NULL
  color_ramp_rgb = as.data.frame(color_ramp_rgb)
  colnames(color_ramp_rgb) = c("R", "G", "B")
  # create vectors of RGB values to add to las data
  R = rep(as.integer(color_ramp_rgb$R), nrow(input_las@data))  
  G = rep(as.integer(color_ramp_rgb$G), nrow(input_las@data))
  B = rep(as.integer(color_ramp_rgb$B), nrow(input_las@data))
  
  ### Use the lidR functions to colorize the point cloud
    # Adds 3 columns named RGB and updates the point format 
    # of the LAS object for a format that supports RGB attributes. 
    # If the RGB values are ranging from 0 to 255 they are automatically scaled on 16 bits
  input_las = lidR::add_lasrgb(input_las, R = R, G = G, B = B)
  return(input_las)
  
}

1. Crop the pt. cloud to a crown polygon

Filter the laz grid tile files created in this section and exported to the ../data/02_processed_data/04_las_stem_dir using an individual crown polygon. Points that are below 0.1 m in height are dropped. This example uses a tree with SfM-derived DBH that was used as part of the training data for the local DBH estimation model.

### New function to crop each crown to each canopy polygon, colorize, write to disk
# crop_crowns_by_id_and_colorize_stems_and_veg = function(config, las_grid, colorize_input_las, stem_color_hex_temp, veg_color_hex_temp, ground_color_hex_temp, use_bottom_up_crowns){
  
  ###___________________________###
  ### Read in the las files ###
  ###___________________________###
  
  ### Read in the classified las files as a catalog
  las_stem_ctg_temp = lidR::readTLSLAScatalog(config$las_stem_dir)
  
  ###___________________________###
  ### # for each individual tree crown {
  ###___________________________###

  ### Get the desired crown file 
    des_crown_temp = crowns_sf_with_dbh %>%
      dplyr::filter(is_training_data) %>% 
      dplyr::slice_sample(n=1)
    
  ### Crop the crown file from the las catalog
    cropped_crown_temp = lidR::clip_roi(las_stem_ctg_temp, des_crown_temp)
## Chunk 1 of 1 (100%): state ✓
    # drop points close to the ground
    cropped_crown_temp = lidR::filter_poi(cropped_crown_temp, Z > 0.1)
    # plot(cropped_crown_temp)

Plot the extracted point cloud for the example crown

plot3D::scatter3D(
      x = cropped_crown_temp@data$X
      , y = cropped_crown_temp@data$Y
      , z = cropped_crown_temp@data$Z
      , colvar = cropped_crown_temp@data$Z
      , pch = 19, cex = 0.2
      , colkey = F
      , phi = 0.5
    )

  1. Assign RGB values to point cloud

Call the colorize_input_las function defined above to assign a color to the points classified as stem and other, non-ground/non-stem based on the Classification value: ground points (class 2); water points (class 9); stem points (class 4); non-stem (class 5).

    ### Pull the veg and stems from the cropped crown
    veg_temp = lidR::filter_poi(cropped_crown_temp, Classification == 5)
    stem_temp = lidR::filter_poi(cropped_crown_temp, Classification == 4)
    
    ###______________________________________________________________________###
    ### If stems and crowns are not empty, color them then write to the disk ###
    ###______________________________________________________________________###
    # if(lidR::is.empty(veg_temp) == FALSE && lidR::is.empty(stem_temp) == FALSE){
      
      ### Get random color for the veg
      des_veg_color_temp = "forestgreen"
        # randomcoloR::randomColor(count = 1, hue = c("green"), luminosity = c("dark"))
      
      ### Colorize the veg points
      veg_temp = colorize_input_las(veg_temp, desired_color = des_veg_color_temp)
      
      ### Get random colors for the stems
      des_stem_color_temp = "tan4"
        # randomcoloR::randomColor(count = 1, hue = c("orange"), luminosity = c("dark"))
      
      ### Colorize the stem points
      stem_temp = colorize_input_las(stem_temp, desired_color = des_stem_color_temp)
      
      ### Combine the colored stems and veg
      combined_veg_stem_temp = rbind(veg_temp, stem_temp)

Plot the colorized stems versus non-stem, non-ground points (assumed to be other vegetation)

plot3D::scatter3D(
      x = cropped_crown_temp@data$X
      , y = cropped_crown_temp@data$Y
      , z = cropped_crown_temp@data$Z
      , colvar = cropped_crown_temp@data$Classification
      , col = c(des_stem_color_temp,des_veg_color_temp)
      , pch = 19, cex = 0.2
      , colkey = F
      , phi = 0.5
    )

# can also plot using lidR plot with color = "RGB"
    # plot(combined_veg_stem_temp, color = "RGB")