PhenoCam API and Data Fusion

Bijan Seyednasrollah

2019-01-04

This R package is an effort to simplify data wrangling steps for fusion of PhenoCam time-series and other datasets.

Each PhenoCam site has specific metadata including but not limited to how a site is set up and where it is located, what vegetation type is visible from the camera, and its climate regime. Each PhenoCam may have none to several ROI’s per vegetation type. phenocamapi is an interface to interact with the PhenoCam server to extract those data and process them in an R environment.

I begin with several examples for extracting Phenocam data directly from the server:

Exploriong the PhenoCam metadata

We can obtain an up-to-date data.frame of the metadata of the entire PhenoCam network using the get_phenos() function. The returning value would be a data.table in order to simplify further data exploration.

library(data.table)
library(phenocamapi)

# obtaining the metadata as data.table
phenos <- get_phenos()

Filtering the dataset using attributes

For example we can just list the sites that have flux tower:

# store sites with flux_data available
phenofluxsites <- phenos[flux_data==TRUE&!is.na(flux_sitenames), .(PhenoCam=site, Flux=flux_sitenames)]

# see the first few rows
head(phenofluxsites)
#>                PhenoCam   Flux
#> 1:       alligatorriver US-NC4
#> 2:        arscolesnorth   LTAR
#> 3:        arscolessouth   LTAR
#> 4: arsgreatbasinltar098 US-Rws
#> 5:          arsope3ltar       
#> 6:           austincary US-SP1

#list deciduous broadleaf sites with flux tower
DB.flux <- phenos[flux_data==TRUE&primary_veg_type=='DB', site]

# see the first few rows
head(DB.flux)
#> [1] "alligatorriver" "bartlett"       "bartlettir"     "bbc3"          
#> [5] "bbc4"           "bbc7"

Downloading PhenoCam time-series data

PhenoCam time-series are extracted time-series data obtained from ROI’s for a given site.

To download the phenological time-series from the PhenoCam, we need to know the sitename, vegetation type and ROI ID. This information can be obtained from the PhenoCam website or using the get_rois() function:

# obtaining the list of all the available ROI's on the PhenoCam server
rois <- get_rois()

head(rois$roi_name)
#> [1] "alligatorriver_DB_1000"   "arbutuslake_DB_1000"     
#> [3] "arbutuslakeinlet_DB_1000" "arbutuslakeinlet_EN_1000"
#> [5] "arbutuslakeinlet_EN_2000" "archboldavir_AG_1000"

colnames(rois)
#>  [1] "roi_name"          "site"              "lat"              
#>  [4] "lon"               "roitype"           "active"           
#>  [7] "show_link"         "show_data_link"    "sequence_number"  
#> [10] "description"       "first_date"        "last_date"        
#> [13] "site_years"        "missing_data_pct"  "roi_page"         
#> [16] "roi_stats_file"    "one_day_summary"   "three_day_summary"
#> [19] "data_release"

# list all the ROI's for dukehw
rois[site=='dukehw',]
#>          roi_name   site      lat       lon roitype active show_link
#> 1: dukehw_DB_1000 dukehw 35.97358 -79.10037      DB   TRUE      TRUE
#>    show_data_link sequence_number
#> 1:           TRUE            1000
#>                                      description first_date  last_date
#> 1: canopy level DB forest at awesome Duke forest 2013-06-01 2019-01-02
#>    site_years missing_data_pct
#> 1:        5.4              4.0
#>                                                                   roi_page
#> 1: https://phenocam.sr.unh.edu/data/archive/dukehw/ROI/dukehw_DB_1000.html
#>                                                                     roi_stats_file
#> 1: https://phenocam.sr.unh.edu/data/archive/dukehw/ROI/dukehw_DB_1000_roistats.csv
#>                                                                one_day_summary
#> 1: https://phenocam.sr.unh.edu/data/archive/dukehw/ROI/dukehw_DB_1000_1day.csv
#>                                                              three_day_summary
#> 1: https://phenocam.sr.unh.edu/data/archive/dukehw/ROI/dukehw_DB_1000_3day.csv
#>    data_release
#> 1:          pre

The get_pheno_ts() function can download a time-series and return the result as a data.table. For example, to obtain the time-series for DB_1000 from the dukehw PhenoCam site, we can run the following code:

# to obtain the DB 1000  from dukehw
dukehw_DB_1000 <- get_pheno_ts(site = 'dukehw', vegType = 'DB', roiID = 1000, type = '3day')

colnames(dukehw_DB_1000)
#>  [1] "date"                 "year"                 "doy"                 
#>  [4] "image_count"          "midday_filename"      "midday_r"            
#>  [7] "midday_g"             "midday_b"             "midday_gcc"          
#> [10] "midday_rcc"           "r_mean"               "r_std"               
#> [13] "g_mean"               "g_std"                "b_mean"              
#> [16] "b_std"                "gcc_mean"             "gcc_std"             
#> [19] "gcc_50"               "gcc_75"               "gcc_90"              
#> [22] "rcc_mean"             "rcc_std"              "rcc_50"              
#> [25] "rcc_75"               "rcc_90"               "max_solar_elev"      
#> [28] "snow_flag"            "outlierflag_gcc_mean" "outlierflag_gcc_50"  
#> [31] "outlierflag_gcc_75"   "outlierflag_gcc_90"   "YEAR"                
#> [34] "DOY"                  "YYYYMMDD"

dukehw_DB_1000[,date:=as.Date(date)]
dukehw_DB_1000[,plot(date, gcc_90, col = 'green', type = 'b')]
#> NULL
mtext('Duke Forest, Hardwood', font = 2)

Merge with other time-series such as flux data

In a fully programmatic settings you can load the PhenoCam dataset, find the related flux data, load the flux data and merge everything together as follows:

phenots <- get_pheno_ts(site = 'oregonMP', vegType = 'EN', roiID = 1000)

colnames(phenots)
#>  [1] "date"                 "year"                 "doy"                 
#>  [4] "image_count"          "midday_filename"      "midday_r"            
#>  [7] "midday_g"             "midday_b"             "midday_gcc"          
#> [10] "midday_rcc"           "r_mean"               "r_std"               
#> [13] "g_mean"               "g_std"                "b_mean"              
#> [16] "b_std"                "gcc_mean"             "gcc_std"             
#> [19] "gcc_50"               "gcc_75"               "gcc_90"              
#> [22] "rcc_mean"             "rcc_std"              "rcc_50"              
#> [25] "rcc_75"               "rcc_90"               "max_solar_elev"      
#> [28] "snow_flag"            "outlierflag_gcc_mean" "outlierflag_gcc_50"  
#> [31] "outlierflag_gcc_75"   "outlierflag_gcc_90"   "YEAR"                
#> [34] "DOY"                  "YYYYMMDD"

fluxfile <- system.file('fluxnetrepo/FLX_US-Me2/FLX_US-Me2_FULLSET_DD.csv', package = 'phenocamapi')

fluxts <- read.csv(fluxfile, skip = 0)
fluxts[fluxts==-9999] <- NA
fluxts <- as.data.table(fluxts)
fluxts[,datetime:=as.POSIXct(as.character(TIMESTAMP), format='%Y%m%d')]
fluxts[,YYYYMMDD:=as.character(as.Date(datetime))]
fluxts[,YEAR:=year(datetime)]
fluxts[,DOY:=yday(datetime)]

head(fluxts[, .(TIMESTAMP, TA_F)])
#>    TIMESTAMP    TA_F
#> 1:  20141115 -10.105
#> 2:  20141116  -8.044
#> 3:  20141117  -4.550
#> 4:  20141118  -1.584
#> 5:  20141119  -1.805
#> 6:  20141120   4.019