NCDC vignette

About the package

rnoaa is an R wrapper for many NOAA data types, including National Climatic Data Center (NCDC).

Load rnoaa

library('rnoaa')
library('plyr')

Get info on a station by specifying a datasetid, locationid, and stationid

ncdc_stations(datasetid='GHCND', locationid='FIPS:12017', stationid='GHCND:USC00084289')
#> $meta
#> NULL
#> 
#> $data
#>   elevation    mindate    maxdate latitude                  name
#> 1      12.2 1899-02-01 2018-11-25  28.8029 INVERNESS 3 SE, FL US
#>   datacoverage                id elevationUnit longitude
#> 1            1 GHCND:USC00084289        METERS  -82.3126
#> 
#> attr(,"class")
#> [1] "ncdc_stations"

Search for data and get a data.frame

out <- ncdc(datasetid='NORMAL_DLY', datatypeid='dly-tmax-normal', startdate = '2010-05-01', enddate = '2010-05-10')
out$data
#> # A tibble: 25 x 5
#>    date                datatype        station           value fl_c 
#>    <chr>               <chr>           <chr>             <int> <chr>
#>  1 2010-05-01T00:00:00 DLY-TMAX-NORMAL GHCND:AQW00061705   869 C    
#>  2 2010-05-01T00:00:00 DLY-TMAX-NORMAL GHCND:CAW00064757   607 Q    
#>  3 2010-05-01T00:00:00 DLY-TMAX-NORMAL GHCND:CQC00914080   840 R    
#>  4 2010-05-01T00:00:00 DLY-TMAX-NORMAL GHCND:CQC00914801   858 R    
#>  5 2010-05-01T00:00:00 DLY-TMAX-NORMAL GHCND:FMC00914395   876 P    
#>  6 2010-05-01T00:00:00 DLY-TMAX-NORMAL GHCND:FMC00914419   885 P    
#>  7 2010-05-01T00:00:00 DLY-TMAX-NORMAL GHCND:FMC00914446   885 P    
#>  8 2010-05-01T00:00:00 DLY-TMAX-NORMAL GHCND:FMC00914482   868 R    
#>  9 2010-05-01T00:00:00 DLY-TMAX-NORMAL GHCND:FMC00914720   899 R    
#> 10 2010-05-01T00:00:00 DLY-TMAX-NORMAL GHCND:FMC00914761   897 P    
#> # … with 15 more rows

Plot data, super simple, but it's a start

out <- ncdc(datasetid='NORMAL_DLY', stationid='GHCND:USW00014895', datatypeid='dly-tmax-normal', startdate = '2010-01-01', enddate = '2010-12-10', limit = 300)
ncdc_plot(out)

plot of chunk six

More on plotting

Example 1

Search for data first, then plot

out <- ncdc(datasetid='GHCND', stationid='GHCND:USW00014895', datatypeid='PRCP', startdate = '2010-05-01', enddate = '2010-10-31', limit=500)

Default plot

ncdc_plot(out)

plot of chunk unnamed-chunk-3

Create 14 day breaks

ncdc_plot(out, breaks="14 days")

plot of chunk unnamed-chunk-4

One month breaks

ncdc_plot(out, breaks="1 month", dateformat="%d/%m")

plot of chunk unnamed-chunk-5

Example 2

Search for data

out <- ncdc(datasetid='GHCND', stationid='GHCND:USW00014895', datatypeid='PRCP',
            startdate = '2010-05-01', enddate = '2010-10-31', limit=500)

Make a plot, with 6 hour breaks, and date format with only hour

ncdc_plot(out, breaks = "1 month", dateformat = "%d/%m")

plot of chunk unnamed-chunk-7

Combine many calls to noaa function

Search for two sets of data

out1 <- ncdc(datasetid='GHCND', stationid='GHCND:USW00014895', datatypeid='PRCP', startdate = '2010-03-01', enddate = '2010-05-31', limit=500)

out2 <- ncdc(datasetid='GHCND', stationid='GHCND:USW00014895', datatypeid='PRCP', startdate = '2010-09-01', enddate = '2010-10-31', limit=500)

Then combine with a call to ncdc_combine

df <- ncdc_combine(out1, out2)
head(df[[1]]); tail(df[[1]])
#> # A tibble: 6 x 8
#>   date               datatype station         value fl_m  fl_q  fl_so fl_t 
#>   <chr>              <chr>    <chr>           <int> <chr> <chr> <chr> <chr>
#> 1 2010-03-01T00:00:… PRCP     GHCND:USW00014…     0 T     ""    0     2400 
#> 2 2010-03-02T00:00:… PRCP     GHCND:USW00014…     0 T     ""    0     2400 
#> 3 2010-03-03T00:00:… PRCP     GHCND:USW00014…     0 T     ""    0     2400 
#> 4 2010-03-04T00:00:… PRCP     GHCND:USW00014…     0 ""    ""    0     2400 
#> 5 2010-03-05T00:00:… PRCP     GHCND:USW00014…     0 ""    ""    0     2400 
#> 6 2010-03-06T00:00:… PRCP     GHCND:USW00014…     0 ""    ""    0     2400
#> # A tibble: 6 x 8
#>   date               datatype station         value fl_m  fl_q  fl_so fl_t 
#>   <chr>              <chr>    <chr>           <int> <chr> <chr> <chr> <chr>
#> 1 2010-10-26T00:00:… PRCP     GHCND:USW00014…   221 ""    ""    0     2400 
#> 2 2010-10-27T00:00:… PRCP     GHCND:USW00014…     0 ""    ""    0     2400 
#> 3 2010-10-28T00:00:… PRCP     GHCND:USW00014…     0 T     ""    0     2400 
#> 4 2010-10-29T00:00:… PRCP     GHCND:USW00014…     0 T     ""    0     2400 
#> 5 2010-10-30T00:00:… PRCP     GHCND:USW00014…     0 ""    ""    0     2400 
#> 6 2010-10-31T00:00:… PRCP     GHCND:USW00014…     0 ""    ""    0     2400

Then plot - the default passing in the combined plot plots the data together. In this case it looks kind of weird since a straight line combines two distant dates.

ncdc_plot(df)

plot of chunk unnamed-chunk-10

But we can pass in each separately, which uses facet_wrap in ggplot2 to plot each set of data in its own panel.

ncdc_plot(out1, out2, breaks="45 days")

plot of chunk unnamed-chunk-11