Fitting epicurves

Tim Taylor

Example

To illustrate the trend fitting functionality of i2extras we will use the simulated Ebola Virus Disease (EVD) outbreak data from the outbreaks package.

Loading relevant packages and data

library(outbreaks)
library(incidence2)
library(i2extras)

raw_dat <- ebola_sim_clean$linelist

For this example we will restrict ourselves to the first 20 weeks of data:

dat <- incidence(
    raw_dat, 
    date_index = date_of_onset,
    interval = "week"
)[1:20, ]
dat
#> An incidence2 object: 20 x 2
#> 783 cases from 2014-W15 to 2014-W34
#> interval: 1 monday week
#> cumulative: FALSE
#> 
#>    date_index count
#>    <yrwk>     <int>
#>  1 2014-W15       1
#>  2 2014-W16       1
#>  3 2014-W17       5
#>  4 2014-W18       4
#>  5 2014-W19      12
#>  6 2014-W20      17
#>  7 2014-W21      15
#>  8 2014-W22      19
#>  9 2014-W23      23
#> 10 2014-W24      21
#> 11 2014-W25      30
#> 12 2014-W26      22
#> 13 2014-W27      34
#> 14 2014-W28      38
#> 15 2014-W29      61
#> 16 2014-W30      59
#> 17 2014-W31      80
#> 18 2014-W32      86
#> 19 2014-W33     116
#> 20 2014-W34     139
plot(dat)

Modeling incidence

We can use fit_curve() to fit the data with either a poisson or negative binomial regression model. The output from this will be a nested object with class incidence2_fit which has methods available for both automatic plotting and the calculation of growth (decay) rates and doubling (halving) times.

out <- fit_curve(dat, model = "poisson", alpha = 0.05)
out
#> # A tibble: 1 x 8
#>   count_variable          data model  estimates    fitting_warning fitting_error
#>   <chr>          <list<tibble> <list> <list>       <list>          <list>       
#> 1 count               [20 × 2] <glm>  <df[,7] [20… <NULL>          <NULL>       
#> # … with 2 more variables: prediction_warning <list>, prediction_error <list>
plot(out)

growth_rate(out)
#> # A tibble: 1 x 9
#>   count_variable model      r r_lower r_upper growth_or_decay  time time_lower
#>   <chr>          <list> <dbl>   <dbl>   <dbl> <chr>           <dbl>      <dbl>
#> 1 count          <glm>  0.184   0.168   0.200 doubling         3.77       3.46
#> # … with 1 more variable: time_upper <dbl>

To unnest the data we can use unnest() (a function that has been reexported from the tidyr package.

unnest(out, estimates)
#> # A tibble: 20 x 14
#>    count_variable         data model count date_index estimate lower_ci upper_ci
#>    <chr>          <list<tibbl> <lis> <int> <yrwk>        <dbl>    <dbl>    <dbl>
#>  1 count              [20 × 2] <glm>     1 2014-W15       4.09     3.20     5.23
#>  2 count              [20 × 2] <glm>     1 2014-W16       4.92     3.91     6.19
#>  3 count              [20 × 2] <glm>     5 2014-W17       5.92     4.77     7.33
#>  4 count              [20 × 2] <glm>     4 2014-W18       7.11     5.82     8.68
#>  5 count              [20 × 2] <glm>    12 2014-W19       8.55     7.11    10.3 
#>  6 count              [20 × 2] <glm>    17 2014-W20      10.3      8.67    12.2 
#>  7 count              [20 × 2] <glm>    15 2014-W21      12.3     10.6     14.4 
#>  8 count              [20 × 2] <glm>    19 2014-W22      14.8     12.9     17.1 
#>  9 count              [20 × 2] <glm>    23 2014-W23      17.8     15.7     20.3 
#> 10 count              [20 × 2] <glm>    21 2014-W24      21.4     19.1     24.0 
#> 11 count              [20 × 2] <glm>    30 2014-W25      25.8     23.3     28.5 
#> 12 count              [20 × 2] <glm>    22 2014-W26      31.0     28.3     33.9 
#> 13 count              [20 × 2] <glm>    34 2014-W27      37.2     34.3     40.4 
#> 14 count              [20 × 2] <glm>    38 2014-W28      44.8     41.5     48.2 
#> 15 count              [20 × 2] <glm>    61 2014-W29      53.8     50.1     57.7 
#> 16 count              [20 × 2] <glm>    59 2014-W30      64.7     60.3     69.4 
#> 17 count              [20 × 2] <glm>    80 2014-W31      77.7     72.2     83.7 
#> 18 count              [20 × 2] <glm>    86 2014-W32      93.4     86.2    101.  
#> 19 count              [20 × 2] <glm>   116 2014-W33     112.     103.     123.  
#> 20 count              [20 × 2] <glm>   139 2014-W34     135.     122.     149.  
#> # … with 6 more variables: lower_pi <dbl>, upper_pi <dbl>,
#> #   fitting_warning <list>, fitting_error <list>, prediction_warning <list>,
#> #   prediction_error <list>

fit_curve() also works nicely with grouped incidence2 objects. In this situation, we return a nested tibble with some additional columns that indicate whether there has been a warning or error during the fitting or prediction stages.

grouped_dat <- incidence(
    raw_dat, 
    date_index = date_of_onset,
    interval = "week",
    groups = hospital
)[1:120, ]
grouped_dat
#> An incidence2 object: 120 x 3
#> 1621 cases from 2014-W15 to 2014-W38
#> interval: 1 monday week
#> cumulative: FALSE
#> 
#>    date_index hospital                                     count
#>    <yrwk>     <fct>                                        <int>
#>  1 2014-W15   Military Hospital                                1
#>  2 2014-W16   Connaught Hospital                               1
#>  3 2014-W17   <NA>                                             2
#>  4 2014-W17   other                                            3
#>  5 2014-W18   <NA>                                             1
#>  6 2014-W18   Connaught Hospital                               1
#>  7 2014-W18   Princess Christian Maternity Hospital (PCMH)     1
#>  8 2014-W18   Rokupa Hospital                                  1
#>  9 2014-W19   <NA>                                             1
#> 10 2014-W19   Connaught Hospital                               3
#> # … with 110 more rows

out <- fit_curve(grouped_dat, model = "poisson", alpha = 0.05)
out
#> # A tibble: 6 x 9
#>   count_variable hospital     data model estimates fitting_warning fitting_error
#>   <chr>          <fct>    <list<t> <lis> <list>    <list>          <list>       
#> 1 count          Connaug… [22 × 2] <glm> <df[,7] … <NULL>          <NULL>       
#> 2 count          Militar… [21 × 2] <glm> <df[,7] … <NULL>          <NULL>       
#> 3 count          other    [20 × 2] <glm> <df[,7] … <NULL>          <NULL>       
#> 4 count          Princes… [17 × 2] <glm> <df[,7] … <NULL>          <NULL>       
#> 5 count          Rokupa … [18 × 2] <glm> <df[,7] … <NULL>          <NULL>       
#> 6 count          <NA>     [22 × 2] <glm> <df[,7] … <NULL>          <NULL>       
#> # … with 2 more variables: prediction_warning <list>, prediction_error <list>

# plot with a prediction interval but not a confidence interval
plot(out, ci = FALSE, pi=TRUE)

growth_rate(out)
#> # A tibble: 6 x 10
#>   count_variable hospital      model     r r_lower r_upper growth_or_decay  time
#>   <chr>          <fct>         <lis> <dbl>   <dbl>   <dbl> <chr>           <dbl>
#> 1 count          Connaught Ho… <glm> 0.197   0.177   0.217 doubling         3.53
#> 2 count          Military Hos… <glm> 0.173   0.147   0.200 doubling         4.00
#> 3 count          other         <glm> 0.170   0.141   0.200 doubling         4.09
#> 4 count          Princess Chr… <glm> 0.142   0.101   0.188 doubling         4.87
#> 5 count          Rokupa Hospi… <glm> 0.178   0.133   0.228 doubling         3.89
#> 6 count          <NA>          <glm> 0.184   0.164   0.205 doubling         3.77
#> # … with 2 more variables: time_lower <dbl>, time_upper <dbl>

We provide helper functions, is_ok(), is_warning() and is_error() to help filter the output as necessary.

out <- fit_curve(grouped_dat, model = "negbin", alpha = 0.05)
is_warning(out)
#> # A tibble: 5 x 7
#>   count_variable hospital                 data model  estimates  fitting_warning
#>   <chr>          <fct>             <list<tibb> <list> <list>     <list>         
#> 1 count          Connaught Hospit…    [22 × 2] <negb… <df[,7] [… <chr [2]>      
#> 2 count          other                [20 × 2] <negb… <df[,7] [… <chr [2]>      
#> 3 count          Princess Christi…    [17 × 2] <negb… <df[,7] [… <chr [2]>      
#> 4 count          Rokupa Hospital      [18 × 2] <negb… <df[,7] [… <chr [2]>      
#> 5 count          <NA>                 [22 × 2] <negb… <df[,7] [… <chr [2]>      
#> # … with 1 more variable: prediction_warning <list>
unnest(is_warning(out), fitting_warning)
#> # A tibble: 10 x 7
#>    count_variable hospital               data model  estimates  fitting_warning 
#>    <chr>          <fct>            <list<tib> <list> <list>     <chr>           
#>  1 count          Connaught Hospi…   [22 × 2] <negb… <df[,7] [… iteration limit…
#>  2 count          Connaught Hospi…   [22 × 2] <negb… <df[,7] [… iteration limit…
#>  3 count          other              [20 × 2] <negb… <df[,7] [… iteration limit…
#>  4 count          other              [20 × 2] <negb… <df[,7] [… iteration limit…
#>  5 count          Princess Christ…   [17 × 2] <negb… <df[,7] [… iteration limit…
#>  6 count          Princess Christ…   [17 × 2] <negb… <df[,7] [… iteration limit…
#>  7 count          Rokupa Hospital    [18 × 2] <negb… <df[,7] [… iteration limit…
#>  8 count          Rokupa Hospital    [18 × 2] <negb… <df[,7] [… iteration limit…
#>  9 count          <NA>               [22 × 2] <negb… <df[,7] [… iteration limit…
#> 10 count          <NA>               [22 × 2] <negb… <df[,7] [… iteration limit…
#> # … with 1 more variable: prediction_warning <list>

Rolling average

We can add a rolling average, across current and previous intervals, to an incidence2 object with the add_rolling_average() function:

ra <- add_rolling_average(grouped_dat, before = 2) # group observations with the 2 prior
ra
#> # A tibble: 6 x 3
#>   count_variable hospital                                   rolling_average     
#>   <chr>          <fct>                                      <list>              
#> 1 count          Connaught Hospital                         <tibble[,3] [22 × 3…
#> 2 count          Military Hospital                          <tibble[,3] [21 × 3…
#> 3 count          other                                      <tibble[,3] [20 × 3…
#> 4 count          Princess Christian Maternity Hospital (PC… <tibble[,3] [17 × 3…
#> 5 count          Rokupa Hospital                            <tibble[,3] [18 × 3…
#> 6 count          <NA>                                       <tibble[,3] [22 × 3…
unnest(ra, rolling_average)
#> # A tibble: 120 x 5
#>    count_variable hospital           date_index count rolling_average
#>    <chr>          <fct>              <yrwk>     <int>           <dbl>
#>  1 count          Connaught Hospital 2014-W16       1           NA   
#>  2 count          Connaught Hospital 2014-W18       1           NA   
#>  3 count          Connaught Hospital 2014-W19       3            1.67
#>  4 count          Connaught Hospital 2014-W20       2            2.00
#>  5 count          Connaught Hospital 2014-W21       5            3.33
#>  6 count          Connaught Hospital 2014-W22       4            3.67
#>  7 count          Connaught Hospital 2014-W23       6            5   
#>  8 count          Connaught Hospital 2014-W24       6            5.33
#>  9 count          Connaught Hospital 2014-W25      12            8   
#> 10 count          Connaught Hospital 2014-W26       8            8.67
#> # … with 110 more rows

plot(ra, color = "white")
#> Warning: Removed 12 rows containing missing values (position_stack).

#> Warning: Removed 12 rows containing missing values (position_stack).