Skimr defaults

2019-06-20

This vignette simply displays the defaults for skimr functions. All of these may be overloaded as explained in the Using skimr vignette_.

Skimmers

The show_skimmers() function displays the named list of skimmers by type.

library(skimr)
#> 
#> Attaching package: 'skimr'
#> The following object is masked from 'package:stats':
#> 
#>     filter
show_skimmers()
#> $numeric
#>  [1] "missing"  "complete" "n"        "mean"     "sd"       "p0"      
#>  [7] "p25"      "p50"      "p75"      "p100"     "hist"    
#> 
#> $integer
#>  [1] "missing"  "complete" "n"        "mean"     "sd"       "p0"      
#>  [7] "p25"      "p50"      "p75"      "p100"     "hist"    
#> 
#> $factor
#> [1] "missing"    "complete"   "n"          "n_unique"   "top_counts"
#> [6] "ordered"   
#> 
#> $character
#> [1] "missing"  "complete" "n"        "min"      "max"      "empty"   
#> [7] "n_unique"
#> 
#> $logical
#> [1] "missing"  "complete" "n"        "mean"     "count"   
#> 
#> $complex
#> [1] "missing"  "complete" "n"       
#> 
#> $date
#> [1] "missing"  "complete" "n"        "min"      "max"      "median"  
#> [7] "n_unique"
#> 
#> $Date
#> [1] "missing"  "complete" "n"        "min"      "max"      "median"  
#> [7] "n_unique"
#> 
#> $ts
#>  [1] "missing"    "complete"   "n"          "start"      "end"       
#>  [6] "frequency"  "deltat"     "mean"       "sd"         "min"       
#> [11] "max"        "median"     "line_graph"
#> 
#> $POSIXct
#> [1] "missing"  "complete" "n"        "min"      "max"      "median"  
#> [7] "n_unique"
#> 
#> $list
#> [1] "missing"       "complete"      "n"             "n_unique"     
#> [5] "min_length"    "median_length" "max_length"   
#> 
#> $AsIs
#> [1] "missing"    "complete"   "n"          "n_unique"   "min_length"
#> [6] "max_length"
#> 
#> $difftime
#> [1] "missing"  "complete" "n"        "min"      "max"      "median"  
#> [7] "n_unique"

Skimmer functions

The get_skimmers() function shows the actual functions used, including function defaults such as NA handling.

get_skimmers()
#> $numeric
#> $numeric$missing
#> function (x) 
#> {
#>     sum(is.na(x) | is.null(x))
#> }
#> <environment: namespace:skimr>
#> 
#> $numeric$complete
#> function (x) 
#> {
#>     length(x) - n_missing(x)
#> }
#> <environment: namespace:skimr>
#> 
#> $numeric$n
#> function (x)  .Primitive("length")
#> 
#> $numeric$mean
#> <partialised>
#> function (...) 
#> mean.default(na.rm = TRUE, ...)
#> 
#> $numeric$sd
#> <partialised>
#> function (...) 
#> stats::sd(na.rm = TRUE, ...)
#> 
#> $numeric$p0
#> <partialised>
#> function (...) 
#> stats::quantile(probs = 0, na.rm = TRUE, names = FALSE, ...)
#> 
#> $numeric$p25
#> <partialised>
#> function (...) 
#> stats::quantile(probs = 0.25, na.rm = TRUE, names = FALSE, ...)
#> 
#> $numeric$p50
#> <partialised>
#> function (...) 
#> stats::quantile(probs = 0.5, na.rm = TRUE, names = FALSE, ...)
#> 
#> $numeric$p75
#> <partialised>
#> function (...) 
#> stats::quantile(probs = 0.75, na.rm = TRUE, names = FALSE, ...)
#> 
#> $numeric$p100
#> <partialised>
#> function (...) 
#> stats::quantile(probs = 1, na.rm = TRUE, names = FALSE, ...)
#> 
#> $numeric$hist
#> function (x) 
#> {
#>     if (any(is.infinite(x))) {
#>         x[is.infinite(x)] <- NA
#>         warning("Variable contains Inf or -Inf value(s) that were converted to NA.")
#>     }
#>     if (length(x) < 1 || all(is.na(x))) {
#>         return(structure(" ", class = c("spark", "character")))
#>     }
#>     if (all(x == 0, na.rm = TRUE)) 
#>         x <- x + 1
#>     hist_dt <- table(cut(x, options$formats$character$width))
#>     hist_dt <- hist_dt/max(hist_dt)
#>     structure(spark_bar(hist_dt), class = c("spark", "character"))
#> }
#> <environment: namespace:skimr>
#> 
#> 
#> $integer
#> $integer$missing
#> function (x) 
#> {
#>     sum(is.na(x) | is.null(x))
#> }
#> <environment: namespace:skimr>
#> 
#> $integer$complete
#> function (x) 
#> {
#>     length(x) - n_missing(x)
#> }
#> <environment: namespace:skimr>
#> 
#> $integer$n
#> function (x)  .Primitive("length")
#> 
#> $integer$mean
#> <partialised>
#> function (...) 
#> mean.default(na.rm = TRUE, ...)
#> 
#> $integer$sd
#> <partialised>
#> function (...) 
#> stats::sd(na.rm = TRUE, ...)
#> 
#> $integer$p0
#> <partialised>
#> function (...) 
#> stats::quantile(probs = 0, na.rm = TRUE, names = FALSE, ...)
#> 
#> $integer$p25
#> <partialised>
#> function (...) 
#> stats::quantile(probs = 0.25, na.rm = TRUE, names = FALSE, ...)
#> 
#> $integer$p50
#> <partialised>
#> function (...) 
#> stats::quantile(probs = 0.5, na.rm = TRUE, names = FALSE, ...)
#> 
#> $integer$p75
#> <partialised>
#> function (...) 
#> stats::quantile(probs = 0.75, na.rm = TRUE, names = FALSE, ...)
#> 
#> $integer$p100
#> <partialised>
#> function (...) 
#> stats::quantile(probs = 1, na.rm = TRUE, names = FALSE, ...)
#> 
#> $integer$hist
#> function (x) 
#> {
#>     if (any(is.infinite(x))) {
#>         x[is.infinite(x)] <- NA
#>         warning("Variable contains Inf or -Inf value(s) that were converted to NA.")
#>     }
#>     if (length(x) < 1 || all(is.na(x))) {
#>         return(structure(" ", class = c("spark", "character")))
#>     }
#>     if (all(x == 0, na.rm = TRUE)) 
#>         x <- x + 1
#>     hist_dt <- table(cut(x, options$formats$character$width))
#>     hist_dt <- hist_dt/max(hist_dt)
#>     structure(spark_bar(hist_dt), class = c("spark", "character"))
#> }
#> <environment: namespace:skimr>
#> 
#> 
#> $factor
#> $factor$missing
#> function (x) 
#> {
#>     sum(is.na(x) | is.null(x))
#> }
#> <environment: namespace:skimr>
#> 
#> $factor$complete
#> function (x) 
#> {
#>     length(x) - n_missing(x)
#> }
#> <environment: namespace:skimr>
#> 
#> $factor$n
#> function (x)  .Primitive("length")
#> 
#> $factor$n_unique
#> function (x) 
#> {
#>     un <- x[!is.na(x)]
#>     un <- unique(un)
#>     length(un)
#> }
#> <environment: namespace:skimr>
#> 
#> $factor$top_counts
#> function (x) 
#> {
#>     tab <- table(x, useNA = "always")
#>     names_tab <- names(tab)
#>     if (is.element("", names_tab)) {
#>         names_tab[names_tab == ""] <- "empty"
#>         warning("Variable contains value(s) of \"\" that have been converted to \"empty\".")
#>     }
#>     out <- rlang::set_names(as.integer(tab), names_tab)
#>     sort(out, decreasing = TRUE)
#> }
#> <environment: namespace:skimr>
#> 
#> $factor$ordered
#> function (x) 
#> inherits(x, "ordered")
#> <bytecode: 0x7fc7348dc320>
#> <environment: namespace:base>
#> 
#> 
#> $character
#> $character$missing
#> function (x) 
#> {
#>     sum(is.na(x) | is.null(x))
#> }
#> <environment: namespace:skimr>
#> 
#> $character$complete
#> function (x) 
#> {
#>     length(x) - n_missing(x)
#> }
#> <environment: namespace:skimr>
#> 
#> $character$n
#> function (x)  .Primitive("length")
#> 
#> $character$min
#> function (x) 
#> {
#>     if (all(is.na(x))) 
#>         return(NA)
#>     characters <- nchar(x, allowNA = TRUE)
#>     min(characters, na.rm = TRUE)
#> }
#> <environment: namespace:skimr>
#> 
#> $character$max
#> function (x) 
#> {
#>     if (all(is.na(x))) 
#>         return(NA)
#>     characters <- nchar(x, allowNA = TRUE)
#>     max(characters, na.rm = TRUE)
#> }
#> <environment: namespace:skimr>
#> 
#> $character$empty
#> function (x) 
#> {
#>     empty.strings <- c("")
#>     sum(x %in% empty.strings)
#> }
#> <environment: namespace:skimr>
#> 
#> $character$n_unique
#> function (x) 
#> {
#>     un <- x[!is.na(x)]
#>     un <- unique(un)
#>     length(un)
#> }
#> <environment: namespace:skimr>
#> 
#> 
#> $logical
#> $logical$missing
#> function (x) 
#> {
#>     sum(is.na(x) | is.null(x))
#> }
#> <environment: namespace:skimr>
#> 
#> $logical$complete
#> function (x) 
#> {
#>     length(x) - n_missing(x)
#> }
#> <environment: namespace:skimr>
#> 
#> $logical$n
#> function (x)  .Primitive("length")
#> 
#> $logical$mean
#> <partialised>
#> function (...) 
#> mean.default(na.rm = TRUE, ...)
#> 
#> $logical$count
#> function (x) 
#> {
#>     tab <- table(x, useNA = "always")
#>     names_tab <- names(tab)
#>     if (is.element("", names_tab)) {
#>         names_tab[names_tab == ""] <- "empty"
#>         warning("Variable contains value(s) of \"\" that have been converted to \"empty\".")
#>     }
#>     out <- rlang::set_names(as.integer(tab), names_tab)
#>     sort(out, decreasing = TRUE)
#> }
#> <environment: namespace:skimr>
#> 
#> 
#> $complex
#> $complex$missing
#> function (x) 
#> {
#>     sum(is.na(x) | is.null(x))
#> }
#> <environment: namespace:skimr>
#> 
#> $complex$complete
#> function (x) 
#> {
#>     length(x) - n_missing(x)
#> }
#> <environment: namespace:skimr>
#> 
#> $complex$n
#> function (x)  .Primitive("length")
#> 
#> 
#> $date
#> $date$missing
#> function (x) 
#> {
#>     sum(is.na(x) | is.null(x))
#> }
#> <environment: namespace:skimr>
#> 
#> $date$complete
#> function (x) 
#> {
#>     length(x) - n_missing(x)
#> }
#> <environment: namespace:skimr>
#> 
#> $date$n
#> function (x)  .Primitive("length")
#> 
#> $date$min
#> <partialised>
#> function (...) 
#> min(na.rm = TRUE, ...)
#> 
#> $date$max
#> <partialised>
#> function (...) 
#> max(na.rm = TRUE, ...)
#> 
#> $date$median
#> <partialised>
#> function (...) 
#> stats::median(na.rm = TRUE, ...)
#> 
#> $date$n_unique
#> function (x) 
#> {
#>     un <- x[!is.na(x)]
#>     un <- unique(un)
#>     length(un)
#> }
#> <environment: namespace:skimr>
#> 
#> 
#> $Date
#> $Date$missing
#> function (x) 
#> {
#>     sum(is.na(x) | is.null(x))
#> }
#> <environment: namespace:skimr>
#> 
#> $Date$complete
#> function (x) 
#> {
#>     length(x) - n_missing(x)
#> }
#> <environment: namespace:skimr>
#> 
#> $Date$n
#> function (x)  .Primitive("length")
#> 
#> $Date$min
#> <partialised>
#> function (...) 
#> min(na.rm = TRUE, ...)
#> 
#> $Date$max
#> <partialised>
#> function (...) 
#> max(na.rm = TRUE, ...)
#> 
#> $Date$median
#> <partialised>
#> function (...) 
#> stats::median(na.rm = TRUE, ...)
#> 
#> $Date$n_unique
#> function (x) 
#> {
#>     un <- x[!is.na(x)]
#>     un <- unique(un)
#>     length(un)
#> }
#> <environment: namespace:skimr>
#> 
#> 
#> $ts
#> $ts$missing
#> function (x) 
#> {
#>     sum(is.na(x) | is.null(x))
#> }
#> <environment: namespace:skimr>
#> 
#> $ts$complete
#> function (x) 
#> {
#>     length(x) - n_missing(x)
#> }
#> <environment: namespace:skimr>
#> 
#> $ts$n
#> function (x)  .Primitive("length")
#> 
#> $ts$start
#> function (x) 
#> {
#>     stats::start(x)[1]
#> }
#> <environment: namespace:skimr>
#> 
#> $ts$end
#> function (x) 
#> {
#>     stats::end(x)[1]
#> }
#> <environment: namespace:skimr>
#> 
#> $ts$frequency
#> function (x, ...) 
#> UseMethod("frequency")
#> <bytecode: 0x7fc73640d028>
#> <environment: namespace:stats>
#> 
#> $ts$deltat
#> function (x, ...) 
#> UseMethod("deltat")
#> <bytecode: 0x7fc735656310>
#> <environment: namespace:stats>
#> 
#> $ts$mean
#> <partialised>
#> function (...) 
#> mean.default(na.rm = TRUE, ...)
#> 
#> $ts$sd
#> <partialised>
#> function (...) 
#> stats::sd(na.rm = TRUE, ...)
#> 
#> $ts$min
#> <partialised>
#> function (...) 
#> min(na.rm = TRUE, ...)
#> 
#> $ts$max
#> <partialised>
#> function (...) 
#> max(na.rm = TRUE, ...)
#> 
#> $ts$median
#> <partialised>
#> function (...) 
#> stats::median(na.rm = TRUE, ...)
#> 
#> $ts$line_graph
#> function (x) 
#> {
#>     t <- x[!is.na(x)]
#>     id <- seq(1, length(t), length.out = 2 * options$formats$character$width)
#>     normalized <- normalize01(t[floor(id)])
#>     structure(spark_line(normalized), class = c("spark", "character"))
#> }
#> <environment: namespace:skimr>
#> 
#> 
#> $POSIXct
#> $POSIXct$missing
#> function (x) 
#> {
#>     sum(is.na(x) | is.null(x))
#> }
#> <environment: namespace:skimr>
#> 
#> $POSIXct$complete
#> function (x) 
#> {
#>     length(x) - n_missing(x)
#> }
#> <environment: namespace:skimr>
#> 
#> $POSIXct$n
#> function (x)  .Primitive("length")
#> 
#> $POSIXct$min
#> <partialised>
#> function (...) 
#> min(na.rm = TRUE, ...)
#> 
#> $POSIXct$max
#> <partialised>
#> function (...) 
#> max(na.rm = TRUE, ...)
#> 
#> $POSIXct$median
#> <partialised>
#> function (...) 
#> stats::median(na.rm = TRUE, ...)
#> 
#> $POSIXct$n_unique
#> function (x) 
#> {
#>     un <- x[!is.na(x)]
#>     un <- unique(un)
#>     length(un)
#> }
#> <environment: namespace:skimr>
#> 
#> 
#> $list
#> $list$missing
#> function (x) 
#> {
#>     sum(is.na(x) | is.null(x))
#> }
#> <environment: namespace:skimr>
#> 
#> $list$complete
#> function (x) 
#> {
#>     length(x) - n_missing(x)
#> }
#> <environment: namespace:skimr>
#> 
#> $list$n
#> function (x)  .Primitive("length")
#> 
#> $list$n_unique
#> function (x) 
#> {
#>     un <- x[!is.na(x)]
#>     un <- unique(un)
#>     length(un)
#> }
#> <environment: namespace:skimr>
#> 
#> $list$min_length
#> function (x) 
#> {
#>     x <- x[!is.na(x)]
#>     l <- lengths(x)
#>     if (length(l) > 0) 
#>         min(l)
#>     else NA
#> }
#> <environment: namespace:skimr>
#> 
#> $list$median_length
#> function (x) 
#> {
#>     x <- x[!is.na(x)]
#>     l <- lengths(x)
#>     if (length(l) > 0) 
#>         stats::median(l)
#>     else NA
#> }
#> <environment: namespace:skimr>
#> 
#> $list$max_length
#> function (x) 
#> {
#>     x <- x[!is.na(x)]
#>     l <- lengths(x)
#>     if (length(l) > 0) 
#>         max(l)
#>     else NA
#> }
#> <environment: namespace:skimr>
#> 
#> 
#> $AsIs
#> $AsIs$missing
#> function (x) 
#> {
#>     sum(is.na(x) | is.null(x))
#> }
#> <environment: namespace:skimr>
#> 
#> $AsIs$complete
#> function (x) 
#> {
#>     length(x) - n_missing(x)
#> }
#> <environment: namespace:skimr>
#> 
#> $AsIs$n
#> function (x)  .Primitive("length")
#> 
#> $AsIs$n_unique
#> function (x) 
#> {
#>     un <- x[!is.na(x)]
#>     un <- unique(un)
#>     length(un)
#> }
#> <environment: namespace:skimr>
#> 
#> $AsIs$min_length
#> function (x) 
#> {
#>     l <- lengths(x)
#>     min(l)
#> }
#> <environment: namespace:skimr>
#> 
#> $AsIs$max_length
#> function (x) 
#> {
#>     l <- lengths(x)
#>     max(l)
#> }
#> <environment: namespace:skimr>
#> 
#> 
#> $difftime
#> $difftime$missing
#> function (x) 
#> {
#>     sum(is.na(x) | is.null(x))
#> }
#> <environment: namespace:skimr>
#> 
#> $difftime$complete
#> function (x) 
#> {
#>     length(x) - n_missing(x)
#> }
#> <environment: namespace:skimr>
#> 
#> $difftime$n
#> function (x)  .Primitive("length")
#> 
#> $difftime$min
#> <partialised>
#> function (...) 
#> min(na.rm = TRUE, ...)
#> 
#> $difftime$max
#> <partialised>
#> function (...) 
#> max(na.rm = TRUE, ...)
#> 
#> $difftime$median
#> <partialised>
#> function (...) 
#> stats::median(na.rm = TRUE, ...)
#> 
#> $difftime$n_unique
#> function (x) 
#> {
#>     un <- x[!is.na(x)]
#>     un <- unique(un)
#>     length(un)
#> }
#> <environment: namespace:skimr>

Formats

Skimmer creates formatted values to use in printing. The show_formats() function shows the default formats.

show_formats()
#> $.levels
#> $.levels$max_char
#> [1] 3
#> 
#> $.levels$max_levels
#> [1] 4
#> 
#> 
#> $.align_decimal
#> [1] TRUE
#> 
#> $numeric
#> $numeric$digits
#> [1] 2
#> 
#> $numeric$nsmall
#> [1] 2
#> 
#> $numeric$drop0trailing
#> [1] TRUE
#> 
#> 
#> $integer
#> $integer$drop0trailing
#> [1] TRUE
#> 
#> 
#> $character
#> $character$width
#> [1] 8
#> 
#> 
#> $date
#> $date$format
#> [1] "%Y-%m-%d"
#> 
#> 
#> $posixct
#> $posixct$format
#> [1] "%Y-%m-%d"
#> 
#> 
#> $logical
#> list()
#> 
#> $asis
#> list()
#> 
#> $difftime
#> list()
#> 
#> $spark
#> NULL