This document serves as an overview for measuring the performance of
RcppAlgos
against other tools for generating combinations,
permutations, and partitions. This stackoverflow post: How to generate
permutations or combinations of object in R? has some benchmarks.
You will note that the examples in that post are relatively small. The
benchmarks below will focus on larger examples where performance really
matters and for this reason we only consider the packages arrangements,
partitions,
and RcppAlgos.
For the benchmarks below, we used a
2022 Macbook Air Apple M2 24 GB
machine.
library(RcppAlgos)
library(partitions)
library(arrangements)
#>
#> Attaching package: 'arrangements'
#> The following object is masked from 'package:partitions':
#>
#> compositions
library(microbenchmark)
options(digits = 4)
options(width = 90)
pertinent_output <- capture.output(sessionInfo())
cat(paste(pertinent_output[1:3], collapse = "\n"))
#> R version 4.3.1 (2023-06-16)
#> Platform: aarch64-apple-darwin20 (64-bit)
#> Running under: macOS Ventura 13.4.1
pkgs <- c("RcppAlgos", "arrangements", "partitions", "microbenchmark")
sapply(pkgs, packageVersion, simplify = FALSE)
#> $RcppAlgos
#> [1] '2.8.0'
#>
#> $arrangements
#> [1] '1.1.9'
#>
#> $partitions
#> [1] '1.10.7'
#>
#> $microbenchmark
#> [1] '1.4.10'
numThreads <- min(as.integer(RcppAlgos::stdThreadMax() / 2), 6)
numThreads
#> [1] 4
set.seed(13)
v1 <- sort(sample(100, 30))
m <- 21
t1 <- comboGeneral(v1, m, Parallel = T)
t2 <- combinations(v1, m)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 14307150 21
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads),
cbRcppAlgosSer = comboGeneral(v1, m),
cbArrangements = combinations(v1, m),
times = 15, unit = "relative")
#> Warning in microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads), :
#> less accurate nanosecond times to avoid potential integer overflows
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15 a
#> cbRcppAlgosSer 3.440 3.091 3.099 3.089 3.077 2.982 15 b
#> cbArrangements 3.493 3.141 3.163 3.134 3.147 3.117 15 c
v2 <- v1[1:10]
m <- 20
t1 <- comboGeneral(v2, m, repetition = TRUE, nThreads = numThreads)
t2 <- combinations(v2, m, replace = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 10015005 20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v2, m, TRUE, nThreads = numThreads),
cbRcppAlgosSer = comboGeneral(v2, m, TRUE),
cbArrangements = combinations(v2, m, replace = TRUE),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.00 1.000 1.000 15 a
#> cbRcppAlgosSer 2.925 3.018 3.059 3.01 3.000 3.873 15 b
#> cbArrangements 3.055 3.052 3.018 3.04 3.012 2.883 15 b
myFreqs <- c(2, 4, 4, 5, 3, 2, 2, 2, 3, 4, 1, 4, 2, 5)
v3 <- as.integer(c(1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610))
t1 <- comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads)
t2 <- combinations(freq = myFreqs, k = 20, x = v3)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 14594082 20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads),
cbRcppAlgosSer = comboGeneral(v3, 20, freqs = myFreqs),
cbArrangements = combinations(freq = myFreqs, k = 20, x = v3),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a
#> cbRcppAlgosSer 3.184 3.156 3.148 3.162 3.114 3.121 10 b
#> cbArrangements 6.014 6.059 6.030 6.044 5.947 6.173 10 c
v4 <- as.integer(c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59))
t1 <- permuteGeneral(v4, 6, nThreads = numThreads)
t2 <- permutations(v4, 6)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 8910720 6
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v4, 6, nThreads = numThreads),
cbRcppAlgosSer = permuteGeneral(v4, 6),
cbArrangements = permutations(v4, 6),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15 a
#> cbRcppAlgosSer 1.491 1.494 1.428 1.500 1.283 1.350 15 b
#> cbArrangements 2.563 2.583 2.421 2.583 2.310 2.117 15 c
## Indexing permutation example with the partitions package
t1 <- permuteGeneral(11, nThreads = 4)
t2 <- permutations(11)
t3 <- perms(11)
dim(t1)
#> [1] 39916800 11
stopifnot(identical(t1, t2), identical(t1, t(as.matrix(t3))))
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(11, nThreads = 4),
cbRcppAlgosSer = permuteGeneral(11),
cbArrangements = permutations(11),
cbPartitions = perms(11),
times = 5, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 5 a
#> cbRcppAlgosSer 2.502 2.493 2.173 2.498 1.756 1.921 5 b
#> cbArrangements 4.091 4.094 3.545 4.092 2.990 2.973 5 c
#> cbPartitions 7.997 8.175 7.188 8.288 6.004 6.380 5 d
v5 <- v3[1:5]
t1 <- permuteGeneral(v5, 10, repetition = TRUE, nThreads = numThreads)
t2 <- permutations(v5, 10, replace = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 9765625 10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v5, 10, TRUE, nThreads = numThreads),
cbRcppAlgosSer = permuteGeneral(v5, 10, TRUE),
cbArrangements = permutations(x = v5, k = 10, replace = TRUE),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a
#> cbRcppAlgosSer 2.867 2.836 2.956 2.785 2.766 4.196 10 b
#> cbArrangements 3.567 3.508 3.629 3.464 3.448 4.836 10 c
v6 <- sort(runif(12))
t1 <- permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads)
t2 <- permutations(freq = rep(1:3, 4), k = 7, x = v6)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 19520760 7
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads),
cbRcppAlgosSer = permuteGeneral(v6, 7, freqs = rep(1:3, 4)),
cbArrangements = permutations(freq = rep(1:3, 4), k = 7, x = v6),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.00 1.000 1.000 1.000 1.000 10 a
#> cbRcppAlgosSer 3.157 3.22 3.000 3.214 3.210 1.906 10 b
#> cbArrangements 3.601 3.58 3.339 3.572 3.564 2.109 10 c
t1 <- comboGeneral(0:140, freqs=c(140, rep(1, 140)),
constraintFun = "sum", comparisonFun = "==",
limitConstraints = 140)
t2 <- partitions(140, distinct = TRUE)
t3 <- diffparts(140)
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 140
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
all(rowSums(t1) == 140), all(rowSums(t2) == 140),
all(colSums(t3) == 140))
dim(t1)
#> [1] 9617150 16
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:140, freqs=c(140, rep(1, 140)), nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(0:140, freqs=c(140, rep(1, 140))),
cbArrangements = partitions(140, distinct = TRUE),
cbPartitions = diffparts(140),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.00 1.000 1.000 1.000 1.000 10 a
#> cbRcppAlgosSer 3.283 3.30 3.017 3.551 2.602 2.492 10 b
#> cbArrangements 2.629 2.61 2.355 2.605 2.106 1.984 10 c
#> cbPartitions 17.812 18.44 16.317 19.205 14.092 13.282 10 d
t1 <- comboGeneral(160, 10,
constraintFun = "sum", comparisonFun = "==",
limitConstraints = 160)
t2 <- partitions(160, 10, distinct = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 8942920 10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(160, 10, nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(160, 10),
cbArrangements = partitions(160, 10, distinct = TRUE),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a
#> cbRcppAlgosSer 3.348 3.344 2.966 3.260 2.233 2.552 10 b
#> cbArrangements 4.384 4.412 3.785 4.284 2.928 2.927 10 c
t1 <- comboGeneral(0:65, repetition = TRUE, constraintFun = "sum",
comparisonFun = "==", limitConstraints = 65)
t2 <- partitions(65)
t3 <- parts(65)
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 65
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
all(rowSums(t1) == 65), all(rowSums(t2) == 65),
all(colSums(t3) == 65))
dim(t1)
#> [1] 2012558 65
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:65, repetition = TRUE,
nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(0:65, repetition = TRUE),
cbArrangements = partitions(65),
cbPartitions = parts(65),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.0000 20 a
#> cbRcppAlgosSer 2.890 2.635 2.317 2.584 2.441 0.8822 20 b
#> cbArrangements 2.330 2.046 1.897 2.008 1.912 1.2706 20 b
#> cbPartitions 9.132 9.167 9.273 11.193 10.536 3.7732 20 c
t1 <- comboGeneral(100, 15, TRUE, constraintFun = "sum",
comparisonFun = "==", limitConstraints = 100)
t2 <- partitions(100, 15)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 9921212 15
rm(t1, t2)
# This takes a really long time... not because of restrictedparts,
# but because apply is not that fast. This transformation is
# needed for proper comparisons. As a result, we will compare
# a smaller example
# t3 <- t(apply(as.matrix(restrictedparts(100, 15, include.zero = F)), 2, sort))
t3 <- t(apply(as.matrix(restrictedparts(50, 15, include.zero = F)), 2, sort))
stopifnot(identical(partitions(50, 15), t3))
rm(t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(100, 15, TRUE,
nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(100, 15, TRUE),
cbArrangements = partitions(100, 15),
cbPartitions = restrictedparts(100, 15,
include.zero = FALSE),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a
#> cbRcppAlgosSer 3.359 3.304 2.546 2.735 2.779 1.227 10 b
#> cbArrangements 4.201 4.138 3.153 3.405 3.343 1.504 10 c
#> cbPartitions 15.543 15.511 11.818 12.758 12.242 6.145 10 d
Currenlty, RcppAlgos
is the only package capable of
efficiently generating partitions of multisets. Therefore, we will only
time RcppAlgos
and use this as a reference for future
improvements.
t1 <- comboGeneral(120, 10, freqs=rep(1:8, 15),
constraintFun = "sum", comparisonFun = "==",
limitConstraints = 120)
dim(t1)
#> [1] 7340225 10
stopifnot(all(rowSums(t1) == 120))
microbenchmark(cbRcppAlgos = partitionsGeneral(120, 10, freqs=rep(1:8, 15)),
times = 10)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> cbRcppAlgos 281.7 282.6 287.2 284.2 292.5 295.8 10
t1 <- compositionsGeneral(0:15, repetition = TRUE)
t2 <- arrangements::compositions(15)
t3 <- partitions::compositions(15)
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 15
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
all(rowSums(t1) == 15), all(rowSums(t2) == 15),
all(colSums(t3) == 15))
dim(t1)
#> [1] 16384 15
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosSer = compositionsGeneral(0:15, repetition = TRUE),
cbArrangements = arrangements::compositions(15),
cbPartitions = partitions::compositions(15),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosSer 1.00 1.000 1.000 1.000 1.000 1.000 20 a
#> cbArrangements 1.23 1.221 1.199 1.199 1.188 1.108 20 a
#> cbPartitions 133.80 148.531 172.109 175.967 192.882 222.542 20 b
For the next two examples, we will exclude the
partitions
package for efficiency reasons.
t1 <- compositionsGeneral(0:23, repetition = TRUE)
t2 <- arrangements::compositions(23)
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 23
stopifnot(identical(dim(t1), dim(t2)), all(rowSums(t1) == 23),
all(rowSums(t2) == 23))
dim(t1)
#> [1] 4194304 23
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = compositionsGeneral(0:23, repetition = TRUE,
nThreads = numThreads),
cbRcppAlgosSer = compositionsGeneral(0:23, repetition = TRUE),
cbArrangements = arrangements::compositions(23),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20 a
#> cbRcppAlgosSer 3.337 3.374 3.365 3.383 3.379 3.175 20 b
#> cbArrangements 3.730 3.760 3.779 3.776 3.775 4.014 20 c
t1 <- compositionsGeneral(30, 10, repetition = TRUE)
t2 <- arrangements::compositions(30, 10)
stopifnot(identical(t1, t2), all(rowSums(t1) == 30))
dim(t1)
#> [1] 10015005 10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = compositionsGeneral(30, 10, repetition = TRUE,
nThreads = numThreads),
cbRcppAlgosSer = compositionsGeneral(30, 10, repetition = TRUE),
cbArrangements = arrangements::compositions(30, 10),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20 a
#> cbRcppAlgosSer 3.005 3.089 3.078 3.094 3.081 3.046 20 b
#> cbArrangements 3.153 3.129 3.127 3.132 3.119 3.128 20 c
We will show one example from each category to demonstrate the
efficiency of the iterators in RcppAlgos
. The results are
similar for the rest of the cases not shown.
pkg_arrangements <- function(n, total) {
a <- icombinations(n, as.integer(n / 2))
for (i in 1:total) a$getnext()
}
pkg_RcppAlgos <- function(n, total) {
a <- comboIter(n, as.integer(n / 2))
for (i in 1:total) a@nextIter()
}
total <- comboCount(18, 9)
total
#> [1] 48620
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(18, total),
cbArrangements = pkg_arrangements(18, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15 a
#> cbArrangements 19.57 19.36 18.79 19.15 19.06 15.26 15 b
pkg_arrangements <- function(n, total) {
a <- ipermutations(n)
for (i in 1:total) a$getnext()
}
pkg_RcppAlgos <- function(n, total) {
a <- permuteIter(n)
for (i in 1:total) a@nextIter()
}
total <- permuteCount(8)
total
#> [1] 40320
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(8, total),
cbArrangements = pkg_arrangements(8, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15 a
#> cbArrangements 19.56 19.44 19.18 19.41 19.13 18.18 15 b
pkg_partitions <- function(n, total) {
a <- firstpart(n)
for (i in 1:(total - 1)) a <- nextpart(a)
}
pkg_arrangements <- function(n, total) {
a <- ipartitions(n)
for (i in 1:total) a$getnext()
}
pkg_RcppAlgos <- function(n, total) {
a <- partitionsIter(0:n, repetition = TRUE)
for (i in 1:total) a@nextIter()
}
total <- partitionsCount(0:40, repetition = TRUE)
total
#> [1] 37338
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(40, total),
cbArrangements = pkg_arrangements(40, total),
cbPartitions = pkg_partitions(40, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15 a
#> cbArrangements 15.55 15.56 14.85 15.64 14.13 14.34 15 b
#> cbPartitions 25.36 25.54 24.22 25.60 23.23 22.02 15 c
pkg_partitions <- function(n, total) {
a <- firstcomposition(n)
for (i in 1:(total - 1)) a <- nextcomposition(a, FALSE)
}
pkg_arrangements <- function(n, total) {
a <- icompositions(n)
for (i in 1:total) a$getnext()
}
pkg_RcppAlgos <- function(n, total) {
a <- compositionsIter(0:n, repetition = TRUE)
for (i in 1:total) a@nextIter()
}
total <- compositionsCount(0:15, repetition = TRUE)
total
#> [1] 16384
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(15, total),
cbArrangements = pkg_arrangements(15, total),
cbPartitions = pkg_partitions(15, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval cld
#> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15 a
#> cbArrangements 14.21 14.10 13.56 14.20 13.84 11.64 15 b
#> cbPartitions 46.54 46.16 44.64 46.03 44.68 44.44 15 c