emstreeR

Travis-CI Build Status CRAN_Status_Badge Downloads from the RStudio CRAN mirror License

Overview

emstreeR is a package for fast and easily computing Euclidean Minimum Spanning Trees (EMST). It heavily relies on ‘RcppMLPACK’ and ‘Rcpp’, working as a wrapper to the fast EMST Dual-Tree Boruvka algorithm (March, Ram, Gray, 2010) implemented in ‘mlpack’ - the C++ Machine Learning library (Curtin, 2013). Thus, do not have to deal with the R-‘Rcpp’-C++ integration. The package also provides functions and an S3 method for readily plotting Minimum Spanning Trees (MST) using either ‘base’ R, ‘scatterplot3d’ or ‘ggplot2’ style.

Installation

# CRAN version
install.packages("emstreeR")

# Dev version
if (!require('devtools')) install.packages('devtools')
devtools::install_github("allanvc/emstreeR")

Basic Usage

## artificial data:
set.seed(1984)
n <- 7
c1 <- data.frame(x = rnorm(n, -0.2, sd = 0.2), y = rnorm(n, -2, sd = 0.2))
c2 <- data.frame(x = rnorm(n, -1.1, sd = 0.15), y = rnorm(n, -2, sd = 0.3)) 
d <- rbind(c1, c2)
d <- as.data.frame(d)

## MST:
library(emstreeR)
out <- ComputeMST(d)
#> 8 edges found so far.
#> 182 cumulative base cases.
#> 0 cumulative node combinations scored.
#> 12 edges found so far.
#> 344 cumulative base cases.
#> 0 cumulative node combinations scored.
#> 13 edges found so far.
#> 442 cumulative base cases.
#> 0 cumulative node combinations scored.
#> Total spanning tree length: 8.81192
out
#>               x         y from to  distance
#> 1  -0.118159357 -2.166545   11 13 0.2039000
#> 2  -0.264604994 -2.105242    6  7 0.3429154
#> 3  -0.072829535 -1.716803   10 14 0.3540068
#> 4  -0.569225757 -1.943598    8 12 0.3541008
#> 5  -0.009270527 -1.942413    1  2 0.4825166
#> 6   0.037697969 -1.832590    3  7 0.5072094
#> 7  -0.091509110 -1.795213   12 13 0.6017045
#> 8  -1.097338236 -1.871078    5  6 0.7142135
#> 9  -0.841400898 -2.194585    8 14 0.8351710
#> 10 -1.081888729 -1.728982    4 12 0.9724635
#> 11 -1.366334073 -2.003965    4  5 1.0563892
#> 12 -1.081078171 -1.925745    2  5 1.1558933
#> 13 -1.357063682 -1.972485    2  9 1.2314331
#> 14 -0.913706515 -1.753315    1  1 0.0000000

Plotting

2D Plots

## artifical data for 2D plots:
set.seed(1984)
n <- 15
c1 <- data.frame(x = rnorm(n, -0.2, sd = 0.2), y = rnorm(n, -2, sd = 0.2))
c2 <- data.frame(x = rnorm(n, -1.1, sd = 0.15), y = rnorm(n, -2, sd = 0.3)) 
d <- rbind(c1, c2)
d <- as.data.frame(d)
  
## MST:
library(emstreeR)
out <- ComputeMST(d, verbose = FALSE)
## simple 2D plot:
plot(out, col.pts = "red", col.segts = "blue")

## 2D plot with ggplot2:
library(ggplot2)
ggplot(data = out, aes(x = x, y = y, from = from, to = to))+ 
  geom_point()+ 
  stat_MST(colour="red")

## 2D curved edges plot with ggplot2:
library(ggplot2)
ggplot(data = out, aes(x = x, y = y, from = from, to = to))+ 
  geom_point()+ 
  stat_MST(geom="curve")

3D Plot

## artificial data for 3D plots:
n = 99
set.seed(1984)
d1<-matrix(rnorm(n, mean = -2, sd = .5), n/3, 3) # 3d
d2<-matrix(rnorm(n, mean = 0, sd = .3), n/3, 3)
d3<-matrix(rnorm(n, mean = 3, sd = .4), n/3, 3)
d<-rbind(d1,d2,d3) # just to show a matrix input
  
## MST:
library(emstreeR)
out <- ComputeMST(d, verbose = FALSE)
## simple 3D plot:
plotMST3D(out, xlab = "xaxis", col.pts = "orange", col.segts = "red", main = "Just a MST 3D plot")

Extras

## plotting MST on maps:
# honeymoon cruise example

library(ggmap)
library(dplyr)  
    
## define ports:
df.port_locations <- data.frame(location = c("Civitavecchia, Italy", 
                                             "Genova, Italy",
                                             "Marseille, France",
                                             "Barcelona, Spain",
                                             "Tunis, Tunisia",
                                             "Palermo, Italy"), 
                                stringsAsFactors = FALSE)
    
## get latitude and longitude:
geo.port_locations <- geocode(df.port_locations$location, source = "dsk")
    
## combine data:
df.port_locations <- cbind(df.port_locations, geo.port_locations)
    
## MST:
library(emstreeR)
out <- ComputeMST(df.port_locations[,2:3], verbose = FALSE)
## Plot:
map_grid <- c(left = -8, bottom = 32, right = 20, top = 47)
    
get_stamenmap(map_grid, zoom = 5) %>% ggmap()+
  stat_MST(data = out,
           aes(x = lon, y = lat, from=from, to=to), 
           colour="red", linetype = 2)+
  geom_point(data = out, aes(x = lon, y = lat), size=3)

License

This package is licensed under the terms of the BSD 3-clause License.

References

March, W. B., and Ram, P., and Gray, A. G. (2010). Fast euclidian minimum spanning tree: algorithm analysis, and applications. 16th ACM SIGKDD International Conference on Knowledge Discovery and Data mining, July 25-28 2010. Washington, DC, USA. doi:10.1145/1835804.1835882.

Curtin, R. R. et al. (2013). Mlpack: A scalable C++ machine learning library. Journal of Machine Learning Research, v. 14, 2013.