Predictor Identifier: Nonparametric PREDiction

Ashish Sharma, Raj Mehrotra, Sanjeev Jha, Jingwan Li and Ze Jiang

19:25:50 12 August, 2021

library(NPRED)

op <- par()
require(zoo)
#> Loading required package: zoo
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
require(ggplot2)
#> Loading required package: ggplot2

1 Synthetic data generation

set.seed(2020)
# AR1 model from paper with 9 dummy variables
data.ar1 <- data.gen.ar1(500)


# AR4 model from paper with total 9 dimensions
data.ar4 <- data.gen.ar4(500)


# AR9 model from paper with total 9 dimensions
data.ar9 <- data.gen.ar9(500)

plot.zoo(cbind(data.ar1$x, data.ar4$x, data.ar9$x),
  xlab = NA, main = "Example of AR models",
  ylab = c("AR1", "AR4", "AR9"))
Example of AR models

Figure 1.1: Example of AR models

2 Partial informational correlation (PIC)

# R version of pic
pic.calc(data.ar9$x, data.ar9$dp)
#> $pmi
#> [1]  0.01667864 -0.01345925  0.03862148  0.33933764 -0.01281137  0.01703449
#> [7]  0.04359516  0.07521345  0.07754585
#> 
#> $pic
#> [1] 0.1811272 0.0000000 0.2726446 0.7019341 0.0000000 0.1830168 0.2889591
#> [8] 0.3737103 0.3790295


# fortran version of pic
calc.PIC(data.ar9$x, data.ar9$dp)
#> $pmi
#> [1] -0.003200178 -0.030318462  0.030197035  0.343560072 -0.019053916
#> [6]  0.010659037  0.046381753  0.068540976  0.059103240
#> 
#> $pic
#> [1] 0.0000000 0.0000000 0.2420878 0.7049662 0.0000000 0.1452324 0.2976424
#> [8] 0.3579123 0.3338973

3 Predictor identifier: stepwise.PIC

# pic <- stepwise.PIC(data.ar1$x, data.ar1$dp)
# pic <- stepwise.PIC(data.ar4$x, data.ar4$dp)
pic <- stepwise.PIC(data.ar9$x, data.ar9$dp)
pic
#> $cpy
#> [1] 4 9 1
#> 
#> $cpyPIC
#> [1] 0.7019341 0.5358898 0.3475098
#> 
#> $wt
#> [1] 0.4708906 0.3340259 0.1950835
#> 
#> $lstwet
#>  Intercept         X1         X2         X3 
#> 0.02092053 0.61892085 0.49902045 0.26565225 
#> 
#> $icpy
#> [1] 3

4 Partial weights (PW)

# R version of pw
pw.calc(data.ar9$x, data.ar9$dp, pic$cpy, pic$cpyPIC)
#> $pw
#> [1] 0.4708906 0.3340259 0.1950835

# fortran version of pw
calc.PW(data.ar9$x, data.ar9$dp, pic$cpy, pic$cpyPIC)
#> $pw
#> [1] 0.6371889 0.4519895 0.2639787

5 Nonparameteric prediction: knn

data("data3")
x <- ts(data3[, 1]) # response
z <- ts(data3[, -1]) # possible predictors
zout <- ts(data.gen.ar1(500, ndim = 15)$dp) # new input

xhat1 <- xhat2 <- x
# xhat1 <- NPRED::knn(x,z,zout,k=5,reg=T,extrap=F)
# xhat2 <- NPRED::knn(x,z,zout,k=5,reg=T,extrap=T)

for (i in 1:500) {
  xhat1[i] <- NPRED::knn(x[-i], z[-i, ], z[i, ], extrap = F)
  xhat2[i] <- NPRED::knn(x[-i], z[-i, ], z[i, ], extrap = T)
}

if (TRUE) {
  par(mfrow = c(1, 2), pty = c("s"))

  ts.plot(x, xhat1, xhat2, col = c("black","red","blue"), ylim = c(-10, 10), lwd = c(1, 1, 1))
  legend("topleft",
    bty = "n", lwd = 3, cex = 1, lty = 1,
    # inset = c(-0.5, 0),
    legend = c("OBS", "Pred", "Pred(extrap=T)"),
    x.intersp = 0, xjust = 0, yjust = 0, text.width = c(0, 50, 50), horiz = T,
    col = c("black","red","blue")
  )

  plot(xhat1, xhat2, xlim = c(-10, 10), ylim = c(-10, 10))
  abline(coef = c(0, 1), lwd = 1, col = 2)
}
Example of KNN implemented in NPRED

Figure 5.1: Example of KNN implemented in NPRED

6 Illustration of the usage of partial weights

par(op)
#> Warning in par(op): graphical parameter "cin" cannot be set
#> Warning in par(op): graphical parameter "cra" cannot be set
#> Warning in par(op): graphical parameter "csi" cannot be set
#> Warning in par(op): graphical parameter "cxy" cannot be set
#> Warning in par(op): graphical parameter "din" cannot be set
#> Warning in par(op): graphical parameter "page" cannot be set
sample <- 500
k <- 0
u <- runif(sample, 0, 5 * pi)
z <- sin(u) + rnorm(sample, sd = 0.2)

u1 <- cbind(u, runif(sample, 0, 5 * pi), runif(sample, 0, 5 * pi), runif(sample, 0, 5 * pi))
# zhat1 <- knnregl1cv(x=z, z=u1, k=k)
zhat1 <- sapply(1:sample, function(i) knn(x = z[-i], z = u1[-i, ], zout = u1[i, ], k = k))

sel <- stepwise.PIC(x = z, py = u1)
sel
#> $cpy
#> [1] 1
#> 
#> $cpyPIC
#>         u 
#> 0.2559138 
#> 
#> $wt
#> u 
#> 1 
#> 
#> $lstwet
#>  Intercept          X 
#> 0.17620490 0.00959311 
#> 
#> $icpy
#> [1] 1
# zhat2 <- knnregl1cv(x=z, z=u1[,sel$cpy], k=k)
zhat2 <- sapply(1:sample, function(i) knn(x = z[-i], z = u1[-i, sel$cpy], zout = u1[i, sel$cpy], k = k))

if (TRUE) {
  plot(u, z, pch = 16)
  lines(sort(u), zhat1[order(u)], col = "green")
  lines(sort(u), zhat2[order(u)], col = "red")
  abline(a = 0, b = 0)
}
Illustration of the usage of partial weights

Figure 6.1: Illustration of the usage of partial weights