How to use RSDA 3.0

RSDA Package version 3

Oldemar Rodríguez R.

Installing the package

CRAN

Github

How to read a Symbolic Table from a CSV file with RSDA?

ex3 <- read.sym.table('tsym1.csv', header=TRUE, sep=';',dec='.', row.names=1)
ex3
#> # A tibble: 7 x 7
#>      F1              F2     F3    F4        F5               F6
#> * <dbl>      <interval> <hist> <dbl>     <set>       <interval>
#> 1   2.8   [1.00 : 2.00] <hist>   6       {a,d}   [0.00 : 90.00]
#> 2   1.4   [3.00 : 9.00] <hist>   8     {b,c,d} [-90.00 : 98.00]
#> 3   3.2  [-1.00 : 4.00] <hist>  -7       {a,b}  [65.00 : 90.00]
#> 4  -2.1   [0.00 : 2.00] <hist>   0   {a,b,c,d}  [45.00 : 89.00]
#> 5  -3   [-4.00 : -2.00] <hist>  -9.5       {b}  [20.00 : 40.00]
#> 6   0.1 [10.00 : 21.00] <hist>  -1       {a,d}    [5.00 : 8.00]
#> 7   9    [4.00 : 21.00] <hist>   0.5       {a}    [3.14 : 6.76]
#> # … with 1 more variable: F7 <interval>

##How to save a Symbolic Table in a CSV file with RSDA?

write.sym.table(ex3, file = 'tsymtemp.csv', sep = ';',dec = '.',
                row.names = TRUE, col.names = TRUE)

Symbolic Data Frame Example in RSDA

data(example3)
example3
#> # A tibble: 7 x 7
#>      F1              F2                      F3    F4        F5
#> * <dbl>      <interval>                 <modal> <dbl>     <set>
#> 1   2.8   [1.00 : 2.00] M1:0.10 M2:0.70 M3:0.20   6   {e,g,i,k}
#> 2   1.4   [3.00 : 9.00] M1:0.60 M2:0.30 M3:0.10   8   {a,b,c,d}
#> 3   3.2  [-1.00 : 4.00] M1:0.20 M2:0.20 M3:0.60  -7   {2,b,1,c}
#> 4  -2.1   [0.00 : 2.00] M1:0.90 M2:0.00 M3:0.10   0   {a,3,4,c}
#> 5  -3   [-4.00 : -2.00] M1:0.60 M2:0.00 M3:0.40  -9.5 {e,g,i,k}
#> 6   0.1 [10.00 : 21.00] M1:0.00 M2:0.70 M3:0.30  -1     {e,1,i}
#> 7   9    [4.00 : 21.00] M1:0.20 M2:0.20 M3:0.60   0.5   {e,a,2}
#> # … with 2 more variables: F6 <interval>, F7 <interval>
example3[2,]
#> # A tibble: 1 x 7
#>      F1            F2                      F3    F4        F5
#> * <dbl>    <interval>                 <modal> <dbl>     <set>
#> 1   1.4 [3.00 : 9.00] M1:0.60 M2:0.30 M3:0.10     8 {a,b,c,d}
#> # … with 2 more variables: F6 <interval>, F7 <interval>
example3[,3]
#> # A tibble: 7 x 1
#>                        F3
#> *                 <modal>
#> 1 M1:0.10 M2:0.70 M3:0.20
#> 2 M1:0.60 M2:0.30 M3:0.10
#> 3 M1:0.20 M2:0.20 M3:0.60
#> 4 M1:0.90 M2:0.00 M3:0.10
#> 5 M1:0.60 M2:0.00 M3:0.40
#> 6 M1:0.00 M2:0.70 M3:0.30
#> 7 M1:0.20 M2:0.20 M3:0.60
example3[2:3,5]
#> # A tibble: 2 x 1
#>          F5
#> *     <set>
#> 1 {a,b,c,d}
#> 2 {2,b,1,c}
example3$F1
#> [1]  2.8  1.4  3.2 -2.1 -3.0  0.1  9.0

How to generated a symbolic data table from a classic data table in RSDA?

data(ex1_db2so)
ex1_db2so
#>         state sex county group age
#> 1     Florida   M      2     6   3
#> 2  California   F      4     3   4
#> 3       Texas   M     12     3   4
#> 4     Florida   F      2     3   4
#> 5       Texas   M      4     6   4
#> 6       Texas   F      2     3   3
#> 7     Florida   M      6     3   4
#> 8     Florida   F      2     6   4
#> 9  California   M      2     3   6
#> 10 California   F     21     3   4
#> 11 California   M      2     3   4
#> 12 California   M      2     6   7
#> 13      Texas   F     23     3   4
#> 14    Florida   M      2     3   4
#> 15    Florida   F     12     7   4
#> 16      Texas   M      2     3   8
#> 17 California   F      3     7   9
#> 18 California   M      2     3  11
#> 19 California   M      1     3  11

The classic.to.sym function allows to convert a traditional table into a symbolic one, to this we must indicate the following parameters.

Example 1

We can add new variables indicating the type we want them to be.

Example 2

data(USCrime)
head(USCrime)
#>   state fold population householdsize racepctblack racePctWhite
#> 1     8    1       0.19          0.33         0.02         0.90
#> 2    53    1       0.00          0.16         0.12         0.74
#> 3    24    1       0.00          0.42         0.49         0.56
#> 4    34    1       0.04          0.77         1.00         0.08
#> 5    42    1       0.01          0.55         0.02         0.95
#> 6     6    1       0.02          0.28         0.06         0.54
#>   racePctAsian racePctHisp agePct12t21 agePct12t29 agePct16t24 agePct65up
#> 1         0.12        0.17        0.34        0.47        0.29       0.32
#> 2         0.45        0.07        0.26        0.59        0.35       0.27
#> 3         0.17        0.04        0.39        0.47        0.28       0.32
#> 4         0.12        0.10        0.51        0.50        0.34       0.21
#> 5         0.09        0.05        0.38        0.38        0.23       0.36
#> 6         1.00        0.25        0.31        0.48        0.27       0.37
#>   numbUrban pctUrban medIncome pctWWage pctWFarmSelf pctWInvInc pctWSocSec
#> 1      0.20      1.0      0.37     0.72         0.34       0.60       0.29
#> 2      0.02      1.0      0.31     0.72         0.11       0.45       0.25
#> 3      0.00      0.0      0.30     0.58         0.19       0.39       0.38
#> 4      0.06      1.0      0.58     0.89         0.21       0.43       0.36
#> 5      0.02      0.9      0.50     0.72         0.16       0.68       0.44
#> 6      0.04      1.0      0.52     0.68         0.20       0.61       0.28
#>   pctWPubAsst pctWRetire medFamInc perCapInc whitePerCap blackPerCap
#> 1        0.15       0.43      0.39      0.40        0.39        0.32
#> 2        0.29       0.39      0.29      0.37        0.38        0.33
#> 3        0.40       0.84      0.28      0.27        0.29        0.27
#> 4        0.20       0.82      0.51      0.36        0.40        0.39
#> 5        0.11       0.71      0.46      0.43        0.41        0.28
#> 6        0.15       0.25      0.62      0.72        0.76        0.77
#>   indianPerCap AsianPerCap OtherPerCap HispPerCap NumUnderPov
#> 1         0.27        0.27        0.36       0.41        0.08
#> 2         0.16        0.30        0.22       0.35        0.01
#> 3         0.07        0.29        0.28       0.39        0.01
#> 4         0.16        0.25        0.36       0.44        0.01
#> 5         0.00        0.74        0.51       0.48        0.00
#> 6         0.28        0.52        0.48       0.60        0.01
#>   PctPopUnderPov PctLess9thGrade PctNotHSGrad PctBSorMore PctUnemployed
#> 1           0.19            0.10         0.18        0.48          0.27
#> 2           0.24            0.14         0.24        0.30          0.27
#> 3           0.27            0.27         0.43        0.19          0.36
#> 4           0.10            0.09         0.25        0.31          0.33
#> 5           0.06            0.25         0.30        0.33          0.12
#> 6           0.12            0.13         0.12        0.80          0.10
#>   PctEmploy PctEmplManu PctEmplProfServ PctOccupManu PctOccupMgmtProf
#> 1      0.68        0.23            0.41         0.25             0.52
#> 2      0.73        0.57            0.15         0.42             0.36
#> 3      0.58        0.32            0.29         0.49             0.32
#> 4      0.71        0.36            0.45         0.37             0.39
#> 5      0.65        0.67            0.38         0.42             0.46
#> 6      0.65        0.19            0.77         0.06             0.91
#>   MalePctDivorce MalePctNevMarr FemalePctDiv TotalPctDiv PersPerFam
#> 1           0.68           0.40         0.75        0.75       0.35
#> 2           1.00           0.63         0.91        1.00       0.29
#> 3           0.63           0.41         0.71        0.70       0.45
#> 4           0.34           0.45         0.49        0.44       0.75
#> 5           0.22           0.27         0.20        0.21       0.51
#> 6           0.49           0.57         0.61        0.58       0.44
#>   PctFam2Par PctKids2Par PctYoungKids2Par PctTeen2Par PctWorkMomYoungKids
#> 1       0.55        0.59             0.61        0.56                0.74
#> 2       0.43        0.47             0.60        0.39                0.46
#> 3       0.42        0.44             0.43        0.43                0.71
#> 4       0.65        0.54             0.83        0.65                0.85
#> 5       0.91        0.91             0.89        0.85                0.40
#> 6       0.62        0.69             0.87        0.53                0.30
#>   PctWorkMom NumIlleg PctIlleg NumImmig PctImmigRecent PctImmigRec5
#> 1       0.76     0.04     0.14     0.03           0.24         0.27
#> 2       0.53     0.00     0.24     0.01           0.52         0.62
#> 3       0.67     0.01     0.46     0.00           0.07         0.06
#> 4       0.86     0.03     0.33     0.02           0.11         0.20
#> 5       0.60     0.00     0.06     0.00           0.03         0.07
#> 6       0.43     0.00     0.11     0.04           0.30         0.35
#>   PctImmigRec8 PctImmigRec10 PctRecentImmig PctRecImmig5 PctRecImmig8
#> 1         0.37          0.39           0.07         0.07         0.08
#> 2         0.64          0.63           0.25         0.27         0.25
#> 3         0.15          0.19           0.02         0.02         0.04
#> 4         0.30          0.31           0.05         0.08         0.11
#> 5         0.20          0.27           0.01         0.02         0.04
#> 6         0.43          0.47           0.50         0.50         0.56
#>   PctRecImmig10 PctSpeakEnglOnly PctNotSpeakEnglWell PctLargHouseFam
#> 1          0.08             0.89                0.06            0.14
#> 2          0.23             0.84                0.10            0.16
#> 3          0.05             0.88                0.04            0.20
#> 4          0.11             0.81                0.08            0.56
#> 5          0.05             0.88                0.05            0.16
#> 6          0.57             0.45                0.28            0.25
#>   PctLargHouseOccup PersPerOccupHous PersPerOwnOccHous PersPerRentOccHous
#> 1              0.13             0.33              0.39               0.28
#> 2              0.10             0.17              0.29               0.17
#> 3              0.20             0.46              0.52               0.43
#> 4              0.62             0.85              0.77               1.00
#> 5              0.19             0.59              0.60               0.37
#> 6              0.19             0.29              0.53               0.18
#>   PctPersOwnOccup PctPersDenseHous PctHousLess3BR MedNumBR HousVacant
#> 1            0.55             0.09           0.51      0.5       0.21
#> 2            0.26             0.20           0.82      0.0       0.02
#> 3            0.42             0.15           0.51      0.5       0.01
#> 4            0.94             0.12           0.01      0.5       0.01
#> 5            0.89             0.02           0.19      0.5       0.01
#> 6            0.39             0.26           0.73      0.0       0.02
#>   PctHousOccup PctHousOwnOcc PctVacantBoarded PctVacMore6Mos
#> 1         0.71          0.52             0.05           0.26
#> 2         0.79          0.24             0.02           0.25
#> 3         0.86          0.41             0.29           0.30
#> 4         0.97          0.96             0.60           0.47
#> 5         0.89          0.87             0.04           0.55
#> 6         0.84          0.30             0.16           0.28
#>   MedYrHousBuilt PctHousNoPhone PctWOFullPlumb OwnOccLowQuart OwnOccMedVal
#> 1           0.65           0.14           0.06           0.22         0.19
#> 2           0.65           0.16           0.00           0.21         0.20
#> 3           0.52           0.47           0.45           0.18         0.17
#> 4           0.52           0.11           0.11           0.24         0.21
#> 5           0.73           0.05           0.14           0.31         0.31
#> 6           0.25           0.02           0.05           0.94         1.00
#>   OwnOccHiQuart RentLowQ RentMedian RentHighQ MedRent MedRentPctHousInc
#> 1          0.18     0.36       0.35      0.38    0.34              0.38
#> 2          0.21     0.42       0.38      0.40    0.37              0.29
#> 3          0.16     0.27       0.29      0.27    0.31              0.48
#> 4          0.19     0.75       0.70      0.77    0.89              0.63
#> 5          0.30     0.40       0.36      0.38    0.38              0.22
#> 6          1.00     0.67       0.63      0.68    0.62              0.47
#>   MedOwnCostPctInc MedOwnCostPctIncNoMtg NumInShelters NumStreet
#> 1             0.46                  0.25          0.04         0
#> 2             0.32                  0.18          0.00         0
#> 3             0.39                  0.28          0.00         0
#> 4             0.51                  0.47          0.00         0
#> 5             0.51                  0.21          0.00         0
#> 6             0.59                  0.11          0.00         0
#>   PctForeignBorn PctBornSameState PctSameHouse85 PctSameCity85
#> 1           0.12             0.42           0.50          0.51
#> 2           0.21             0.50           0.34          0.60
#> 3           0.14             0.49           0.54          0.67
#> 4           0.19             0.30           0.73          0.64
#> 5           0.11             0.72           0.64          0.61
#> 6           0.70             0.42           0.49          0.73
#>   PctSameState85 LandArea PopDens PctUsePubTrans LemasPctOfficDrugUn
#> 1           0.64     0.12    0.26           0.20                0.32
#> 2           0.52     0.02    0.12           0.45                0.00
#> 3           0.56     0.01    0.21           0.02                0.00
#> 4           0.65     0.02    0.39           0.28                0.00
#> 5           0.53     0.04    0.09           0.02                0.00
#> 6           0.64     0.01    0.58           0.10                0.00
#>   ViolentCrimesPerPop
#> 1                0.20
#> 2                0.67
#> 3                0.43
#> 4                0.12
#> 5                0.03
#> 6                0.14

Example 3

Example 4

We can modify the function that will be applied by default to the categorical variables

Converting a SODAS 1.0 *.SDS files to RSDA files

Converting a SODAS 2.0 *.XML files to RSDA files

Basic statistics

Symbolic correlation

Radar plot for intervals

Distances for intervals

Linear regression for intervals

LASSO regression for intervals

RIDGE regression for intervals

PCA for intervals

Symbolic Multiple Correspondence Analysis

Example 1

data("ex_mcfa1") 
ex_mcfa1
#>     suspect age     hair    eyes    region
#> 1         1  42    h_red e_brown     Bronx
#> 2         2  20  h_black e_green     Bronx
#> 3         3  64  h_brown e_brown  Brooklyn
#> 4         4  55 h_blonde e_brown     Bronx
#> 5         5   4  h_brown e_green Manhattan
#> 6         6  61 h_blonde e_green     Bronx
#> 7         7  61  h_white e_black    Queens
#> 8         8  32 h_blonde e_brown Manhattan
#> 9         9  39 h_blonde e_black  Brooklyn
#> 10       10  50  h_brown e_brown Manhattan
#> 11       11  41    h_red  e_blue Manhattan
#> 12       12  35 h_blonde e_green  Brooklyn
#> 13       13  56 h_blonde e_brown     Bronx
#> 14       14  52    h_red e_brown    Queens
#> 15       15  55    h_red e_green  Brooklyn
#> 16       16  25  h_brown e_brown    Queens
#> 17       17  52 h_blonde e_brown  Brooklyn
#> 18       18  28    h_red e_brown Manhattan
#> 19       19  21  h_white  e_blue Manhattan
#> 20       20  66  h_black e_black  Brooklyn
#> 21       21  67 h_blonde e_brown    Queens
#> 22       22  13  h_white  e_blue  Brooklyn
#> 23       23  39  h_brown e_green Manhattan
#> 24       24  47  h_black e_green  Brooklyn
#> 25       25  54 h_blonde e_brown     Bronx
#> 26       26  75  h_brown  e_blue  Brooklyn
#> 27       27   3  h_white e_green Manhattan
#> 28       28  40  h_white e_green Manhattan
#> 29       29  58    h_red  e_blue    Queens
#> 30       30  41  h_brown e_green     Bronx
#> 31       31  25  h_white e_black  Brooklyn
#> 32       32  75 h_blonde  e_blue Manhattan
#> 33       33  58  h_white e_brown     Bronx
#> 34       34  61  h_white e_brown Manhattan
#> 35       35  52  h_white  e_blue     Bronx
#> 36       36  19    h_red e_black    Queens
#> 37       37  58    h_red e_black     Bronx
#> 38       38  46  h_black e_green Manhattan
#> 39       39  74  h_brown e_black Manhattan
#> 40       40  26 h_blonde e_brown  Brooklyn
#> 41       41  63 h_blonde  e_blue    Queens
#> 42       42  40  h_brown e_black    Queens
#> 43       43  65  h_black e_brown  Brooklyn
#> 44       44  51 h_blonde e_brown  Brooklyn
#> 45       45  15  h_white e_black  Brooklyn
#> 46       46  32 h_blonde e_brown     Bronx
#> 47       47  68  h_white e_black Manhattan
#> 48       48  51  h_white e_black    Queens
#> 49       49  14    h_red e_green    Queens
#> 50       50  72  h_white e_brown  Brooklyn
#> 51       51   7    h_red  e_blue  Brooklyn
#> 52       52  22    h_red e_brown     Bronx
#> 53       53  52    h_red e_brown  Brooklyn
#> 54       54  62  h_brown e_green     Bronx
#> 55       55  41  h_black e_brown    Queens
#> 56       56  32  h_black e_black Manhattan
#> 57       57  58  h_brown e_brown    Queens
#> 58       58  25  h_black e_brown    Queens
#> 59       59  70 h_blonde e_green  Brooklyn
#> 60       60  64  h_brown  e_blue    Queens
#> 61       61  25  h_white  e_blue     Bronx
#> 62       62  42  h_black e_black  Brooklyn
#> 63       63  56    h_red e_black  Brooklyn
#> 64       64  41 h_blonde e_black  Brooklyn
#> 65       65   8  h_white e_black Manhattan
#> 66       66   7  h_black e_green  Brooklyn
#> 67       67  42  h_white e_brown    Queens
#> 68       68  10  h_white  e_blue Manhattan
#> 69       69  60  h_brown e_black     Bronx
#> 70       70  52 h_blonde e_brown  Brooklyn
#> 71       71  39  h_brown  e_blue Manhattan
#> 72       72  69  h_brown e_green    Queens
#> 73       73  67 h_blonde e_green Manhattan
#> 74       74  46    h_red e_black  Brooklyn
#> 75       75  72  h_black e_black    Queens
#> 76       76  66    h_red  e_blue    Queens
#> 77       77   4  h_black  e_blue Manhattan
#> 78       78  62  h_black e_green  Brooklyn
#> 79       79  10 h_blonde  e_blue     Bronx
#> 80       80  16 h_blonde e_black Manhattan
#> 81       81  59 h_blonde e_brown     Bronx
#> 82       82  63 h_blonde  e_blue Manhattan
#> 83       83  54    h_red  e_blue    Queens
#> 84       84  14  h_brown  e_blue  Brooklyn
#> 85       85  48  h_black e_green Manhattan
#> 86       86  59 h_blonde e_black     Bronx
#> 87       87  73 h_blonde e_black     Bronx
#> 88       88  51  h_brown e_brown     Bronx
#> 89       89  14  h_white e_black     Bronx
#> 90       90  58 h_blonde e_black    Queens
#> 91       91  56    h_red e_green Manhattan
#> 92       92  26    h_red  e_blue  Brooklyn
#> 93       93  59  h_brown e_black Manhattan
#> 94       94  27  h_white e_green Manhattan
#> 95       95  38  h_black e_green Manhattan
#> 96       96   5 h_blonde e_green     Bronx
#> 97       97  14  h_black  e_blue    Queens
#> 98       98  13  h_black e_brown Manhattan
#> 99       99  54  h_white  e_blue  Brooklyn
#> 100     100  66  h_white e_green Manhattan
#> 101       1  22    h_red e_black     Bronx
#> 102       2  57 h_blonde e_black Manhattan
#> 103       3  29  h_white e_green    Queens
#> 104       4  14 h_blonde e_black Manhattan
#> 105       5  47    h_red e_green     Bronx
#> 106       6  32  h_white  e_blue    Queens
#> 107       7  49    h_red  e_blue     Bronx
#> 108       8   8  h_white e_black  Brooklyn
#> 109       9  67  h_white e_brown     Bronx
#> 110      10  68  h_black e_green     Bronx
#> 111      11  15  h_black e_brown Manhattan
#> 112      12  46  h_white e_brown     Bronx
#> 113      13  68  h_white e_black Manhattan
#> 114      14  55 h_blonde  e_blue Manhattan
#> 115      15   7  h_white e_green     Bronx
#> 116      16  10  h_black e_brown  Brooklyn
#> 117      17  49    h_red  e_blue Manhattan
#> 118      18  12  h_brown  e_blue  Brooklyn
#> 119      19  41  h_white  e_blue     Bronx
#> 120      20  10  h_brown  e_blue     Bronx
#> 121      21  12  h_white e_green Manhattan
#> 122      22  53  h_white  e_blue Manhattan
#> 123      23   5  h_black e_black Manhattan
#> 124      24  46  h_brown e_black    Queens
#> 125      25  14  h_brown e_black    Queens
#> 126      26  55  h_white e_green  Brooklyn
#> 127      27  53    h_red e_brown Manhattan
#> 128      28  31  h_black e_brown Manhattan
#> 129      29  31 h_blonde e_brown    Queens
#> 130      30  55  h_brown e_black  Brooklyn