Based on the logistic regression model, we can budget the future trend of the market.

The sample code is about 94 percent correct.

It’s all in the code comments.

cat("\ 014")

# Load the sample stock

library(quantmod)
getSymbols("^DJI", src = "yahoo")
dji <- DJI[, "DJI.Close"]

# Generate technical indicators

avg10 <- rollapply(dji, 10, mean)
avg20 <- rollapply(dji, 20, mean)
std10 <- rollapply(dji, 10, sd)
std20 <- rollapply(dji, 20, sd)
rsi5 <- RSI(dji, 5."SMA")
rsi14 <- RSI(dji, 14."SMA")
macd12269 <- MACD(dji, 12.26.9."SMA")
macd7205 <- MACD(dji, 7.20.5."SMA")
bbands <- BBands(dji, 20."SMA".2)

# Generate market direction, closing price compared to the next 20 days

direction <- NULL
direction[dji > Lag(dji, 20< -)]1
direction[dji < Lag(dji, 20< -)]0

# merge result
dji <-
  cbind(dji,
        avg10,
        avg20,
        std10,
        std20,
        rsi5,
        rsi14,
        macd12269,
        macd7205,
        bbands,
        direction)

dm <- dim(dji)
dm
colnames(dji)[dm[2< -]]"Direction"
colnames(dji)[dm[2]]

# In sample IS and out of sample OS

issd <- "2010-01-01"
ised <- "2014-12-31"
ossd <- "2015-01-01"
osed <- "2015-12-31"isrow <- which(index(dji) >= issd & index(dji) <= ised) osrow <- which(index(dji) >= ossd & index(dji) <= osed) isdji <-  dji[isrow,] osdji <- dji[osrow,]# Data standardization and transformation

isme <- apply(isdji, 2, mean, na.rm = TRUE)
isstd <- apply(isdji, 2, sd, na.rm = TRUE)

isidn <- matrix(1, dim(isdji)[1], dim(isdji)[2])

norm_isdji <- (isdji - t(isme * t(isidn))) / t(isstd * t(isidn))

dm <- dim(isdji)
norm_isdji[, dm[2]] <- direction[isrow]

# modeling

formula <- as.formula("Direction ~ .")
model <- glm(formula, family = "binomial", data = norm_isdji)

summary(model)

pred <- predict(model, norm_isdji)

prob <- 1 / (1 + exp(-pred))

# Fitting effect and probability value

# par(mfrow = c(2, 1))
Error in plot.new() : Figure Margins too large
plot(pred, type = "l")
plot(prob, type = "l")

pred_direction <- NULL
pred_direction[prob > 0.5] < -1
pred_direction[prob <= 0.5] < -0

# Accuracy rate of model prediction

library(caret)
ismatrix <- confusionMatrix(as.factor(pred_direction),
                          as.factor(norm_isdji$Direction))

ismatrix

# Test generalization performance for out-of-sample data

osidn <- matrix(1, dim(osdji)[1], dim(osdji)[2])
norm_osdji <- (osdji - t(isme * t(osidn))) / t(isstd * t(osidn))
norm_osdji[, dm[2]] <- direction[osrow]

ospred <- predict(model, norm_osdji)
osprob <- 1 / (1 + exp(-ospred))

ospred_direction <- NULL
ospred_direction[osprob > 0.5] < -1
ospred_direction[osprob <= 0.5] < -0

osmatrix <- confusionMatrix(as.factor(ospred_direction),
                            as.factor(norm_osdji$Direction))
osmatrix

Copy the code

The results of

Model overview

> summary(model)

Call:
glm(formula = formula, family = "binomial", data = norm_isdji)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-3.0080  -0.0107   0.0366   0.1533   3.1790  

Coefficients: (3 not defined because of singularities)
              Estimate Std. Error z value Pr(>|z|)    
(Intercept)   1.658760   0.222691   7.449 9.43 e-14 ***
DJI.Close    44.051359   8.409499   5.238 1.62 e-07 ***
DJI.Close.1 -44.561952  17.549358  -2.539   0.0111 *  
DJI.Close.2   0.577137  17.620013   0.033   0.9739    
DJI.Close.3  -0.003556   0.291865  -0.012   0.9903    
DJI.Close.4  -0.264309   0.312768  -0.845   0.3981    
rsi           0.046117   0.339620   0.136   0.8920    
rsi.1        -2.306590   0.565594  -4.078 4.54 e-05 ***
macd          2.562233   1.300929   1.970   0.0489 *  
signal        1.476838   0.610356   2.420   0.0155 *  
macd.1       -1.032963   0.798086  -1.294   0.1956    
signal.1      3.871052   1.635221   2.367   0.0179 *  
dn                  NA         NA      NA       NA    
mavg                NA         NA      NA       NA    
up                  NA         NA      NA       NA    
pctB          1.269642   0.521006   2.437   0.0148 *  
---
Signif. codes:  0'* * *'0.001'* *'0.01'*'0.05'. '0.1' '1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1579.37  on 1257  degrees of freedom
Residual deviance:  348.17  on 1245  degrees of freedom
AIC: 374.17

Number of Fisher Scoring iterations: 8
Copy the code

Model fitting effect

The probability of

The accuracy rate of in-sample data is 93.88%

> ismatrix
Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 362  35
         1  42 819
                                          
               Accuracy : 0.9388          
                 95% CI : (0.9241.0.9514)
    No Information Rate : 0.6789          
    P-Value [Acc > NIR] : <2e-16          
                                          
                  Kappa : 0.859           
                                          
 Mcnemar's Test p-value: 0.4941 Sensitivity: 0.8960 Specificity: 0.9590 Pos Pred Value: 0.9118 Neg Pred Value: 0.9512 Prevalence: 0.3211 Detection Rate: 0.2878 Detection Rate: 0.3156 Balanced Accuracy: 0.9275 'Positive' Class : 0   
Copy the code

The accuracy rate of out of sample data is 84.92%

> osmatrix
Confusion Matrix and Statistics

          Reference
Prediction   0   1
         0 115  26
         1  12  99
                                         
               Accuracy : 0.8492         
                 95% CI : (0.7989.0.891)
    No Information Rate : 0.504          
    P-Value [Acc > NIR] : < 2e-16        
                                         
                  Kappa : 0.6981         
                                         
 Mcnemar's Test p-value: 0.03496 Sensitivity: 0.9055 Specificity: 0.7920 Pos Pred Value: 0.8156 Neg Pred Value: Prevalence: 0.8919 preventable: 0.5040 Detection Rate: 0.4563 Detection Rate: 0.5595 Balanced Accuracy: 0.8488 'Positive' Class : 0  
Copy the code

Quantitative investment and futures foreign exchange scattered fairy, fund insurance level is also taken out