options(gsubfn.engine = "R")
library('sqldf')
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
library('ggplot2')
# simulate playing a casino up to nStep times or stopping at first win-streak of length 3 (8=2^3)
nStep=10
pWin=1/8
vWin = 3500000
vLoss =-400000
d <- c()
pStage <- 1
cumulative <- 0
i <- 1
while(i<=nStep) {
pWi <- pStage*pWin
di <- data.frame(key=paste('s',i,'win'),
prob=pWi,
value=vWin+cumulative)
d <- rbind(d,di)
cumulative <- cumulative + vLoss
pStage <- (1-pWin)*pStage
i <- i+1
}
di <- data.frame(key=paste('s',i-1,'loss'),
prob=pStage,
value=cumulative)
d <- rbind(d,di)
d$expectedImpact <- d$prob*d$value
dens <- density(d$value,weights=d$prob,adjust=0.2)
# plot(dens)
# fails
ggplot(aes(x=value,y=density),data=data.frame(value=dens$x,density=dens$y)) + geom_line()
print(d)
## key prob value expectedImpact
## 1 s 1 win 0.12500000 3500000 437500.000
## 2 s 2 win 0.10937500 3100000 339062.500
## 3 s 3 win 0.09570312 2700000 258398.438
## 4 s 4 win 0.08374023 2300000 192602.539
## 5 s 5 win 0.07327271 1900000 139218.140
## 6 s 6 win 0.06411362 1500000 96170.425
## 7 s 7 win 0.05609941 1100000 61709.356
## 8 s 8 win 0.04908699 700000 34360.892
## 9 s 9 win 0.04295111 300000 12885.334
## 10 s 10 win 0.03758223 -100000 -3758.223
## 11 s 10 loss 0.26307558 -4000000 -1052302.305
print(sum(d$prob))
## [1] 1
print(sum(d$prob[d$value>=0]))
## [1] 0.6993422
expectedProfit <- sum(d$prob*d$value)
print(expectedProfit)
## [1] 515847.1
stake <- abs(min(d$value))
print(stake)
## [1] 4e+06
print(expectedProfit/stake)
## [1] 0.1289618
dPlus <- d[d$value>=0,]
print(sum(dPlus$prob*dPlus$value)/sum(dPlus$prob))
## [1] 2247695
xm <- sum(d$value*d$prob)/sum(d$prob)
var <- sum(d$prob*(d$value-xm)^2)/sum(d$prob)
sd <- sqrt(var)
SharpeRatio <- xm/sd
print(SharpeRatio)
## [1] 0.1804603
#' @param d1 data frame with columns value, prob (prob non-negative, sums to 1 or less), and optional nwin
#' @param d2 data frame with columns value, prob (prob non-negative, sums to 1 or less), and optional nwin
#' @return data frame with columns value and prob (prob non-negative, sums to 1) representing the probility any of sum of d1.value+d2.value would be seen when one event is draw from the first frame (with probability d1.prob) and another event is draw from the second frame (with probability d2.prob). This is commonly called a convolution of discrete probability distributions.
convoluteDist <- function(d1,d2) {
if(!('nwin' %in% colnames(d1))) {
d1$nwin <- ifelse(d1$value>0,1,0)
}
if(!('nwin' %in% colnames(d2))) {
d2$nwin <- ifelse(d2$value>0,1,0)
}
sqldf('SELECT
d1.nwin + d2.nwin as nwin,
d1.value + d2.value as value,
SUM(d1.prob * d2.prob) as prob
FROM
d1
JOIN
d2
WHERE
d1.prob>0 AND d2.prob>0
GROUP BY
d1.nwin + d2.nwin,
d1.value + d2.value
ORDER BY
d1.value + d2.value,
d1.nwin + d2.nwin')
}
#' @param d data frame with columns value and prob (prob non-negative, sums to 1)
#' @param k integer >0
#' @return d convoluted with itself k times. repersents the distibutions of outcomes of sums of k-events
convK <- function(d,k) {
d2 <- d
for(i in seq_len(k-1)) {
d2 <- convoluteDist(d,d2)
}
d2
}
# simulate playing at 4 casinos
dK <- convK(d,4)
# report
dens <- density(dK$value,weights=dK$prob,adjust=0.2)
ggplot(aes(x=value,y=density),data=data.frame(value=dens$x,density=dens$y)) + geom_line()
interestRate <- 1.05^(10/365)-1
print(interestRate)
## [1] 0.001337611
costOfMoney <- interestRate*stake
print(costOfMoney)
## [1] 5350.443
print(sum(dK$prob))
## [1] 1
print(sum(dK$prob[dK$value>=0]))
## [1] 0.6712816
print(sum(dK$prob*dK$value))
## [1] 2063388
xm <- (sum(dK$value*dK$prob)-costOfMoney)/sum(dK$prob)
var <- sum(dK$prob*(dK$value-xm)^2)/sum(dK$prob)
sd <- sqrt(var)
SharpeRatio <- xm/sd
print(SharpeRatio)
## [1] 0.3599846
# see what we know condition on winning exactly 3 times out of 4 casinos
d3 <- dK[dK$nwin==3,]
print(sum(d3$prob))
## [1] 0.4113407
print(sum(d3$prob*d3$value)/sum(d3$prob))
## [1] 3230584