import numpy as np

treatments = np.array([0,1,0,1,0,2])
outcomes   = np.array([0,0,0,1,0,1])

np.equal(treatments, 1)

array([False,  True, False,  True, False, False])

outcomes[np.equal(treatments, 1)]

array([0, 1])

np.sum(outcomes[np.equal(treatments, 1)])

1

np.sum(1 - outcomes[np.equal(treatments, 1)])

1

k = 3
successes = np.array([np.sum(    outcomes[np.equal(treatments, d)]) for d in range(k)])
failures  = np.array([np.sum(1 - outcomes[np.equal(treatments, d)]) for d in range(k)])

print(successes)
print(failures)

[0 1 1]
[3 1 0]

np.random.beta(1, 2)

0.20135263034038475

posterior_draw = np.random.beta(1 + successes, 1 + failures)
print(posterior_draw)

[0.19611936 0.42983356 0.3486667 ]

np.argmax(posterior_draw)

1

def thompson(outcomes, treatments, k):
    '''Given a vector of binary outcomes and discrete treatments,
        return a treatment value according to Thompson sampling.'''
    
    successes = np.array([np.sum(    outcomes[np.equal(treatments, d)]) for d in range(k)])
    failures  = np.array([np.sum(1 - outcomes[np.equal(treatments, d)]) for d in range(k)])
    
    posterior_draw = np.random.beta(1 + successes, 1 + failures)
    return np.argmax(posterior_draw)

thompson(outcomes, treatments, k)

1

thompson(outcomes, treatments, k)

0

theta = np.array([.2,.5,.8])
treatment = 2
np.random.binomial(1, theta[treatment])

1

np.empty(10, dtype = np.int8)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8)

def simulate_thompson_path(theta, T):
    '''Given a vector of expected potential outcomes, 
        simulate a path of outcomes and treatments according to Thompson sampling.'''
    
    k = len(theta)
    treatment = np.empty(T, dtype = np.int8)
    outcome =   np.empty(T, dtype = np.int8)
    
    for t in range(T):
        treatment[t] = thompson(outcome[range(t-1)], treatment[range(t-1)], k) 
        outcome[t] = np.random.binomial(1, theta[treatment[t]])

    return np.array([treatment, outcome, theta[treatment]])

theta = np.array([.2,.5,.8])
T = 10
print(simulate_thompson_path(theta, T))

[[0.  2.  2.  2.  2.  2.  2.  1.  1.  1. ]
 [0.  1.  1.  1.  1.  1.  1.  1.  1.  0. ]
 [0.2 0.8 0.8 0.8 0.8 0.8 0.8 0.5 0.5 0.5]]

from multiprocess import Pool

def average_thompson_path(theta, T, reps = 5000):
    '''Simulate reps replicate Thompson paths.'''
    
    with Pool() as pool:
        paths = pool.map(lambda _: simulate_thompson_path(theta, T),
                         range(reps))
    return np.array(paths).mean(axis = 0)

average_thompson_path(theta, 80, reps = 100)

array([[1.1  , 1.06 , 1.32 , 1.5  , 1.39 , 1.34 , 1.58 , 1.56 , 1.41 ,
        1.56 , 1.41 , 1.83 , 1.82 , 1.81 , 1.74 , 1.91 , 1.97 , 1.55 ,
        1.84 , 1.9  , 1.59 , 1.59 , 1.98 , 1.84 , 1.92 , 1.75 , 1.67 ,
        1.44 , 1.84 , 1.75 , 1.84 , 1.91 , 1.83 , 1.59 , 1.67 , 1.84 ,
        1.83 , 1.84 , 1.68 , 1.82 , 1.76 , 1.6  , 1.83 , 2.   , 1.76 ,
        1.83 , 1.92 , 2.   , 2.   , 2.   , 1.76 , 1.99 , 1.99 , 2.   ,
        1.76 , 1.99 , 1.84 , 2.   , 1.76 , 2.   , 2.   , 2.   , 2.   ,
        1.99 , 1.92 , 1.84 , 1.84 , 2.   , 1.84 , 1.92 , 1.52 , 1.99 ,
        2.   , 1.67 , 2.   , 1.92 , 1.99 , 2.   , 1.84 , 2.   ],
       [0.65 , 0.57 , 0.74 , 0.82 , 0.67 , 0.74 , 0.83 , 0.57 , 0.75 ,
        0.73 , 0.68 , 0.82 , 0.92 , 0.83 , 0.66 , 0.83 , 0.73 , 0.5  ,
        0.68 , 0.75 , 0.6  , 0.74 , 0.91 , 0.91 , 0.67 , 0.76 , 0.76 ,
        0.67 , 0.75 , 0.84 , 0.68 , 0.68 , 0.59 , 0.83 , 0.75 , 0.67 ,
        0.91 , 1.   , 0.92 , 0.59 , 0.83 , 0.68 , 0.73 , 0.91 , 0.76 ,
        0.75 , 0.68 , 0.6  , 0.89 , 0.68 , 0.83 , 0.83 , 0.76 , 0.66 ,
        1.   , 0.99 , 0.59 , 0.76 , 0.67 , 1.   , 0.83 , 0.75 , 0.84 ,
        0.92 , 0.75 , 0.59 , 0.83 , 0.9  , 0.84 , 0.75 , 0.58 , 0.84 ,
        0.6  , 0.83 , 0.82 , 0.83 , 0.9  , 0.68 , 0.92 , 0.91 ],
       [0.53 , 0.518, 0.596, 0.65 , 0.617, 0.602, 0.674, 0.668, 0.623,
        0.668, 0.623, 0.749, 0.746, 0.743, 0.722, 0.773, 0.791, 0.665,
        0.752, 0.77 , 0.677, 0.677, 0.794, 0.752, 0.776, 0.725, 0.701,
        0.632, 0.752, 0.725, 0.752, 0.773, 0.749, 0.677, 0.701, 0.752,
        0.749, 0.752, 0.704, 0.746, 0.728, 0.68 , 0.749, 0.8  , 0.728,
        0.749, 0.776, 0.8  , 0.8  , 0.8  , 0.728, 0.797, 0.797, 0.8  ,
        0.728, 0.797, 0.752, 0.8  , 0.728, 0.8  , 0.8  , 0.8  , 0.8  ,
        0.797, 0.776, 0.752, 0.752, 0.8  , 0.752, 0.776, 0.656, 0.797,
        0.8  , 0.701, 0.8  , 0.776, 0.797, 0.8  , 0.752, 0.8  ]])

import matplotlib.pyplot as plt
plt.style.use('ggplot')

def plot_average_thompspon_path(avg_thompson, theta, T):
    avg_thompson = average_thompson_path(theta, T)
    regret = max(theta) - avg_thompson[2]
    
    plt.plot(range(T), regret)
    plt.ylim(bottom = 0)
    plt.xlabel('Time $t$')
    plt.ylabel('Regret')
    plt.title(f'Expected regret of Thompsons sampling for parameter vector {theta}')

T = 80
theta = np.array([.2,.5,.8])
plot_average_thompspon_path(average_thompson_path(theta, T), 
                            theta, T)

theta = np.array([.3,.4,.5,.6,.7])
plot_average_thompspon_path(average_thompson_path(theta, T), 
                            theta, T)

def cumulative_average_regret(T):
    theta2s = np.linspace(0,1, 21)
    paths = [np.max([.5, theta2]) - average_thompson_path(np.array([.5, theta2]), T) 
             for theta2 in theta2s]
    regret_function =  np.array([np.mean(path[2]) for path in paths])

    plt.plot(theta2s, regret_function)
    plt.ylim(bottom = 0)
    plt.xlabel('$\\theta_2$')
    plt.ylabel('Regret')
    plt.title(f'Expected regret of Thompsons sampling for different $\\theta_2$')

cumulative_average_regret(200)

Thomson sampling in Python¶

Code for simulations¶

Generate plots¶

Regret as a function of $\theta$¶