# Two Years of Bayesian Bandits for E-Commerce¶

### Data Philly • March 20, 2018 • @AustinRochford¶

#### arochford@monetate.com • austin.rochford@gmail.com¶

• Founded 2008, web optimization and personalization SaaS

#### Simulating a bandit¶

In [12]:
class BetaBinomial:
def __init__(self, a0=1., b0=1.):
self.a = a0
self.b = b0

def sample(self):
return sp.stats.beta.rvs(self.a, self.b)

def update(self, n, x):
self.a += x
self.b += n - x

In [13]:
class Bandit:
def __init__(self, a_post, b_post):
self.a_post = a_post
self.b_post = b_post

def assign(self):
return 1 * (self.a_post.sample() < self.b_post.sample())

def update(self, arm, reward):
arm_post = self.a_post if arm == 0 else self.b_post
arm_post.update(1, reward)

In [15]:
A_RATE, B_RATE = 0.05, 0.1
N = 1000

rewards_gen = generate_rewards(A_RATE, B_RATE, N)

In [16]:
bandit = Bandit(BetaBinomial(), BetaBinomial())
arms = np.empty(N, dtype=np.int64)
rewards = np.empty(N)

for t, arm_rewards in tqdm(enumerate(rewards_gen), total=N):
arms[t] = bandit.assign()
rewards[t] = arm_rewards[arms[t]]

bandit.update(arms[t], rewards[t])

100%|██████████| 1000/1000 [00:00<00:00, 3333.85it/s]

In [18]:
fig

Out[18]: