%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pymc3 import *
import theano
from patsy import dmatrices
data = pd.read_table("QT_data.txt")
data.head(5)
plt.figure(figsize=(6,6));
plt.plot("log.cmax", "log.herg", "bo", data=data[data["QT.increase"] == 0 ], label="No QT increase");
plt.plot("log.cmax", "log.herg", "r^", data=data[data["QT.increase"] == 1 ], label="QT increase");
plt.xlabel("Log10 Cmax");
plt.ylabel("Log10 hERG IC50");
plt.legend(loc="lower right");
def invlogit(x):
return np.exp(x) / (1 + np.exp(x))
with Model() as m1:
# priors
b0 = Normal("b0", 0, sd=10)
b1 = Normal("b1", 0, sd=10)
b2 = Normal("b2", 0, sd=10)
b3 = Normal("b3", 0, sd=10)
# likelihood
linpred = Deterministic("linpred", b0 + b1*data["log.herg"] +
b2*data["log.cmax"] + b3*data["log.herg"]*data["log.cmax"] )
theta = invlogit(linpred)
y = Bernoulli("y", p=theta, observed=data["QT.increase"])
# sample
post = sample(draws=10000, seed=123, trace=[b0, b1, b2, b3, linpred])
df_summary(post)
traceplot(post)
ppc = sample_ppc(post, samples=5000, model=m1)
preds = invlogit(post["linpred"])
preds.sort(axis=1)
plt.figure(figsize=(7,12));
plt.violinplot(preds, vert=False,
widths=0.9, showextrema=False, showmeans=True,
bw_method='silverman')
plt.xlabel("Prob of QT increase")
plt.ylabel("Compound")