In [9]:
## ---------------------------------------------------------
## import libraries
## ---------------------------------------------------------

using CSV
using DataFrames
using LinearAlgebra
using StatsModels
using MLPreprocessing
using StatsBase
using Turing
using StatsFuns: logistic, softplus
using Plots
using Distributions # for Laplace
using KernelDensity # for kernel density estimation
using Plots: Shape  # to plot polygons
import Random: AbstractRNG
In [4]:
## ---------------------------------------------------------
## Preprocessing
## ---------------------------------------------------------

# training data
df_train = CSV.read("DILI_raw_data.txt")

# rename columns
colnames = string.(names(df_train))
colnames = map(s -> replace(s, "." => "_"), colnames)
names!(df_train, Symbol.(colnames));

# select predictors
drug_names = df_train[!, :Drug]
df_train = df_train[!, [:Spher, :BSEP, :THP1, :Glu, :Glu_Gal, :ClogP, :log10_cmax, :BA, :dili_sev]];

# additional data
df_test = DataFrame(
    AZID = ["AZD123", "AZD456"], 
    Spher = [250, 250],
    BSEP =     [314, 1000],
    THP1 =    [250, 250],
    Glu =      [250, 250],
    Glu_Gal =  [1, 1],
    ClogP =   [2.501, 3.900],
    BA =  [0, 0],
    log10_cmax =  [0.5051500, 0.7075702])

# select variables for scaling
df_Xtrain = df_train[!, [:Spher, :BSEP, :THP1, :Glu, :Glu_Gal, :ClogP, :log10_cmax]]
df_Xtest  = df_test[!, [:Spher,:BSEP, :THP1, :Glu, :Glu_Gal, :ClogP, :log10_cmax]]

# standardize train and test data
scaler = fit(StandardScaler, df_Xtrain)
transform!(df_Xtrain, scaler)
transform!(df_Xtest, scaler)

# return Bioactivation (it did not require standardisation)
df_Xtrain[!, :BA] = df_train[!, :BA]
df_Xtest[!, :BA] = df_test[!, :BA]

# combine X and y in one column
df_yXtrain = hcat(df_train[!, :dili_sev], df_Xtrain);

f = @formula(x1 ~  0 + Spher + BSEP + THP1 + Glu + Glu_Gal + ClogP + BA  +  log10_cmax +
                      Spher * (BSEP + THP1 + Glu + Glu_Gal + ClogP + BA) +
                              BSEP * (THP1 + Glu + Glu_Gal + ClogP + BA) +
                                      THP1 * (Glu + Glu_Gal + ClogP + BA) +
                                             Glu * (Glu_Gal + ClogP + BA)+
                                                    Glu_Gal * (ClogP + BA)+
                                                                ClogP* BA);

# model frame
mf = ModelFrame(f, df_yXtrain)
coefnames(mf)
mm = modelmatrix(mf);

y = convert(Array, df_train[!, :dili_sev]);
X = convert(Matrix, mm);
println(countmap(y))

println(typeof(X))
println(typeof(y))
Dict(2=>40,3=>23,1=>33)
Array{Float64,2}
Array{Int64,1}
In [10]:
struct OrderedLogistic{T1, T2<:AbstractVector} <: DiscreteUnivariateDistribution
   η::T1
   cutpoints::T2

   function OrderedLogistic(η, cutpoints)
        if !issorted(cutpoints)
            error("cutpoints are not sorted")
        end
        return new{typeof(η), typeof(cutpoints)}(η, cutpoints)
   end

end

function Distributions.logpdf(d::OrderedLogistic, k::Int)

    K = length(d.cutpoints)+1

    c =  d.cutpoints

    if k==1
        logp= - softplus(-(c[k]-d.η))  #logp= log(logistic(c[k]-d.η))
    elseif k<K
        logp= log(logistic(c[k]-d.η) - logistic(c[k-1]-d.η))
    else
        logp= - softplus(c[k-1]-d.η)  #logp= log(1-logistic(c[k-1]-d.η))
    end

    return logp
end

function Distributions.rand(rng::AbstractRNG, d::OrderedLogistic)
    cutpoints = d.cutpoints
    η = d.η

    K = length(cutpoints)+1
    c = vcat(-Inf, cutpoints, Inf)
    
    ps = [logistic(η - i[1]) - logistic(η - i[2]) for i in zip(c[1:(end-1)],c[2:end])]
    
    k = rand(rng, Categorical(ps))

    if all(ps.>0)
        return(k)
    else
        return(-Inf)
    end
end
In [6]:
## ---------------------------------------------------------
## Define and fit model
## ---------------------------------------------------------

sigma_prior = 1

### Turing model
@model m(X, y, ::Type{VT}=Vector{Float64}) where {VT} = begin

    p = size(X, 2)
    
    # priors
    mu ~ Normal(0, 2)
    sigma ~ TruncatedNormal(0, sigma_prior, 0,Inf)
    
    beta = VT(undef, p)
    
    for i=1:p
       beta[i] ~ Laplace(mu , sigma)
    end

    c1 ~ Normal(0, 20)
    log_diff_c ~ Normal(0, 2)
    
    c2 = c1 + exp(log_diff_c)
    c = [c1, c2]

    eta = X * beta

    # likelihood
    for i = 1:length(y)
        y[i] ~ OrderedLogistic(eta[i], c)
    end
end

# sampling
steps = 10000
chain = sample(m(X, y), NUTS(steps, 0.65));

#show(chain)
┌ Info: Found initial step size
│   init_ϵ = 0.4
â”” @ Turing.Inference /Users/kcft114/.julia/packages/Turing/m05p3/src/inference/hmc.jl:365
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfiniteθ = true
│   isfiniter = false
│   isfiniteℓπ = false
│   isfiniteℓκ = false
â”” @ AdvancedHMC /Users/kcft114/.julia/packages/AdvancedHMC/YWXfk/src/hamiltonian.jl:36
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfiniteθ = true
│   isfiniter = false
│   isfiniteℓπ = false
│   isfiniteℓκ = false
â”” @ AdvancedHMC /Users/kcft114/.julia/packages/AdvancedHMC/YWXfk/src/hamiltonian.jl:36
┌ Info: Finished 1000 adapation steps
│   adaptor = StanHMCAdaptor(n_adapts=1000, pc=DiagPreconditioner, ssa=NesterovDualAveraging(γ=0.05, t_0=10.0, κ=0.75, δ=0.65, state.ϵ=0.13750253466199372), init_buffer=75, term_buffer=50)
│   τ.integrator = Leapfrog(ϵ=0.138)
│   h.metric = DiagEuclideanMetric([0.0215796, 0.0974323, 0.10 ...])
â”” @ AdvancedHMC /Users/kcft114/.julia/packages/AdvancedHMC/YWXfk/src/sampler.jl:67
┌ Info: Finished 10000 sampling steps in 216.359950816 (s)
│   h = Hamiltonian(metric=DiagEuclideanMetric([0.0215796, 0.0974323, 0.10 ...]))
│   τ = NUTS{Multinomial}(integrator=Leapfrog(ϵ=0.138), max_depth=5), Δ_max=1000.0)
│   EBFMI(Hs) = 5040.398523609766
│   mean(αs) = 0.7704226506700168
â”” @ AdvancedHMC /Users/kcft114/.julia/packages/AdvancedHMC/YWXfk/src/sampler.jl:77
In [11]:
## ---------------------------------------------------------
## Helper functions
## ---------------------------------------------------------

# posterior prediction for each category
function ps(y_pred_samps, ind)
    y_pred_ind = y_pred_samps[ind,:]
    p = [mean(y_pred_ind .== 1), mean(y_pred_ind .== 2), mean(y_pred_ind .== 3)]
    return p
end

# percent of distribution in each category (blue densities)
function percs(post, ind)
    post_ind = post[ind, :]
    prs = [mean(post_ind .<= c1), mean((post_ind .> c1) .* (post_ind .<= c2)), mean(post_ind .> c2)]
    return prs
end

function summary_stats(post_ind)
    post_ind_01 = vcat(0, post_ind, 1)
    dens = kde(post_ind_01, npoints=512, bandwidth=0.01)
    dens_y = dens.density
    dens_x = dens.x
    # there must be a better way to find the position of the maximal element
    pos = sum((dens_y .== maximum(dens_y)) .* range(1, stop=512, step=1))
    post_peak = round(dens_x[pos], digits=2)
    
    post_mean = round(mean(post_ind), digits = 2)
    post_median = round(median(post_ind), digits = 2)
    post_q025 = round(quantile(post_ind, 0.025), digits = 2)
    post_q975 = round(quantile(post_ind, 0.975), digits = 2)
    
    return post_peak, post_mean, post_median, post_q025, post_q975
end
Out[11]:
summary_stats (generic function with 1 method)
In [12]:
## ---------------------------------------------------------
## predict for training data
## ---------------------------------------------------------

## extract posterior samples

# fixed effects
beta_est = chain[:beta].value.data[:,:,1]';
eta_post = X * beta_est;

# cutpoints
e_log_diff_c = exp.(chain[:log_diff_c].value.data)[:,1,1]
c1_est = chain[:c1].value.data[:,1,1]
c2_est = c1_est + e_log_diff_c;

y_pred_samps = zeros(size(eta_post));
y_pred = zeros(size(eta_post, 1));

for i in 1:size(y_pred_samps,1)
    for j in 1:size(y_pred_samps,2)
        
        c1 = c1_est[j,1,1]
        c2 = c2_est[j,1,1]
        c = [c1, c2]
        
        dist = OrderedLogistic(eta_post[i,j], c)
        
        y_pred_samps[i,j] = rand(dist)
    end
    
    probs = [mean(y_pred_samps[i,:] .== 1), mean(y_pred_samps[i,:] .== 2), mean(y_pred_samps[i,:] .== 3)]
    y_pred[i] = sum((probs .== maximum(probs)) .* [1, 2, 3])
end
In [13]:
# accuracy (it is higher in Turing with normal priors for beta)
println("Accuracy: ", round(mean(y_pred .== y), digits=2))

y_bin = zeros(length(y))
y_pred_bin = zeros(length(y))

for i in 1:length(y)
  y_bin[i] = (y[i] == 1 ? 0 : 1)
  y_pred_bin[i] = (y_pred[i] == 1 ? 0 : 1)
end

# binary accuracy
println("Binary accuracy: ", round(mean(y_pred_bin .== y_bin), digits=2))
Accuracy: 0.8
Binary accuracy: 0.93
In [14]:
## ---------------------------------------------------------
## Plot results
## ---------------------------------------------------------

## extract predicted values and convert to 0-1 scale
post = logistic.(eta_post);

# cutpoints
c1 = logistic(mean(c1_est))
c2 = logistic(mean(c2_est))

## calculate average profile for DILI category 3 compounds
y_3 = findall(x->x==3, y)

kde_npoints = 2048
av3 = zeros(kde_npoints, length(y_3));

counter = 1
for ind in y_3
    post_ind = post[ind, :]
    post_ind_01 = vcat(0, post[ind, :], 1)
    dens = kde(post_ind_01, npoints=kde_npoints, bandwidth=0.01)
    av3[:,counter] = dens.density
    counter += 1
end

post_ind = post[y_3[1], :]
post_ind_01 = vcat(0, post[y_3[1], :], 1)
dens = kde(post_ind_01, npoints=kde_npoints, bandwidth=0.01)
av3_x = dens.x
av3_y = mean(av3, dims=2);
In [15]:
## ---------------------------------------------------------
## Plotting function
## ---------------------------------------------------------
function postplot(post, ind)
    post_ind = post[ind,:]
    kde(post_ind)

    p1 = plot(kde(post_ind).x,kde(post_ind).density,  
          fill = (0, 0.2, :blue), 
          title = drug_names[ind], 
          xlabel="P(DILI)", 
          xlims = (0,1.01),
          ylims = (0,12),
          legend=false,
          yticks = false,
          framestyle = :box)
    vline!([c1, c2], color = :black, linestyle = :dash)

    # define a function that returns a Plots.Shape
    rectangle(w, h, x, y) = Shape(x .+ [0,w,w,0], y .+ [0,0,h,h])

    plot!(rectangle(c1+0.05, 0.5,-0.05,0), color = :green, alpha = 0.9)
    plot!(rectangle(c2-c1,0.5,c1,0), color = :darkgoldenrod, alpha = 0.9)
    plot!(rectangle(1.05-c2,0.5,c2,0), color = :firebrick, alpha = 0.9)

    pers = percs(post, ind)
    s1 = string(convert(Int64, round(pers[1] * 100)), "%")
    s2 = string(convert(Int64, round(pers[2] * 100)), "%")
    s3 = string(convert(Int64, round(pers[3] * 100)), "%")

    annotate!(c1/2, 0.2, text(s1, :white, 8))
    annotate!(c1+(c2-c1)/2, 0.2, text(s2, :white, 8))
    annotate!(c2+0.04, 0.2, text(s3, :white, 8))

    plot!(av3_x, av3_y, color=:black)

    # =======  p2 =======================

    # ========= legend 1 ===============
    p2 = plot( ylim=(0,12), xlim=(0,10), border=:none) 
    #p2 = plot( ylim=(0,12), xlim=(0,10)) 

    x_title = 2.3
    y_title = 11.5

    w = 0.5
    h = 0.3

    x_green = 1.5
    y_green = y_title - 0.7

    x_golden = 1.5
    y_golden = y_title - 1.2

    x_red = 1.5
    y_red = y_title - 1.7

    x_box = 0.5
    y_box = 9.5
    w_box = 4.4
    h_box = 2.5

    probs = ps(y_pred_samps, ind)
    s1 = string(round(probs[1], digits=2), "0" ^ (4-length(string(round(probs[1], digits=2)))))
    s2 = string(round(probs[2], digits=2), "0" ^ (4-length(string(round(probs[2], digits=2)))))
    s3 = string(round(probs[3], digits=2), "0" ^ (4-length(string(round(probs[3], digits=2)))))

    plot!(Shape(x_box .+ [0,w_box,w_box,0], y_box .+ [0,0,h_box,h_box]), color = :white, alpha = 0.8, legend=false);
    annotate!(x_title, y_title, text(" Proportions ", :balck, 8))

    plot!(Shape(x_green .+ [0,w,w,0], y_green .+ [0,0,h,h]), color = :green, alpha = 0.8, legend=false)
    plot!(Shape(x_golden .+ [0,w,w,0], y_golden .+ [0,0,h,h]), color = :darkgoldenrod, alpha = 0.8, legend=false)
    plot!(Shape(x_red .+ [0,w,w,0], y_red .+ [0,0,h,h]), color = :firebrick, alpha = 0.8, legend=false);

    annotate!(x_green+ 1.1, y_green+0.1,  text(s1,  7))
    annotate!(x_green+ 1.1, y_golden+0.1, text(s2, 7))
    annotate!(x_green+ 1.1, y_red+0.1,    text(s3,  7))

    # ========= legend 2 ===============
    x_box = 0.5
    y_box = 7.0
    w_box = 4.4
    h_box = 1.5

    x_title = 2.3
    y_title = 8

    x_green = 1.5
    y_green = y_title - 0.7

    plot!(Shape(x_box .+ [0,w_box,w_box,0], y_box .+ [0,0,h_box,h_box]), color = :white, alpha = 0.8, legend=false);
    annotate!(x_title, y_title, text("True category", :balck, 8))

    if y[ind]==1
        plot!(Shape(x_green .+ [0,w,w,0], y_green .+ [0,0,h,h]), color = :green, alpha = 0.8, legend=false)
    elseif y[ind]==2
        plot!(Shape(x_green .+ [0,w,w,0], y_green .+ [0,0,h,h]), color = :darkgoldenrod, alpha = 0.8, legend=false)
    else
        plot!(Shape(x_green .+ [0,w,w,0], y_green .+ [0,0,h,h]), color = :firebrick, alpha = 0.8, legend=false)
    end

    annotate!(x_green+ 0.8, y_green+0.1,  text(string(y[ind]),  7))

    # =======  summary stats =================
    res = summary_stats(post_ind)
    
    x_box = 0.5
    y_box = 2.7
    w_box = 4.4
    h_box = 3.2
    
    x_title = 1.0
    y_title = 4.3
    plot!(Shape(x_box .+ [0,w_box,w_box,0], y_box .+ [0,0,h_box,h_box]), color = :white, alpha = 0.8, legend=false);
    s = string("  Summary stats \n \n Peak = ", res[1], "  \n  Mean = ", res[2], "  \n Median = ", res[3], "  \n 95% CI = ", res[4], "-", res[5])
    #s = "  Summary stats \n \n Peak = ", res[1], "  \n  Mean = 2 \n Median = 2 \n 95% CI = 2"
    #annotate!(x_title, y_title, text("  Summary stats \n \n Peak = ", res[1], "  \n  Mean = 2 \n Median = 2 \n 95% CI = 2", 
                                       #:balck, :left, 8))
    annotate!(x_title, y_title, text(s, 
                                       :balck, :left, 8))
   
    #annotate!(x_title, y_title - 1.5, text("   Peak = 2 \n  Mean = 2 \n Median = 2 \n 95% CI = 2", :balck, :left, 8))
    
    # =======  display =======================
    p = plot(p1,p2,layout=(1,2),legend=false)
    
    return p
end

for i=1:length(y)
#for i=1:1
    p = postplot(post, i)
    display(p)
end
0.00 0.25 0.50 0.75 1.00 Acetaminophen P(DILI) 0% 73% 27% Proportions 0.07 0.56 0.37 True category 2 Summary stats Peak = 0.95 Mean = 0.87 Median = 0.9 95% CI = 0.6-0.99
0.00 0.25 0.50 0.75 1.00 Acetylsalicylic_acid P(DILI) 58% 42% 0% Proportions 0.54 0.42 0.03 True category 2 Summary stats Peak = 0.15 Mean = 0.3 Median = 0.27 95% CI = 0.04-0.71
0.00 0.25 0.50 0.75 1.00 Acyclovir P(DILI) 4% 78% 18% Proportions 0.13 0.58 0.28 True category 2 Summary stats Peak = 0.96 Mean = 0.77 Median = 0.83 95% CI = 0.27-0.99
0.00 0.25 0.50 0.75 1.00 Albuterol P(DILI) 100% 0% 0% Proportions 0.98 0.02 0.00 True category 1 Summary stats Peak = 0.0 Mean = 0.01 Median = 0.0 95% CI = 0.0-0.06
0.00 0.25 0.50 0.75 1.00 Alendronate P(DILI) 100% 0% 0% Proportions 0.97 0.03 0.00 True category 1 Summary stats Peak = 0.0 Mean = 0.01 Median = 0.0 95% CI = 0.0-0.11
0.00 0.25 0.50 0.75 1.00 Ambrisentan P(DILI) 99% 1% 0% Proportions 0.86 0.13 0.01 True category 1 Summary stats Peak = 0.03 Mean = 0.07 Median = 0.05 95% CI = 0.01-0.25
0.00 0.25 0.50 0.75 1.00 Aminopyrine P(DILI) 0% 43% 57% Proportions 0.03 0.43 0.54 True category 2 Summary stats Peak = 0.97 Mean = 0.93 Median = 0.95 95% CI = 0.78-0.99
0.00 0.25 0.50 0.75 1.00 Amiodarone P(DILI) 5% 73% 22% Proportions 0.14 0.55 0.30 True category 3 Summary stats Peak = 0.97 Mean = 0.77 Median = 0.83 95% CI = 0.23-0.99
0.00 0.25 0.50 0.75 1.00 Amlodipine P(DILI) 11% 88% 1% Proportions 0.26 0.62 0.13 True category 2 Summary stats Peak = 0.77 Mean = 0.6 Median = 0.63 95% CI = 0.16-0.92
0.00 0.25 0.50 0.75 1.00 Amodiaquine P(DILI) 7% 90% 3% Proportions 0.22 0.63 0.15 True category 2 Summary stats Peak = 0.76 Mean = 0.65 Median = 0.67 95% CI = 0.21-0.94
0.00 0.25 0.50 0.75 1.00 Benoxaprofen P(DILI) 0% 21% 79% Proportions 0.02 0.32 0.66 True category 3 Summary stats Peak = 0.98 Mean = 0.96 Median = 0.97 95% CI = 0.88-0.99
0.00 0.25 0.50 0.75 1.00 Benzbromarone P(DILI) 0% 19% 81% Proportions 0.02 0.27 0.71 True category 3 Summary stats Peak = 0.99 Mean = 0.96 Median = 0.98 95% CI = 0.85-1.0
0.00 0.25 0.50 0.75 1.00 Benztropine P(DILI) 81% 19% 0% Proportions 0.72 0.26 0.02 True category 1 Summary stats Peak = 0.01 Mean = 0.17 Median = 0.11 95% CI = 0.0-0.67
0.00 0.25 0.50 0.75 1.00 Biperiden P(DILI) 100% 0% 0% Proportions 0.90 0.10 0.00 True category 1 Summary stats Peak = 0.02 Mean = 0.05 Median = 0.04 95% CI = 0.0-0.19
0.00 0.25 0.50 0.75 1.00 Bosentan P(DILI) 0% 65% 35% Proportions 0.06 0.52 0.42 True category 3 Summary stats Peak = 0.96 Mean = 0.89 Median = 0.92 95% CI = 0.62-0.99
0.00 0.25 0.50 0.75 1.00 Bromfenac P(DILI) 0% 93% 7% Proportions 0.10 0.64 0.26 True category 3 Summary stats Peak = 0.88 Mean = 0.82 Median = 0.84 95% CI = 0.54-0.96
0.00 0.25 0.50 0.75 1.00 Bumetanide P(DILI) 100% 0% 0% Proportions 0.96 0.04 0.00 True category 1 Summary stats Peak = 0.01 Mean = 0.02 Median = 0.01 95% CI = 0.0-0.08
0.00 0.25 0.50 0.75 1.00 Buspirone P(DILI) 57% 43% 0% Proportions 0.54 0.43 0.04 True category 1 Summary stats Peak = 0.2 Mean = 0.31 Median = 0.28 95% CI = 0.05-0.73
0.00 0.25 0.50 0.75 1.00 Captopril P(DILI) 2% 94% 3% Proportions 0.16 0.66 0.18 True category 2 Summary stats Peak = 0.85 Mean = 0.72 Median = 0.74 95% CI = 0.32-0.95
0.00 0.25 0.50 0.75 1.00 Carbamazepine P(DILI) 0% 61% 39% Proportions 0.04 0.51 0.45 True category 2 Summary stats Peak = 0.96 Mean = 0.91 Median = 0.93 95% CI = 0.73-0.99
0.00 0.25 0.50 0.75 1.00 Celecoxib P(DILI) 5% 78% 17% Proportions 0.15 0.58 0.27 True category 2 Summary stats Peak = 0.96 Mean = 0.75 Median = 0.81 95% CI = 0.22-0.99
0.00 0.25 0.50 0.75 1.00 Chlorpromazine P(DILI) 0% 78% 22% Proportions 0.06 0.58 0.36 True category 2 Summary stats Peak = 0.94 Mean = 0.87 Median = 0.89 95% CI = 0.62-0.98
0.00 0.25 0.50 0.75 1.00 Clomipramine P(DILI) 0% 81% 19% Proportions 0.08 0.60 0.33 True category 2 Summary stats Peak = 0.94 Mean = 0.85 Median = 0.88 95% CI = 0.55-0.98
0.00 0.25 0.50 0.75 1.00 Clozapine P(DILI) 0% 77% 23% Proportions 0.06 0.57 0.37 True category 2 Summary stats Peak = 0.93 Mean = 0.88 Median = 0.9 95% CI = 0.69-0.98
0.00 0.25 0.50 0.75 1.00 Cycloserine P(DILI) 78% 22% 0% Proportions 0.68 0.30 0.02 True category 1 Summary stats Peak = 0.04 Mean = 0.2 Median = 0.16 95% CI = 0.01-0.61
0.00 0.25 0.50 0.75 1.00 Dantrolene P(DILI) 0% 97% 3% Proportions 0.11 0.67 0.22 True category 3 Summary stats Peak = 0.84 Mean = 0.78 Median = 0.8 95% CI = 0.51-0.94
0.00 0.25 0.50 0.75 1.00 Desipramine P(DILI) 30% 67% 3% Proportions 0.37 0.52 0.11 True category 2 Summary stats Peak = 0.23 Mean = 0.5 Median = 0.49 95% CI = 0.05-0.95
0.00 0.25 0.50 0.75 1.00 Dexamethasone P(DILI) 23% 77% 0% Proportions 0.37 0.56 0.06 True category 1 Summary stats Peak = 0.43 Mean = 0.46 Median = 0.45 95% CI = 0.13-0.81
0.00 0.25 0.50 0.75 1.00 Diclofenac P(DILI) 0% 88% 12% Proportions 0.08 0.61 0.31 True category 2 Summary stats Peak = 0.92 Mean = 0.85 Median = 0.87 95% CI = 0.6-0.97
0.00 0.25 0.50 0.75 1.00 Entacapone P(DILI) 82% 18% 0% Proportions 0.68 0.31 0.02 True category 1 Summary stats Peak = 0.06 Mean = 0.19 Median = 0.16 95% CI = 0.02-0.54
0.00 0.25 0.50 0.75 1.00 Ethotoin P(DILI) 80% 20% 0% Proportions 0.67 0.31 0.02 True category 1 Summary stats Peak = 0.08 Mean = 0.2 Median = 0.16 95% CI = 0.02-0.57
0.00 0.25 0.50 0.75 1.00 Ezetimibe P(DILI) 89% 11% 0% Proportions 0.72 0.27 0.02 True category 2 Summary stats Peak = 0.07 Mean = 0.16 Median = 0.13 95% CI = 0.02-0.45
0.00 0.25 0.50 0.75 1.00 Felbamate P(DILI) 0% 43% 57% Proportions 0.03 0.42 0.55 True category 3 Summary stats Peak = 0.97 Mean = 0.93 Median = 0.95 95% CI = 0.76-0.99
0.00 0.25 0.50 0.75 1.00 Felodipine P(DILI) 92% 8% 0% Proportions 0.79 0.20 0.01 True category 1 Summary stats Peak = 0.02 Mean = 0.12 Median = 0.09 95% CI = 0.01-0.43
0.00 0.25 0.50 0.75 1.00 Fenclozic_Acid P(DILI) 0% 28% 72% Proportions 0.02 0.35 0.63 True category 3 Summary stats Peak = 0.98 Mean = 0.95 Median = 0.96 95% CI = 0.85-0.99
0.00 0.25 0.50 0.75 1.00 Flavoxate P(DILI) 85% 15% 0% Proportions 0.73 0.26 0.02 True category 1 Summary stats Peak = 0.05 Mean = 0.16 Median = 0.12 95% CI = 0.01-0.54
0.00 0.25 0.50 0.75 1.00 Flucloxacillin P(DILI) 0% 74% 26% Proportions 0.06 0.56 0.38 True category 2 Summary stats Peak = 0.94 Mean = 0.88 Median = 0.9 95% CI = 0.63-0.98
0.00 0.25 0.50 0.75 1.00 Fludarabine P(DILI) 97% 3% 0% Proportions 0.92 0.08 0.00 True category 1 Summary stats Peak = 0.0 Mean = 0.04 Median = 0.01 95% CI = 0.0-0.33
0.00 0.25 0.50 0.75 1.00 Flumazenil P(DILI) 81% 19% 0% Proportions 0.71 0.26 0.02 True category 1 Summary stats Peak = 0.03 Mean = 0.18 Median = 0.12 95% CI = 0.01-0.64
0.00 0.25 0.50 0.75 1.00 Fluoxetine P(DILI) 3% 94% 3% Proportions 0.19 0.65 0.17 True category 2 Summary stats Peak = 0.82 Mean = 0.69 Median = 0.72 95% CI = 0.29-0.94
0.00 0.25 0.50 0.75 1.00 Flutamide P(DILI) 0% 53% 47% Proportions 0.04 0.46 0.50 True category 3 Summary stats Peak = 0.96 Mean = 0.92 Median = 0.94 95% CI = 0.79-0.99
0.00 0.25 0.50 0.75 1.00 Furazolidone P(DILI) 11% 67% 22% Proportions 0.18 0.53 0.28 True category 2 Summary stats Peak = 0.98 Mean = 0.72 Median = 0.81 95% CI = 0.11-0.99
0.00 0.25 0.50 0.75 1.00 Guanethidine P(DILI) 100% 0% 0% Proportions 0.98 0.02 0.00 True category 1 Summary stats Peak = 0.0 Mean = 0.01 Median = 0.01 95% CI = 0.0-0.06
0.00 0.25 0.50 0.75 1.00 Ibufenac P(DILI) 0% 43% 57% Proportions 0.03 0.43 0.54 True category 3 Summary stats Peak = 0.97 Mean = 0.93 Median = 0.95 95% CI = 0.79-0.99
0.00 0.25 0.50 0.75 1.00 Ibuprofen P(DILI) 3% 88% 9% Proportions 0.15 0.62 0.23 True category 2 Summary stats Peak = 0.88 Mean = 0.75 Median = 0.79 95% CI = 0.29-0.97
0.00 0.25 0.50 0.75 1.00 Imipramine P(DILI) 3% 95% 2% Proportions 0.19 0.65 0.16 True category 2 Summary stats Peak = 0.78 Mean = 0.68 Median = 0.7 95% CI = 0.29-0.93
0.00 0.25 0.50 0.75 1.00 Indomethacin P(DILI) 1% 79% 21% Proportions 0.10 0.57 0.33 True category 2 Summary stats Peak = 0.95 Mean = 0.83 Median = 0.87 95% CI = 0.44-0.98
0.00 0.25 0.50 0.75 1.00 Indoramin P(DILI) 100% 0% 0% Proportions 0.91 0.09 0.00 True category 1 Summary stats Peak = 0.02 Mean = 0.04 Median = 0.03 95% CI = 0.0-0.14
0.00 0.25 0.50 0.75 1.00 Itraconazole P(DILI) 16% 79% 5% Proportions 0.27 0.57 0.16 True category 2 Summary stats Peak = 0.84 Mean = 0.6 Median = 0.63 95% CI = 0.11-0.96
0.00 0.25 0.50 0.75 1.00 Ketoconazole P(DILI) 0% 73% 27% Proportions 0.07 0.55 0.38 True category 3 Summary stats Peak = 0.95 Mean = 0.87 Median = 0.9 95% CI = 0.61-0.99
0.00 0.25 0.50 0.75 1.00 Lapatinib P(DILI) 1% 27% 72% Proportions 0.05 0.27 0.69 True category 3 Summary stats Peak = 0.99 Mean = 0.93 Median = 0.98 95% CI = 0.47-1.0
0.00 0.25 0.50 0.75 1.00 Liothyronine P(DILI) 99% 1% 0% Proportions 0.88 0.11 0.01 True category 1 Summary stats Peak = 0.02 Mean = 0.06 Median = 0.04 95% CI = 0.0-0.23
0.00 0.25 0.50 0.75 1.00 Mecamylamine P(DILI) 99% 1% 0% Proportions 0.89 0.11 0.01 True category 1 Summary stats Peak = 0.02 Mean = 0.06 Median = 0.04 95% CI = 0.0-0.23
0.00 0.25 0.50 0.75 1.00 Meclofenamate P(DILI) 5% 91% 4% Proportions 0.21 0.62 0.17 True category 1 Summary stats Peak = 0.75 Mean = 0.67 Median = 0.7 95% CI = 0.25-0.95
0.00 0.25 0.50 0.75 1.00 Metergoline P(DILI) 83% 17% 0% Proportions 0.69 0.29 0.02 True category 1 Summary stats Peak = 0.05 Mean = 0.19 Median = 0.15 95% CI = 0.02-0.56
0.00 0.25 0.50 0.75 1.00 Methotrexate P(DILI) 1% 26% 72% Proportions 0.04 0.26 0.69 True category 3 Summary stats Peak = 0.99 Mean = 0.93 Median = 0.98 95% CI = 0.46-1.0
0.00 0.25 0.50 0.75 1.00 Nadolol P(DILI) 98% 2% 0% Proportions 0.85 0.15 0.01 True category 1 Summary stats Peak = 0.03 Mean = 0.08 Median = 0.06 95% CI = 0.01-0.28
0.00 0.25 0.50 0.75 1.00 Naproxen P(DILI) 0% 40% 60% Proportions 0.03 0.41 0.56 True category 2 Summary stats Peak = 0.98 Mean = 0.94 Median = 0.95 95% CI = 0.76-0.99
0.00 0.25 0.50 0.75 1.00 Nefazodone P(DILI) 0% 8% 92% Proportions 0.01 0.12 0.87 True category 3 Summary stats Peak = 1.0 Mean = 0.98 Median = 1.0 95% CI = 0.84-1.0
0.00 0.25 0.50 0.75 1.00 Neostigmine P(DILI) 100% 0% 0% Proportions 0.98 0.02 0.00 True category 1 Summary stats Peak = 0.0 Mean = 0.01 Median = 0.0 95% CI = 0.0-0.07
0.00 0.25 0.50 0.75 1.00 Nicardipine P(DILI) 95% 5% 0% Proportions 0.87 0.12 0.01 True category 1 Summary stats Peak = 0.01 Mean = 0.07 Median = 0.03 95% CI = 0.0-0.41
0.00 0.25 0.50 0.75 1.00 Nifedipine P(DILI) 4% 95% 1% Proportions 0.22 0.65 0.13 True category 2 Summary stats Peak = 0.72 Mean = 0.64 Median = 0.66 95% CI = 0.26-0.92
0.00 0.25 0.50 0.75 1.00 Nimesulide P(DILI) 0% 66% 34% Proportions 0.04 0.52 0.44 True category 2 Summary stats Peak = 0.95 Mean = 0.91 Median = 0.92 95% CI = 0.77-0.98
0.00 0.25 0.50 0.75 1.00 Nitrofurantoin P(DILI) 21% 72% 8% Proportions 0.29 0.55 0.17 True category 2 Summary stats Peak = 0.91 Mean = 0.59 Median = 0.62 95% CI = 0.06-0.98
0.00 0.25 0.50 0.75 1.00 Olanzapine P(DILI) 13% 87% 0% Proportions 0.33 0.60 0.07 True category 2 Summary stats Peak = 0.53 Mean = 0.5 Median = 0.5 95% CI = 0.19-0.8
0.00 0.25 0.50 0.75 1.00 Orphenadrine P(DILI) 100% 0% 0% Proportions 0.93 0.07 0.00 True category 1 Summary stats Peak = 0.01 Mean = 0.03 Median = 0.02 95% CI = 0.0-0.13
0.00 0.25 0.50 0.75 1.00 Oxybutynin P(DILI) 82% 18% 0% Proportions 0.74 0.24 0.02 True category 1 Summary stats Peak = 0.02 Mean = 0.17 Median = 0.1 95% CI = 0.0-0.65
0.00 0.25 0.50 0.75 1.00 Pargyline P(DILI) 100% 0% 0% Proportions 0.94 0.06 0.00 True category 1 Summary stats Peak = 0.01 Mean = 0.03 Median = 0.02 95% CI = 0.0-0.11
0.00 0.25 0.50 0.75 1.00 Paroxetine P(DILI) 0% 85% 14% Proportions 0.10 0.61 0.29 True category 2 Summary stats Peak = 0.92 Mean = 0.82 Median = 0.85 95% CI = 0.47-0.98
0.00 0.25 0.50 0.75 1.00 Perhexiline P(DILI) 0% 54% 45% Proportions 0.05 0.47 0.48 True category 3 Summary stats Peak = 0.98 Mean = 0.89 Median = 0.93 95% CI = 0.56-1.0
0.00 0.25 0.50 0.75 1.00 Phenoxybenzamine P(DILI) 95% 5% 0% Proportions 0.74 0.25 0.01 True category 1 Summary stats Peak = 0.08 Mean = 0.14 Median = 0.12 95% CI = 0.03-0.36
0.00 0.25 0.50 0.75 1.00 Pimozide P(DILI) 23% 73% 4% Proportions 0.32 0.55 0.12 True category 2 Summary stats Peak = 0.77 Mean = 0.54 Median = 0.55 95% CI = 0.07-0.96
0.00 0.25 0.50 0.75 1.00 Pioglitazone P(DILI) 2% 82% 16% Proportions 0.12 0.59 0.29 True category 2 Summary stats Peak = 0.94 Mean = 0.79 Median = 0.84 95% CI = 0.35-0.98
0.00 0.25 0.50 0.75 1.00 Procyclidine P(DILI) 99% 1% 0% Proportions 0.88 0.11 0.00 True category 1 Summary stats Peak = 0.01 Mean = 0.06 Median = 0.04 95% CI = 0.0-0.24
0.00 0.25 0.50 0.75 1.00 Propantheline P(DILI) 100% 0% 0% Proportions 0.95 0.05 0.00 True category 1 Summary stats Peak = 0.01 Mean = 0.02 Median = 0.01 95% CI = 0.0-0.1
0.00 0.25 0.50 0.75 1.00 Pyridostigmine P(DILI) 98% 2% 0% Proportions 0.93 0.07 0.00 True category 1 Summary stats Peak = 0.01 Mean = 0.04 Median = 0.01 95% CI = 0.0-0.25
0.00 0.25 0.50 0.75 1.00 Rimonabant P(DILI) 7% 77% 16% Proportions 0.16 0.59 0.25 True category 1 Summary stats Peak = 0.97 Mean = 0.73 Median = 0.79 95% CI = 0.17-0.99
0.00 0.25 0.50 0.75 1.00 Rosiglitazone P(DILI) 1% 87% 12% Proportions 0.12 0.62 0.26 True category 2 Summary stats Peak = 0.92 Mean = 0.79 Median = 0.83 95% CI = 0.43-0.98
0.00 0.25 0.50 0.75 1.00 Simvastatin P(DILI) 13% 87% 0% Proportions 0.30 0.61 0.09 True category 2 Summary stats Peak = 0.63 Mean = 0.55 Median = 0.55 95% CI = 0.18-0.87
0.00 0.25 0.50 0.75 1.00 Sitaxsentan P(DILI) 0% 49% 51% Proportions 0.04 0.45 0.52 True category 3 Summary stats Peak = 0.97 Mean = 0.92 Median = 0.94 95% CI = 0.75-0.99
0.00 0.25 0.50 0.75 1.00 Stavudine P(DILI) 2% 60% 38% Proportions 0.09 0.49 0.42 True category 3 Summary stats Peak = 0.98 Mean = 0.85 Median = 0.91 95% CI = 0.35-1.0
0.00 0.25 0.50 0.75 1.00 Sudoxicam P(DILI) 0% 75% 25% Proportions 0.05 0.56 0.39 True category 3 Summary stats Peak = 0.94 Mean = 0.89 Median = 0.91 95% CI = 0.7-0.98
0.00 0.25 0.50 0.75 1.00 Sunitinib P(DILI) 0% 16% 84% Proportions 0.02 0.20 0.78 True category 3 Summary stats Peak = 1.0 Mean = 0.97 Median = 0.99 95% CI = 0.76-1.0
0.00 0.25 0.50 0.75 1.00 Suprofen P(DILI) 0% 57% 43% Proportions 0.04 0.50 0.46 True category 2 Summary stats Peak = 0.96 Mean = 0.92 Median = 0.93 95% CI = 0.76-0.99
0.00 0.25 0.50 0.75 1.00 Tacrine P(DILI) 30% 70% 0% Proportions 0.40 0.53 0.07 True category 2 Summary stats Peak = 0.45 Mean = 0.44 Median = 0.43 95% CI = 0.09-0.83
0.00 0.25 0.50 0.75 1.00 Tamoxifen P(DILI) 0% 70% 30% Proportions 0.06 0.55 0.39 True category 2 Summary stats Peak = 0.96 Mean = 0.87 Median = 0.9 95% CI = 0.57-0.99
0.00 0.25 0.50 0.75 1.00 Ticlopidine P(DILI) 0% 73% 27% Proportions 0.05 0.55 0.39 True category 2 Summary stats Peak = 0.94 Mean = 0.89 Median = 0.91 95% CI = 0.72-0.98
0.00 0.25 0.50 0.75 1.00 Tienilic_acid P(DILI) 0% 66% 34% Proportions 0.04 0.52 0.44 True category 3 Summary stats Peak = 0.94 Mean = 0.91 Median = 0.92 95% CI = 0.77-0.98
0.00 0.25 0.50 0.75 1.00 Tolcapone P(DILI) 0% 39% 61% Proportions 0.03 0.41 0.56 True category 3 Summary stats Peak = 0.97 Mean = 0.94 Median = 0.95 95% CI = 0.8-0.99
0.00 0.25 0.50 0.75 1.00 Tolmetin P(DILI) 0% 38% 62% Proportions 0.03 0.41 0.56 True category 2 Summary stats Peak = 0.97 Mean = 0.94 Median = 0.95 95% CI = 0.82-0.99
0.00 0.25 0.50 0.75 1.00 Troglitazone P(DILI) 0% 76% 24% Proportions 0.06 0.56 0.38 True category 3 Summary stats Peak = 0.94 Mean = 0.88 Median = 0.9 95% CI = 0.69-0.98
0.00 0.25 0.50 0.75 1.00 Verapamil P(DILI) 0% 100% 0% Proportions 0.18 0.69 0.13 True category 2 Summary stats Peak = 0.71 Mean = 0.69 Median = 0.69 95% CI = 0.45-0.87
0.00 0.25 0.50 0.75 1.00 Warfarin P(DILI) 0% 89% 10% Proportions 0.08 0.62 0.30 True category 2 Summary stats Peak = 0.9 Mean = 0.84 Median = 0.86 95% CI = 0.59-0.97
0.00 0.25 0.50 0.75 1.00 Ximelagatran P(DILI) 91% 9% 0% Proportions 0.76 0.23 0.02 True category 3 Summary stats Peak = 0.05 Mean = 0.14 Median = 0.11 95% CI = 0.01-0.45
0.00 0.25 0.50 0.75 1.00 Zileuton P(DILI) 0% 90% 10% Proportions 0.09 0.63 0.28 True category 2 Summary stats Peak = 0.91 Mean = 0.83 Median = 0.85 95% CI = 0.57-0.97
0.00 0.25 0.50 0.75 1.00 Zomepirac P(DILI) 0% 74% 26% Proportions 0.07 0.56 0.37 True category 1 Summary stats Peak = 0.95 Mean = 0.87 Median = 0.9 95% CI = 0.62-0.99
In [16]:
## ---------------------------------------------------------
## Predict for test data
## ---------------------------------------------------------

# combine X and y in one column: add dummy y
ytest = DataFrame(x1 = [0,0])
df_yXtest = hcat(ytest, df_Xtest)

# model frame
mf_test = ModelFrame(f, df_yXtest)
mm_test = modelmatrix(mf_test)
X_test = convert(Matrix, mm_test)

eta_test = X_test * beta_est
y_test_samps = zeros(size(eta_test))
y_test_pred = zeros(size(y_test_samps, 1));

for i in 1:size(y_test_samps,1)
    for j in 1:size(y_test_samps,2)
        
        c1 = c1_est[j,1,1]
        c2 = c2_est[j,1,1]
        c = [c1, c2]
        
        dist = OrderedLogistic(eta_test[i,j], c)
        
        y_test_samps[i,j] = rand(dist)
    end
    
    probs = [mean(y_test_samps[i,:] .== 1), mean(y_test_samps[i,:] .== 2), mean(y_test_samps[i,:] .== 3)]
    println(probs)
    y_test_pred[i] = sum((probs .== maximum(probs)) .* [1, 2, 3])

end
[0.466556, 0.496111, 0.0373333]
[0.496667, 0.459889, 0.0434444]
In [ ]: