# For REPL setting
ENV["LINES"] = 1000; ENV["COLUMNS"] = 250;
## In Julia, you can activate virtual environment you compiled in REPL
using Pkg; Pkg.activate("./")
using Distributed;
addprocs(3; exeflags=`--project=$(Base.active_project())`);
@everywhere begin 
    using SharedArrays, LinearAlgebra, Optim, ForwardDiff, NLsolve, Ipopt, Zygote,
        DataFrames, Printf, Random, Distributions, Statistics, HypothesisTests, TimeSeries, SharedArrays,
        Parameters, .MathConstants; # for γ=0.5772...
end

  Activating project at `c:\Users\jaepi\OneDrive\Documents\GitHub\Prof_Miller_structural_econometrics\5. Arcidiacono and Miller, 2011 (CCP-EM)`

# two choices - j=1 replace j=2 continue
J = 2;
x1_min = 0.0 ; x1_max = 25.0; x1_int = 0.125; x1_len = Int64((x1_max-x1_min)/x1_int+1.0);
x2_min = 0.25; x2_max = 1.25; x2_int = 0.01 ; x2_len = Int64((x2_max-x2_min)/x2_int+1.0);
x1 = range(x1_min, x1_max, x1_len);
x2 = range(x2_min, x2_max, x2_len);
# transition matrix
# x_tran : Transition matrix for j=2 only. Note that the size is: |x1| \times |x1| \times |x2|
# x_tran_cumul: cdf for transition matrix. We will use this later.
# x1_next: x1 states for tomorrow. A row-wise repetition of states. Size: |x1| \times |x1|
# x1_tday: x1 states for today. A column-wise repetition of states. Size: |x1| \times |x1| 
# x1_zero: matrix of zeros. |x1| \times |x1|
x_tran       = zeros((x1_len, x1_len, x2_len));
x_tran_cumul = zeros((x1_len, x1_len, x2_len));
x1_tday      = repeat(x1, 1, x1_len);               # It's different from Python!
x1_next      = x1_tday';                            # notice the transpose!
x1_zero      = zeros((x1_len, x1_len));             # we use this for computing
for k = 1:x2_len
    x_tran[:,:,k]       = (x1_next .>= x1_tday) .* exp.(-x2[k] * (x1_next - x1_tday)) .* (1 .- exp(-x2[k] * x1_int)); # j=2 continue
    x_tran[:,end,k]     = 1 .- sum(x_tran[:,1:(end-1),k], dims=2); # last column is set so that the sum of probs is 1
    x_tran_cumul[:,:,k] = cumsum(x_tran[:,:,k], dims=2);
end;
# Note that we only compute the transition matrix for j=2 because 
# the transition for j=1 is the first row of the transition matrix for j=2.
# In a more general setting, we have to compute the transition matrix for each j=1 and 2 like below:
# x_tran = zeros((J,x1_len,x1_len,x2_len))
# x_tran_cumul = zeros((J,x1_len,x1_len,x2_len))
# for j in 1:J
#     for k in 1:x2_len
#         x_tran[j,:,:,k]       = (x1_next .>= x1_zero) .* exp(-x2[k] .* (x1_next - x1_zero)) .* (1 .- exp(-x2[k] * x1_int))
#         x_tran[j,:,end,k]     = 1 .- sum(x_tran[j,:,(1:end-1),k], dims=2)
#         x_tran_cumul[j,:,:,k] = cumsum(x_tran[j,:,:,k], dims=2)

# Solve for the ex-ante value function
@everywhere function backward_induction(arguments, theta)
    T, S, _, x_tran, _, x1, x1_len, _, x2_len = arguments
    value_function = zeros(eltype(theta),x1_len,x2_len,length(S),T+1)
    for t in T:-1:1
        for s in S
            for k in 1:x2_len
                for i in 1:x1_len
                    # replace
                    u1 = 0.0
                    v1 = u1 + theta[4] * (x_tran[1,:,k]'value_function[:,k,s,t+1])
                    # continue
                    u2 = theta[1] + theta[2] * x1[i] + theta[3] * s
                    v2 = u2 + theta[4] * (x_tran[i,:,k]'value_function[:,k,s,t+1])
                    value_function[i,k,s,t] = log(exp(v1)+exp(v2)) + γ
                end
            end
        end
    end
    return value_function
end;

@everywhere function generate_data(N, arguments, theta)

    T, S, data_init, x_tran, x_tran_cumul, x1, x1_len, x2, x2_len = arguments;

    x1_index = Array{Int32}(ones(N, T+1)); # index #1 means mileage of 0
    x2_index = rand(1:x2_len,N); # index: 1-101
    simul_x1 = zeros(N,T+1);
    simul_x2 = x2[x2_index];
    simul_s  = (rand(N) .> theta[5]) .+ 1;
    simul_d  = zeros(N, T); # a panel of decisions
    draw_ccp = rand(N, T);
    draw_x1  = rand(N, T);

    value_function = backward_induction(arguments, theta);
    
    for n in 1:N
        for t in 1:T
            # j=1 replace 
            u1 = 0.0
            v1 = u1 + theta[4] * x_tran[1,:,x2_index[n]]'value_function[:,x2_index[n],simul_s[n],t+1]
            # j=2 continue
            u2 = theta[1] + theta[2] * x1[x1_index[n,t]] + theta[3] * simul_s[n]
            v2 = u2 + theta[4] * x_tran[x1_index[n,t],:,x2_index[n]]'value_function[:,x2_index[n],simul_s[n],t+1]
            # CCP under T1EV epsilon
            p1 = exp(v1)/(exp(v1)+exp(v2)) # exp.(v) ./ sum(exp.(v)) for larger J where v is a vector of cvf's
            p2 = 1.0-p1
            # Simulate t+1
            simul_d[n,t]    = (draw_ccp[n,t] > p1) + 1 # 1: replace, 2: continue
            x1_index[n,t+1] = (simul_d[n,t]==1) * sum((draw_x1[n,t] .> x_tran_cumul[1,:,x2_index[n]])) +
                              (simul_d[n,t]==2) * sum((draw_x1[n,t] .> x_tran_cumul[x1_index[n,t],:,x2_index[n]])) + 1
            simul_x1[n,t+1] = x1[x1_index[n,t+1]]
        end
    end
    # simul_x1_alt = getindex(x1,x1_index) # this works too instead of simul_x1[n,t+1]

    data_x1       = simul_x1[:, data_init:T];
    data_d        = simul_d[:, data_init:T];
    data_x2       = repeat(simul_x2, 1, T-data_init+1);
    data_s        = repeat(simul_s,  1, T-data_init+1);
    data_t        = repeat(transpose(data_init:T), N,1);
    data_x1_index = indexin(data_x1,x1); # So elegant!
    data_x2_index = indexin(data_x2,x2);
    
    return data_t, data_x1, data_x2, data_s, data_d, data_x1_index, data_x2_index

end;

@everywhere function log_likelihood_FIML(theta, N, data_set, arguments)

    _, _, _, data_s, data_d, data_x1_index, data_x2_index = data_set;
    T, S, data_init, x_tran, x_tran_cumul, x1, x1_len, x2, x2_len = arguments;
    beta = softmax(theta[4]);
    theta_converted = append!(theta[1:(end-1)],beta)
    value_function  = backward_induction(arguments, theta_converted);
    log_likelihood  = 0.0;

    for t in 1:(T-data_init+1)
        for n in 1:N
            i = data_x1_index[n,t];
            k = data_x2_index[n,t];
            s = data_s[n,t];
            # replace
            u1 = 0.0;
            v1 = u1 +  beta * (x_tran[1,:,k]'value_function[:,k,s,data_init+t]);
            # continue
            u2 = theta[1] + theta[2] * x1[i] + theta[3] * s;
            v2 = u2 +  beta * (x_tran[i,:,k]'value_function[:,k,s,data_init+t]);
            denominator = 1.0 + exp(v2-v1);
            log_likelihood += (data_d[n,t]==2) * (v2-v1) - log(denominator);
        end
    end
    return -log_likelihood
end;

@everywhere function softmax(x)
    return exp.(x) ./ (1 .+ exp.(x))
end;

@everywhere function logit(x)
    return log(x) - log(1-x)
end;

@everywhere function logistic(b,x)
    v1 = x * b
    return softmax(v1)
end;

# Given data, we compute the beta with logit
@everywhere function wlogit(b, y, X, W=ones(size(X)[1]))
    # W is the weight for each observation to the log-likelihood
    v1 = X * b;
    p1 = softmax(v1)
    p2 = 1.0 .- p1
    weighted_log_likelihood = W'*((y.==1.0) .* log.(p1) .+ (y.==2.0) .* log.(p2))
    return -weighted_log_likelihood
end;

# Use this if you want to bound beta to be [0,1]
# @everywhere function wlogit_bounded_beta(b, y, X, W=ones(size(X)[1]))
#     b[end] = exp(b[end])/(1 + exp(b[end]));
#     v1 = X * b;
#     p1 = softmax(v1)
#     p2 = 1.0 .- p1
#     weighted_log_likelihood = W'*((y.==1.0) .* log.(p1) .+ (y.==2.0) .* log.(p2))
#     return -weighted_log_likelihood
# end;

@everywhere function ccp_estimation(theta, theta_init, N, data_set, arguments; 
                                    rescale=rescale, x_method="quadratic", t_method="quadratic")

    @assert x_method=="linear" || x_method=="quadratic" || x_method=="cubic" || x_method=="true_ccp"
        "Choose one of 'linear', 'quadratic', 'cubic', and 'true_ccp' for the first-stage estimation of CCP"
    @assert (t_method=="none" || t_method=="linear" || t_method=="quadratic" || t_method=="cubic") || x_method=="true_ccp"
        "Choose one of 'none', linear', 'quadratic', 'cubic' for t_method if x_method!='true_ccp'"

    data_t, data_x1, data_x2, data_s, data_d, data_x1_index, data_x2_index = data_set;
    T, S, data_init, x_tran, x_tran_cumul, x1, x1_len, x2, x2_len = arguments;
    
    ### FIRST STAGE: CCP ESTIMATION 
    if x_method == "true_ccp"
        # From the true theta
        cvf1 =  zeros(eltype(theta),x1_len,x2_len,length(S),T+1)
        cvf2 =  zeros(eltype(theta),x1_len,x2_len,length(S),T+1)
        value_function = zeros(eltype(theta),x1_len,x2_len,length(S),T+1)
        for t in T:-1:1
            for s in S
                for k in 1:x2_len
                    for i in 1:x1_len
                        # replace
                        u1 = 0.0
                        v1 = u1 + theta[4] * (x_tran[1,:,k]'value_function[:,k,s,t+1])
                        # continue
                        u2 = theta[1] + theta[2] * x1[i] + theta[3] * s
                        v2 = u2 + theta[4] * (x_tran[i,:,k]'value_function[:,k,s,t+1])
                        value_function[i,k,s,t] = log(exp(v1)+exp(v2)) + γ
                        cvf1[i,k,s,t] = v1
                        cvf2[i,k,s,t] = v2
                    end
                end
            end
        end
        grid_fv = (γ .- log.(softmax(cvf1.-cvf2)))[:,:,:,data_init:T];
    else # Estimate the data

        # Data
        vec_one  = ones(N*(T-data_init+1));
        vec_d    = vec(data_d' );
        vec_s    = vec(data_s' );
        vec_t    = vec(data_t' )./rescale;
        vec_x1   = vec(data_x1')./rescale;
        vec_x2   = vec(data_x2');

        # Regressors for computing future value
        grid_one   = ones(x1_len * x2_len);
        grid_x1    = kron(ones(x2_len), x1)./rescale;
        grid_x2    = kron(x2, ones(x1_len));

        if x_method=="linear"
            data_x = [vec_one vec_x1 vec_x2];
            grid_x = [grid_one grid_x1 grid_x2];
        elseif x_method=="quadratic"
            data_x = [vec_one vec_x1 vec_x2 vec_x1.^2 vec_x1.*vec_x2 vec_x2.^2];
            grid_x = [grid_one grid_x1 grid_x2 grid_x1.^2 grid_x1.*grid_x2 grid_x2.^2];
        else # "cubic" 
            data_x = [vec_one vec_x1 vec_x2 vec_x1.^2 vec_x1.*vec_x2 vec_x2.^2 vec_x1.^3 (vec_x1.^2).*vec_x2 vec_x1.*(vec_x2.^2) vec_x2.^3];
            grid_x = [grid_one grid_x1 grid_x2 grid_x1.^2 grid_x1.*grid_x2 grid_x2.^2 grid_x1.^3 (grid_x1.^2).*grid_x2 grid_x1.*(grid_x2.^2) grid_x2.^3]; 
        end

        if t_method == "none"
            data_xst = [data_x  vec_s.*data_x];
        elseif t_method == "linear"
            data_xs  = [data_x  vec_s.*data_x];
            data_xst = [data_xs vec_t.*data_xs];
        elseif t_method == "quadratic"
            data_xs  = [data_x  vec_s.*data_x];
            data_xst = [data_xs vec_t.*data_xs (vec_t.^2).*data_xs];
        else # cubic
            data_xs  = [data_x  vec_s.*data_x];
            data_xst = [data_xs vec_t.*data_xs (vec_t.^2).*data_xs (vec_t.^3).*data_xs];
        end
        
        # We use this to estimate beta with reverse differentiation
        wlogit_b(b) = wlogit(b, vec_d, data_xst)
        function wlogit_d!(G, b)
            G .= wlogit_b'(b)
        end;
        beta_ccp_init = zeros(size(data_xst)[2]);
        opt_ccp = optimize(wlogit_b, wlogit_d!, beta_ccp_init, BFGS())
        # opt_ccp = optimize(b -> wlogit(b, vec_d, data_xst), beta_ccp_init, BFGS(), autodiff = :forward); # Forward differentiation
        ccp_hat = opt_ccp.minimizer;

        grid_fv = zeros(x1_len,x2_len,length(S),(T-data_init+1))
        for t in 1:(T - data_init + 1)
            for s in S
                time    = t + (data_init - 1)
                if t_method == "none"
                    grid_xst = [grid_x s*grid_x];
                elseif t_method == "linear"
                    grid_xst = kron([1 time/rescale], [grid_x s*grid_x]);
                elseif t_method == "quadratic"
                    grid_xst = kron([1 time/rescale (time/rescale)^2], [grid_x s*grid_x]);
                else # cubic
                    grid_xst = kron([1 time/rescale (time/rescale)^2 (time/rescale)^3], [grid_x s*grid_x]);
                end
                grid_fv[:,:,s,t] = γ .- reshape(log.(logistic(ccp_hat,grid_xst)),x1_len,x2_len)
            end
        end

    end 
    
    # Estimate Future Value from the data
    data_fv = zeros(N,(T-data_init));
    for n = 1:N 
        for t = 1:(T-data_init)
            i = data_x1_index[n,t]
            k = data_x2_index[n,t]
            s = data_s[n,t]
            data_fv[n,t] = (x_tran[i,:,k] - x_tran[1,:,k])'grid_fv[:, k, s, t+1] # be careful about p_(x_{t+1})!
        end
    end

    # construct the data set for structural estimation
    vec_one_ccp = ones(N*(T-data_init));
    vec_d_ccp   = vec(data_d[:, 1:(T-data_init)]');
    vec_s_ccp   = vec(data_s[:, 1:(T-data_init)]');
    vec_x1_ccp  = vec(data_x1[:,1:(T-data_init)]');
    vec_x2_ccp  = vec(data_x2[:,1:(T-data_init)]');
    vec_fv_ccp  = vec(data_fv[:,1:(T-data_init)]');
    data_ccp    = [vec_one_ccp vec_x1_ccp vec_s_ccp vec_fv_ccp];

    ### SECOND STAGE: ESTIMATION FOR STRUCTURAL PARAMETERS
    opt_structural = optimize(b -> wlogit(b, 3 .- vec_d_ccp, data_ccp), theta_init, BFGS(), autodiff = :forward) # or ConjugateGradient()
    return beta_ccp = opt_structural.minimizer
end

### Global variables for simulation
T = 30;
S = 1:2; # "unobserved heterogeneity" in AM2011 for s in {1,2}
theta = [2.00, -0.15, 1.0, 0.90, 0.40]; # theta[4] is beta, theta[5] is Pi
data_init = 11; # extract data from periods t = "data_init",...,T

structural_param = append!(theta[1:(end-2)], logit(theta[end-1]));
deviation = randn(length(theta[1:(end-1)])) / length(theta[1:(end-1)]);
theta_init = structural_param + deviation;
rescale  = 10.0; # prevents overflow of exp(.)

arguments = T, S, data_init, x_tran, x_tran_cumul, x1, x1_len, x2, x2_len;

### Monte Carlo Experiment
function monte_carlo(N,MC,arguments,theta_init,theta)

    # Result storage
    time_CCP_MC       = SharedArray{Float64}(MC)
    theta_CCP_MC      = SharedArray{Float64}(MC,length(theta_init))
    time_true_CCP_MC  = SharedArray{Float64}(MC)
    theta_true_CCP_MC = SharedArray{Float64}(MC,length(theta_init))
    time_FIML_MC      = SharedArray{Float64}(MC)
    theta_FIML_MC     = SharedArray{Float64}(MC,length(theta_init))

    @sync @distributed for i = 1:MC
        data_set = generate_data(N, arguments, theta)
        time_CCP_MC[i]      = @elapsed theta_CCP_MC[i,:] = 
            ccp_estimation(theta, theta_init, N, data_set, arguments; rescale=rescale, x_method="quadratic", t_method="quadratic")
        time_true_CCP_MC[i] = @elapsed theta_true_CCP_MC[i,:] = 
            ccp_estimation(theta, theta_init, N, data_set, arguments; rescale=rescale, x_method="true_ccp")
        time_FIML_MC[i]     = @elapsed opt_FIML_MC = 
            optimize(theta -> log_likelihood_FIML(theta, N, data_set, arguments), theta_init, BFGS(), autodiff = :forward);
        theta_FIML_MC[i,:]  = append!(opt_FIML_MC.minimizer[1:3], softmax(opt_FIML_MC.minimizer[4]))
    end
    return time_CCP_MC, theta_CCP_MC, time_true_CCP_MC, theta_true_CCP_MC, time_FIML_MC, theta_FIML_MC
end;

Random.seed!(42)
N_MC = 1_000; MC = Int64(N/10);
time_CCP_MC, theta_CCP_MC, time_true_CCP_MC, theta_true_CCP_MC, time_FIML_MC, theta_FIML_MC = monte_carlo(N_MC,MC,arguments,theta_init,theta);

### Bootstrap
function bootstrap(N,Bootstrap,arguments,theta_init,theta,x_method_list,t_method_list)
    
    # Data storage
    data_t, data_x1, data_x2, data_s, data_d, data_x1_index, data_x2_index = generate_data(N, arguments, theta)
    data_t  = convert(SharedArray,data_t);
    data_x1 = convert(SharedArray,data_x1);
    data_x2 = convert(SharedArray,data_x2);
    data_s = convert(SharedArray,data_s);
    data_d = convert(SharedArray,data_d);
    data_x1_index = convert(SharedArray,Matrix{Int64}(data_x1_index));
    data_x2_index = convert(SharedArray,Matrix{Int64}(data_x2_index));

    # Result storage
    time_CCP_Bootstrap       = SharedArray{Float64}(Bootstrap,length(x_method_list),length(t_method_list))
    theta_CCP_Bootstrap      = SharedArray{Float64}(Bootstrap,length(x_method_list),length(t_method_list),length(theta_init))
    time_true_CCP_Bootstrap  = SharedArray{Float64}(Bootstrap)
    theta_true_CCP_Bootstrap = SharedArray{Float64}(Bootstrap,length(theta_init))

    @sync @distributed for i = 1:Bootstrap
        sample = rand(1:N,N)
        data_set = data_t[sample,:], data_x1[sample,:], data_x2[sample,:], data_s[sample,:], data_d[sample,:], 
                data_x1_index[sample,:], data_x2_index[sample,:]
        for x in 1:length(x_method_list)
            for t in 1:length(t_method_list)
                time_CCP_Bootstrap[i,x,t] = @elapsed theta_CCP_Bootstrap[i,x,t,:] =
                    ccp_estimation(theta, theta_init, N, data_set, arguments; 
                                   rescale=rescale, x_method=x_method_list[x], t_method=t_method_list[t])
            end
        end
        time_true_CCP_Bootstrap[i] = @elapsed theta_true_CCP_Bootstrap[i,:] = 
            ccp_estimation(theta, theta_init, N, data_set, arguments; rescale=rescale, x_method="true_ccp")
    end
    return time_CCP_Bootstrap, theta_CCP_Bootstrap, time_true_CCP_Bootstrap, theta_true_CCP_Bootstrap
end
Random.seed!(42)
N_Bootstrap = 2_000; Bootstrap = Int64(N/10);
x_method_list = ["quadratic", "cubic"]
t_method_list = ["linear", "quadratic", "cubic"]
time_CCP_Bootstrap, theta_CCP_Bootstrap, time_true_CCP_Bootstrap, theta_true_CCP_Bootstrap = 
    bootstrap(N_Bootstrap,Bootstrap,arguments,theta_init,theta,x_method_list,t_method_list);

function dgp_result(theta)
    print("DGP        : ")
    for i in 1:(length(theta)-1)
        @printf("     b%1.0f    ", i)
    end
    print(" time(min)  \n")
    print("           : ")
    for i in 1:(length(theta)-1)
        @printf("  %7.4f  ", theta[i])
    end
end;

function theta_time(theta,time)
    theta_mean = mean(theta,dims=1)
    theta_std  = std(theta,dims=1)
    time_mean  = mean(time./60)
    time_std   = std(time./60)
    print("theta_hat  : ")
    for i in 1:length(theta_mean)
        @printf("  %7.4f  ", theta_mean[i])
    end
    @printf("  %7.4f  ", time_mean)
    print("\n")
    print("s_hat      : ")
    for i in 1:length(theta_std)
        @printf(" (%7.4f) ", theta_std[i])
    end
    @printf(" (%7.4f) ", time_std)
    print("\n")
end;

@printf("Monte Carlo Estimation : FIML vs CCP, N = %i, MC = %i \n", N_MC, MC)
dgp_result(theta)
print("\n")
print("-------------------------------------------------------------------\n")
print("CCP hat    : \n")
theta_time(theta_CCP_MC, time_CCP_MC)
print("-------------------------------------------------------------------\n")
print("CCP true   : \n")
theta_time(theta_true_CCP_MC, time_true_CCP_MC)
print("-------------------------------------------------------------------\n")
print("FIML       : \n")
theta_time(theta_FIML_MC, time_FIML_MC)

Monte Carlo Estimation : FIML vs CCP, N = 5000, MC = 100 
DGP        :      b1         b2         b3         b4     time(min)  
           :    2.0000    -0.1500     1.0000     0.9000  
-------------------------------------------------------------------
CCP hat    : 
theta_hat  :    1.9850    -0.1368     0.9471     0.9304     0.1919  
s_hat      :  ( 0.1321)  ( 0.0136)  ( 0.0905)  ( 0.0719)  ( 0.4038) 
-------------------------------------------------------------------
CCP true   : 
theta_hat  :    2.0066    -0.1499     0.9993     0.9027     0.3589  
s_hat      :  ( 0.0657)  ( 0.0092)  ( 0.0495)  ( 0.0521)  ( 0.0614) 
-------------------------------------------------------------------
FIML       : 
theta_hat  :    2.0130    -0.1501     0.9945     0.9003    26.5429  
s_hat      :  ( 0.1022)  ( 0.0059)  ( 0.0726)  ( 0.0258)  ( 2.9381)

@printf("Bootstrap Estimation for CCP, N = %i, Bootstrap = %i \n", N_Bootstrap, Bootstrap)
dgp_result(theta)
for x in 1:length(x_method_list)
    for t in 1:length(t_method_list)
        print("\n")
        print("-------------------------------------------------------------------\n")
        print("X: ", x_method_list[x], " T: ", t_method_list[t], "\n")
        theta_time(theta_CCP_Bootstrap[:,x,t,:], time_CCP_Bootstrap[:,x,t])
    end
end
print("-------------------------------------------------------------------\n")
print("True CCP \n")
theta_time(theta_true_CCP_Bootstrap, time_true_CCP_Bootstrap)

Bootstrap Estimation for CCP, N = 2000, Bootstrap = 100 
DGP        :      b1         b2         b3         b4     time(min)  
           :    2.0000    -0.1500     1.0000     0.9000  
-------------------------------------------------------------------
X: quadratic T: linear
theta_hat  :    1.8893    -0.1310     1.0183     0.9672     0.0485  
s_hat      :  ( 0.0837)  ( 0.0084)  ( 0.0583)  ( 0.0475)  ( 0.0067) 

-------------------------------------------------------------------
X: quadratic T: quadratic
theta_hat  :    1.8983    -0.1476     0.9769     0.8569     0.0811  
s_hat      :  ( 0.0788)  ( 0.0131)  ( 0.0592)  ( 0.0804)  ( 0.0089) 

-------------------------------------------------------------------
X: quadratic T: cubic
theta_hat  :    1.9102    -0.1484     0.9647     0.8525     0.1303  
s_hat      :  ( 0.0831)  ( 0.0127)  ( 0.0594)  ( 0.0773)  ( 0.0127) 

-------------------------------------------------------------------
X: cubic T: linear
theta_hat  :    1.8692    -0.1549     1.0897     0.8588     0.1024  
s_hat      :  ( 0.0996)  ( 0.0124)  ( 0.0797)  ( 0.0679)  ( 0.0121) 

-------------------------------------------------------------------
X: cubic T: quadratic
theta_hat  :    1.8735    -0.1662     1.0450     0.7734     0.1747  
s_hat      :  ( 0.0963)  ( 0.0144)  ( 0.0837)  ( 0.0873)  ( 0.0174) 

-------------------------------------------------------------------
X: cubic T: cubic
theta_hat  :    1.8759    -0.1670     1.0358     0.7657     0.3258  
s_hat      :  ( 0.0987)  ( 0.0132)  ( 0.0827)  ( 0.0808)  ( 0.0323) 
-------------------------------------------------------------------
True CCP 
theta_hat  :    1.9985    -0.1548     1.0132     0.8935     0.1875  
s_hat      :  ( 0.0510)  ( 0.0070)  ( 0.0377)  ( 0.0390)  ( 0.0174)

Arcidiacono and Miller (2011, Ecta)

Dynamic Discrete Choice Model ¶

The Bus Engine Problem Revisited ¶

Full-Information Maximum Likelihood (FIML) Estimation ¶

Conditional Choice Probabilities (CCP) Estimation ¶

Leave a Comment