#!/usr/bin/env python
from __future__ import absolute_import, division, print_function, unicode_literals
from math import log
from numpy.random import exponential
from random import random
import sys

def main(input_filepath):
	real_data = read_data(input_filepath)
	real_results = analyze_data(real_data)
	real_lrt, real_null_mles, real_alt_mles = real_results
	num_sims = 100
	null_dist = simulate_null_dist_of_lrt(real_data, real_null_mles, num_sims)
	summarize_results(real_results, null_dist)

def analyze_data(data):
	with_summary_stats = calc_summaries(data)
	null_mle = calc_MLE_null(with_summary_stats)
	null_ln_like = calc_ln_likelihood(with_summary_stats, null_mle)
	alt_mle = calc_MLE_alt(with_summary_stats)
	alt_ln_like = calc_ln_likelihood(with_summary_stats, alt_mle)
	lrt = 2*(alt_ln_like - null_ln_like)
	return lrt, null_mle, alt_mle

def summarize_results(real_results, null_dist):
	''' Taken from http://phylo.bio.ku.edu/sites/default/files/logistic_regress_with_param_boot.py.txt'''
	real_lrt, real_null_mles, real_alt_mles = real_results
	null_dist.sort()
	num_sims = len(null_dist)
	five_percent_count = num_sims / 20.0
	p = 0.95
	print('Based on a simulated null distribution of for LRT:')
	index_cutoff = five_percent_count
	fmt = ' for P of about {:3.2f} the critical value is {:6.3f} (based on the {} out of {} values)'
	while p > 0.0:
		cutoff = null_dist[int(index_cutoff)]
		print(fmt.format(p, cutoff, index_cutoff, num_sims))
		index_cutoff += five_percent_count
		p -= 0.05
	p_value = len([i for i in null_dist if i > real_lrt])/float(num_sims)
	print('The approximation of the P-value is', p_value)

def read_data(fn):
	raise NotImplementedError('read_data not written')

def simulate_data_set(real_data, theta):
	'''Use real_data to get the size of the data. Use `theta`
	as the set of parameters to simulate under.
	'''
	waiting_times = []
	s_vals = {'H': 0, 'M': 0, 'L':0}
	#for ...:
	#	datum = exponential(mean)
	#	waiting_times.append(datum)
	#for ...:
	#	u = random()
	#	if u < prob_h:
	#		s_vals['H'] += 1
	#	elif u < prob_h + prob_m:
	#		s_vals['M'] += 1
	#	else:
	#		s_vals['M'] += 1
	sim_data = (waiting_times, s_vals)
	return sim_data

def simulate_null_dist_of_lrt(real_data, theta, num_sims):
	null_dist = []
	for i in xrange(num_sims):
		sim_data = simulate_data_set(real_data, theta)
		sim_summaries = analyze_data(sim_data)
		sim_lrt = sim_summaries[0]
		null_dist.append(sim_lrt)
	return null_dist

main(sys.argv[1])
