# Crossvalidate class

from __future__ import division
import util
import copy
import combiner
import numpy
import pdb
import cPickle
import os
import to_latex
from time import localtime
from rpy import r as rc

__author__ = "Brian Tomasik"
__date__ = "April/May 2009"

class Crossvalidate(object):
    """
    Perform crossvalidation using the Combiner class.
    """
 
    def __init__(self, verbosity=1, basedir = "../../.."):
        self.verbosity = verbosity
        self.basedir = basedir
        self.cv_folds = self._read_cv_folds()

    def _progress(self, message):
        """
        Write info if high enough verbosity.
        """
        if self.verbosity > 0:
            util.info(message)

    def tune_param(self, values_to_try, param_type, kwargs, special_name=None, tagtypes_to_run=["Pandora Genres","Pandora Acoustic"]):
        """
        Runs crossvalidation for a number of different parameter values.

        Required params:
        - values_to_try: a list of the different parameter values at which to
        run (e.g., [100, 1000, 10000] for param_type=="mcmc_reps").
        - param_type: the name of a parameter that's accepted as an input
        argument to the Combiner class's constructor (e.g., "min_tag_count").
        - kwargs: a dictionary of parameter-value pairs that you want passed
        to the Combiner class's constructor in addition to the parameter
        currently being tuned. For instance, if you wanted to set
        "max_n_songs" at 50 while tuning "mcmc_reps", you would set this
        argument to {"max_n_songs": 50}. USUALLY, JUST SET THIS TO {}.
        
        Optional params:
        - special_name: If this isn't None, the output directory will have this
        string in place of param_type as an identifier. This parameter is
        useful when you want to differentiate a parameter sweep done with
        unusual kwargs values from one done with the default values.
        - tagtypes_to_run: Can be any list of items from this list:
        ["Pandora Genres", "Pandora Acoustic", "All", "Last.fm"]
        The default is ["Pandora Genres","Pandora Acoustic"] for the
        ISMIR '09 paper runs.

        No return value. Rather, this function creates a new directory with a 
        datestamp corresponding to the hour you ran this function (in EST).
        Lots of output files are generated and put there:
        - Files ending in ".pkl" are pickle files of all the results generated 
        on that round of crossvalidation. Use these when you want to play around
        with the results further in Python rather than just viewing them
        dumped out.
        - Files ending in "_betas.tab" give readable results on the level
        of individual tags.
        - Files ending in "_formatted.txt" give dumps of the dictionaries
        in the corresponding pickle files, starting with the most salient
        results at the top.
        - In addition to per-param-setting files, there are "overall" files,
        which have the dictionary of individual-parameter-results
        dictionaries. Also, "overall.tex" has the same output as would be
        generated by running this command in the results directory:

        python ../to_latex.py -p "overall.pkl"

        This command generates roughly the right LaTeX code for inclusion
        as a table in a .tex document. Note that "overall.tex" does not
        itself compile into a LaTeX document, since it lacks the headings, etc.
        """
        # Transfer our parameter settings to the kwargs that will be used to
        # generate Combiner instances.
        kwargs["verbosity"] = self.verbosity
        kwargs["basedir"] = self.basedir
        datestamp = self._datestamp()
        if special_name is not None: # Name this a special directory, not just the current param_type.
            dirname = "%s_tuning_%s" % (special_name, datestamp)
        else:
            dirname = "%s_tuning_%s" % (param_type, datestamp)
        self._make_dir(dirname)
        per_tagtype_results = dict()
        for tagtype in tagtypes_to_run:
            kwargs["tagtype"] = tagtype
            per_param_results = dict()
            for val in values_to_try:
                kwargs[param_type] = val
                param_settings = "%s_%s=%s" % (tagtype, param_type, val)
                cur_outfile_stem = "%s/%s" % (dirname, param_settings)
                self._progress("Doing CV for %s." % param_settings)
                per_param_results[val] = self.get_results(kwargs, outfile_stem=cur_outfile_stem)
            per_tagtype_results[tagtype] = per_param_results
        self._write_pickle("%s/overall" % dirname, per_tagtype_results)
        util.write_file("%s/overall.tex" % dirname, to_latex.LatexWriter().param_sweep_table(per_tagtype_results))
        self._write_pickle("%s/ttests" % dirname, self._get_ttests_dict(per_tagtype_results))

    def generate_readable_tab_files(self):
        """
        Runs crossvalidation four times, with the regression model
        alternately set to All3&P, CB, CF, and WD.
        The desired files end with "_betas.tab".

        No parameters or return value. See the documentation for the
        tune_param function for an explanation of the directory and
        output files generated.
        """
        self.tune_param(["CB and WD and CF and P", "CB", "CF", "WD"], "regmodel", {})

    def try_all_regression_models(self):
        """
        Runs crossvalidation on all the models, including subsets of
        the sources and the Random model.

        No parameters or return value. See the documentation for the
        tune_param function for an explanation of the directory and
        output files generated.
        """
        self.tune_param(["CB and WD and CF and P", "CB and WD and CF", "CB and WD and CF and P and I", "CB", "CF", "WD", "CB and WD", "CB and CF", "WD and CF", "Random"], "regmodel", {})

    def try_all_regression_types(self):
        """
        Runs crossvalidation on all the models, including subsets of
        the sources and the Random model.

        No parameters or return value. See the documentation for the
        tune_param function for an explanation of the directory and
        output files generated.
        """
        self.tune_param(combiner.ALL_REGRESSIONS, "regtype", {})

    def _datestamp(self):
        cur_time = localtime()
        return "%d-%d-%d-%d" % (cur_time[0], cur_time[1], cur_time[2], cur_time[3])

    def _make_dir(self, dirname):
        try:
            os.mkdir(dirname)
        except OSError, e:
            if e[0] == 17: # directory exists
                pass
            else:
                raise e

    def _write_pickle(self, filename_stem, results, also_save_txt=True):
        self._progress("Pickling results dictionary.")
        file = open("%s.pkl" % filename_stem, 'wb')
	cPickle.dump(results, file, cPickle.HIGHEST_PROTOCOL)
	file.close()
        if also_save_txt:
            util.write_file("%s_dict.txt" % filename_stem, str(results))

    def get_results(self, kwargs, write_to_file=True, write_pickle=True, outfile_stem="results", write_normalized_betas=True):
        """
        Runs 5 folds of crossvalidation and saves the results.

        Required params:
        - kwargs: a dictionary of parameter-value pairs that you want passed
        to the Combiner class's constructor. For instance, if you wanted to set
        "verbosity" to 0, you would set this argument to {"max_n_songs": 50}.
        USUALLY, JUST SET THIS TO {}.

        Optional params:
        - write_to_file: Should we write the results to a file called
        "%s_formatted.txt" % outfile_stem ?
        - write_pickle: Should we save the results in a pickle called
        "%s.pkl" % outfile_stem ?
        - outfile_stem: The beginning of the output file names.
        - write_normalized_betas: Should we write a human-readable file
        with per-tag results, including normalized beta values? If so,
        it will be stored in a file called "%s_betas.tab" % outfile_stem .
        """
        self._progress("Getting CV results.")
        per_fold_results = dict()
        per_fold_beta = dict()
        per_fold_best_worst_songs = dict()
        end_results = dict()
        n_folds = len(self.cv_folds) # 5 folds
        combiner_kwargs = copy.deepcopy(kwargs) # The next line mutates the kwargs, so avoid doing that to the original.
        combiner_kwargs = self._prune_tags(combiner_kwargs)
        for fold_no in range(n_folds):
            train_index = fold_no % n_folds
            test_index = (train_index + 1) % n_folds
            self._progress("Doing CV with train_index=%d, test_index=%d." % (train_index, test_index))
            c = combiner.Combiner(fold_no=fold_no, **combiner_kwargs)
            per_fold_results[fold_no] = c.evaluate_regression(training_songs=self.cv_folds[train_index],
                                                              testing_songs=self.cv_folds[test_index])
            per_fold_beta[fold_no] = c.beta
            per_fold_best_worst_songs[fold_no] = c.best_worst_songs
        (end_results["results_each_fold"], end_results["per_tag_avg"], end_results["overall_avg_list"], end_results["overall_avg"]) = self._combine_fold_vals(per_fold_results, input_is_beta=False)
        (end_results["beta_each_fold"], end_results["per_tag_avg_beta"], end_results["overall_avg_beta_list"], end_results["overall_avg_beta"]) = self._combine_fold_vals(per_fold_beta, input_is_beta=True)
        end_results["best_worst_songs"] = self._combine_folds_best_worst_songs(per_fold_best_worst_songs)
        # Build a string-buffer-like list of the text to output using "\n".join(output_list)
        output_list = []
        output_list.append("-----")
        output_list.append("regtype = %s" % c.regtype)
        output_list.append("tagtype = %s" % c.tagtype)
        output_list.append("regmodel = %s" % c.regmodel)
        output_list.append("min tag count = %d" % c.min_tag_count)
        output_list.append("min feature count %d" % c.min_feature_count)
        output_list.append("\nOverall average stats:")
        output_list.append("--------------")
        output_list.append("%s\n" % str(end_results["overall_avg"]))
        output_list.append("Overall average beta:")
        output_list.append("--------------")
        output_list.append("%s\n" % str(end_results["overall_avg_beta"]))
        output_list.append("Best/worst songs:")
        output_list.append("--------------")
        output_list.append("%s\n" % str(end_results["best_worst_songs"]))
        output_list.append("Average stats:")
        output_list.append("--------------")
        output_list.append("%s\n" % str(end_results["per_tag_avg"]))
        output_list.append("Average beta:")
        output_list.append("--------------")
        output_list.append("%s\n" % str(end_results["per_tag_avg_beta"]))
        output_list.append("Per-fold stats:")
        output_list.append("--------------")
        output_list.append("%s\n" % str(end_results["results_each_fold"]))
        output_list.append("Per-fold beta:")
        output_list.append("--------------")
        output_list.append("%s\n" % str(end_results["beta_each_fold"]))
        output_list.append("\n\n")
        # Get the string, write it, and possibly print it.
        output_string = "\n".join(output_list)
        if write_to_file:
            util.write_file("%s_formatted.txt" % outfile_stem, output_string)
        if write_pickle:
            self._write_pickle(outfile_stem, end_results, also_save_txt=False)
        if write_normalized_betas:
            util.write_file("%s_betas.tab" % outfile_stem, self._readable_tab_file(end_results["per_tag_avg"], end_results["per_tag_avg_beta"], end_results["best_worst_songs"]))
        self._progress(output_string[:1000])
        return end_results

    def _readable_tab_file(self, per_tag_avg, per_tag_avg_beta, per_tag_best_worst_songs):
        # Store a list of lists of the form [tag, autotag_beta, propagated_beta, web_beta, auc, map, rprec, 10prec, topsong1, song1ingroundtruth?, topsong2, ...], if our current run has all of those available. If not, just omit the betas.
        separator = "\t"
        example_tag = per_tag_avg_beta.keys()[0]
        sources = per_tag_avg_beta[example_tag].keys()
        have_all_three_betas = "CB" in sources and "CF" in sources and "WD" in sources
        # Make header row.
        header_list = ["Tag"]
        if have_all_three_betas:
            header_list.extend(["CB-BetaFrac", "CF-BetaFrac", "WD-BetaFrac"])
        header_list.extend(["AUC", "MAP", "10-Prec", "R-Prec"])
        for i in range(len(per_tag_best_worst_songs[example_tag]["Best Song"])):
            header_list.append("Top5Songs#%d" % i)
            header_list.append("#%dCorrect?" % i)
        for i in range(len(per_tag_best_worst_songs[example_tag]["Worst Song"])):
            header_list.append("Bottom5Songs#%d" % i)
            header_list.append("#%dCorrect?" % i)
        # Start the output list, which will become the output string with a "\n".join(output_list)
        output_list = [separator.join(header_list)]
        # Get the list of lists to add and sort by CB beta fraction.
        tag_lists = []
        for (tag, source_dict) in per_tag_avg_beta.iteritems():
            list_for_cur_line = [tag]
            if have_all_three_betas:
                try:
                    autotag_beta = source_dict["CB"]["beta"]
                    propagated_beta = source_dict["CF"]["beta"]
                    web_beta = source_dict["WD"]["beta"]
                    sum_of_betas = propagated_beta + autotag_beta + web_beta
                    list_for_cur_line.extend([autotag_beta / sum_of_betas, propagated_beta / sum_of_betas, web_beta / sum_of_betas])
                except:
                    list_for_cur_line.extend(["(missing)" for counter in range(3)])
            # Now add results info.
            results_dict = per_tag_avg[tag]
            list_for_cur_line.extend([results_dict["AUC"], results_dict["MAP"], results_dict["10-Prec"], results_dict["R-Prec"]])
            # Now add best/worst songs.
            list_for_cur_line.extend(self._convert_best_or_worst_songs_list(per_tag_best_worst_songs[tag]["Best Song"]))
            list_for_cur_line.extend(self._convert_best_or_worst_songs_list(per_tag_best_worst_songs[tag]["Worst Song"]))
            # Done with this line.
            tag_lists.append(list_for_cur_line)
        # Now, sort by CB's beta fraction in decreasing order.
        tag_lists.sort(key=lambda tuple: tuple[1], reverse=True)
        for tag_list in tag_lists:
            output_list.append(separator.join(map(lambda x: str(x), tag_list)))
        return "\n".join(output_list)

    def _convert_best_or_worst_songs_list(self, best_or_worst_songs_list):
        """
        Prepare the contents of best_song_list or worst_song_list for output.
        """
        N_BEST_OR_WORST_SONGS = 5
        out_list = []
        for i in range(N_BEST_OR_WORST_SONGS):
            try:
                (artist_and_song, in_ground_truth) = best_or_worst_songs_list[i]
                out_list.append(artist_and_song)
                out_list.append(in_ground_truth)
            except IndexError:
                out_list.append("(missing)")
                out_list.append("(missing)")
        return out_list

    def _prune_tags(self, combiner_kwargs):
        # Figure out which tags to use.
        self._progress("Pruning CV tags.")
        nonrare_tags = None
        # IMPORTANT: We need to make sure we use the same set of tags for all regression models, both those that use fewer and those that use more features. So we need to act as though we're going to use all the features here, so that we get the most restrictive tag set. Therefore, change combiner_kwargs.
        temp_kwargs_for_pruning = copy.deepcopy(combiner_kwargs)
        temp_kwargs_for_pruning["regmodel"] = "CB and WD and CF and P and I"
        for (fold_no, song_set) in self.cv_folds.items():
            c = combiner.Combiner(fold_no=fold_no, **temp_kwargs_for_pruning)
            cur_fold_nonrare_tags = c.nonrare_tags(song_set)
            # Update using that info.
            if nonrare_tags is None:
                nonrare_tags = cur_fold_nonrare_tags
            else:
                nonrare_tags = nonrare_tags.intersection(cur_fold_nonrare_tags)
        try:
            orig_value_only_these_tags = temp_kwargs_for_pruning["only_these_tags"]
            assert util.is_subset(nonrare_tags, orig_value_only_these_tags), "Nonrare tags shouldn't include any more than you started with...."
        except KeyError:
            pass
        combiner_kwargs["only_these_tags"] = nonrare_tags
        return combiner_kwargs

    def _combine_fold_vals(self, per_fold_results, input_is_beta):
        results_each_fold = dict()
        for fold_results in per_fold_results.values():
            for (tag, val_dict) in fold_results.iteritems():
                cur_tag_dict = results_each_fold.get(tag, {})
                for (val_id, val) in val_dict.iteritems():
                    if not input_is_beta:
                        cur_tag_dict.setdefault(val_id, []).append(val)
                    else: # beta dictionaries have an extra level
                        cur_source_dict = cur_tag_dict.get(val_id, {})
                        for (stat, number) in val.iteritems():
                            cur_source_dict.setdefault(stat, []).append(number)
                        cur_tag_dict[val_id] = cur_source_dict
                results_each_fold[tag] = cur_tag_dict
        per_tag_avg = self._per_tag_avg(results_each_fold, input_is_beta)
        (overall_avg_list, overall_avg) = self._overall_avg(per_tag_avg, input_is_beta)
        return (results_each_fold, per_tag_avg, overall_avg_list, overall_avg)

    def _per_tag_avg(self, results_each_fold, input_is_beta):
        N_FOLDS = 5
        per_tag_avg = dict()
        for (tag, val_dict) in results_each_fold.iteritems():
            for (val_id, val_list) in val_dict.iteritems():
                cur_tag_dict = per_tag_avg.get(tag, {})
                if not input_is_beta:
                    cur_tag_dict[val_id] = util.mean_if_numeric(val_list)
                else: # beta dictionaries have an extra level
                    cur_source_dict = dict()
                    for (stat, number_list) in val_list.iteritems():
                        cur_source_dict[stat] = util.mean_if_numeric(number_list)
                    cur_tag_dict[val_id] = cur_source_dict
                per_tag_avg[tag] = cur_tag_dict
        return per_tag_avg

    def _overall_avg(self, per_tag_avg, input_is_beta):
        """
        NOTE: The std errors returned here are over each tag, but don't count
        the 5 folds of CV for each tag. So actual std errors are those
        divided by sqrt(5).
        """
        overall_avg_list = dict()
        for val_dict in per_tag_avg.values():
            for (val_id, avg_val) in val_dict.iteritems():
                if not input_is_beta:
                    overall_avg_list.setdefault(val_id, []).append(avg_val)
                else:
                    dict_of_avg_lists = overall_avg_list.get(val_id, {})
                    for (stat, number) in avg_val.iteritems():
                        dict_of_avg_lists.setdefault(stat, []).append(number)
                    overall_avg_list[val_id] = dict_of_avg_lists
        overall_avg = dict()
        for (val_id, avg_list) in overall_avg_list.iteritems():
            if not input_is_beta:
                overall_avg[val_id] = util.summary_stats(avg_list)
            else:
                dict_of_averages = dict()
                for (stat, number_list) in avg_list.iteritems():
                    dict_of_averages[stat] = util.summary_stats(number_list)
                overall_avg[val_id] = dict_of_averages
        return (overall_avg_list, overall_avg)

    def _combine_folds_best_worst_songs(self, orig_dict):
        combined_dict = dict()
        for (fold_no, tag_dict) in orig_dict.iteritems():
            for (tag, info_dict) in tag_dict.iteritems():
                combined_dict.setdefault(tag, {})
                for (key, val) in info_dict.iteritems():
                    combined_dict[tag].setdefault(key, []).append(val)
        return combined_dict

    def _read_cv_folds(self):
        """
        Returns a dict: fold # -> set of songs for that fold.
        """
        self._progress("Reading CV folds.")
        cv_folds = dict()
        N_FOLDS = 5
        for i_fold in range(N_FOLDS):
            cur_file = open("%s/lists/crossFold/part%i.tab" % (self.basedir, i_fold+1), "r")
            cv_folds[i_fold] = self._cv_get_songs(cur_file)
            cur_file.close()
        return cv_folds

    def _cv_get_songs(self, file):
        """
        Read the song ids, one per line.
        """
        songs = set()
        for line in file:
            line_list = line.rstrip().split("\t")
            songs.add(int(line_list[0]))
        return songs

    def _get_ttests_dict(self, per_tagtype_results):
        per_tagtype = dict()
        for (tagtype, per_param_results) in per_tagtype_results.iteritems():
            per_tagtype.setdefault(tagtype, {})
            for (val1, val1_results) in per_param_results.items():
                for (val2, val2_results) in per_param_results.items():
                    cur_pair = str(sorted([val1, val2]))
                    if val1 != val2 and cur_pair not in per_tagtype[tagtype]:
                        per_tagtype[tagtype][cur_pair] = self._individual_ttest_dict(val1_results["per_tag_avg"], val2_results["per_tag_avg"])
        return per_tagtype
            
    def _individual_ttest_dict(self, per_tag_dict1, per_tag_dict2):
        STATS_TO_COMPARE = ["AUC", "MAP", "R-Prec", "10-Prec"]
        cur_dict = dict()
        for stat in STATS_TO_COMPARE:
            differences = []
            for (tag, dict1_stats) in per_tag_dict1.iteritems():
                try:
                    dict2_stats = per_tag_dict2[tag]
                except KeyError:
                    continue
                try:
                    val1 = dict1_stats[stat]
                    val2 = dict2_stats[stat]
                    differences.append(val1-val2)
                except KeyError:
                    continue
            if len(differences) > 0:
                ttest_dict = rc.t_test(differences)
                del ttest_dict["data.name"] # That's a long and ugly value.
                cur_dict[stat] = ttest_dict
        return cur_dict

def main():
    cv = Crossvalidate()
    cv.generate_readable_tab_files()
    #cv.try_all_regression_models()
    #cv.try_all_regression_types()

if __name__ == "__main__":
    main()