#!/usr/bin/env python

#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#

from optparse import OptionParser
try:
    import sqlite3
except ImportError:
    # pre 2.5.x
    from pysqlite2 import dbapi2 as sqlite3
import sys
import os
import bisect
import math

from glue.ligolw import lsctables
from glue.ligolw import dbtables
from glue.ligolw import ligolw
from glue.ligolw import table
from glue.ligolw import utils
from glue.ligolw.utils import print_tables
from glue.ligolw.utils import process
from glue.ligolw.utils.ligolw_sqlite import extract
from glue import git_version

from pylal import ligolw_sqlutils as sqlutils
from pylal import printutils

__prog__ = "ligolw_cbc_printlc"
__author__ = "Collin Capano <cdcapano@physics.syr.edu>"

description = \
"Prints the largest triggers in either foreground or background along with " + \
"relevant statistics and parameters to a specified file or to stdout."

# =============================================================================
#
#                                   Set Options
#
# =============================================================================


def parse_command_line():
    """
    Parse the command line, return options and check for consistency among the
    options.
    """
    parser = OptionParser(
        version = git_version.verbose_msg,
        usage   = "%prog [options]",
        description = description
        )

    # following are related to file input and output naming
    parser.add_option( "-i", "--input", action = "store", type = "string",
        default = None,                  
        help = 
            "Input database to read. Can only input one at a time." 
            )
    parser.add_option("-o", "--output", action = "store", type = "string",
        default = None,
        help =
            "Save summary table to file. If no output specified, result " +
            "will be printed to stdout."
            )
    parser.add_option( "-t", "--tmp-space", action = "store", type = "string",
        default = None, metavar = "path",
        help = 
            "Location of local disk on which to do work. " +
            "This is used to enhance performance in a networked " +
            "environment and is needed if extracting an xml file."
            )
    parser.add_option("-f", "--output-format", action = "store", type = "string",
        default = "xml", metavar = "wiki, html, OR xml",
        help =
            "Format of output summary table. Choices are 'wiki', 'html', or 'xml'. " +
            "Default is xml."
            )
    parser.add_option("-x", "--extract-to-xml", action = "store", type = "string",
        default = None, metavar = "filename",
        help =
            "Will extract a full xml file from the database containing only the " +
            "triggers that are printed. Requries --tmp-space. "
            )
    parser.add_option("-d", "--extract-to-database", action = "store", type = "string",
        default = None, metavar = "filename",
        help =
            "Will extract a SQLite database from the input database containing only the " +
            "triggers that are printed. Requries --tmp-space. "
            )
    parser.add_option( "-v", "--verbose", action = "store_true", default = False,
        help = 
            "Print the SQLite query that is used to stdout." 
            )
    # following are generic inspiral_sql options
    parser.add_option( "", "--param-name", metavar = "PARAMETER",
        action = "store", default = None,
        help = 
            "Can be any parameter in the coinc table. " +
            "Specifying this and param-ranges will only select " +
            "triggers that fall within the parameter ranges. " 
            )
    parser.add_option("", "--sngl-table", action="store", type="string",
        default=None,
        help =
          "Set to table containing single event information. Required if " +
          "--get-sngl-info is also specified. Ex. sngl_inspiral, sngl_ringdown."
        )
    parser.add_option("", "--coinc-table", action="store", type="string",
        default=None,
        help =
          "Required. Can be any table with a coinc_event_id and a time column. " +
          " Ex. coinc_inspiral, coinc_ringdown."
        )
    parser.add_option("", "--time-column", action="store", type="string",
        default=None,
        help =
          "Required. Should be set to end_time for inspiral searches or start_time for ringdown searches."
        )
    parser.add_option( "", "--param-ranges", action = "store", default = None,
        metavar = " [ LOW1, HIGH1 ); ( LOW2, HIGH2]; !VAL3; etc.",
        help = 
            "Requires --param-name. Specify the parameter ranges " +
            "to select triggers in. A '(' or ')' implies an open " +
            "boundary, a '[' or ']' a closed boundary. To specify " +
            "multiple ranges, separate each range by a ';'. To " +
            "specify a single value, just type that value with no " +
            "parentheses or brackets. To specify not equal to a single " +
            "value, put a '!' before the value. If " +
            "multiple ranges are specified, the triggers picked for " +
            "ranking will come from the union of the ranges."
            )
    parser.add_option( "", "--exclude-coincs", action = "store", type = "string", default = None,
        metavar = " [COINC_INSTRUMENTS1 + COINC_INSTRUMENTS2 in INSTRUMENTS_ON1];"
            "[ALL in INSTRUMENTS_ON2]; etc.",
        help = 
            "Exclude coincident types in specified detector times, " +
            "e.g., '[H2,L1 in H1,H2,L1]'. Some rules: " +
                "* Coinc-types and detector time must be separated by " +
                "an ' in '. When specifying a coinc_type or detector " +
                "time, detectors and/or ifos must be separated by " +
                "commas, e.g. 'H1,L1' not 'H1L1'. " +
                "* To specify multiple coinc-types in one type of time, " +
                "separate each coinc-type by a '+', e.g., " +
                "'[H1,H2 + H2,L1 in H1,H2,L1]'. " +
                "* To exclude all coincs in a specified detector time " +
                "or specific coinc-type in all times, use 'ALL'. E.g., " +
                "to exclude all H1,H2 triggers, use '[H1,H2 in ALL]' " +
                "or to exclude all H2,L1 time use '[ALL in H2,L1]'. " + 
                "* To specify multiple exclusions, separate each " +
                "bracket by a ';'. " +
                "* Order of the instruments nor case of the letters " +
                "matter. So if your pinky is broken and you're " +
                "dyslexic you can type '[h2,h1 in all]' without a " +
                "problem." 
            )
    parser.add_option( "", "--include-only-coincs", action = "store", type = "string", default = None,
        metavar = " [COINC_INSTRUMENTS1 + COINC_INSTRUMENTS2 in INSTRUMENTS_ON1];" +
            "[ALL in INSTRUMENTS_ON2]; etc.",
        help =
            "Opposite of --exclude-coincs: only rank the specified coinc types. "
            )
    # following are options specific to this program
    parser.add_option("-c", "--columns", action = "store", type = "string", default = None,
        help =
            "If output-format is set to html or wiki, will only print the given columns. " +
            "To specify multiple columns, separate column names with a comma."
            "Default is to print the columns listed in pylal/printutils.get_columns_to_print."
            )
    parser.add_option("", "--datatype", action = "store", type = "string", default = None,
        metavar = "slide, all_data, playground, exclude_play, OR simulation",
        help =
            "Required. Specify what datatype to print triggers from. "
            )
    parser.add_option( "", "--ranking-stat", action = "store", type = "string", default = None,
        help =
            "Required. Statistic to rank by (can be any column " +
            "in the coinc table)." 
            )
    parser.add_option( "", "--rank-by", action = "store", type = "string", default = None, 
        metavar = "MAX or MIN",
        help = 
            "Required. Options are MAX or MIN. " +
            "This specifies whether to rank triggers by maximum or " +
            "minimum stat value." 
            )
    parser.add_option( "", "--limit", action = "store", type = "int", default = 10,
        help =
            "Specify how many triggers to print. Default is 10. "
            )
    parser.add_option( "", "--convert-durations", action = "store", type = "string", default = "s",
        metavar = "s, min, hr, days, OR yr",
        help =
            "Convert the duration from seconds to a different unit of time. Options are: " +
            "s (seconds), min (minutes), hr (hours), days (days), or yr (years). " +
            "(Setting to s is the equivalent of a no-op.)"
            )
    parser.add_option( "", "--sim-tag", action = "store", type = "string", default = 'ALLINJ',
        help =
            "If the --datatype is simulation, limits the selection to the specified " +
            "simulation name, e.g., 'BNSINJ'. If not specified, will group all injections together."
            )
    parser.add_option("-H", "--daily-ihope-pages-location", action = "store",
        default = "https://ldas-jobs.ligo.caltech.edu/~cbc/ihope_daily",
        help =
            "Web address of the daily ihope pages. " +
            "Default is https://ldas-jobs.ligo.caltech.edu/~cbc/ihope_daily"
            )
    parser.add_option("-S", "--get-sngl-info", action = "store_true", default = False,
        help =
            "Retrieve single event information along with coincident info. All sngl columns " +
            "will be prefixed with 'sngl_'. A single gps_time_utc column will also be included. "
            )

    (options, args) = parser.parse_args()

    # check for required options and for self-consistency
    if not options.input:
        raise ValueError, "No input specified."
    if (options.extract_to_xml or options.extract_to_database) and not options.tmp_space:
        raise ValueError, "extract-to-xml and extract-to-database require tmp-space"
    if not options.ranking_stat:
        raise ValueError, "No ranking stat specified."
    if not (options.rank_by.strip().upper() == 'MIN' or options.rank_by.strip().upper() == 'MAX'):
        raise ValueError, "--rank-by must be specified and set to either MIN or MAX."
    if options.param_name and not options.param_ranges:
        raise ValueError, "--param-name requires --param-ranges"
    if not options.datatype:
        raise ValueError, "--datatype must be specified."
    if options.datatype.strip().lower() not in lsctables.ExperimentSummaryTable.datatypes:
        raise ValueError, "Unrecognized datatype %s. See help for options." % options.datatype
    if not (options.convert_durations.strip().lower() == "s" or options.convert_durations.strip().lower() == "min" or 
        options.convert_durations.strip().lower() == "hr" or options.convert_durations.strip().lower() == "days" or
        options.convert_durations.strip().lower() == "yr"):
        raise ValueError, "--convert-duration must be either s, min, hr, days, or yr"


    return options, sys.argv[1:]


# =============================================================================
#
#                                     Main
#
# =============================================================================

opts, args = parse_command_line()

# get input database filename
filename = opts.input
if not os.path.isfile( filename ):
    raise ValueError, "The input file, %s, cannot be found." % filename

# Setup working databases and connections
if opts.verbose and opts.tmp_space: 
    print >> sys.stderr, "Setting up temp. database..."
working_filename = dbtables.get_connection_filename( 
    filename, tmp_path = opts.tmp_space, verbose = opts.verbose )
connection = sqlite3.connect( working_filename )
if opts.tmp_space:
    dbtables.set_temp_store_directory(connection, opts.tmp_space, verbose = opts.verbose)

sngl_table = sqlutils.validate_option( opts.sngl_table )
coinc_table = sqlutils.validate_option( opts.coinc_table )
time_column = sqlutils.validate_option( opts.time_column )

# Get ranking stat and append coinc table name
if " " in opts.ranking_stat.strip():
  raise ValueError, "ranking-stat must not contain spaces"
ranking_stat =  '.'.join([ coinc_table, opts.ranking_stat.strip() ])

# Get rank_by
if opts.rank_by.strip().upper() == "MIN":
  rank_by = 'ASC'
else:
  rank_by = 'DESC'

#
#
#   Set filter
#

# Set datatype
datatype = opts.datatype.strip().lower()

in_this_filter = ''.join([ 'experiment_summary.datatype == "', datatype, '"' ])

# Get param and param-ranges if specified
if opts.param_name:
    param_filters = sqlutils.parse_param_ranges( coinc_table, opts.param_name, 
        opts.param_ranges, verbose = opts.verbose ).get_param_filters()
    # since want triggers that fall within all the parameters, concatenate
    # all param ranges
    param_filters = '\n\t\tOR '.join( param_filters )
    in_this_filter = ''.join([ in_this_filter, '\n\tAND (\n\t\t', param_filters, '\n\t)' ])

# Get exclude_coincs list if specified
if opts.exclude_coincs:
    exclude_coinc_filters = sqlutils.parse_coinc_options( opts.exclude_coincs, 
        verbose = opts.verbose ).get_coinc_filters(coinc_instruments_table = coinc_table)
    # concatenate exclude_coinc_filters
    exclude_coinc_filters = '\n\t\tOR '.join( exclude_coinc_filters )
    # add to in_this_filter
    in_this_filter = ''.join([ in_this_filter, '\n\tAND NOT (\n\t\t', exclude_coinc_filters, '\n\t)' ]) 

# Get include_only_coincs list if specified
if opts.include_only_coincs:
    include_coinc_filters = sqlutils.parse_coinc_options( opts.include_only_coincs, 
        verbose = opts.verbose ).get_coinc_filters(coinc_instruments_table = coinc_table)
    # concatenate include_coinc_filters
    include_coinc_filters = '\n\t\tOR '.join( include_coinc_filters )
    # add to in_this_filter
    in_this_filter = ''.join([ in_this_filter, '\n\tAND (\n\t\t', include_coinc_filters, '\n\t)' ])

# if datatype is simulation and sim-tag specified add the sim-tag to the filter
if datatype == 'simulation' and opts.sim_tag != 'ALLINJ':
    # create a map between sim_proc_id and sim-tag
    sim_map = sqlutils.sim_tag_proc_id_mapper( connection )
    # check that opts.sim_tag is in the the map
    opts.sim_tag = sqlutils.validate_option(opts.sim_tag, lower = False).upper()
    if opts.sim_tag not in sim_map.tag_id_map.keys():
        raise ValueError, "sim-tag %s not found in database" % opts.sim_tag
    # create the filter
    connection.create_function( 'get_sim_tag', 1, sim_map.get_sim_tag )
    sim_filter = ''.join(['get_sim_tag(experiment_summary.sim_proc_id) == "', opts.sim_tag.strip().upper(), '"' ])
    # add to in_this_filter
    in_this_filter = ''.join([ in_this_filter, '\n\tAND ', sim_filter ])

# join the needed tables to in_this_filter
where_in_this_filter = ''.join([ sqlutils.join_experiment_tables_to_coinc_table(coinc_table), "\n    WHERE\n\t", in_this_filter ])

#
#   Set needed functions
#

# establish what units will be converting duration to; this has to be done
# by defining a function which calls sqlutils.convert_duration with the desired
# conversion. This function is then sqlitized so that it just takes one argument,
# the duration, and automatically converts. We do this because we cannot set the
# appropiate conversion flag from within the sql statement
def convert_duration( duration ):
    return sqlutils.convert_duration( duration, opts.convert_durations.strip().lower() )
connection.create_function( 'convert_duration', 1, convert_duration )

# initialize ranking. We do this by collecting the statistics that do the ranking
# first, so that rank can be computed on the fly in the main sqlquery below. It is
# done in this way because the RANK and DENSE_RANK SQL functions are not supported by
# SQLite.
if opts.verbose:
    print >> sys.stderr, "Getting statistics for ranking..."
ranker = sqlutils.rank_stats(coinc_table, ranking_stat, rank_by)
ranker.populate_stats_list(connection, opts.limit, filter = where_in_this_filter)
connection.create_function( 'rank', 1, ranker.get_rank )


#
#   Create and prepare the LoudestEventsTable to store summary information
#

# Get list of column names from coinc table
coinc_table_cols = sqlutils.get_column_names_from_table( connection, coinc_table )
# generate list of column names for the summary table
column_names = []
column_names.extend(coinc_table_cols)
# add instruments on, duration, mini_followups
rankname = 'rank__Px_using_' + ranking_stat.split('.')[-1] + '_xP_'
durname = ''.join([ datatype, u'_duration__Px_', opts.convert_durations.strip().lower(), '_xP_' ])
column_names.extend([rankname, durname, u'instruments_on', u'mini_followup',u'omega_scan', 'ifos__Px_click_for_elog_xP_', 'gps_time_utc__Px_click_for_daily_ihope_xP_' ])

#
# define the table
#
class LoudestEventsTable(table.Table):
    tableName = "loudest_events:table"
    validcolumns = {}
    for col_name in column_names:
        if 'rank_using_' in col_name:
            validcolumns[col_name] = "int_4u"
        elif 'instruments_on' in col_name:
            validcolumns[col_name] = lsctables.ExperimentTable.validcolumns['instruments']
        elif '_duration_' in col_name:
            validcolumns[col_name] = "real_8"
        elif col_name in lsctables.TableByName[coinc_table].validcolumns.keys():
            validcolumns[col_name] = lsctables.TableByName[coinc_table].validcolumns[col_name]
        # if custom columns exist in the database, just set them to lstrings
        else:
            validcolumns[col_name] = "lstring"
    # add FAP columns
    validcolumns['fap'] = "real_8"
    validcolumns['fap_1yr'] = "real_8"

class LoudestEvents(object):
    __slots__ = LoudestEventsTable.validcolumns.keys()

    def get_gps_time(self):
        if 'start_time' in self.__slots__:
            return self.start_time
        elif 'end_time' in self.__slots__:
            return self.end_time
        else:
            raise AttributeError, "could not find a coinc table start_time or end_time"

    def get_pyvalue(self):
        if self.value is None:
            return None
        return ligolwtypes.ToPyType[self.type or "lstring"](self.value)

# connect the row to the table
LoudestEventsTable.RowType = LoudestEvents

#
# create a document
#
lcdoc = ligolw.Document()
# setup the LIGOLW tag
lcdoc.appendChild(ligolw.LIGO_LW())
# add this program's metadata
lcproc_id = process.register_to_xmldoc(lcdoc, __prog__, opts.__dict__, version = git_version.id)
# add the table
lctable = lsctables.New(LoudestEventsTable)


#
#   Get the Data
#

sqlquery = ''.join([ """
    SELECT
        """, coinc_table, """.*,
        rank(""", ranking_stat, """),
        experiment.instruments,
        convert_duration(experiment_summary.duration)
    FROM
        """, coinc_table, """
    """, where_in_this_filter, """
        AND rank(""", ranking_stat, """) <= """, str(opts.limit), """
    ORDER BY
        """, ranking_stat, ' ',  rank_by ])

if opts.verbose:
    print >> sys.stderr, "Getting coincs..."
    print >> sys.stderr, "SQLite query used is:"
    print >> sys.stderr, sqlquery

for values in connection.cursor().execute( sqlquery ):
    lcrow = LoudestEvents()
    [ setattr(lcrow, column, values[ii]) for ii, column in enumerate(coinc_table_cols) ]
    # set on_instruments, duration, elog, daily_ihope
    setattr(lcrow, rankname, values[-3])
    lcrow.instruments_on = values[-2]
    setattr(lcrow, durname, values[-1])
    lcrow.mini_followup = None
    lcrow.omega_scan = None
    if 'ifos' in coinc_table_cols and time_column in coinc_table_cols:
        elog_pages = [(ifo, printutils.get_elog_page(ifo, lcrow.get_gps_time())) for ifo in lsctables.instrument_set_from_ifos(lcrow.ifos)]
        lcrow.ifos__Px_click_for_elog_xP_ = ','.join([ printutils.create_hyperlink(elog[1], elog[0]) for elog in sorted(elog_pages) ])
    else:
        lcrow.ifos__Px_click_for_elog_xP_ = None
    if time_column in coinc_table_cols:
        gps_time_utc = printutils.format_end_time_in_utc( lcrow.get_gps_time() ) 
        daily_ihope_address = printutils.get_daily_ihope_page(lcrow.get_gps_time(), pages_location = opts.daily_ihope_pages_location)
        lcrow.gps_time_utc__Px_click_for_daily_ihope_xP_ = printutils.create_hyperlink( daily_ihope_address, gps_time_utc ) 
    else:
        lcrow.gps_time_utc__Px_click_for_daily_ihope_xP_ = None
    # calculate and add faps
    if lcrow.combined_far is not None:
        t_in_s = float(values[-1]) / sqlutils.convert_duration(1, opts.convert_durations.strip().lower())
        lcrow.fap = 1 - math.exp(-sqlutils.convert_duration(t_in_s, 'yr') * lcrow.combined_far)
        lcrow.fap_1yr = 1 - math.exp(-lcrow.combined_far)
    else:
        lcrow.fap = None
        lcrow.fap_1yr = None
    # add the row
    lctable.append(lcrow)

#
#   Add sngl table info if desired
#
if opts.get_sngl_info:
    lctable = printutils.get_sngl_info(connection, lctable, opts.sngl_table, opts.daily_ihope_pages_location, verbose = opts.verbose)

# Connect the table to the document
lcdoc.childNodes[0].appendChild(lctable)

#
#   Print the summary data
#

if opts.verbose and not opts.output:
    print >> sys.stderr, "\nResults are:\n"

if opts.output_format != "xml":
    tableList = ['loudest_events']
    columnList, row_span_columns, rspan_break_columns = printutils.get_columns_to_print(lcdoc, tableList[0], with_sngl = opts.get_sngl_info)
    # remove the mini_followup column since it'll be empty
    columnList.remove('mini_followup')
    if opts.columns is not None:
        # overwrite columnList with specified columns
        columnList = [col.strip() for col in opts.columns.split(',')]
        # check for rank, duration
        for ii, column in enumerate(columnList):
            if column == 'rank':
                columnList[ii] = rankname
            if column == 'duration':
                columnList[ii] = durname
    # set output
    if opts.output is not None:
        opts.output = open(opts.output, 'w')
    # print the loudest_events table in the specified format
    print_tables.print_tables(lcdoc, opts.output, opts.output_format, tableList = tableList,
        columnList = columnList, round_floats = True, decimal_places = 2, title = None,
        row_span_columns = row_span_columns, rspan_break_columns = rspan_break_columns)
    if opts.output is not None:
        opts.output.close()
else:
    utils.write_filename(lcdoc, opts.output,
                         gz=(opts.output or "stdout").endswith("gz"),
                         xsl_file = "ligolw.xsl")


#
#   If extract-to-xml or extract-to-database, prepare for extraction
#

if opts.extract_to_xml or opts.extract_to_database:
    # double check that a temporary database is being used
    if not opts.sngl_table:
        raise ValueError, "No single table specified."
    if opts.input == working_filename:
        raise ValueError, "Cannot extract without a temporary database; check that tmp-space is specified in arguments."
    if opts.extract_to_database == opts.input:
        raise ValueError, "Extracted database and input database cannot be the same name. Rename the extracted database."
    if opts.verbose:
        print >> sys.stderr, "\nPreparing temp. database for xml/database extraction..."
        print >> sys.stderr, "Deleting unwanted triggers from coinc table..."
    # use the coinc_event_ids to figure out what to keep
    save_ids = [''.join(['"', row.coinc_event_id, '"']) for row in lctable]
    # delete all other ids from the coinc table
    sqlquery = ''.join([ """
        DELETE
        FROM 
            """, coinc_table, """
        WHERE
            coinc_event_id NOT IN (
            """, ','.join(save_ids), """
            )""" ])
    connection.cursor().execute(sqlquery)

    # clean up the other tables that point to the deleted ids
    sqlutils.clean_using_coinc_table( connection, coinc_table, verbose = opts.verbose,
        clean_experiment_map = True, clean_coinc_event_table = True, clean_coinc_definer = True,
        clean_coinc_event_map = True, clean_mapped_tables = True )

    # also remove experiments that don't have loud events in them
    sqlutils.clean_experiment_tables( connection, verbose = opts.verbose )

    # clean the metadata
    key_table = sngl_table
    key_column = time_column
    sqlutils.clean_metadata_using_end_time(connection, key_table, key_column, verbose = opts.verbose)

    # extract to the xml file
    if opts.extract_to_xml:
        extract(connection, opts.extract_to_xml, verbose = opts.verbose, xsl_file = "ligolw.xsl")


# close connection and exit
if opts.extract_to_database:
    # vacuum the extracted database
    if opts.verbose:
        print >> sys.stderr, "Vacuuming..."
    connection.cursor().execute("VACUUM")
    connection.close()
    dbtables.put_connection_filename(opts.extract_to_database, working_filename, verbose = opts.verbose)
else:
    connection.close()
    dbtables.discard_connection_filename( filename, working_filename, verbose = opts.verbose)

if opts.verbose and opts.tmp_space:
    print >> sys.stderr, "Finished!"

sys.exit(0)
