#!/usr/bin/env python
#
# Copyright (C) 2011--2013 Kipp Cannon, Chad Hanna, Drew Keppel
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

## @file
# Compute FAR and FAP distributions from the likelihood CCDFs.
#
# ### Command line interface
#
#	+ `--background-bins-file` [filename]: Set the name of the xml file containing the marginalized likelihood (required).
#	+ `--tmp-space` [dir]: Set the name of the tmp space if working with sqlite.
#	+ `--non-injection-db` [filename]: Provide the name of a database from a non-injection run.  Can be given multiple times (default = []).
#	+ `--injection-db` [filename]: Provide the name of a database from an injection run.  Can be given multiple times.  Databases are assumed to be over the same time period as the non injection databases using the same templates.  If not the results will be nonsense (default = []).
#	+ `--verbose`: Be verbose.

#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#


from optparse import OptionParser
try:
	import sqlite3
except ImportError:
	# pre 2.5.x
	from pysqlite2 import dbapi2 as sqlite3
sqlite3.enable_callback_tracebacks(True)
import sys


from glue.ligolw import ligolw
from glue.ligolw import dbtables
from glue.ligolw import lsctables
from glue.ligolw import utils as ligolw_utils
from glue.ligolw.utils import process as ligolw_process
from glue.ligolw.utils import search_summary as ligolw_search_summary
from pylal import ligolw_thinca
from gstlal import far


#
# =============================================================================
#
#                                 Command Line
#
# =============================================================================
#


def parse_command_line():
	parser = OptionParser()
	parser.add_option("--background-bins-file", metavar = "filename", help = "Set the name of the xml file containing the marginalized likelihood (required).")
	parser.add_option("--tmp-space", metavar = "dir", help = "Set the name of the tmp space if working with sqlite.")
	parser.add_option("--non-injection-db", metavar = "filename", default = [], action = "append", help = "Provide the name of a database from a non-injection run.  Can be given multiple times.")
	parser.add_option("--injection-db", metavar = "filename", default = [], action = "append", help = "Provide the name of a database from an injection run.  Can be given multiple times.  Databases are assumed to be over the same time period as the non injection databases using the same templates.  If not the results will be nonsense.")
	parser.add_option("--force", "-f", action = "store_true", help = "Force script to reevaluate FARs and FAPs.")
	parser.add_option("--verbose", "-v", action = "store_true", help = "Be verbose.")
	options, filenames = parser.parse_args()

	if options.background_bins_file is None:
		raise ValueError("must set --background-bins-file")

	if not options.non_injection_db + options.injection_db:
		raise ValueError("must provide at least one database file to process")

	if filenames:
		raise ValueError("unrecognized trailing arguments")

	return options, filenames


#
# =============================================================================
#
#                                     Main
#
# =============================================================================
#


#
# Parse command line
#


options, filenames = parse_command_line()


#
# Retrieve distribution data
#


coinc_params_distributions, ranking_data, seglists = far.parse_likelihood_control_doc(ligolw_utils.load_filename(options.background_bins_file, contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler, verbose = options.verbose))
if coinc_params_distributions is None:
	raise ValueError("\"%s\" does not contain event parameter PDFs" % options.background_bins_file)
if ranking_data is None:
	raise ValueError("\"%s\" does not contain likelihood ratio PDFs" % options.background_bins_file)


#
# Count the number of above-threshold events
#


if options.verbose:
	print >>sys.stderr, "beginning count of above-threshold events"

for binnedarray in ranking_data.zero_lag_likelihood_rates.values():
	binnedarray.array[:] = 0.

for n, filename in enumerate(options.non_injection_db, start = 1):
	#
	# get working copy of database.  do not use scratch space for this,
	# query is very fast
	#

	if options.verbose:
		print >>sys.stderr, "%d/%d: %s" % (n, len(options.non_injection_db), filename)
	working_filename = dbtables.get_connection_filename(filename, tmp_path = None, verbose = options.verbose)
	connection = sqlite3.connect(working_filename)

	#
	# update counts
	#

	xmldoc = dbtables.get_xml(connection)
	coinc_def_id = lsctables.CoincDefTable.get_table(xmldoc).get_coinc_def_id(ligolw_thinca.InspiralCoincDef.search, ligolw_thinca.InspiralCoincDef.search_coinc_type, create_new = False)
	xmldoc.unlink()
	ranking_data.collect_zero_lag_rates(connection, coinc_def_id)

	#
	# done
	#

	connection.close()
	dbtables.discard_connection_filename(filename, working_filename, verbose = options.verbose)


# FIXME:  need to do this to get the combined PDFs populated.  the XML file
# contains both bin counts and PDFs for all instrument sets but none for
# the "combined" set.  the bin counts for the combined set are populated by
# the .from_xml() method but the PDFs are built in the .finish() method.
# because all the other PDFs come out of the file we normally would not
# need to invoke the .finish() method here at all.  look into getting the
# combined PDFs built by the .from_xml() method as well.  NOTE: that method
# can't just call .finish() itself because that's a huge waste of time when
# many files need to be read and summed.
ranking_data.finish(verbose = options.verbose)


#
# Initialize the FAP & FAR assignment machine
#


fapfar = far.FAPFAR(ranking_data, livetime = far.get_live_time(seglists))


#
# Iterate over databases
#


if options.verbose:
	print >>sys.stderr, "assigning FAPs and FARs"

for n, filename in enumerate(options.non_injection_db + options.injection_db, start = 1):
	#
	# get working copy of database
	#

	if options.verbose:
		print >>sys.stderr, "%d/%d: %s" % (n, len(options.non_injection_db + options.injection_db), filename)
	if not options.force and sqlite3.connect(filename).cursor().execute("""SELECT EXISTS(SELECT * FROM process WHERE program == ?);""", (u"gstlal_compute_far_from_snr_chisq_histograms",)).fetchone()[0]:
		if options.verbose:
			print >>sys.stderr, "already processed, skipping"
		continue
	working_filename = dbtables.get_connection_filename(filename, tmp_path = options.tmp_space, verbose = options.verbose)
	connection = sqlite3.connect(working_filename)

	#
	# record our passage
	#

	xmldoc = dbtables.get_xml(connection)
	process = ligolw_process.register_to_xmldoc(xmldoc, u"gstlal_compute_far_from_snr_chisq_histograms", {})

	#
	# assign FAPs and FARs
	#

	fapfar.assign_faps(connection)
	fapfar.assign_fars(connection)

	#
	# done, restore file to original location
	#

	ligolw_process.set_process_end_time(process)
	connection.cursor().execute("UPDATE process SET end_time = ? WHERE process_id == ?", (process.end_time, process.process_id))

	connection.commit()
	connection.close()
	dbtables.put_connection_filename(filename, working_filename, verbose = options.verbose)

if options.verbose:
	print >>sys.stderr, "FAP and FAR assignment complete"


#
# Rewrite parameter and ranking statistic distribution file but with
# zero-lag counts replaced with count-above-threshold.
#


xmldoc = ligolw.Document()
xmldoc.appendChild(ligolw.LIGO_LW())
process = ligolw_process.register_to_xmldoc(xmldoc, u"gstlal_compute_far_from_snr_chisq_histograms", {})
search_summary = ligolw_search_summary.append_search_summary(xmldoc, process, ifos = seglists.keys(), inseg = seglists.extent_all(), outseg = seglists.extent_all())
far.gen_likelihood_control_doc(xmldoc, process, coinc_params_distributions, ranking_data, seglists)
ligolw_process.set_process_end_time(process)

outname = "post_%s" % options.background_bins_file
ligolw_utils.write_filename(xmldoc, outname, gz = outname.endswith(".gz"), verbose = options.verbose)

if options.verbose:
	print >>sys.stderr, "done"
