#!/usr/bin/env python
#
# Copyright (C) 2010--2014  Kipp Cannon, Chad Hanna
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

## @file
# A program to compute the likelhood ratios of inspiral triggers
#
# ### Command line interface
#
#	+ `--input-cache` [filename]: Also process the files named in this LAL cache.  See lalapps_path2cache for information on how to produce a LAL cache file.
#	+ `--likelihood-url` [URL]: Set the name of the likelihood ratio data file to use.  Can be given more than once.  Filenames and URLs are accepted.
#	+ `--likelihood-cache` [filename]: Also load the likelihood ratio data files listsed in this LAL cache.  See lalapps_path2cache for information on how to produce a LAL cache file.
#	+ `--tmp-space` [path]: Path to a directory suitable for use as a work area while manipulating the database file.  The database file will be worked on in this directory, and then moved to the final location when complete.  This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on which the final output will reside.
#	+ `--vetoes-name` [name]: Set the name of the segment lists to use as vetoes (default = do not apply vetoes).
#	+ `--verbose`: Be verbose.
#	+ `--write-rates` [filename]: Write combined event parameter histograms together with histograms of asssigned zero-lag ranking statistic values to this file.


#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#


from optparse import OptionParser
import sys


from glue import iterutils
from glue.lal import CacheEntry
from glue.text_progress_bar import ProgressBar
from glue.ligolw import ligolw
from glue.ligolw import lsctables
from glue.ligolw import utils as ligolw_utils
from glue.ligolw.utils import process as ligolw_process
from glue.ligolw.utils import search_summary as ligolw_search_summary
from glue.ligolw.utils import segments as ligolw_segments
from glue import segments
from pylal import ligolw_burca2
from pylal import ligolw_thinca
from pylal import snglcoinc
from gstlal import far


process_name = u"gstlal_inspiral_calc_likelihood"


__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
__version__ = "git id %s" % ""	# FIXME
__date__ = ""	# FIXME


#
# =============================================================================
#
#                                 Command Line
#
# =============================================================================
#


def parse_command_line():
	parser = OptionParser(
		version = "Name: %%prog\n%s" % "" # FIXME
	)
	parser.add_option("-c", "--input-cache", metavar = "filename", help = "Also process the files named in this LAL cache.  See lalapps_path2cache for information on how to produce a LAL cache file.")
	parser.add_option("-l", "--likelihood-url", metavar = "URL", action = "append", help = "Set the name of the likelihood ratio data file to use.  Can be given more than once.  Filenames and URLs are accepted.")
	parser.add_option("--likelihood-cache", metavar = "filename", help = "Also load the likelihood ratio data files listsed in this LAL cache.  See lalapps_path2cache for information on how to produce a LAL cache file.")
	parser.add_option("-t", "--tmp-space", metavar = "path", help = "Path to a directory suitable for use as a work area while manipulating the database file.  The database file will be worked on in this directory, and then moved to the final location when complete.  This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on which the final output will reside.")
	parser.add_option("--vetoes-name", metavar = "name", help = "Set the name of the segment lists to use as vetoes (default = do not apply vetoes).")
	parser.add_option("-f", "--force", action = "store_true", help = "Force recomputation of likelihood values.")
	parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
	parser.add_option("-w", "--write-rates", metavar = "filename", help = "Write combined event parameter histograms together with histograms of asssigned zero-lag ranking statistic values to this file.")
	options, filenames = parser.parse_args()

	paramdict = options.__dict__

	options.likelihood_urls = []
	if options.likelihood_url is not None:
		options.likelihood_urls += options.likelihood_url
	if options.likelihood_cache is not None:
		options.likelihood_urls += [CacheEntry(line).url for line in open(options.likelihood_cache)]
	if not options.likelihood_urls:
		raise ValueError("no likelihood URLs specified")

	if options.input_cache:
		filenames += [CacheEntry(line).path for line in open(options.input_cache)]

	return options, paramdict, filenames


#
# =============================================================================
#
#                   Support Funcs for Likelihood Ratio Code
#
# =============================================================================
#


def sngl_inspiral_veto_func(event, vetoseglists):
	# return True if event should be *retained*
	return event.ifo not in vetoseglists or event.get_end() not in vetoseglists[event.ifo]


#
# =============================================================================
#
#                                     Main
#
# =============================================================================
#


#
# command line
#


options, paramdict, filenames = parse_command_line()


#
# load parameter distribution data
#


coincparamsdistributions = None
seglists = segments.segmentlistdict()
for n, likelihood_url in enumerate(options.likelihood_urls, start = 1):
	if options.verbose:
		print >>sys.stderr, "%d/%d:" % (n, len(options.likelihood_urls)),
	xmldoc = ligolw_utils.load_url(likelihood_url, contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler, verbose = options.verbose)
	this_coincparamsdistributions, ignored, this_seglists = far.parse_likelihood_control_doc(xmldoc)
	xmldoc.unlink()
	if this_coincparamsdistributions is None:
		raise ValueError("%s does not contain parameter distribution data" % likelihood_url)
	if coincparamsdistributions is None:
		coincparamsdistributions = this_coincparamsdistributions
	else:
		coincparamsdistributions += this_coincparamsdistributions
	seglists |= this_seglists
if options.verbose:
	print >>sys.stderr, "total livetime:\n\t%s" % ",\n\t".join("%s = %s s" % (instrument, str(abs(segs))) for instrument, segs in seglists.items())

# compute the probability of instruments given signal
coincparamsdistributions.populate_prob_of_instruments_given_signal(segs = seglists, n = 1.0, verbose = options.verbose)

# compute the instrument combination counts
coincparamsdistributions.add_instrument_combination_counts(segs = seglists, verbose = options.verbose)

#
# rebuild event parameter PDFs (+= method has not constructed these
# correctly, and we might have added additional priors to the histograms),
# then initialize likeihood ratio evaluator
#


coincparamsdistributions.finish(verbose = options.verbose)
ln_likelihood_ratio_func = snglcoinc.LnLikelihoodRatio(coincparamsdistributions)


#
# initialize a RankingData object in which to collect ranking statistic
# histograms if requested
#


if options.write_rates is not None:
	ranking_data = far.RankingData(None, seglists.keys())
else:
	ranking_data = None


#
# iterate over files
#


failed = []
for n, filename in enumerate(filenames, 1):
	#
	# open the file.  be lazy and use the content handler for the
	# distribution data files because it's fine for this, too.  if a
	# file can't be loaded because of a filesystem failure or CRC
	# failure, or whatever, try to do the rest of the files before
	# exiting instead of crashing right away to reduce the time spent
	# in rescue dags.
	#

	if options.verbose:
		print >>sys.stderr, "%d/%d:" % (n, len(filenames)),
	try:
		xmldoc = ligolw_utils.load_filename(filename, contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler, verbose = options.verbose)
	except Exception as e:
		if options.verbose:
			print >>sys.stderr, "failed to load '%s': %s.  trying to continue with remaining files" % (filename, str(e))
		failed.append(filename)
		continue

	if not options.force and ligolw_process.doc_includes_process(xmldoc, process_name):
		if options.verbose:
			print >>sys.stderr, "already processed, skipping"
		xmldoc.unlink()
		continue

	#
	# summarize the database, and record our passage.
	#

	try:
		coinc_def_id = lsctables.CoincDefTable.get_table(xmldoc).get_coinc_def_id(ligolw_thinca.InspiralCoincDef.search, ligolw_thinca.InspiralCoincDef.search_coinc_type, create_new = False)
	except KeyError:
		if options.verbose:
			print >>sys.stderr, "document does not contain inspiral coincidences.  skipping."
		xmldoc.unlink()
		continue

	process = ligolw_process.register_to_xmldoc(xmldoc, process_name, paramdict)
	search_summary = ligolw_search_summary.append_search_summary(xmldoc, process, ifos = seglists.keys(), inseg = seglists.extent_all(), outseg = seglists.extent_all())

	if options.verbose:
		print >>sys.stderr, "indexing document ...",
	sngl_inspiral_table_index = dict((row.event_id, row) for row in lsctables.SnglInspiralTable.get_table(xmldoc))
	coinc_event_map_index = dict((row.coinc_event_id, []) for row in lsctables.CoincTable.get_table(xmldoc) if row.coinc_def_id == coinc_def_id)
	for row in lsctables.CoincMapTable.get_table(xmldoc):
		if row.coinc_event_id not in coinc_event_map_index:
			continue
		coinc_event_map_index[row.coinc_event_id].append(sngl_inspiral_table_index[row.event_id])
	del sngl_inspiral_table_index
	coinc_inspiral_index = dict((row.coinc_event_id, row) for row in lsctables.CoincInspiralTable.get_table(xmldoc))

	offset_vectors = lsctables.TimeSlideTable.get_table(xmldoc).as_dict()

	if options.vetoes_name is not None:
		vetoseglists = ligolw_segments.segmenttable_get_by_name(xmldoc, options.vetoes_name).coalesce()
	else:
		vetoseglists = segments.segmentlistdict()
	if options.verbose:
		print >>sys.stderr, "done"

	#
	# run likelihood ratio calculation.
	#

	ligolw_burca2.assign_likelihood_ratios_xml(
		xmldoc = xmldoc,
		coinc_def_id = coinc_def_id,
		offset_vectors = offset_vectors,
		vetoseglists = vetoseglists,
		events_func = lambda _, coinc_event_id: coinc_event_map_index[coinc_event_id],
		veto_func = sngl_inspiral_veto_func,
		ln_likelihood_ratio_func = ln_likelihood_ratio_func,
		likelihood_params_func = coincparamsdistributions.coinc_params,
		verbose = options.verbose
	)

	#
	# collect ranking statistic values if that's what we're doing.
	#

	if ranking_data is not None:
		for row in lsctables.CoincTable.get_table(xmldoc):
			if row.coinc_def_id != coinc_def_id:
				continue
			instruments = lsctables.instrument_set_from_ifos(coinc_inspiral_index[row.coinc_event_id].ifos)
			ln_likelihood_ratio = row.likelihood
			if any(offset_vectors[row.time_slide_id].values()):
				ranking_data.background_likelihood_rates[frozenset(instruments)][ln_likelihood_ratio,] += 1.
			else:
				ranking_data.zero_lag_likelihood_rates[frozenset(instruments)][ln_likelihood_ratio,] += 1.

	#
	# close out process metadata.
	#

	ligolw_process.set_process_end_time(process)

	#
	# clean up.
	#

	ligolw_utils.write_filename(xmldoc, filename, gz = (filename or "stdout").endswith(".gz"), verbose = options.verbose)
	xmldoc.unlink()


#
# crash if any input files were broken
#


if failed:
	raise ValueError("%s could not be processed" % ", ".join("'%s'" % filename for filename in failed))


#
# finally write combined ranking data file if requested
#


if options.write_rates is not None:
	xmldoc = ligolw.Document()
	xmldoc.appendChild(ligolw.LIGO_LW())
	process = ligolw_process.register_to_xmldoc(xmldoc, process_name, paramdict)
	search_summary = ligolw_search_summary.append_search_summary(xmldoc, process, ifos = seglists.keys(), inseg = seglists.extent_all(), outseg = seglists.extent_all())
	far.gen_likelihood_control_doc(xmldoc, process, coincparamsdistributions, ranking_data, seglists)
	ligolw_utils.write_filename(xmldoc, options.write_rates, gz = (options.write_rates or "stdout").endswith(".gz"), verbose = options.verbose)
