#!/usr/bin/env python
#
# Copyright (C) 2009-2013  Kipp Cannon, Chad Hanna, Drew Keppel
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

##@file gstlal_inspiral_marginalize_likelihood
# A program to marginalize the likelihood pdfs in noise across mass bins for a gstlal inspiral analysis.
#
# ### Command line interface
#
#	+ `--ignore-missing`: Ignore and skip missing input documents.
#	+ `--output` [filename]: Set the output file name (default = write to stdout).
#	+ `--likelihood-cache` [filename]: Set the cache file name from which to read likelihood files
#	+ `--verbose`: Be verbose.
#
# ### Review status
#
# | Names 	                    | Hash 					                   | Date       | Diff to Head of Master      |
# | --------------------------- | ---------------------------------------- | ---------- | --------------------------- |
# | Florent, Jolien, Kipp, Chad | 1dbbbd963c9dc076e1f7f5f659f936e44005f33b | 2015-05-14 | <a href="@gstlal_inspiral_cgit_diff/bin/gstlal_inspiral_marginalize_likelihood?id=HEAD&id2=1dbbbd963c9dc076e1f7f5f659f936e44005f33b">gstlal_inspiral_marginalize_likelihood</a> |
#
# #### Action
# - none
#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#


from optparse import OptionParser
import sys


from glue.ligolw import ligolw
from glue.ligolw import utils as ligolw_utils
from glue.ligolw.utils import process as ligolw_process
from glue.ligolw.utils import search_summary as ligolw_search_summary
from glue import segments
from glue import lal


from gstlal import far


#
# =============================================================================
#
#                                 Command Line
#
# =============================================================================
#


def parse_command_line():
	parser = OptionParser(
	)
	parser.add_option("--ignore-missing", action = "store_true", help = "Ignore and skip missing input documents.")
	parser.add_option("-o", "--output", metavar = "filename", help = "Set the output file name (default = write to stdout).")
	parser.add_option("--likelihood-cache", metavar = "filename", help = "Set the cache file name from which to read likelihood files.")
	parser.add_option("--verbose", action = "store_true", help = "Be verbose.")

	options, urls = parser.parse_args()

	if options.likelihood_cache:
		urls += [lal.CacheEntry(line).url for line in open(options.likelihood_cache)]
	if not urls and not options.ignore_missing:
		raise ValueError("no input documents")

	return options, urls


#
# =============================================================================
#
#                                     Main
#
# =============================================================================
#


#
# parse command line
#


options, urls = parse_command_line()


#
# initialize output document
#


xmldoc = ligolw.Document()
xmldoc.appendChild(ligolw.LIGO_LW())
process = ligolw_process.register_to_xmldoc(xmldoc, u"gstlal_inspiral_marginalize_likelihood", options.__dict__)
search_summary = ligolw_search_summary.append_search_summary(xmldoc, process)


#
# loop over input documents
#


distributions = None
ranking_data = None
seglists = segments.segmentlistdict()
for n, url in enumerate(urls, start = 1):
	#
	# load input document
	#

	if options.verbose:
		print >>sys.stderr, "%d/%d:" % (n, len(urls)),
	try:
		in_xmldoc = ligolw_utils.load_url(url, verbose = options.verbose, contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler)
	except IOError:
		# IOError is raised when an on-disk file is missing.
		# urllib2.URLError is raised when a URL cannot be loaded,
		# but this is subclassed from IOError so IOError will catch
		# those, too.
		if not options.ignore_missing:
			raise
		if options.verbose:
			print >>sys.stderr, "Could not load \"%s\" ... skipping as requested" % url
		continue

	#
	# compute weighted sum of ranking data PDFs
	#

	this_distributions, this_ranking_data, this_seglists = far.parse_likelihood_control_doc(in_xmldoc)
	in_xmldoc.unlink()

	if this_distributions is None:
		raise ValueError("\"%s\" contains no parameter PDF data" % url)
	if this_ranking_data is None:
		raise ValueError("\"%s\" contains no ranking statistic PDF data" % url)

	if distributions is None:
		distributions = this_distributions
	else:
		distributions += this_distributions
	if ranking_data is None:
		ranking_data = this_ranking_data
	else:
		ranking_data += this_ranking_data
	seglists |= this_seglists


#
# regenerate event parameter PDFs.  += method doesn't compute these
# correctly.
#
# NOTE:  the FAP/FAR code doesn't actually use any of the information in
# the parameter PDF file.  that code does need to know the "count above
# threshold" but in the offline case that count isn't known until after the
# coincs are ranked (the files loaded here have the count of all coincs,
# not just coincs above the ranking stat FAP/FAR normalization threshold).
# the compute_far_from_snr_chisq_histograms program will count the
# above-threshold events itself and replace the coinc counts in the
# parameter PDF file with its counts and write a new file.  we wouldn't
# need to load the parameter PDF files here at all except that the Farr et
# al. rate estimation code needs the marginalized numerator and denominator
# parameter PDFs.
#
# also re-generate likelihood ratio (ranking data) PDFs from the combined
# bin counts.  this shouldn't be needed but it's fast and maybe more
# accurate than relying on the weighted sum of the PDFs to have been
# numerically stable
#


distributions.finish(verbose = options.verbose)
ranking_data.finish(verbose = options.verbose)


#
# write output document
#


distributions.process_id = ranking_data.process_id = process.process_id
far.gen_likelihood_control_doc(xmldoc, process, distributions, ranking_data, seglists)
search_summary.instruments = seglists.keys()
search_summary.in_segment = search_summary.out_segment = seglists.extent_all()
ligolw_process.set_process_end_time(process)
ligolw_utils.write_filename(xmldoc, options.output, gz = (options.output or "stdout").endswith(".gz"), verbose = options.verbose)
