#!/usr/bin/env python
#
# Copyright (C) 2009-2014  Kipp Cannon, Chad Hanna, Drew Keppel
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
"""Stream-based inspiral analysis tool"""


#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#

## @file gstlal_inspiral
# A program to analyze gravitational wave data for compact binary coalescence in real time or in an offline mode
#
# @dot
# digraph llpipe {
# 	labeljust = "r";
# 	label="gstlal_inspiral"
# 	rankdir=LR;
# 	graph [fontname="Roman", fontsize=24];
# 	edge [ fontname="Roman", fontsize=10 ];
# 	node [fontname="Roman", shape=box, fontsize=11];
# 
# 	gracedb [label="GW\nCandidate\nDatabase", shape=oval, color=tomato3, style=filled];
# 
# 
# 	subgraph clusterNodeN {
# 
# 		style=rounded;
# 		label="gstreamer pipeline";
# 		labeljust = "r";
# 		fontsize = 14;
# 
# 		H1src [label="H1 data source:\n mkbasicsrc()", color=red4, URL="\ref pipeparts.mkbasicsrc()"];
# 		L1src [label="L1 data source:\n mkbasicsrc()", color=green4, URL="\ref pipeparts.mkbasicsrc()"];
# 		V1src [label="V1 data source:\n mkbasicsrc()", color=magenta4];
# 		
# 		H1multirate [label="H1 whitening and downsampling:\nmkwhitened_multirate_src()", color=red4, URL="\ref multirate_datasource.mkwhitened_multirate_src()"];
# 		L1multirate [label="L1 whitening and downsampling:\nmkwhitened_multirate_src()", color=green4, URL="\ref multirate_datasource.mkwhitened_multirate_src()"];
# 		V1multirate [label="V1 whitening and downsampling:\nmkwhitened_multirate_src()", color=magenta4, URL="\ref multirate_datasource.mkwhitened_multirate_src()"];
# 		
# 		H1LLOID [label="H1 LLOID filtering engine:\nmkLLOIDmulti()", color=red4, URL="\ref lloidparts.mkLLOIDmulti()"];
# 		L1LLOID [label="L1 LLOID filtering engine:\nmkLLOIDmulti()", color=green4, URL="\ref lloidparts.mkLLOIDmulti()"];
# 		V1LLOID [label="V1 LLOID filtering engine:\nmkLLOIDmulti()", color=magenta4, URL="\ref lloidparts.mkLLOIDmulti()"];
# 
# 		H1Trig1 [label="H1 Triggering:\nsub bank 1", color=red4];
# 		L1Trig1 [label="L1 Triggering:\nsub bank 1", color=green4];
# 		V1Trig1 [label="V1 Triggering:\nsub bank 1", color=magenta4];
# 		H1Trig2 [label="H1 Triggering:\nsub bank 2", color=red4];
# 		L1Trig2 [label="L1 Triggering:\nsub bank 2", color=green4];
# 		V1Trig2 [label="V1 Triggering:\nsub bank 2", color=magenta4];
# 		H1TrigN [label="H1 Triggering:\nsub bank N", color=red4];
# 		L1TrigN [label="L1 Triggering:\nsub bank N", color=green4];
# 		V1TrigN [label="V1 Triggering:\nsub bank N", color=magenta4];
# 		
# 		H1src -> H1multirate;
# 		L1src -> L1multirate;
# 		V1src -> V1multirate;
# 
# 		H1multirate -> H1LLOID [label="h(t) 4096Hz"];
# 		L1multirate -> L1LLOID [label="h(t) 4096Hz"];
# 		V1multirate -> V1LLOID [label="h(t) 4096Hz"];
# 		H1multirate -> H1LLOID [label="h(t) 2048Hz"];
# 		L1multirate -> L1LLOID [label="h(t) 2048Hz"];
# 		V1multirate -> V1LLOID [label="h(t) 2048Hz"];
# 		H1multirate -> H1LLOID [label="h(t) Nth-pow-of-2 Hz"];
# 		L1multirate -> L1LLOID [label="h(t) Nth-pow-of-2 Hz"];
# 		V1multirate -> V1LLOID [label="h(t) Nth-pow-of-2 Hz"];
# 	
# 		H1LLOID -> H1Trig1 [label="SNRs sub bank 1"];
# 		L1LLOID -> L1Trig1 [label="SNRs sub bank 1"];
# 		V1LLOID -> V1Trig1 [label="SNRs sub bank 1"];
# 		H1LLOID -> H1Trig2 [label="SNRs sub bank 2"];
# 		L1LLOID -> L1Trig2 [label="SNRs sub bank 2"];
# 		V1LLOID -> V1Trig2 [label="SNRs sub bank 2"];
# 		H1LLOID -> H1TrigN [label="SNRs sub bank N"];
# 		L1LLOID -> L1TrigN [label="SNRs sub bank N"];
# 		V1LLOID -> V1TrigN [label="SNRs sub bank N"];
# 	}
# 
# 
# 	Coincidence [label="Coincidence\nO(1)s latency"];
# 	SigEst [label="Significance\nEstimation\nO(1)s latency"];
# 	Thresh [label="Thresholding\nO(1)s latency"];
# 	EventGen [label="Event\nGeneration\nO(1)s latency"];
# 		
# 	H1Trig1 -> Coincidence [label="Trigs sub bank 1"];
# 	L1Trig1 -> Coincidence [label="Trigs sub bank 1"];
# 	V1Trig1 -> Coincidence [label="Trigs sub bank 1"];
# 	H1Trig2 -> Coincidence [label="Trigs sub bank 2"];
# 	L1Trig2 -> Coincidence [label="Trigs sub bank 2"];
# 	V1Trig2 -> Coincidence [label="Trigs sub bank 2"];
# 	H1TrigN -> Coincidence [label="Trigs sub bank N"];
# 	L1TrigN -> Coincidence [label="Trigs sub bank N"];
# 	V1TrigN -> Coincidence [label="Trigs sub bank N"];
# 	
# 	Coincidence -> SigEst -> Thresh -> EventGen;
# 
# 	EventGen -> gracedb;
# 	
# }
# @enddot
#
# ### Command line interface
#
#	+ `--local-frame-caching`
#	+ `--psd-fft-length` [s] (int): FFT length, default 16s.
#	+ `--veto-segments-file` [filename]: Set the name of the LIGO light-weight XML file from which to load vetoes (optional).
#	+ `--veto-segments-name` [name]: Set the name of the segments to extract from the segment tables and use as the veto list, default = "vetoes".
#	+ `--nxydump-segment` [start:stop]: Set the time interval to dump from nxydump elments (optional).  The default is \":\", i.e. dump all time."
#	+ `--output` [filename]: Set the name of the LIGO light-weight XML output file *.{xml,xml.gz} or an SQLite database *.sqlite (required).
#	+ `--reference-psd` [filename]: Instead of measuring the noise spectrum, load the spectrum from this LIGO light-weight XML file (optional).
#	+ `--track-psd`: Track PSD even if a reference is given.
#	+ `--svd-bank` [filename]: Set the name of the LIGO light-weight XML file from which to load the svd bank for a given instrument in the form ifo:file, These can be given as a comma separated list such as H1:file1,H2:file2,L1:file3 to analyze multiple instruments.  This option can be given multiple times in order to analyze bank serially.  At least one svd bank for at least 2 detectors is required.
#	+ `--time-slide-file` [filename]: Set the name of the xml file to get time slide offsets.
#	+ `--control-peak-time` [time] (int): Set a time window in seconds to find peaks in the control signal.
#	+ `--fir-stride` [time] (int): Set the length of the fir filter stride in seconds, default = 8.
#	+ `--ht-gate-threshold` [threshold] (float): Set the threshold on whitened h(t) to mark samples as gaps (glitch removal), default = infinity.
#	+ `--chisq-type" [type]: Choose the type of chisq computation to perform. Must be one of (autochisq|timeslicechisq). The default is autochisq.
#	+ `--coincidence-threshold` [value] (float): Set the coincidence window in seconds (default = 0.005).  The light-travel time between instruments will be added automatically in the coincidence test.
#	+ `--write-pipeline` [filename]: Write a DOT graph description of the as-built pipeline to this file (optional).  The environment variable GST_DEBUG_DUMP_DOT_DIR must be set for this option to work.
#	+ `--comment`: Set the string to be recorded in comment and tag columns in various places in the output file (optional).
#	+ `--check-time-stamps`: Turn on time stamp checking.
#	+ `--verbose`: Be verbose (optional).
#	+ `--tmp-space` [path]: Path to a directory suitable for use as a work area while manipulating the database file.  The database file will be worked on in this directory, and then moved to the final location when complete.  This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on which the final output will reside.
#	+ `--blind-injections` [filename]: Set the name of an injection file that will be added to the data without saving the sim_inspiral_table or otherwise processing the data differently.  Has the effect of having hidden signals in the input data.  --injections must not be specified in this case.
#	+ `--job-tag`: Set the string to identify this job and register the resources it provides on a node.  Should be 4 digits of the form 0001, 0002, etc..
#	+ `--likelihood-file` [filename]: Set the name of the likelihood ratio data file to use for ranking events (either --likelihood-file or --reference-likelihood-file must be provided).
#	+ `--reference-likelihood-file` [filename]: Set the name of the likelihood ratio data file to use for ranking events (--data-source must be lvshm or framexmit) (--likelihood-snapshot-interval must provided) (either --likelihood-file or --reference-likelihood-file must be provided).
#	+ `--likelihood-snapshot-interval` [seconds] (float): How often to reread the marginalized likelihoood data. If --likelihood-file is provided, the likelihood file will be overwritten by a snapshot of the trigger files and a duplicate snapshot will be generated to keep a record of past ranking statistics.
#	+ `--marginalized-likelihood-file` [filename]: Set the name of the file from which to load initial marginalized likelihood ratio data (required).
#	+ `--gracedb-far-threshold` (float): False alarm rate threshold for gracedb (Hz), if not given gracedb events are not sent.
#	+ `--gracedb-search`: gracedb type (default is LowMass).
#	+ `--gracedb-pipeline`: gracedb pipeline (default is gstlal).
#	+ `--gracedb-group`: gracedb group (default is Test).
#	+ `--gracedb-service-url`: gracedb service url (default is https://gracedb.ligo.org/api/)
#	+ `--thinca-interval` [secs] (float): Set the thinca interval, default = 4s.
#	+ `--singles-threshold` [SNR] (float): Set the SNR threshold at which to record single-instrument events in the output (default = 8).
#
# ### Review Status
#
# | Names                                          | Hash                                        | Date       | Diff to Head of Master      |
# | -------------------------------------------    | ------------------------------------------- | ---------- | --------------------------- |
# | Florent, Sathya, Duncan Me, Jolien, Kipp, Chad | 9074294d6b57f43651143b5f93210751de1fe55a    | 2014-05-02 | <a href="@gstlal_inspiral_cgit_diff/bin/gstlal_inspiral?id=HEAD&id2=9074294d6b57f43651143b5f93210751de1fe55a">gstlal_inspiral</a> |
#
# #### Actions
#
# - Consider cleaning up the nxydump segment option.  Currently it only works with modifying source code
# - Consider changing the thinca-interval name to thinca-cadence
# - consider replacing  the 'in ("framexmitsrc", "lvshmsrc")' tests with a property or method in the data source class
# - Consider allowing a single detector analysis
# - Consider deleting timeslicechisq


import os
import resource
import sys
from optparse import OptionParser
import signal
import socket
import time
import tempfile
import math
from collections import namedtuple

# The following snippet is taken from http://gstreamer.freedesktop.org/wiki/FAQ#Mypygstprogramismysteriouslycoredumping.2Chowtofixthis.3F
import pygtk
pygtk.require("2.0")
import gobject
gobject.threads_init()
import pygst
pygst.require("0.10")
import gst

import lal

from glue.lal import CacheEntry
from glue import segments
from glue import segmentsUtils
from glue.ligolw import ligolw
from glue.ligolw import lsctables
from glue.ligolw import utils as ligolw_utils
from glue.ligolw.utils import segments as ligolw_segments
from pylal.datatypes import LIGOTimeGPS
from pylal import series as lalseries
from gstlal import bottle
from gstlal import datasource
from gstlal import lloidparts
from gstlal import far
from gstlal import httpinterface
from gstlal import hoftcache
from gstlal import inspiral
from gstlal import inspiral_pipe
from gstlal import pipeparts
from gstlal import simulation

class LIGOLWContentHandler(ligolw.LIGOLWContentHandler):
	pass
lsctables.use_in(LIGOLWContentHandler)

#
# Make sure we have sufficient resources
# We allocate far more memory than we need, so this is okay
#

def setrlimit(res, lim):
	hard_lim = resource.getrlimit(res)[1]
	resource.setrlimit(res, (lim if lim is not None else hard_lim, hard_lim))

# set the number of processes and total set size up to hard limit and
# shrink the per-thread stack size (default is 10 MiB)
setrlimit(resource.RLIMIT_NPROC, None)
setrlimit(resource.RLIMIT_AS, None)
setrlimit(resource.RLIMIT_RSS, None)
setrlimit(resource.RLIMIT_STACK, 1024 * 1024) # 1 MiB per thread


def now():
	return LIGOTimeGPS(lal.UTCToGPS(time.gmtime()), 0)


#
# =============================================================================
#
#                                 Command Line
#
# =============================================================================
#


def parse_command_line():
	parser = OptionParser(
		description = __doc__
	)

	# append all the datasource specific options
	datasource.append_options(parser)

	# local caching to help with I/O for offline running
	parser.add_option("--local-frame-caching", action = "store_true", help = "blah")

	parser.add_option("--psd-fft-length", metavar = "s", default = 16, type = "int", help = "FFT length, default 16s")
	parser.add_option("--veto-segments-file", metavar = "filename", help = "Set the name of the LIGO light-weight XML file from which to load vetoes (optional).")
	parser.add_option("--veto-segments-name", metavar = "name", help = "Set the name of the segments to extract from the segment tables and use as the veto list.", default = "vetoes")
	parser.add_option("--nxydump-segment", metavar = "start:stop", default = ":", help = "Set the time interval to dump from nxydump elments (optional).  The default is \":\", i.e. dump all time.")
	parser.add_option("--output", metavar = "filename", action = "append", default = [], help = "Set the name of the LIGO light-weight XML output file *.{xml,xml.gz} or an SQLite database *.sqlite (required).")
	parser.add_option("--output-cache", metavar = "filename", help = "Provide a cache file with the names of the LIGO light-weight XML output file *.{xml,xml.gz} or an SQLite database *.sqlite (required).")
	parser.add_option("--reference-psd", metavar = "filename", help = "Instead of measuring the noise spectrum, load the spectrum from this LIGO light-weight XML file (optional).")
	parser.add_option("--track-psd", action = "store_true", help = "Track PSD even if a reference is given")
	parser.add_option("--svd-bank", metavar = "filename", action = "append", default = [], help = "Set the name of the LIGO light-weight XML file from which to load the svd bank for a given instrument in the form ifo:file, These can be given as a comma separated list such as H1:file1,H2:file2,L1:file3 to analyze multiple instruments.  This option can be given multiple times in order to analyze bank serially.  At least one svd bank for at least 2 detectors is required.")
	parser.add_option("--svd-bank-cache", metavar = "filename", help = "Provide a cache file of svd-bank files")
	parser.add_option("--time-slide-file", metavar = "filename", help = "Set the name of the xml file to get time slide offsets")
	parser.add_option("--control-peak-time", metavar = "time", type = "int", help = "Set a time window in seconds to find peaks in the control signal")
	parser.add_option("--fir-stride", metavar = "time", type = "int", default = 8, help = "Set the length of the fir filter stride in seconds. default = 8")
	parser.add_option("--ht-gate-threshold", metavar = "threshold", type = "float", default = float("inf"), help = "Set the threshold on whitened h(t) to mark samples as gaps (glitch removal)")
	parser.add_option("--chisq-type", metavar = "type", default = "autochisq", help = "Choose the type of chisq computation to perform. Must be one of (autochisq|timeslicechisq). The default is autochisq.")
	parser.add_option("--coincidence-threshold", metavar = "value", type = "float", default = 0.005, help = "Set the coincidence window in seconds (default = 0.005).  The light-travel time between instruments will be added automatically in the coincidence test.")
	parser.add_option("--write-pipeline", metavar = "filename", help = "Write a DOT graph description of the as-built pipeline to this file (optional).  The environment variable GST_DEBUG_DUMP_DOT_DIR must be set for this option to work.")
	parser.add_option("--comment", help = "Set the string to be recorded in comment and tag columns in various places in the output file (optional).")
	parser.add_option("--check-time-stamps", action = "store_true", help = "Turn on time stamp checking")
	parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose (optional).")
	parser.add_option("-t", "--tmp-space", metavar = "path", help = "Path to a directory suitable for use as a work area while manipulating the database file.  The database file will be worked on in this directory, and then moved to the final location when complete.  This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on which the final output will reside.")
	parser.add_option("--blind-injections", metavar = "filename", help = "Set the name of an injection file that will be added to the data without saving the sim_inspiral_table or otherwise processing the data differently.  Has the effect of having hidden signals in the input data.  --injections must not be specified in this case")

	# Online options

	parser.add_option("--job-tag", help = "Set the string to identify this job and register the resources it provides on a node.  Should be 4 digits of the form 0001, 0002, etc..")
	parser.add_option("--likelihood-file", metavar = "filename", action = "append", default = [], help = "Set the name of the likelihood ratio data file to use for ranking events (either --likelihood-file or --reference-likelihood-file must be provided)")
	parser.add_option("--likelihood-file-cache", metavar = "filename", help = "Cache file for likelihood ratio data to use for ranking events")
	parser.add_option("--reference-likelihood-file", metavar = "filename", help = "Set the name of the likelihood ratio data file to use for ranking events (--data-source must be lvshm or framexmit) (--likelihood-snapshot-interval must provided) (either --likelihood-file or --reference-likelihood-file must be provided)")
	parser.add_option("--likelihood-snapshot-interval", type = "float", metavar = "seconds", help = "How often to reread the marginalized likelihoood data. If --likelihood-file is provided, the likelihood file will be overwritten by a snapshot of the trigger files and a duplicate snapshot will be generated to keep a record of past ranking statistics.")
	parser.add_option("--marginalized-likelihood-file", metavar = "filename", help = "Set the name of the file from which to load initial marginalized likelihood ratio data (required).")
	parser.add_option("--gracedb-far-threshold", type = "float", help = "false alarm rate threshold for gracedb (Hz), if not given gracedb events are not sent")
	parser.add_option("--gracedb-search", default = "LowMass", help = "gracedb search, default is LowMass")
	parser.add_option("--gracedb-pipeline", default = "gstlal", help = "gracedb pipeline, default is gstlal")
	parser.add_option("--gracedb-group", default = "Test", help = "gracedb group, default is Test")
	parser.add_option("--gracedb-service-url", default = "https://gracedb.ligo.org/api/", help = "gracedb service url, default is https://gracedb.ligo.org/api/")
	parser.add_option("--thinca-interval", metavar = "secs", type = "float", default = 4.0, help = "Set the thinca interval, default = 4s")
	# NOTE:  gstlal_inspiral_calc_likelihood searches for this option
	# in the process_params table to determine the threshold below
	# which it can delete uninteresting singles after the coincs are
	# ranked.  if the name of this option is changed, be sure to update
	# gstlal_inspiral_calc_likelihood
	parser.add_option("--singles-threshold", metavar = "SNR", type = "float", default = 8.0, help = "Set the SNR threshold at which to record single-instrument events in the output (default = 8).")

	options, filenames = parser.parse_args()

	#
	# check for options, files that are always required
	#
	
	if options.svd_bank_cache:
		svd_bank_dict = {}
		for line in open(options.svd_bank_cache):
			svd_bank_cache_entry = CacheEntry(line)
			svd_bank_dict.setdefault(svd_bank_cache_entry.description, []).append("{0}:{1}".format(svd_bank_cache_entry.observatory, svd_bank_cache_entry.url))
		options.svd_bank.extend([",".join(svd_string) for svd_string in svd_bank_dict.itervalues()])

	if options.output_cache:
		options.output.extend([CacheEntry(line).url for line in open(options.output_cache)])

	if options.likelihood_file_cache:
		options.likelihood_file.extend([CacheEntry(line).url for line in open(options.likelihood_file_cache)])

	missing_options = ["--%s" % option.replace("_", "-") for option in ["svd_bank", "output"] if getattr(options, option) is None]
	if options.likelihood_file is None and options.reference_likelihood_file is None:
		missing_options.append("either --likelihood-file or --reference-likelihood-file")

	if missing_options:
		raise ValueError("missing required option(s) %s" % ", ".join(sorted(missing_options)))

	detectors = datasource.GWDataSourceInfo(options)
	if len(detectors.channel_dict) < 2:
		raise ValueError("only coincident searches are supported:  must process data from at least two antennae")

	# FIXME Put all svd banks for different detectors in one file.  
	try:
		svd_banks = [inspiral.parse_svdbank_string(svdbank) for svdbank in options.svd_bank]
	except ValueError as e:
		print "Unable to parse svd banks"
		raise

	# FIXME: should also check for read permissions
	required_files = []
	for svd_bank_set in svd_banks: 
		for instrument in svd_bank_set:
			required_files.append(svd_bank_set[instrument])
	if options.veto_segments_file:
		required_files += [options.veto_segments_file]
	missing_files = [filename for filename in required_files if not os.path.exists(filename)]

	if missing_files:
		raise ValueError("files %s do not exist" % ", ".join("'%s'" % filename for filename in sorted(missing_files)))
	#
	# check for mutually exclusive options
	#

	bad_combos = []
	if options.blind_injections and options.injections:
		bad_combos.append("(--blind-injections, --injections)")
	if options.likelihood_file and options.reference_likelihood_file:
		bad_combos.append(("--likelihood-file", "--reference-likelihood-file"))

	if bad_combos:
		raise ValueError("must use only one option from each set: %s" % ', '.join(bad_combos))

	#
	# check sanity of options
	#

	# Online specific initialization
	# FIXME someday support other online sources
	if options.data_source in ("lvshm", "framexmit"):
		missed_options = []
		for option in ["job_tag", "marginalized_likelihood_file"]:
			if getattr(options, option) is None:
				missed_options.append("--%s" %option.replace("_","-"))

		if missed_options:
			raise ValueError("%s required for --data-source is lvshm or framexmit" % ", ".join(missed_options))

		if len(options.svd_bank) > 1:
			raise ValueError("more than one --svd-bank not allowed for --datasource lvshm or framexmit, %d given" % len(options.likelihood_file))

		# make an "infinite" extent segment
		detectors.seg = segments.segment(LIGOTimeGPS(0), LIGOTimeGPS(2000000000))

		# this gets set so that if you log into a node you can find out what the job id is easily
		os.environ['GSTLAL_LL_JOB'] = options.job_tag
	else:
		bad_options = []
		for option in ["job_tag", "marginalized_likelihood_file", "likelihood_snapshot_interval", "reference_likelihood_file"]:
			if getattr(options, option) is not None:
				bad_options.append("--%s" % option.replace("_","-"))
		if bad_options:
			raise ValueError("%s options can only be given for --data-source is lvshm or framexmit " % ", ".join(bad_options))

	if options.reference_psd is None and not options.track_psd:
		raise ValueError("must use --track-psd if no reference psd is given, you can use both simultaneously")
	if options.local_frame_caching and not options.data_source == "frames":
		raise ValueError('--local-frame-caching can only be used if --data-source = "frames"')
	if options.chisq_type not in ["autochisq", "timeslicechisq"]:
		raise ValueError("--chisq-type must be one of (autochisq|timeslicechisq), given %s" % (options.chisq_type))
	
	if options.reference_likelihood_file:
		likelihood_namedtuples_list = [namedtuple('likelihood_namedtuple',('likelihood_file','reference_likelihood_file'))(None, options.reference_likelihood_file)]
	else:
		likelihood_namedtuples_list = [namedtuple('likelihood_namedtuple',('likelihood_file','reference_likelihood_file'))(likelihood_file,None) for likelihood_file in options.likelihood_file]

	if not (len(options.svd_bank) == len(options.output) == len(likelihood_namedtuples_list)):
		raise ValueError("must have equal numbers of svd banks, output files and likelihood files")

	#
	# Option checks complete
	#

	# do this before converting option types
	process_params = options.__dict__.copy()

	options.nxydump_segment, = segmentsUtils.from_range_strings([options.nxydump_segment], boundtype = LIGOTimeGPS)

	if options.blind_injections is not None:
		detectors.injection_filename = options.blind_injections
	
	# Setup local caching
	if options.local_frame_caching:
		f, fname = tempfile.mkstemp(".cache")
		if options.verbose:
			print >> sys.stderr, "caching frame data locally to ", fname
		f = open(fname, "w")
		# FIXME:  should try to down-sample if possible.  there are
		# MDCs data sets floating around whose streams do not start
		# on integer second boundaries, however, and it's possible
		# to trigger a failure in the frame muxer if those get
		# down-sampled so for now we're not doing any resampling.
		# later, when we don't care about those MDCs, we can go
		# back to down-sampling.  if not being able to down-sample
		# is a problem in the meantime, I think the best way
		# forward is to clip the start of said streams using a drop
		# element and a (to be written) buffer probe that figures
		# out how many samples to clip off the start of the stream.
		# FIXME shouldn't use tempfile.gettempdir() directly, use
		# _CONDOR_SCRATCH_DIR like glue??
		# FIXME, note that at least for now condor sets TMPDIR to the
		# run scratch space so this *will* work properly
		detectors.local_cache_list = hoftcache.cache_hoft(detectors, output_path = tempfile.gettempdir(), verbose = options.verbose)
		for cacheentry in detectors.local_cache_list:
			# Guarantee a lal cache complient file with only integer starts and durations.
			cacheentry.segment = segments.segment( int(cacheentry.segment[0]), int(math.ceil(cacheentry.segment[1])) )
			print >>f, str(cacheentry)
		detectors.frame_cache = fname

		# the injections are now present in the data so we don't want to do them twice
		detectors.injection_filename = None
	
	# Choose to optionally reconstruct segments around injections (not blind injections!)
	if options.injections:
		reconstruction_segment_list = simulation.sim_inspiral_to_segment_list(options.injections)
	else:
		reconstruction_segment_list = None

	# we're done
	return options, filenames, process_params, svd_banks, detectors, reconstruction_segment_list, likelihood_namedtuples_list


#
# =============================================================================
#
#                                Signal Handler
#
# =============================================================================
#


class OneTimeSignalHandler(object):
	def __init__(self, pipeline):
		self.pipeline = pipeline
		self.count = 0

	def __call__(self, signum, frame):
		self.count += 1
		if self.count == 1:
			print >>sys.stderr, "*** SIG %d attempting graceful shutdown (this might take several minutes) ... ***" % signum
			try:
				#FIXME how do I choose a timestamp?
				self.pipeline.get_bus().post(inspiral.message_new_checkpoint(self.pipeline, timestamp=now().ns()))
				if not self.pipeline.send_event(gst.event_new_eos()):
					raise Exception("pipeline.send_event(EOS) returned failure")
			except Exception as e:
				print >>sys.stderr, "graceful shutdown failed: %s\naborting." % str(e)
				os._exit(1)
		else:
			print >>sys.stderr, "*** received SIG %d %d times... ***" % (signum, self.count)


#
# =============================================================================
#
#                                     Main
#
# =============================================================================
#


#
# parse command line
#


options, filenames, process_params, svd_banks, detectors, reconstruction_segment_list, likelihood_namedtuples_list = parse_command_line()

if not options.check_time_stamps:
	pipeparts.mkchecktimestamps = lambda pipeline, src, *args: src


#
# Parse the vetos segments file(s) if provided
#


if options.veto_segments_file is not None:
	veto_segments = ligolw_segments.segmenttable_get_by_name(ligolw_utils.load_filename(options.veto_segments_file, verbose = options.verbose, contenthandler = LIGOLWContentHandler), options.veto_segments_name).coalesce()
else:
	veto_segments = None


#
# set up the PSDs
#
# There are three modes for psds in this program
# 1) --reference-psd without --track-psd - a fixed psd (provided by the user) will be used to whiten the data
# 2) --track-psd without --reference-psd - a psd will me measured and used on the fly
# 3) --track-psd with --reference-psd - a psd will be measured on the fly, but the first guess will come from the users provided psd
#


if options.reference_psd is not None:
	psd = lalseries.read_psd_xmldoc(ligolw_utils.load_filename(options.reference_psd, verbose = options.verbose, contenthandler = lalseries.LIGOLWContentHandler))
else:
	psd = dict((instrument, None) for instrument in detectors.channel_dict)


#
# Process svd banks in serial
#


for svd_bank, output_filename, likelihood_namedtuple in zip(svd_banks, options.output, likelihood_namedtuples_list):
	#
	# create a new, empty, Bottle application and make it the current
	# default, then start http server(s) to serve it up
	#


	bottle.default_app.push()
	# uncomment the next line to show tracebacks when something fails
	# in the web server
	#bottle.app().catchall = False
	httpservers = httpinterface.HTTPServers(0, bottle_app = bottle.default_app(), service_name = "gstlal_inspiral" + (" (%s)" % options.job_tag if options.job_tag is not None else ""), service_properties = {"job_tag": options.job_tag if options.job_tag is not None else ""}, verbose = options.verbose)


	#
	# Set up a registry of the resources that this job provides
	#


	@bottle.route("/")
	@bottle.route("/index.html")
	def index(job_tag = options.job_tag, instruments = set(svd_bank.keys())):
		host = socket.gethostname()
		server_address = "http://%s:%d" % (host, httpservers[0][0].port)
		yield "<html><body>\n<h3>%s %s %s</h3>\n<p>\n" % (job_tag, host, " ".join(sorted(instruments)))
		for route in sorted(bottle.default_app().routes, key = lambda route: route.rule):
			if route.rule in ("/", "/index.html"):
				# don't create links back to this page
				continue
			if route.method != "GET":
				# only create links for GET methods
				continue
			yield "<a href=\"%s%s\">%s</a><br>\n" % (server_address, route.rule, route.rule)
		yield "</p>\n</body></html>"
	# FIXME:  get service-discovery working, then don't do this
	if "GSTLAL_LL_JOB" in os.environ:
		open("%s_registry.txt" % os.environ["GSTLAL_LL_JOB"], "w").write("http://%s:%s/\n" % (socket.gethostname(), httpservers[0][0].port))


	#
	# parse SVD template bank and expose bank metadata
	#


	banks = inspiral.parse_bank_files(svd_bank, verbose = options.verbose)

	@bottle.route("/bank.txt")
	def get_filter_length_and_chirpmass(banks = banks):
		bank = banks.values()[0][0] #FIXME maybe shouldn't just take the first ones
		yield '%.14g %.4g %.4g' % (float(now()), bank.filter_length, bank.sngl_inspiral_table[0].mchirp)


	#
	# Build pipeline
	#


	if options.verbose:
		print >>sys.stderr, "assembling pipeline ...",

	pipeline = gst.Pipeline("gstlal_inspiral")
	mainloop = gobject.MainLoop()
		
	triggersrc = lloidparts.mkLLOIDmulti(
		pipeline,
		detectors = detectors,
		banks = banks,
		psd = psd,
		psd_fft_length = options.psd_fft_length,
		ht_gate_threshold = options.ht_gate_threshold,
		veto_segments = veto_segments,
		verbose = options.verbose,
		nxydump_segment = options.nxydump_segment,
		chisq_type = options.chisq_type,
		track_psd = options.track_psd,
		control_peak_time = options.control_peak_time,
		fir_stride = options.fir_stride,
		reconstruction_segment_list = reconstruction_segment_list
	)
	

	if options.verbose:
		print >>sys.stderr, "done"


	#
	# Load likelihood ratio data, assume injections are present!
	#


	if options.data_source in ("lvshm", "framexmit"):
		coinc_params_distributions, ranking_data, seglists = far.parse_likelihood_control_doc(ligolw_utils.load_filename(likelihood_namedtuple[0] if likelihood_namedtuple[0] is not None else likelihood_namedtuple[1], verbose = options.verbose, contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler))
		assert set(seglists) == set(detectors.channel_dict)
		if coinc_params_distributions is None:
			raise ValueError("\"%s\" does not contain parameter distribution data" % likelihood_namedtuple[0] if likelihood_namedtuple[0] is not None else likelihood_namedtuple[1])
	else:
		coinc_params_distributions, ranking_data, seglists = far.ThincaCoincParamsDistributions(), None, segments.segmentlistdict((instrument, segments.segmentlist()) for instrument in detectors.channel_dict) 


	#
	# build output document
	#


	if options.verbose:
		print >>sys.stderr, "initializing output document ..."
	output = inspiral.Data(
		filename = output_filename or "%s-%s_LLOID-%d-%d.xml.gz" % (lsctables.ifos_from_instrument_set(detectors.channel_dict.keys()).replace(",", ""), options.job_tag, int(detectors.seg[0]), int(abs(detectors.seg))),
		process_params = process_params,
		pipeline = pipeline,
		instruments = set(detectors.channel_dict),
		seg = detectors.seg or segments.segment(LIGOTimeGPS(0), LIGOTimeGPS(2000000000)), # online data doesn't have a segment so make it all possible time
		coincidence_threshold = options.coincidence_threshold,
		coinc_params_distributions = coinc_params_distributions,
		ranking_data = ranking_data,
		marginalized_likelihood_file = options.marginalized_likelihood_file,
		likelihood_files_namedtuple = likelihood_namedtuple,
		injection_filename = options.injections,
		time_slide_file = options.time_slide_file,
		comment = options.comment,
		tmp_path = options.tmp_space,
		likelihood_snapshot_interval = options.likelihood_snapshot_interval,	# seconds
		thinca_interval = options.thinca_interval,
		sngls_snr_threshold = options.singles_threshold,
		gracedb_far_threshold = options.gracedb_far_threshold,
		gracedb_group = options.gracedb_group,
		gracedb_search = options.gracedb_search,
		gracedb_pipeline = options.gracedb_pipeline,
		gracedb_service_url = options.gracedb_service_url,
		verbose = options.verbose
	)
	if options.verbose:
		print >>sys.stderr, "... output document initialized"

	handler = lloidparts.Handler(mainloop, pipeline, output, instruments = set(detectors.channel_dict), tag = options.job_tag, seglistdict = seglists, verbose = options.verbose)


	if options.verbose:
		print >>sys.stderr, "attaching appsinks to pipeline ...",
	appsync = pipeparts.AppSync(appsink_new_buffer = output.appsink_new_buffer)
	appsinks = set(appsync.add_sink(pipeline, pipeparts.mkqueue(pipeline, src), caps = gst.Caps("application/x-lal-snglinspiral"), name = "%s_sink_%d" % (instrument, n)) for instrument, srcs in triggersrc.items() for n, src in enumerate(srcs))
	if options.verbose:
		print >>sys.stderr, "attached %d, done" % len(appsinks)


	#
	# if we request a dot graph of the pipeline, set it up
	#


	if options.write_pipeline is not None:
		pipeparts.connect_appsink_dump_dot(pipeline, appsinks, options.write_pipeline, options.verbose)
		pipeparts.write_dump_dot(pipeline, "%s.%s" % (options.write_pipeline, "NULL"), verbose = options.verbose)


	#
	# Run pipeline
	#


	if options.data_source in ("lvshm", "framexmit"):
		#
		# setup sigint handler to shutdown pipeline.  this is how
		# the program stops gracefully, it is the only way to stop
		# it.  Otherwise it runs forever man.
		#
		# this is only done in the online case because we want
		# offline jobs to just die and not write partial databases
		#

		signal.signal(signal.SIGINT, OneTimeSignalHandler(pipeline))
		signal.signal(signal.SIGTERM, OneTimeSignalHandler(pipeline))


	if options.verbose:
		print >>sys.stderr, "setting pipeline state to playing ..."
	if pipeline.set_state(gst.STATE_PLAYING) != gst.STATE_CHANGE_SUCCESS:
		raise RuntimeError("pipeline did not enter playing state")

	if options.write_pipeline is not None:
		pipeparts.write_dump_dot(pipeline, "%s.%s" % (options.write_pipeline, "PLAYING"), verbose = options.verbose)

	if options.verbose:
		print >>sys.stderr, "running pipeline ..."
	mainloop.run()
	

	#
	# write output file
	#


	output.write_output_file(filename = output_filename or output.coincs_document.T050017_filename("%s_LLOID" % options.job_tag, "xml.gz"), description = "%s_LLOID" % options.job_tag, verbose = options.verbose)


	#
	# Cleanup template bank temp files
	#


	for ifo in banks:
		for bank in banks[ifo]:
			if options.verbose:
				print >> sys.stderr, "removing file: ", bank.template_bank_filename
			os.remove(bank.template_bank_filename)


	#
	# Shutdown the web interface servers and garbage collect the Bottle
	# app.  This should release the references the Bottle app's routes
	# hold to the pipeline's data (like template banks and so on).
	#


	del httpservers
	bottle.default_app.pop()


	#
	# Set pipeline state to NULL and garbage collect the handler
	#


	if pipeline.set_state(gst.STATE_NULL) != gst.STATE_CHANGE_SUCCESS:
		raise RuntimeError("pipeline could not be set to NULL")
	del handler.pipeline
	del output.pipeline
	del handler
	del bank
	del banks


#
# Cleanup local caches
#


if options.local_frame_caching:
	if options.verbose:
		print >>sys.stderr, "deleting temporary cache file ", detectors.frame_cache
	os.remove(detectors.frame_cache)
	del detectors.local_cache_list


#
# done.  online pipeline always ends with an error code so that dagman does
# not mark the job "done" and the job will be restarted when the dag is
# restarted.
#


if options.data_source in ("lvshm", "framexmit"):
	sys.exit(1)
