#!/usr/bin/env python
#
# Copyright (C) 2011  Chad Hanna
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

import sys
import cgi
import cgitb
import os
os.environ["MPLCONFIGDIR"] = "/tmp"
import matplotlib
matplotlib.use('Agg')
import numpy
import matplotlib.pyplot as plt
import time
import StringIO
import base64
from urlparse import urlparse
cgitb.enable()
form = cgi.FieldStorage()

## @file gstlal_llcbcsummary
# This program will monitor the output of a gstlal inspiral low latency
# analysis; See gstlal_llcbcsummary for help and usage.

## @package gstlal_llcbcsummary
#
# This program is designed to be placed in the cgi-bin directory of the user's
# public_html directory on the cluster that is running the gstlal inspiral low
# latency analysis
#
# ## USAGE:
# This program is never meant to be executed by a user, but rather on a
# webserver via a url such as:
# 
#	 	https://hostname.domain/path/to/cgi-bin/gstlal_llcbcsummary?id=<start>,<stop>&dir=/path/to/analysis/directory
#
# e.g.,
#
# 		https://ldas-jobs.ligo.caltech.edu/~gstlalcbc/cgi-bin/gstlal_llcbcsummary?id=0001,0010&dir=/home/gstlalcbc/engineering/5/bns_trigs_40Hz
#
# <start> and <stop> are the 4 digit id numbers corresponding to the first and
# last job, respectively.  
# 
# ## Interpretation of the output page
#
# General remarks.  Red bars indicate that no data is available for the job in
# question.  If you are sure the url is correct, this could mean that a job has
# failed.  Each bar contains the node and job number.
# 
# ### Header information
#
# \image html gstlal_llcbcsummary01.png
#
# This displays the current ifos involved ("H1L1V1") as well as the current GPS
# time and local time
#
# ### Latency
#
# \image html gstlal_llcbcsummary02.png
#
# This plot indicates the time since the last event (gray) as well as the
# latency of the last event (white).  If things are behaving well the latency
# should be ~60 seconds and the time since last event should be no more than
# ~300 seconds. 
#
# #### Reasons that this might not be the case:
#  
# -# At least two detectors are not functioning
# -# The gstlal inspiral jobs are in a period "update" mode where they pause to
# update background statistics.  This should only last for a few minutes every
# 4 hours and should not cause more than a few hundred seconds of delay
# -# Something more serious is wrong, e.g. the jobs are not able to keep up with the data.
#
# ### SNR
#
# \image html gstlal_llcbcsummary03.png
#
# This plot indicates the coincident SNR (root-sum-squares) of the last 1000
# events.  The median SNR is white and the maximum SNR is gray.  The median SNR
# should typically be small (< 8), however the maximum SNR might be large due
# to the presence of a glitch or a signal.
#
# 
# ### Livetime plots
# 
# \image html gstlal_llcbcsummary04.png
#
# These IFO specific plots can be used to infer the ammount of up-time,
# down-time and periods of lost data for a given job.  This is *only* since the
# job has been running.  Thus it is an indicator of how long the job has been
# running without crashing.  Ideally jobs would only be restarted during
# maintenance periods.  If the livetime is low for one or more jobs it could
# indicate instability that is causing crashes
# 
# ### Template Parameters
#
# \image html gstlal_llcbcsummary05.png
# 
# \image html gstlal_llcbcsummary06.png
#
# These plots give the chirp mass and template duration bins of the given job.
# 
# ### RAM usage
#
# \image html gstlal_llcbcsummary07.png
#
# This plot gives the maximum RAM usage seen over the duration of the job. If
# it is near the limit of the resource it might indicate that the jobs are
# "swapping" and could be related to problems such as falling behind.
# 
# ### Links to more information about a given job
# 
# \image html gstlal_llcbcsummary08.png
#
# To drill down into specific information of a job click on the links at the
# bottom of the screen.  These will bring up pages created by gstlal_llcbcnode.
# Please see gstlal_llcbcnode for more information

def now():
	#FIXME use pylal when available
	return time.time() - 315964785

def to_png_image():
	f = StringIO.StringIO()
	plt.savefig(f, format="png")
	print '<img src="data:image/png;base64,',base64.b64encode(f.getvalue()),'"></img>'
	f.close()

def read_registry(dir, dataurl, ids):
	nodedict = {}
	for id in ids:
		url = '%s/%s%s' % (dir, id, dataurl)
		try:
			tmp = open(url,"r")
			nodedict[id] = urlparse(tmp.readline()).netloc
			tmp.close()
		except IOError:
			nodedict[id] = ""
	return nodedict

	
def load_data(directory, idrange, type):
	found = {}
	missed = {}
	for i, id in enumerate(idrange):
		fname = "%s/%s/%s.txt" % (directory, id, type)
		try:
			found[i] = numpy.loadtxt(fname)
			if len(found[i].shape) == 1:
				found[i] = numpy.array([found[i],])
		except IOError:
			missed[i] = numpy.array([])
		except ValueError:
			missed[i] = numpy.array([])
	return found, missed
			
def setup_plot():
	fig = plt.figure(figsize=(20,5),)
	fig.patch.set_alpha(0.0)
	h = fig.add_subplot(111, axisbg = 'k')
	plt.subplots_adjust(left = .062, right = 0.98, bottom = 0.3)
	return fig, h

def finish_plot(ids, registry, ylim, title=''):
	plt.grid(color=(0.1,0.4,0.5), linewidth=2)	
	ticks = ["%s : %s " % (id, registry[id]) for id in ids]
	plt.xticks(numpy.arange(len(ids))+.3, ticks, rotation=90, fontsize = 10)
	plt.xlim([0, len(ids)])
	plt.ylim(ylim)
	tickpoints = numpy.linspace(ylim[0], ylim[1], 8)
	ticks = ["%.1e" % (10.**t,) for t in tickpoints]
	plt.yticks(tickpoints, ticks, fontsize = 14)
	plt.title(title, fontsize = 18)
	to_png_image()
	#plt.savefig(sys.stdout, format = "svg")

def plot_latency(found, missed, ids, registry):
	fig, h = setup_plot()
	
	found_x = found.keys()
	latency_y = numpy.log10(numpy.array([found[k][-1,1] for k in found_x]))
	time_y = numpy.log10(now() - numpy.array([found[k][-1,0] for k in found_x]))
	try:
		max_y = max(time_y.max(), latency_y.max())
	except ValueError:
		max_y = 1
	missed_x = missed.keys()
	missed_y = numpy.ones(len(missed_x)) * max_y
	
	h.bar(missed_x, missed_y, color='r', alpha=0.9, linewidth=2)
	h.bar(found_x, latency_y, color='w', alpha=0.9, linewidth=2)
	h.bar(found_x, time_y, color='w', alpha=0.7, linewidth=2)
	finish_plot(ids, registry, [0, max_y], 'Time (s) since last event (gray) and latency (white)')

def plot_snr(found, missed, ids, registry):
	fig, h = setup_plot()
	
	found_x = found.keys()
	maxsnr_y = numpy.log10(numpy.array([found[k][:,1].max() for k in found_x]))
	mediansnr_y = numpy.log10(numpy.array([numpy.median(found[k][:,1]) for k in found_x]))

	try:	
		max_y = max(maxsnr_y)
	except ValueError:
		max_y = 1
	missed_x = missed.keys()
	missed_y = numpy.ones(len(missed_x)) * max_y
	
	h.bar(missed_x, missed_y, color='r', alpha=0.9, linewidth=2)
	h.bar(found_x, mediansnr_y, color='w', alpha=0.9, linewidth=2)
	h.bar(found_x, maxsnr_y, color='w', alpha=0.7, linewidth=2)
	finish_plot(ids, registry, [numpy.log10(5.5), max_y], 'SNR of last 1000 events: max (gray) and median (white)')

def plot_livetime(found, missed, ids, registry, ifo):
	fig, h = setup_plot()
	
	found_x = found.keys()
	# Handle log of 0 by setting it to max of (actual value, 1)
	on_y = numpy.log10(numpy.array([max(found[k][0][1],1) for k in found_x]))
	off_y = numpy.log10(numpy.array([max(found[k][0][2],1) for k in found_x]))
	gap_y = numpy.log10(numpy.array([max(found[k][0][3],1) for k in found_x]))
	# FIXME Hack to adjust for high sample rate L1 and V1 state vector
	if ifo != "V1":
		on_y -= numpy.log10(16)
		off_y -= numpy.log10(16)
		gap_y -= numpy.log10(16)
		
	if len(found_x) > 0:
		max_y = max(on_y.max(), off_y.max(), gap_y.max())
		min_y = min(on_y.min(), off_y.min(), gap_y.min())
	else:
		max_y = 1
		min_y = 0

	missed_x = missed.keys()
	missed_y = numpy.ones(len(missed_x)) * max_y
	
	h.bar(missed_x, missed_y, color='r', alpha=0.9, linewidth=2)
	h.bar(found_x, off_y, color='w', alpha=0.7, linewidth=2)
	h.bar(found_x, gap_y, color='b', alpha=0.5, linewidth=2)
	h.bar(found_x, on_y, color='w', alpha=0.5, linewidth=2)
	finish_plot(ids, registry, [min_y*.9, max_y], '%s Up time (gray) Down time (white) Dropped time (blue)' % (ifo,))

def plot_ram(found, missed, ids, registry):
	fig, h = setup_plot()
	
	found_x = found.keys()
	found_y = numpy.log10(numpy.array([found[k][0,1] for k in found_x]))

	try:	
		max_y, min_y = max(found_y), min(found_y)
	except ValueError:
		max_y, min_y = (1,0)
	missed_x = missed.keys()
	missed_y = numpy.ones(len(missed_x)) * max_y
	
	h.bar(missed_x, missed_y, color='r', alpha=0.9, linewidth=2)
	h.bar(found_x, found_y, color='w', alpha=0.9, linewidth=2)
	finish_plot(ids, registry, [0.9 * min_y, max_y], 'RAM usage GB')

def plot_single_col(found, missed, ids, registry, col = 0, title = ''):
	fig, h = setup_plot()
	
	found_x = found.keys()
	found_y = numpy.log10(numpy.array([found[k][0][col] for k in found_x]))

	try:	
		max_y, min_y = max(found_y), min(found_y)
	except ValueError:
		max_y, min_y = (1,0)
	missed_x = missed.keys()
	missed_y = numpy.ones(len(missed_x)) * max_y
	
	h.bar(missed_x, missed_y, color='r', alpha=0.9, linewidth=2)
	h.bar(found_x, found_y, color='w', alpha=0.9, linewidth=2)
	finish_plot(ids, registry, [0.9 * min_y, max_y], title)


def get_ids(form):
	idrange = [int(n) for n in form.getvalue("id").split(",")]
	#FIXME relies on 4 digit ids
	ids = ['%04d' % (job,) for job in range(idrange[0], idrange[1]+1)]
	return ids

if "dir" not in form:
	raise ValueError("must specify dir")
if "id" not in form:
	raise ValueError("must specify id")

ids = get_ids(form)
directory = form.getvalue("dir")
ifos = form.getvalue("ifos").split(",")
reg = read_registry(form.getvalue("dir"), "_registry.txt", ids)

# Header
print >>sys.stdout, 'Cache-Control: no-cache, must-revalidate'
print >>sys.stdout, 'Expires: Mon, 26 Jul 1997 05:00:00 GMT'
print >>sys.stdout, 'Content-type: text/html\r\n'

# HTML preamble
print """
<html>
<head>
<meta http-equiv="Pragma" content="no-cache">
<meta http-equiv="Expires" content="-1">
<meta http-equiv="CACHE-CONTROL" content="NO-CACHE">
<meta http-equiv="refresh" content="300">
  <link rel="stylesheet" href="//code.jquery.com/ui/1.10.0/themes/base/jquery-ui.css" />
  <script src="//code.jquery.com/jquery-1.8.3.js"></script>
  <script src="//code.jquery.com/ui/1.10.0/jquery-ui.js"></script>
  <script type="text/javascript"> $(function() {
    $("#accordion").accordion({
    });

  });</script>
</head>
<body>
"""

# title
print """
<font size=10><img src="http://www.lsc-group.phys.uwm.edu/cgit/gstlal/plain/gstlal/doc/gstlal.png">gstlal_inspiral online </font><font size=6 color=#707070><b><right>%s: %d - %s </right></b><br></font><hr><br>
""" % ("".join(sorted(ifos)), int(now()), time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.localtime()))

print '<div id="accordion">'


#
# latency history
#
print "<h1>Latency</h1>"
print "<div id='canvaslatency'>"
found, missed = load_data(directory, ids, "latency_history")
plot_latency(found, missed, ids, reg)
print "</div>"

#
# snr history
#

print "<h1>SNR</h1>"
print "<div id='canvassnr'>"
found, missed = load_data(directory, ids, "snr_history")
plot_snr(found, missed, ids, reg)
print "</div>"

#
# live time by ifo
#

for ifo in ifos:
	print "<h1>Livetime for %s</h1>" % ifo
	print "<div id='%scanvastime'>" % (ifo,)
	found, missed = load_data(directory, ids, "%s/state_vector_on_off_gap" % (ifo,))
	plot_livetime(found, missed, ids, reg, ifo)
	print "</div>"

#
# Template Duration
#

print "<h1>Template Duration</h1>"
print "<div id='canvasdur'>"
found, missed = load_data(directory, ids, "bank")
plot_single_col(found, missed, ids, reg, 1, "Template Duration (s)")
print "</div>"

#
# Chirp Mass
#

print "<h1>Chirp Mass</h1>"
print "<div id='canvasmchirp'>"
found, missed = load_data(directory, ids, "bank")
plot_single_col(found, missed, ids, reg, 2, "Chirp Mass")
print "</div>"

#
# RAM
#


print "<h1>RAM</h1>"
print "<div id='canvasram'>"
found, missed = load_data(directory, ids, "ram_history")
plot_ram(found, missed, ids, reg)
print "</div>"

print "<h1>SNR History</h1>"
print "<div id='canvassnrhistory'>"
print "<img src=gstlal_llsnrhistory?dir=%s&id=%s,%s></img>" % (form.getvalue("dir"), ids[0], ids[-1])
print "</div>"

print "</div>"

# links at bottom
print "<h3>Node summary info:</h3><hr>"
for id in ids:
	url = os.path.join(form.getvalue("dir"), "%s/likelihood.xml" % (id,))
	print '<font size=5><a target="_blank" href="gstlal_llcbcnode?dir=%s&id=%s&url=%s"> %s </a></font>' % (form.getvalue("dir"), id, url, id)

print "</body>"

