#!/usr/bin/env python
#
# Time Drive - based on duplicity - Encrypted bandwidth efficient backup.
#
# Copyright 2009 Rob Oakes	<LyX-Devel@oak-tree>
#
# Backend Methods that communicate with duplicity.  Because duplicity is
# a command line utility, it assumes that the entire environment will be 
# reset between each run.  The methods here have been slightly refactored so
# that multiple tasks can be performed within the same session.

import os
import subprocess
import sys, time

from duplicity import backend
from duplicity import collections
from duplicity import commandline
from duplicity import diffdir
from duplicity import dup_time
from duplicity import dup_temp
from duplicity import file_naming
from duplicity import patchdir
from duplicity import globals
from duplicity import gpg
from duplicity import log
from duplicity import path
import duplicity.backends
import duplicity.errors

allow_source_mismatch = False

def get_snapshot_list(archive_url, gnu_passphrase, sync_remote = True,
	S3_AccessId = None, S3_SecretKey = None, S3_BucketType = None):
	"""Retrieves a list of the snapshots that are currently available in a
	given archive."""
	
	log.setup()
	
	col_stats, filelist_backend, filelist_archive = get_collection_status(archive_url,
		gnu_passphrase, sync_remote, None, S3_AccessId, S3_SecretKey, S3_BucketType)
	
	try:
		backup_list = col_stats.matched_chain_pair[1].to_log_info('')
	except:	
		cleanup_globals()
		backup_list = None
	
	# Cleanup Globals and Shutdown Backend
	cleanup_globals()
	filelist_backend.close()
	log.shutdown()
	
	if S3_AccessId != None and S3_SecretKey != None:
		_unset_AmazonS3()
	
	return backup_list



def delete_backup(archive_url, gnu_passphrase, backup_time, sync_remote = True, 
	S3_AccessId = None, S3_SecretKey = None, S3_BucketType = None):
	"""Deletes the backup sets specified by archive_url and backup_time"""
	
	log.setup()
	
	col_stats, filelist_backend, filelist_archive = get_collection_status(archive_url,
		gnu_passphrase, sync_remote, None, S3_AccessId, S3_SecretKey, S3_BucketType)
		
	result_sets, backup_chain = get_backup_sets(col_stats, backup_time)
	
	try:
		for backup_set in result_sets:
			backup_set.delete()
		col_stats.set_values(sig_chain_warning = None).cleanup_signatures()
		operation_success = True
	except:
		operation_success = False
	
	# Restore Values to Default, Unset Amazon S3 and Close Log
	cleanup_globals()
	if S3_AccessId != None and S3_SecretKey != None:
		_unset_AmazonS3()
	filelist_backend.close()
	log.shutdown()
	
	return operation_success


def get_backup_sets(col_stats, backup_time1, backup_time2 = None):
	"""Retreives the backup sets of col_stats between backup_time1 and
	backup_time2.  If only a single backup_time is given, it will retrieve
	all sets for the chain.  backup_time1 < backup_time2"""
	
	if not col_stats.all_backup_chains:
		result_sets = []
	else:
		backup_chain = col_stats.get_backup_chain_at_time(backup_time1)
		assert backup_chain, col_stats.all_backup_chains
		
		if backup_time2 != None:
			older_sets = filter(lambda s: s.end_time <= backup_time2,
				backup_chain.get_all_sets())
			result_sets = older_sets + [backup_chain.fullset]
		else:
			result_sets = backup_chain.get_all_sets()
	
	# Return the list of result sets
	return result_sets, backup_chain


def get_file_list(archive_url, gnu_passphrase, sync_remote = True, time = None,
	S3_AccessId = None, S3_SecretKey = None, S3_BucketType = None):
	"""
	Retrieve the file_list from the specified archive url.
	@type archive_url: String
	@param archive_url: valid duplicity archive url
	@type gnu_passphrase: String
	@param gnu_passphrase: the passphrase for the specified url
	@type sync_remote: Boolean
	@param sync_remote: Default = True: Will first make sure that
		the local file list is in sync with the remote file list.
	@type time: String
	@param time: Default = None: Specify the time from which to list
		files.  Date should be provided in the format year-month-date.
		Ex: 2009-04-10
	@type S3_AccessId: String
	@param S3_AccessId: Default = None: Amazon S3 Access Id.
	@type S3_SecretKey: String
	@param S3_SecretKey: Default = None: Amazon S3 secret key.
	@type S3_BucketType: String
	@param S3_BucketType: Default = None: Amazon S3 Bucket Type.
	@rtype: path_iter
	@return: list of files contained in the archive.  Can be parsed
		with utils.ParseDirectoryStructure.
	"""
	
	log.setup()
	
	col_stats, filelist_backend, filelist_archive = get_collection_status(archive_url,
		gnu_passphrase, sync_remote, time, S3_AccessId, S3_SecretKey, S3_BucketType)
	
	
	# Fetch the Filelist and File Listing
	path_iter = list_current(col_stats)
	
	# Restore Values to Default and Close Log
	cleanup_globals()
	if S3_AccessId != None and S3_SecretKey != None:
		_unset_AmazonS3()
		
	filelist_backend.close()
	log.shutdown()
	
	if path_iter == None:
		return None
	
	return path_iter


def run_cleanup_backupset(archive_url, gnu_passphrase, sync_remote = True,
	S3_AccessId = None, S3_SecretKey = None, S3_BucketType = None):
	"""
	Method that checks the specified backup set to see if files left
	from failed backups need to be removed.
	
	@type archive_url: String
	@param archive_url: valid duplicity archive url
	@type gnu_passphrase: String
	@param gnu_passphrase: the passphrase for the specified url
	@type sync_remote: Boolean
	@param sync_remote: Default = True: Will first make sure that
		the local file list is in sync with the remote file list.
	@type time: String
	@param time: Default = None: Specify the time from which to list
		files.  Date should be provided in the format year-month-date.
		Ex: 2009-04-10
	@type S3_AccessId: String
	@param S3_AccessId: Default = None: Amazon S3 Access Id.
	@type S3_SecretKey: String
	@param S3_SecretKey: Default = None: Amazon S3 secret key.
	@type S3_BucketType: String
	@param S3_BucketType: Default = None: Amazon S3 Bucket Type.
	
	@rtype: Boolean, String
	"""
	
	log.setup()
	
	col_stats, filelist_backend, filelist_archive = get_collection_status(archive_url,
		gnu_passphrase, sync_remote, None, S3_AccessId, S3_SecretKey, S3_BucketType)
	
	cleanup_result, cleanup_message = cleanup_backupset(col_stats)
	
	# Cleanup Globals and Shutdown Backend
	cleanup_globals()
	filelist_backend.close()
	log.shutdown()
	
	if S3_AccessId != None and S3_SecretKey != None:
		_unset_AmazonS3()
	
	return cleanup_result, cleanup_message


def get_collection_status(archive_url, gnu_passphrase = None,
	sync_remote = True, time = None, S3_AccessId = None, 
	S3_SecretKey = None, S3_BucketType = None):
	"""
	Retrieves a valid backend, archive dir, and collection status 
	objects from the supplied archive_url, gnu_passphrase and other 
	pertinent information.  Requires that the log already be open.
	Thi smethod leaves the connection open for further operations
	and needs to be closed in the function that makes thse call
	
	@type archive_url: String
	@param archive_url: valid duplicity archive url
	@type gnu_passphrase: String
	@param gnu_passphrase: the passphrase for the specified url
	@type sync_remote: Boolean
	@param sync_remote: Default = True: Will first make sure that
		the local file list is in sync with the remote file list.
	@type time: String
	@param time: Default = None: Specify the time from which to list
		files.  Date should be provided in the format year-month-date.
		Ex: 2009-04-10
	@type S3_AccessId: String
	@param S3_AccessId: Default = None: Amazon S3 Access Id.
	@type S3_SecretKey: String
	@param S3_SecretKey: Default = None: Amazon S3 secret key.
	@type S3_BucketType: String
	@param S3_BucketType: Default = None: Amazon S3 Bucket Type.
	
	@rtype:
	"""
	
	duplicity.backend.import_backends()
	if S3_AccessId != None and S3_SecretKey != None:
		_set_AmazonS3(S3_AccessId, S3_SecretKey)
	
	# Set Global Variables for Euoprean Amazon S3 Buckets
	if (S3_BucketType != None)&(S3_BucketType == "European"):
		globals.s3_european_buckets = True
		globals.s3_use_new_style = True
	
	filelist_backend = backend.get_backend(archive_url)
	filelist_archive_cache = get_archive_dir(archive_url)
	
	# Set global variables to appropriate values
	globals.archive_dir = filelist_archive_cache
	globals.gpg_profile = gpg.GPGProfile()
	globals.gpg_profile.passphrase = gnu_passphrase
	
	if sync_remote:
		sync_archive(filelist_backend, gnu_passphrase)
	if time != None:
		globals.restore_time = dup_time.genstrtotime(time)
	
	col_stats = collections.CollectionsStatus(filelist_backend,
		filelist_archive_cache).set_values()
	
	return col_stats, filelist_backend, filelist_archive_cache


def test_connection_url(archive_url, S3_AccessId = None, 
	S3_SecretKey = None, S3_BucketType = None):
	"""
	Test the remote conneciton to ensure that it is valid
	
	@type archive_url: String
	@param archive_url: valid duplicity archive url
	@type S3_AccessId: String
	@param S3_AccessId: Default = None: Amazon S3 Access Id.
	@type S3_SecretKey: String
	@param S3_SecretKey: Default = None: Amazon S3 secret key.
	@type S3_BucketType: String
	@param S3_BucketType: Default = None: Amazon S3 Bucket Type.
	
	@rtype: Boolean
	"""
	
	log.setup()
	duplicity.backend.import_backends()
	
	# Set Appropriate Environment Variables for Amazon S3
	if S3_AccessId != None and S3_SecretKey != None:
		_set_AmazonS3(S3_AccessId, S3_SecretKey)
	
	if (S3_BucketType != None)&(S3_BucketType == "European"):
		globals.s3_european_buckets = True
		globals.s3_use_new_style = True
	
	try:
		filelist_backend = backend.get_backend(str(archive_url))
		filelist_backend.list()
		tstResult = True
	except:
		tstResult = False
	
	# Unset Amazon S3 Environment Variables
	if S3_AccessId != None and S3_SecretKey != None:
		_unset_AmazonS3()
		
	log.shutdown()
	return tstResult


def restore_file(relative_path, archive_url, gnu_passphrase, destination, 
	restore_time = None, S3_AccessId = None, S3_SecretKey = None, S3_BucketType = None):
	"""Restores the file specified by relative_path to destination"""
	restoreCmd = ["duplicity"]
	# Set the Amazon AccessId and Secret Key (if applicable)
	if S3_AccessId != None and S3_SecretKey != None:
		_set_AmazonS3(S3_AccessId, S3_SecretKey)
	
	_set_Passphrase(gnu_passphrase)
	
	# Set Global Variables for European Amazon S3 Buckets
	if (S3_BucketType != None)&(S3_BucketType == "European"):
		restoreCmd.append("--s3-european-buckets")
		restoreCmd.append("--s3-use-new-style")
	
	if restore_time != None:
		restoreCmd.append("--restore-time")
		restoreCmd.append(str(restore_time))
	
	restoreCmd.append("--file-to-restore")
	restoreCmd.append(str(relative_path))
	restoreCmd.append(str(archive_url))
	restoreCmd.append(str(destination + '/' + os.path.basename(str(relative_path))))
	
	if S3_AccessId != None and S3_SecretKey != None:
		_set_AmazonS3(S3_AccessId, S3_SecretKey)
		
	result = _execute_output(restoreCmd)

	if S3_AccessId != None and S3_SecretKey != None:
		_unset_AmazonS3()

	return result


def start_backup(path, archive_url, gnu_passphrase, ExcludeList = None, 
	FullBackupInterval = None, S3_AccessId = None, S3_SecretKey = None, 
	S3_BucketType = None):
	"""
	Begins a new backup operation for the specified path and archive url.
	Supports exclusions (ExcludeList, Default = None) and setting how often 
	a new full backup should be made (FullBackupInterval, Default = None)
	
	"""
	majorVersion,minorVersion,reVision = get_version(globals.version)
	backupCmd = ["duplicity" ]
	if ExcludeList != None:
		for item, itemtype in ExcludeList:
			if ((majorVersion == 0)&(minorVersion >= 6)&(reVision >= 7))|(majorVersion > 0):
				if itemtype == 1:
					backupCmd.append(str("--exclude-regexp=")+'"'+str(item)+'"')
				if itemtype == 2 or itemtype == 3:
					backupCmd.append(str("--exclude"))
					backupCmd.append(str(item))
			else:
				if itemtype == 1:
					backupCmd.append(str("--exclude-regexp"))
				if itemtype == 2 or itemtype == 3:
					backupCmd.append(str("--exclude"))
				backupCmd.append(str(item))
	
	if (FullBackupInterval != None)&(FullBackupInterval > 0):
		if ((majorVersion == 0)&(minorVersion >= 6)&(reVision >= 7))|(majorVersion > 0):
			# Version 0.6.07 is unable to process strings such as 365D.  It is necessary to 
			# convert the value to a valid Duplicity time string.
			backupString = dup_time.timetostring(time.time()-(FullBackupInterval*24*60*60))
			backupCmd.append(str("--full-if-older-than="+str(backupString)))
		else:
			backupCmd.append(str("--full-if-older-than"))
			backupCmd.append(str(FullBackupInterval) + "D")
		
	if (S3_BucketType != None)&(S3_BucketType == "European"):
		backupCmd.append(str("--s3-european-buckets"))
		backupCmd.append(str("--s3-use-new-style"))

	if allow_source_mismatch:
		backupCmd.append(str("--allow-source-mismatch"))

	if gnu_passphrase is None:
		backupCmd.append(str("--no-encryption"))
	else:
		_set_Passphrase(gnu_passphrase)

	backupCmd.append(str(path.rstrip()))
	backupCmd.append(str(archive_url))
	
	if S3_AccessId != None and S3_SecretKey != None:
		_set_AmazonS3(S3_AccessId, S3_SecretKey)

	if _check_process_duplicity(str(path.rstrip()), str(archive_url)):
		print "Same process is already running, stopped"
		result = "",  "Same process is already running, stopped"
	else:
		result = _execute_output(backupCmd)
	
	#reset options, that give fatal error
	#allow_source_mismatch = False

	if S3_AccessId != None and S3_SecretKey != None:
		_unset_AmazonS3()
	
	if gnu_passphrase != None:
		_unset_Passphrase()

	return result


def cleanup_backupset(col_stats, dry_run = False):
	"""
	Delete the extraneous files in the current backend
	
	@type col_stats: CollectionStatus object
	@param col_stats: collection status
	
	@type dry_run: Boolean
	@param dry_run: default = False:
	
	@rtype: Boolean, String
	@return: First: Returns true if the operation completes without
	errors.  Otherwise returns false Second: A summary of the action taken.
	"""
	ext_local, ext_remote = col_stats.get_extraneous()
	extraneous = ext_local + ext_remote
	
	try:
		if not extraneous:
			returnMessage = _("No extraneous files found, nothing deleted in cleanup.")
			log.Warn(returnMessage)
			return True, returnMessage
	
		filestr = "\n".join(extraneous)
	
		if dry_run != False:
			col_stats.backend.delete(ext_remote)
			for fn in ext_local:
				globals.archive_dir.append(fn).delete()
			returnMessage = _("Deleted " + len(extraneous) + "files from backend.")
			return True, returnMessage
		else:
			return True, _("Dry run performed successfully.\n" + 
				"Detected " + len(extraneous) + "un-needed files.")
	except:
		return False, _("Cleanup operation failed")


def cleanup_globals():
	"""Returns global variables to default states after action has finished."""
	
	globals.gpg_profile = None
	globals.backend = None
	globals.archive_dir = os.path.expandvars("$XDG_CACHE_HOME/duplicity")
	globals.restore_time = None
	globals.s3_use_new_style = False
	globals.s3_european_buckets = False


def get_version(version_string):
	"""
	@param version_string: duplicity version string, from duplicity globals structure
	@returns: list with the [major, minor, revision] version numbers for Duplicity, e.g.
	version 0.6.02 would be major = 0, minor = 6, revision = 2
	"""
	major, minor, revision = version_string.strip("abcdefghijklmnopqrstuvwxyz").split(".")
	return [int(major), int(minor), int(revision)]


def get_archive_dir(archive_url):
	"""
	@param archive_url: URL to archive
	@returns: Path to the directory where the local archive cache is stored.
	"""
	base_url = os.path.expandvars("$XDG_CACHE_HOME/duplicity")
	backup_name = commandline.generate_default_backup_name(archive_url)
	archive_expanded_url = commandline.expand_fn(os.path.join(base_url, backup_name))
	
	if not os.path.exists(archive_expanded_url):
		"""Check archive dir and construct path"""
		try:
			os.makedirs(archive_expanded_url)
		except:
			pass
	
	archive_dir = path.Path(archive_expanded_url)
	
	if not archive_dir.isdir():
		log.FatalError(_("Specified archive directory '%s' does not exist, "
			"or is not a directory") % (archive_dir.name,),
			log.ErrorCode.bad_archive_dir)
	
	return archive_dir


def list_current(col_stats):
	"""
	Returns a list of the files that are currently in the archive.
	The list is generated by examining the signature only.
	@type col_stats: CollectionStatus object
	@param col_stats: collection status
	
	@rtype: path_iter (duplicity class)
	@return: path_iter which contains the contents of the specified file collection
	"""
	sig_chain = check_sig_chain(col_stats)
	if not sig_chain:
		log.Notice(_("No signature data found, unable to list files."))
		return None
	time = globals.restore_time # if None, will use latest
	path_iter = diffdir.get_combined_path_iter(sig_chain.get_fileobjs(time))
	
	return path_iter


def check_sig_chain(col_stats):
	"""
	Get last signature chain for inc backup, or None if none avaialble.
	
	@type col_stats: CollectionStatus object
	@param col_stats: collection status
	"""
	if not col_stats.matched_chain_pair:
		if globals.incremental:
			log.FatalError(_("Fatal Error: Unable to start incremental backup.  "
				"Old signatures not found and incremental specified"),
				log.ErrorCode.inc_without_sigs)
	
		else:
			log.Warn(_("No signatures found, switching to full backup."))
		return None
	return col_stats.matched_chain_pair[0]
	

def sync_archive(backend, gnu_passphrase):
	"""
	Synchronize local archive manifest file and sig chains to remote archives.
	Copy missing files from remote to local as needed to make sure the local
	archive is synchronized to remote storage.
	
	@type backend: backend class
	
	@type gnu_passphrase: string
	@param gnu_passphrase: the encryption key to the archive
	
	#@type encryption: Boolean (True/False)
	#@param encryption: whether the archive is encrypted or not.  Provided by the settings.
	
	@rtype: void
	@return: void
	"""
	suffixes = [".g", ".gpg", ".z", ".gz"]
	
	def get_metafiles(filelist):
		"""
		Return metafiles of interest from the file list.
		Files of interest are:
		  sigtar - signature files
		  manifest - signature files
		Files excluded are:
		  non-duplicity files
		  duplicity partial files
		  
		@rtype: list
		@return: list of duplicity metadata files
		"""
		metafiles = {}
		need_passphrase = False
		for fn in filelist:
			pr = file_naming.parse(fn)
			if not pr:
				continue
			if pr.partial:
				continue
			if pr.encrypted:
				need_passphrase = True
			if pr.type in ["full-sig", "new-sig"] or pr.manifest:
				base, ext = os.path.splitext(fn)
				if ext in suffixes:
					metafiles[base] = fn
				else:
					metafiles[fn] = fn
		return metafiles, need_passphrase
		
	def copy_raw(src_iter, filename):
		"""
		Copy data from src_iter to file at fn
		"""
		block_size = 128 * 1024
		file = open(filename, "wb")
		while True:
			try:
				data = src_iter.next(block_size).data
			except StopIteration:
				break
			file.write(data)
		file.close()
	
	def resolve_basename(fn, backend):
		"""
		@return: (parsedresult, local_name, remote_name)
		"""
		pr = file_naming.parse(fn)
		if pr.manifest:
			suffix = file_naming.get_suffix(globals.encryption, False)
		else:
			suffix = file_naming.get_suffix(globals.encryption, not globals.encryption)
		rem_name = fn + suffix
		
		if pr.manifest:
			suffix = file_naming.get_suffix(False, False)
		else:
			suffix = file_naming.get_suffix(False, True)
		loc_name = fn + suffix
		
		return (pr, loc_name, rem_name)
		
	def remove_local(fn):
		pr, loc_name, rem_name = resolve_basename(fn, backend)
		
		del_name = globals.archive_dir.append(loc_name).name
		log.Notice(_("Deleting local %s (not authoritative at backend).") % del_name)
		os.unlink(del_name)
	
	def copy_to_local(fn, backend):
		"""
		Copy remote file fn to local cache.
		"""
		class Block:
			"""
			Data block to return from SrcIter
			"""
			def __init__(self, data):
				self.data = data
		
		class SrcIter:
			"""
			Iterate over source and return Block of data.
			"""
			def __init__(self, fileobj):
				self.fileobj = fileobj
				
			def next(self, size):
				try:
					res = Block(self.fileobj.read(size))
				except:
					log.FatalError(_("Failed to read %s: %s") %
						(self.fileobj.name, sys.exc_info()),
						log.ErrorCode.generic)
				
				if not res.data:
					self.fileobj.close()
					raise StopIteration
				return res
			
			def get_footer(self):
				return ""
			
		log.Notice(_("Copying %s to local cache.") % fn)
		
		pr, loc_name, rem_name = resolve_basename(fn, backend)
			
		fileobj = backend.get_fileobj_read(rem_name)
		src_iter = SrcIter(fileobj)
		if pr.manifest:
			copy_raw(src_iter, globals.archive_dir.append(loc_name).name)
		else:
			gpg.GzipWriteFile(src_iter, globals.archive_dir.append(loc_name).name, 
				size = sys.maxint)
	
	# get remote metafile list
	remlist = backend.list()
	remote_metafiles, rem_needpass = get_metafiles(remlist)
	
	# get local metafile list
	loclist = globals.archive_dir.listdir()
	local_metafiles, loc_needpass = get_metafiles(loclist)
	
	if rem_needpass or loc_needpass:
		globals.gpg_profile.passphrase = gnu_passphrase
	
	# we have the list of metafiles on both sides. remote is always
	# authoritative. figure out which are local spurious (should not
	# be there) and missing (should be there but are not).
	local_keys = local_metafiles.keys()
	remote_keys = remote_metafiles.keys()
	
	local_missing = []
	local_spurious = []
	
	for key in remote_keys:
		if not key in local_keys:
			local_missing.append(key)
	
	for key in local_keys:
		if not key in remote_keys:
			local_spurious.append(key)
			
	# finally finish the process
	if not local_missing and not local_spurious:
		log.Notice(_("Local and Remote metadata are synchronized, no sync needed."))
	else:
		local_missing.sort()
		local_spurious.sort()
		if not globals.dry_run:
			log.Notice(_("Synchronizing remote metadata to local cache..."))
			for fn in local_spurious:
				remove_local(fn)
			for fn in local_missing:
				copy_to_local(fn, backend)
		else:
			if local_missing:
				log.Notice(_("Sync would copy the following from remote to local:")
					+ "\n" + "\n".join(local_missing))
			if local_spurious:
				log.Notice(_("Sync would remove the following spurious local files:")
					+ "\n" + "\n".join(local_spurious))

def _execute_output(cmd):
	"""
	run a single command that returns output
	
	@type cmd: String []
	@param cmd: the command with it's parameters
	
	@rtype: String
	@return: First: standard output from the command
		Second: standard error from the command
	"""
	output_out = ""
	output_err = ""

	pipe = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	output_out, output_err = pipe.communicate()
	
	return output_out, output_err

def _check_process_duplicity(location, archiveurl):
	for line in os.popen("ps xa"):
		if location in line and archiveurl in line:
			return True
	return False

def _set_AmazonS3(S3_AccessId, S3_SecretKey):
	os.environ["AWS_ACCESS_KEY_ID"] = str(S3_AccessId)
	os.environ["AWS_SECRET_ACCESS_KEY"] = str(S3_SecretKey)

def _unset_AmazonS3():
	os.unsetenv("AWS_ACCESS_KEY_ID")
	os.unsetenv("AWS_SECRET_ACCESS_KEY")

def _set_Passphrase(passphrase):
	os.putenv("PASSPHRASE", passphrase)
	
def _unset_Passphrase():
	os.unsetenv("PASSPHRASE")