#!/usr/bin/perl
# MySQL Binlog purge script.
# Copyright 2008-2014 Robin H. Johnson 
# - <robbat2@isohunt.com> (Isohunt 2008-2013)
#   - Initial implementation
#   - Multi-slave
# - <robbat2@gentoo.org> (Gentoo changes 2010-2014+)
#   - No hardcoded passwords
#   - Help
#   - Cleanups
#   - Configurable warning
# - <rjohnson@sitka.bclibraries.ca (BC Libraries Cooperative 2014+)
#   - Galera support framework
#   - keep min X logs / Y bytes
#
# Run as: ... --master $MASTER --slaves $SLAVE1,$SLAVE2

use DBI;
use strict;
use warnings;
use Data::Dumper;
use File::Path::Expand;
use Carp qw(confess);
use Getopt::Long;
use Getopt::Long::Descriptive;
use Config::Tiny;
use Clone qw(clone);

my $master = undef;
my @slaves = ();
my %slave_status;
my %dbh;
my @binlogs_order;
my %binlogs;
my %binlog_progress;
my $msg_buffer;
my $active_logs = 999999; # high number so we print on failure
my $inactive_logs = 0;
my $error = 0;

# Extract from config files
my @mysql_cnf = ('/etc/mysql/my.cnf', '~/.my.cnf');
my ($mycnf_username,$mycnf_password) = (undef, undef);
my ($default_username, $default_password) = ('root', undef);
foreach my $cfgfile (@mysql_cnf) {
	$cfgfile = expand_filename($cfgfile);
	if(-f $cfgfile) {
		#printf "File %s\n", $cfgfile;
		my $cfg = Config::Tiny->read($cfgfile);
		next unless $cfg;
		foreach my $sect (keys %$cfg) {
			next unless $sect =~ /^client$/ or $sect =~ /^mysql$/;
			$default_username = $mycnf_username = $cfg->{$sect}->{username} if defined $cfg->{$sect}->{username};
			$default_password = $mycnf_password = $cfg->{$sect}->{password} if defined $cfg->{$sect}->{password};
		}
	}
}

# GetOpt funtime
my ($opt,$usage) = describe_options(
  'mysql-binlog-purger %o <some-arg>',
  [ "debug|verbose", "Enable verbose output", { default => 0 } ],
  [ "dry-run|dryrun", "Perform dry-run only", { default => 0 } ],
  [ "confirm-no-slaves", "Yes, I have no slaves and wish to purge data", { default => 0 } ],
  [ "slaves=s@", "Comma-seperated list of slave hosts (and optionally ports), or specify multiple times", {} ],
  [ "master=s", "Master host (and optionally port)", { required => 1 } ],
  [ "username=s", "Username", {default => $default_username } ],
  [ "password=s", "Password", {default => $default_password } ],
  [ "warn-active-logs|warn-max-logs=i", "Verbosely warn if more than this number of active logs exist", { default => 3, callbacks => { positive => sub { shift() > 0 } } }] ,
  [ "warn-inactive-logs=i", "Verbosely warn if more than this number of inactive logs exist", { default => undef, callbacks => { valid => sub { $_ = shift; !defined($_) || $_ > 0 } } }] ,
  [ "preserve-min-logs=i", "Keep at least this many old (inactive) logfiles", { default => 0, callbacks => { positive => sub { shift() > 0 } } } ],
  [ "preserve-min-bytes=i", "Keep at least this many bytes of old (inactive) logfiles", { default => 0, callbacks => { positive => sub { shift() > 0 } } } ],
  [ "galera", "Enable Galera mode",  { default => 0 } ],
  [],
  [ "help|usage", "Print this usage message", { shortcircuit => 1 } ],
);

print($usage->text), exit if $opt->help;
print("TODO: Galera mode not yet implemented\n"), exit if $opt->galera;

my $opt_slaves_ref = $opt->slaves;
foreach my $slave_opt (@{$opt_slaves_ref}) {
	push @slaves, split /[,\s]+/,$slave_opt if $slave_opt;
}
$master = $opt->master;
unless(length($master) > 0) {
	print STDERR "No master given!\n";
	exit(-2);
}
# printf "slaves = %s (%s) %d\n",$opt->slaves, join('/', @slaves), $slaves_size;
unless(@slaves > 0 or $opt->confirm_no_slaves) {
	print STDERR "No slaves given!\n";
	exit(-3);
}

# end of globals

dprintf("# Installing driver\n");
my $drh = DBI->install_driver("mysql");

foreach my $hostname (@slaves, $master) {
	dprintf("# Connecting to $hostname\n");
	my $dsn = 'DBI:mysql:database=mysql;host='.$hostname;
	my $tmp_dbh = DBI->connect($dsn, $opt->username, $opt->password);
	if($tmp_dbh) {
		$dbh{$hostname} = $tmp_dbh;
	} else {
		$dbh{$hostname} = -1;
		$error = 1;
	}
}

# Get the full list of logs from the master
{
	dprintf("# Grabbing master binlog list on $master\n");
	my $sth = $dbh{$master}->prepare("SHOW BINARY LOGS");
	$sth->execute;
	while (my $ref = $sth->fetchrow_hashref()) {
		push @binlogs_order, $ref->{Log_name};
		$binlogs{$ref->{Log_name}} = clone($ref);
	}
	$sth->finish;
	$msg_buffer .= dprintf("# Available logs:\n");
	my $i = 0;
	$msg_buffer .= dprintf(join('',map { sprintf "# %d => %s (%d bytes)\n", $i++, $_, $binlogs{$_}{File_size}; } @binlogs_order));

	$sth = $dbh{$master}->prepare("SHOW STATUS");
	$sth->execute;
	while (my $ref = $sth->fetchrow_hashref()) {
		my $k = $ref->{Variable_name}; my $v = $ref->{Value};
		$slave_status{$master}{$k} = $v;
	}
	$sth->finish;
}
# Record master position in binlog progress implicitly.
$binlog_progress{$master} = $#binlogs_order;

foreach my $hostname (@slaves) {
	dprintf("# Check slave binlog list on $hostname\n");
	my $pos = 0;
	if($dbh{$hostname} == -1) {
		$msg_buffer .= dprintf(sprintf "# Host %s is not reachable, assuming completely out of date\n", $hostname);
		$pos = -1;
		$error = 1;
	} else {
		my $sth;
		$sth = $dbh{$hostname}->prepare("SHOW SLAVE STATUS");
		$sth->execute;
		my $ref = $sth->fetchrow_hashref();
		#print Dumper($ref);
		my $logfile1 = $ref->{Relay_Master_Log_File};
		my $logfile2 = $ref->{Master_Log_File};
		$slave_status{$hostname} = clone($ref);
		$sth->finish;

		my ($logfile1_pos, $logfile2_pos);
		$logfile1_pos = compare($logfile1, \@binlogs_order);
		$logfile2_pos = compare($logfile2, \@binlogs_order);
		$pos = min($logfile1_pos, $logfile2_pos);
		my %badlogs;
		$badlogs{$logfile1} = 1 if $logfile1_pos == -1;
		$badlogs{$logfile2} = 1 if $logfile2_pos == -1;

		if($logfile1_pos == -1 || $logfile2_pos == -1) {
			$msg_buffer .= dprintf(sprintf "# Host %s has probably lost sync due to too-fast binlog purging (looking for: %s)\n", $hostname, join(', ', keys(%badlogs)));
			$error = 1;
		}

		# Fetch all status data, will include galera stuff with wsrep_ prefix
		$sth = $dbh{$hostname}->prepare("SHOW STATUS");
		$sth->execute;
		while (my $ref = $sth->fetchrow_hashref()) {
			my $k = $ref->{Variable_name}; my $v = $ref->{Value};
			$slave_status{$hostname}{$k} = $v;
		}
		$sth->finish;
		
		# DEBUG
		#printf "%s %s %d\n",$hostname,$logfile1,$logfile1_pos;
		#printf "%s %s %d\n",$hostname,$logfile2,$logfile2_pos;
	}
	$binlog_progress{$hostname} = $pos;
}


# DEBUG
#print join(',',keys(%dbh))."\n";
my $min_pos = 999999999;
my $min_pos_valid = 0;
$msg_buffer .= dprintf("# Box status:\n");
# TODO: this is where we have to implement the Galera decision logic
foreach my $hostname (@slaves, $master) {
	my $pos = $binlog_progress{$hostname};
	$msg_buffer .= dprintf(sprintf "# %s: %d => %s\n", $hostname, $pos, ($pos >= 0) ? $binlogs_order[$pos] : '');
	$min_pos = min($min_pos,$pos) if defined($pos);
	$min_pos_valid = 1;
}
# Sanity check
unless($min_pos_valid) {
	#print Dumper(@binlogs_order);
	$min_pos = 1;
}
$active_logs = ($#binlogs_order-$min_pos+1);
$inactive_logs = $#binlogs_order-$active_logs;

$msg_buffer .= dprintf(sprintf "# Earliest active log position is %d\n", $min_pos);
$msg_buffer .= dprintf(sprintf "# Preserving at least %d logs / %d bytes\n", $opt->preserve_min_logs, $opt->preserve_min_bytes);
$msg_buffer .= dprintf(sprintf "# Active logs: %d\n", $active_logs);

# Work out actually how many logs to keep
my $keepcount_logbytes = 0;
my $bytecounter = $opt->preserve_min_bytes;
#print Dumper(%binlogs);
COUNTER: while($bytecounter > 0 && $keepcount_logbytes < $#binlogs_order) {
  #printf "bytes to keep = %d, files to keep = %d\n", $bytecounter, $keepcount_logbytes;
  $keepcount_logbytes++;
  my $pos = $#binlogs_order-$keepcount_logbytes;
  my $binlog_file = $binlogs_order[$pos];
  my $binlogsize = $binlogs{$binlog_file}->{File_size};
  $bytecounter -= $binlogsize;
  #break COUNTER;
}
#printf "bytes to keep = %d, files to keep = %d\n", $bytecounter, $keepcount_logbytes;
my $keepcount = max($opt->preserve_min_logs, $keepcount_logbytes);
$msg_buffer .= dprintf(sprintf "# Actually keeping %d logs\n", $keepcount);

# Keep them
$min_pos -= $keepcount;

# Now process
if($error == 0 && (($active_logs == 0) || ($active_logs == 1)) && $min_pos <= 0) {
	$msg_buffer .= dprintf(sprintf "# Not running, no logs to purge.\n");
} elsif($error == 0 && $min_pos > 0) {
	my $sql = sprintf "PURGE MASTER LOGS TO '%s';", $binlogs_order[$min_pos];
	if($opt->dry_run == 0) {
		$msg_buffer .= dprintf(sprintf "# Running SQL: %s\n", $sql);
		my $sth = $dbh{$master}->prepare($sql);
		my $failed = 0;
		$sth->execute or $failed = 1;
		if($failed) {
			$msg_buffer .= dprintf(sprintf "# Failed to execute SQL %s\n",$sql); 
			$error = 1;
		};
		$sth->finish;
	} else {
		$msg_buffer .= dprintf(sprintf "# Pretend running SQL: %s\n", $sql);
	}
} else {
	$msg_buffer .= dprintf(sprintf "# Not running, some logs are not present!\n");
	$error = 1;
}

#print Dumper(%binlogs);
#print "Slave status\n";
#print Dumper(%slave_status);

sub compare {
	my $needle = shift;
	$_ = shift;
	my @haystack = @$_;
	if($needle lt $haystack[0]) {
		return -1;
	}
	my $i = 0;
	foreach my $hay (@haystack) {
		if($needle eq $hay) {
			return $i;
		}
		$i++;
	}
	return $i;
}
sub cmpfunc {
	my ($a,$b, $def_a, $def_b);
	$a = shift;
	$b = shift;
	$def_a = defined($a);
	$def_b = defined($b);
	if($def_a and not $def_b) {
		return -1;
	} 
	if(not $def_a and $def_b) {
		return 1;
	}
	if($def_a and $def_b) {
		if($a > $b) {
			return 1;
		} elsif($a < $b) {
			return -1;
		} else {
			return 0;
		}
	}
	confess;
}

sub min {
  my ($a, $b, $cmpval) = (shift, shift);
  $cmpval = cmpfunc($a,$b);
  return $a if($cmpval < 0);
  return $b if($cmpval > 0);
  return $a;
}
sub max {
  my ($a, $b, $cmpval) = (shift, shift);
  $cmpval = cmpfunc($a,$b);
  return $b if($cmpval < 0);
  return $a if($cmpval > 0);
  return $a;
}

#my $cmptest = [
# [2, 3, -1],
# [3, 2, 1],
# [2, 2, 0],
# [undef, 2, -1],
# [2, undef, 1],
# [undef, undef, undef],
#];
#for my $testcmpref (@$cmptest) {
#  my @t = @$testcmpref;
#  printf("%d cmp %d = %d == %d\n", $t[0], $t[1], $t[2], cmpfunc($t[0],$t[1]));
#}
#exit;

sub dprintf {
	my $s = shift;
	if($opt->debug == 1) {
		print STDERR $s;
	}
	return $s;
}

END {
	if((defined($opt->warn_active_logs) && $active_logs >= $opt->warn_active_logs) || 
	   (defined($opt->warn_inactive_logs) && $inactive_logs >= $opt->warn_inactive_logs) || 
	   $error == 1) {
		print $msg_buffer if $msg_buffer;
		$msg_buffer = '';
	}
}

