#!/usr/bin/env perl
# vim: set filetype=perl :
use strict;
use warnings;
use 5.010;
use English qw( -no_match_vars);
use autodie;
use File::stat;

main() unless caller(0);

sub main {
    use Pod::Usage;
    use Getopt::Long qw( :config auto_help pass_through );
    use File::Path qw( make_path );
    use Git;

    my $ldif_cmd    = '/usr/sbin/safe-ldif';
    my $backup_dir  = '/var/backups/ldap';
    my $commit_msg  = 'ldap-git-backup';
    my $commit_date = time();
    my $gc = 1;
    my $help;
    GetOptions(
        'ldif-cmd=s'    => \$ldif_cmd,
        'backup-dir=s'  => \$backup_dir,
        'commit-msg=s'  => \$commit_msg,
        'commit-date=s' => \$commit_date,
        'gc!'           => \$gc,
        'help'          => \$help,
    );
    pod2usage('-verbose' => 2, '-exit_status' => 0) if $help;
    pod2usage() if @ARGV;

    if ( -e $commit_date ) {
        $commit_date = stat($commit_date)->mtime();
    }

    my $ldif_aref = LDAP::Utils::read_ldif($ldif_cmd);
    make_path($backup_dir, {mode => 0700});
    my $mode = stat($backup_dir)->mode;
    warn "backup directory $backup_dir is world readable\n" if $mode & 05;
    warn "backup directory $backup_dir is world writable\n" if $mode & 02;
    chdir($backup_dir);
    Git::command('init');
    my $repo = Git->repository(Directory => $backup_dir);

    my @filelist = $repo->command('ls-files', '*.ldif');
    my %files_before = ();
    for my $f (@filelist) { $files_before{$f} = 1 }

    @filelist = ();
    for my $ldif (@$ldif_aref) {
        my $filename = LDAP::Utils::filename($ldif);
        open(my $fh, '>', "$backup_dir/$filename");
        print {$fh} $ldif;
        close($fh);
        push(@filelist, $filename);
        delete($files_before{$filename});
    }
    $repo->command('add', @filelist) if @filelist;
    $repo->command('rm', (keys %files_before)) if %files_before;

    $repo->command('commit', "--message=$commit_msg", "--date=$commit_date");
    $repo->command('gc', '--quiet') if $gc;
}

package LDAP::Utils;
use strict;
use warnings;
use 5.010;
use English qw( -no_match_vars);
use Carp;
use autodie;
use MIME::Base64;

sub read_ldif {
    my ($ldif_cmd) = @_;

    my $entry_count = -1;
    my $ldif_aref = [];
    until ($entry_count == @$ldif_aref) {
        $entry_count = @$ldif_aref;
        $ldif_aref = read_ldif_raw($ldif_cmd);
    }

    return $ldif_aref;
}

sub read_ldif_raw {
    my ($ldif_cmd) = @_;

    my $ldif_aref = [];
    my $ldif_fh;
    local $INPUT_RECORD_SEPARATOR = "\n\n";

    open( $ldif_fh, '-|', $ldif_cmd) or die "Can't exec '$ldif_cmd': $!";
    while (my $record = <$ldif_fh>) {
        push(@$ldif_aref, $record);
    }
    close($ldif_fh) or die "$ldif_cmd exited with $?: $!";

    return $ldif_aref;
}

our %filename_list = ();
sub filename {
    my ($ldif) = @_;

    use Digest::MD5 qw( md5_hex );
    my $filename = timestamp($ldif) . '-' . substr(md5_hex(dn($ldif)), 0, 7);
    if (defined($filename_list{$filename})) {
        $filename_list{$filename} += 1;
        $filename .= '-' . $filename_list{$filename};
    }
    else {
        $filename_list{$filename} = 0;
    }

    return $filename . '.ldif';
}

sub dn {
    my ($ldif) = @_;
    $ldif =~ s{\n }{}gs; # combine multiline attributes

    for my $line (split("\n", $ldif)) {
        next unless $line =~ m{\A dn:}xmsi;
        my $dn = get_value_from_attribute($line);
        return canonicalize_dn($dn);
    }

    return '';
}

sub canonicalize_dn {
    my ($dn) = @_;

    my @rdns = split(',', $dn);
    @rdns = map { canonicalize_rdn($_) } @rdns;
    return join(',', @rdns);
}

sub canonicalize_rdn {
    my ($rdn) = @_;

    my ($key, $value) = split('=', $rdn, 2);
    $key   =~ s{\A\s+}{}xms;
    $key   =~ s{\s+\Z}{}xms;
    $value =~ s{\A\s+}{}xms;
    $value =~ s{\s+\Z}{}xms;
    return lc($key) . '=' . lc($value);
}

sub timestamp {
    my ($ldif) = @_;
    $ldif =~ s{\n }{}gs; # combine multiline attributes

    for my $line (split("\n", $ldif)) {
        next unless $line =~ m{\A createtimestamp:}xmsi;
        return get_value_from_attribute($line);
    }
    return '00000000000000Z';
}

sub get_value_from_attribute {
    my ($attribute) = @_;

    my $value;
    if ( $attribute =~ m{\A [^:]+ ::}xms ) {
        $value = ( split(':: ', $attribute, 2) )[1];
        $value = decode_base64($value);
    }
    else {
        $value = ( split(': ', $attribute, 2) )[1];
        }
    return $value;
}

1;

__END__

=head1 NAME

ldap-git-backup - check in LDIF from an LDAP server into a GIT repository

=head1 SYNOPSIS

ldap-git-backup [options]

ldap-git-backup --help

=head1 DESCRIPTION

ldap-git-backup takes an LDIF dump of an LDAP server and updates a GIT repository
that serves as a versioned backup.

ldap-git-backup splits the LDIF data into separate entries.  It constructs
unique but stable file names using a combination of the creation time stamp for
ordering and the DN as the unique key for an entry.

=head1 OPTIONS

=over 4

=item B<--ldif-cmd E<lt>dump_ldif_commandE<gt>>

Specify a command to create a complete LDIF dump of the LDAP directory suitable
for a backup.  It should contain all entries necessary to restore the LDAP
database.  By default C</usr/sbin/safe-ldif> is taken which calls
C</usr/sbin/slapcat> from OpenLDAP.

=item B<--backup-dir E<lt>backup_directoryE<gt>>

Specify the directory where the GIT repository for the versioned backup is held.
Default: F</var/backups/ldap>

=item B<--commit-msg E<lt>commit_stringE<gt>>

Specify a custom commit message.  Default: ldap-git-backup

=item B<--commit-date E<lt>date_stringE<gt>>

=item B<--commit-date E<lt>fileE<gt>>

Specify a custom commit date.  If a file is given its modification time is used.

=item B<--no-gc>

Do not perform a garbage collection (git gc) after checking in the new backup.
By default gc is done so as to keep the size of the backup down.  You may want
to skip gc for the occasional backup run but leave it on for the scheduled
backups.

=item B<--help>

Prints this page.

=back

=head1 AUTHOR

Elmar S. Heeb <elmar@heebs.ch>

=cut
