#!/usr/bin/perl -w
########################################################

=head1 RIPS

B<R>edundancy B<I>nformation B<P>rotection B<S>ystem

$Revision: 1.5 $ (Version 0.5)

=head1 Purpose

Have you ever teared out your hair because one damaged block of one of your discs screwed 
your whole archive ? Have you ever tried to restore an old tape backup with really important data
and cursed yourself for using tar -cB<z> and gzip stopped after the second block which is the
only broken one at the whole tape? Well then this is probably what you need.
RIPS can protect your files in a manner that minor corruptions can be balanced out
by adding some redundancy to your file.

=head1 Usage

=head2 rips [options]

=over 4

=item B<-v> or B<--verbose>

Be noisy.
Default is not verbose.

=item B<-d> or B<--decode>

Decode given input file.
Default is encoding.

=item B<-t> or B<--test>

Test archive; the same as specifying /dev/null as output.

=item B<-h> or B<--help>

Show this help.

=item B<-i> F<file> or B<--input=>F<file>

Specify the input file. 
If nothing or the file '-' is given, C<stdin> is used.

=item B<-o> F<file> or B<--output=>F<file>

Specify the output file. 
If nothing or the file '-' is given, C<stdout> is used.

=item B<-b> I<size> or B<--blocksize=>I<size>

Defines the size of one slice.
Default is 8192 bytes.

=item B<-s> I<number> or B<--slices=>I<number>

Defines the number of slices-1 that will be combined to one subarchive.
Default are four (plus one) slices, which means your file will be roughly extended by one fourth of it's original size.
Valid values range from 1-255.

=back


=head1 Requirements

Rips uses Getopt::Std, Getopt::Long, Digest::MD5 which are usually part of the standard perl installation
and should not bother you.

=head1 Archive format

A rips archive consists practically of several rips archives concatenated,
each of these archives has I<slices>+1 chunks, each chunk has the size of 
I<blocksize>+16 (md5) + 4 bytes I<blocksize>.
Each archive has a header: 4 bytes ID ("RIPS") + 1 byte archive version
+ 1 byte # of slices.
So a stabdard archive would be:
(4+1) * (8192+20) + 6 = 41066 bytes long.

=head1 ToDo

Make archive decoding continue on more errors.

=head1 Bugs

Currently RIPS is not able to recover from errors in it's metadata.

=head1 Author

Alexander Kuehn E<lt>I<ak@papendorf-se.de>E<gt>

L<R.I.P.S.>

=cut

package main;
# includes
use strict;
use Getopt::Std;
use Getopt::Long;
use Digest::MD5  qw(md5 md5_hex md5_base64);
my $header="RIPS";
my $caller=$0;

# protos
sub verbprt (@);
sub errlog (@);

$SIG{__WARN__} = \&catcherror;

use vars qw/ $opt_v $opt_d $opt_t $opt_h $opt_i $opt_o $opt_b $opt_s /;

getopts('vdthi:o:b:s:');

my $verbose=($opt_v ? 1 : 0);
my $decode=($opt_d ? 1 : 0);
my $testmode=($opt_t ? 1 : 0);
my $help=($opt_h ? 1 : 0);
my $input=($opt_i ? $opt_i : "-");
my $output=($opt_o ? $opt_o : "-");
my $blocksize=($opt_b ? $opt_b : 8192);
my $slices=($opt_s ? $opt_s : 4);

my %args=( "verbose"	=>	\$verbose,		# --verbose
		"decode"		=>	\$decode,		# --decode
		"test"			=>	\$testmode,		# --test
		"help"			=>	\$help,			# --help
		"input:s"		=>	\$input,		# --input=<file>
		"output:s"		=>	\$output,		# --output=<file>
		"blocksize:i"	=>	\$blocksize,	# --blocksize=<size>
		"slices:i"		=>	\$slices);		# --slices

GetOptions(%args);

my $msg_rderr="unexpected end of archive.";
my $msg_archerr="input file does not look like a RIPS archive.";
my $msg_unkarch="unknown archive version, try to get a newer version.";
my $msg_archbroken="found second broken bock in archive giving up, sorry.";

verbprt ("%args:", map { "$_ = ${$args{$_}}" } keys %args);

if ($help) {
	system ("perldoc $caller");	
} else {
	$output="/dev/null" if ($testmode);
	open (INFILE, "< $input") || die ("Can't open $input for reading: $!\n");
	binmode(INFILE);
	open (OUTFILE, "> $output") || die ("Can't open $output for reading: $!\n");
	binmode(OUTFILE);
	my ($red, $wrote, $bufferA, $bufferB, $digest);
	my $slice=0;
	if ($decode) {	# archive decoding
		$red = read(INFILE, $bufferA, 4);
		while ($red eq 4) {
			if ($bufferA eq $header) {		# simple check passed
				$red = read(INFILE, $bufferA, 1);
				if ($red eq 1) {
					if (unpack ("CC", $bufferA ) eq 1) {
						verbprt("This archive is type 1.");
						$red = read(INFILE, $slices, 1);
						if ($red eq 1) {
							$slices=unpack("CC", $slices);
							verbprt("This archive uses $slices slices per archive.");
							for ($slice=0; $slice le $slices; $slice++) {
								$red = read(INFILE, $bufferA, 4);	# size
								if ($red eq 4) {
									$blocksize=unpack("N", $bufferA);
									verbprt("Blocksize is $blocksize.");
									my $md5_sum;
									$red = read(INFILE, $md5_sum, 16);	# md5
									if ($red eq 16) {
										$red = read(INFILE, $bufferA, $blocksize);	# block
										if (md5($bufferA) eq $md5_sum) {
											if ($slice eq 0) {
												$bufferB=$bufferA;
											}
											if ($slice ne $slices) { #not last slice
												$bufferB=$bufferB^$bufferA;
												print OUTFILE $bufferA;
											}
										} else {	# oops broken block!
											verbprt("broken block!");
#											my @temp_buffer=[];
											if ($slice eq $slices) {	#phu, nothing important hit
												verbprt("redundancy block corrupt - no worries.");
											} else {					# red alert
												verbprt("data block corrupt, trying to recover. :-o");
												my $bufferC;		# let's collect the rest of the archive here
												if ($slice eq 0) {
													$bufferB="\0" x $blocksize;
												}
												while ($slice le $slices) {
													$red = read(INFILE, $bufferA, 4);	# size
														if ($red eq 4) {
															$blocksize=unpack("N", $bufferA);
															verbprt("Blocksize is $blocksize.");
															my $md5_sum;
															$red = read(INFILE, $md5_sum, 16);	# md5
															if ($red eq 16) {
																$red = read(INFILE, $bufferA, $blocksize);	# block
																if (md5($bufferA) eq $md5_sum) {
																	$bufferC.=$bufferA
																} else {	# the heavens are broken
																	errlog($msg_archbroken);
																}
															} else { errlog($msg_rderr); }
														} else { errlog($msg_rderr); }
													$slice++;
												}												
											}
										}
									} else { errlog($msg_rderr); }
								} else { errlog($msg_rderr); }
							}
						} else { errlog($msg_rderr); }
					} else { errlog($msg_unkarch); }
				} else { errlog($msg_rderr); }
			} else { errlog($msg_archerr); }
			$red = read(INFILE, $bufferA, 4);
		}
		if ($testmode) {
			errlog("$input Ok.");
		}
	} else {	# archive creation
		$red = read(INFILE, $bufferA, $blocksize);
		while ($red eq $blocksize) {
			if ($slice eq 0) {
				print OUTFILE $header . (pack "CC", 1, $slices); 
				$bufferB=$bufferA;
			} else {
				$bufferB=$bufferB^$bufferA;
			}
			print OUTFILE (pack "N", $blocksize) . md5($bufferA);
			print OUTFILE $bufferA;
			$slice++;
			if ($slice eq $slices) {	#last slice
				print OUTFILE (pack "N", $blocksize) . md5($bufferB);	#save current slice
				print OUTFILE $bufferB;
				$slice=0;
			}
			$red = read(INFILE, $bufferA, $blocksize);
		}
		if ($red gt 0) {
			if ($slice eq 0) {
				print OUTFILE "RIPS" . (pack "CC", 1, 1); 	#version 1, 1+1 slices
				$bufferB=$bufferA;
				$blocksize=$red;
			} else {
				$bufferB=$bufferB^$bufferA;
			}
			print OUTFILE (pack "N", $red) . md5($bufferA);
			print OUTFILE $bufferA;
			print OUTFILE (pack "N", $blocksize) . md5($bufferB);
			print OUTFILE $bufferB;
		}
	}
	close(INFILE);
	close(OUTFILE);
}

exit(0);

###############################################################################
# sets $dberror if an error in a query occurs
# called by error handler
###############################################################################
sub catcherror {
	errlog($_[0]);
	$help=1;
}

##############################################################################
# print error messages to stderr
# parameters: text for output
##############################################################################
sub errlog (@) {
	print STDERR "$caller: $_[0]\n";
}

##############################################################################
# print verbose messages to stderr
# parameters: text for output
##############################################################################
sub verbprt (@) {
	for (@_) {
		print STDERR "$_\n" if $verbose;
	}
}
