#!/usr/bin/perl

use warnings;                             # warn me about the sh.. I wrote
use strict;                               # catch errors as early as possible
use Getopt::Std;                          # helps you to parse the command line
use Data::Dumper;                         # for debugging

my $archiveName = "hadesraw";
my $archivePath = "sep05/hld";
my $archiveFile = "be*.hld";
my $TSMClient    = "/usr/local/bin/tsmcli";
my $TSMFlags     = "0";

my $inputFile = shift( @ARGV );
my $outputFile = shift ( @ARGV );

# the command line option flags - imported from Getopt::Std module
our ($opt_h, $opt_v);

# get options from the command line
getopts( 'hv' );

if ($opt_h)
{
   &showHelp;
   exit( 0 );
}

my @fileList;

if ($inputFile)
	{
		open( INPUTFILE, "<$inputFile" )
			or die "ERROR: Can not open file: $inputFile\n";

		while (<INPUTFILE>)
			{
				chomp;                  # no newline
				s/#.*//;                # no comments
					s/^\s+//;               # no leading white
				s/\s+$//;               # no trailing white
				next unless length;     # anything left?

				my ($file, $events) = split(/\s* /, $_, 2);
				push @fileList, $file;
			}
		if ($opt_v)
			{
				print Dumper( @fileList );
			}
		open ( OUTPUTFILE, ">$outputFile" )
			or die "ERROR could not open file: $outputFile\n";
		while ( my $file = shift ( @fileList ) )
			{
				my $oldfile = $file;
				my $size = queryFile ( $archiveName, $archivePath, $file , 100 );
				if ( $size < 1 )
					{
						printf OUTPUTFILE "$oldfile\n";
						printf "$file not on taperobot\n";
					}
			}
		close ( INPUTFILE );
		close ( OUTPUTFILE );
	}
exit ( 0 );



##### sub function definitions ################################################


# Query a file on the TSM system.
#
# Synopsis: queryFile( ArchiveName, ArchivePath, Filename, SecondsTimeout )
# Return:   File size in bytes if successful, 0 if not found in archive and
#           -1 in case of an error (dies in case of a fatal error)
#
sub queryFile
{
   my $archive    = shift;
   my $path       = shift;
   my $file       = shift;
   my $timeout    = shift;
   my $tsmcommand = "$TSMClient query $file $archive $path";
   my $tsmoutput  = "";
   
   die "ERROR: Timeout too short (< 3 sec.)" if ($timeout < 3);

   # do not try to query a file more than 3 times
   for (my $try = 0; $try < 3; $try++)
   {
      eval {
	 # set a timeout - causes an exception via $SIG{ALRM}
	 alarm( $timeout );

	 # query files in the archive
	 print "==> Exec: $tsmcommand\n" if ($opt_v);
	 $tsmoutput = `$tsmcommand`;

	 # clear the still-pending alarm
	 alarm( 0 );
      };

      # catch the exception probably cause by a timeout
      if ($@)
      {
	 # timeout ?
	 if ($@ =~ /timeout/) 
	 {
	    # kill process
	    my $pid = `ps -x | grep "$tsmcommand"`;
	    $pid =~ /^\s*(\d+)\s/;
	    kill( 9, $1 );

	    print "WARNING: The TSM query " .
	       "operation on file $file timed out!\n" if ($opt_v);
	    next;
	 }
	 else
	 {
	    # clear the still-pending alarm
	    alarm(0);

	    # propagate unexpected exception
	    die "ERROR: TSM query on $archive:$path/$file totally failed!";
	 }
      } 

      if ($tsmoutput =~ /no matching objects/)
      {
	 # no file found
	 return( 0 );
      }
      elsif ($tsmoutput =~ /$file.*\s(\d+)\Z/s)
      {
	 # return filesize
	 return( $1 );
      }
      elsif ($tsmoutput =~ /$file.*STAGE  (\d+)  /)
      {
	 # return filesize
	 return( $1 );
      }
      else 
      {
	 # error
	 print "WARNING: TSM query on $archive:$path/$file " .
	    "returned malicious results!\n" if ($opt_v);
	 return( -1 );
      }
   }

   die "ERROR: TSM query operations timed out 3 times!";
}



# Stage a file on the TSM system.
#
# Synopsis: stageFile( ArchiveName, ArchivePath, Filename, SecondsTimeout )
# Return:   File size in bytes if successful, 0 if not found in archive and
#           -1 in case of an error (dies in case of a fatal error)
#
sub stageFile
{
   my $archive    = shift;
   my $path       = shift;
   my $file       = shift;
   my $timeout    = shift;
   my $tsmcommand = "$TSMClient stage $file $archive $path";
   my $tsmoutput  = "";
   
   die "ERROR: Timeout too short (< 3 sec.)!" if ($timeout < 3);

   # do not try to query a file more than 3 times
   for (my $try = 0; $try < 3; $try++)
   {
      eval {
	 # set a timeout (causes an exception via $SIG{ALRM})
	 alarm( $timeout );

	 # stage file in the archive
	 print( "==> Exec: $tsmcommand \n" ) if ($opt_v);
	 $tsmoutput = `$tsmcommand`;

	 # clear the still-pending alarm
	 alarm( 0 );
      };

      # catch the exception probably cause by a timeout
      if ($@)
      {
	 # timeout ?
	 if ($@ =~ /timeout/) 
	 {
	    # kill process
	    my $pid = `ps -x | grep "$tsmcommand"`;
	    $pid =~ /^\s*(\d+)\s/;
	    kill( 9, $1 );

	    print "WARNING: The TSM stage " .
	       "operation on file $file timed out!\n" if ($opt_v);
	    next;
	 }
	 else
	 {
	    # clear the still-pending alarm
	    alarm( 0 );

	    # propagate unexpected exception
	    die "ERROR: TSM stage on $archive:$path/$file totally failed!";
	 }
      } 

      if ($tsmoutput =~ /no objects found/)
      {
	 # no file found
	 return( 0 );
      }
      elsif ($tsmoutput =~ /$file available on central disk.*\((\d+) bytes\)/s)
      {
	 # OK: already on disk
	 return( $1 );
      }
      elsif ($tsmoutput =~ /$file staged to central disk.*\((\d+) bytes\)/s)
      {
	 # return filesize
	 return( $1 );
      }
      else 
      {
	 # error
	 print "WARNING: TSM stage on $archive:$path/$file " .
	    "returned malicious results!\n" if ($opt_v);
	 return( -1 );
      }
   }

   die "ERROR: TSM stage operations timed out 3 times!";
}



# Retrieve a file from the TSM system and save it to a local directory.
#
# Synopsis: retrieveFile( ArchiveName, ArchivePath, Filename, 
#                         LocalDir, SecondsTimeout )
# Return:   File size in bytes if successful, 0 if not found in archive and
#           -1 in case of an error (dies in case of a fatal error)
#
sub retrieveFile
{
   my $archive    = shift;
   my $path       = shift;
   my $file       = shift;
   my $localdir   = shift;
   my $timeout    = shift;
   my $tsmcommand = "$TSMClient retrieve $file $archive $path";
   my $tsmoutput  = "";
   
   die "ERROR: Timeout too short (< 30 sec.)" if ($timeout < 30);

   unless (-d $localdir  &&  -w $localdir)
   {
      die "ERROR: $localdir is not existing/writable!"
   }

   # do not try to query a file more than 3 times
   for (my $try = 0; $try < 3; $try++)
   {
      eval {
	 # set a timeout (causes an exception via $SIG{ALRM})
	 alarm( $timeout );

	 # query files in the archive
	 print( "==> Exec: cd $localdir && $tsmcommand \n" ) if ($opt_v);
	 $tsmoutput = `cd $localdir && $tsmcommand`;

	 # clear the still-pending alarm
	 alarm( 0 );
      };

      # catch the exception probably cause by a timeout
      if ($@)
      {
	 # timeout ?
	 if ($@ =~ /timeout/) 
	 {
	    # kill process
	    my $pid = `ps -x | grep "$tsmcommand"`;
	    $pid =~ /^\s*(\d+)\s/;
	    kill( 9, $1 );

	    print "WARNING: The TSM retrieve " .
	       "operation on file $file timed out!\n" if ($opt_v);
	    next;
	 }
	 else
	 {
	    # clear the still-pending alarm
	    alarm(0);

	    # propagate unexpected exception
	    die "ERROR: TSM retrieve totally failed!";
	 }
      } 

      if ($tsmoutput =~ /no objects found/)
      {
	 # no file found
	 return( 0 );
      }
      elsif ($tsmoutput =~ /$file.* (\d+) Bytes in/s)
      {
	 # return filesize
	 return( $1 );
      }
      elsif ($tsmoutput =~ /$file exists already/)
      {
	 # file already exists - return that filesize
	 return (lstat( "$localdir/$file" ))[7];
      }
      else 
      {
	 # error
	 print "WARNING: TSM retrieve on $archive:$path/$file" .
	    "returned malicious results!\n" if ($opt_v);
	 return( -1 );
      }
   }

   die "ERROR: TSM retrieve operations timed out 3 times!";
}



# Archive a local file to the TSM system
#
# Synopsis: archiveFile( ArchiveName, ArchivePath, FullFilename, 
#                        SecondsTimeout )
# Return:   File size in bytes if successful, 0 if not found in archive and
#           -1 in case of an error (dies in case of a fatal error)
#
sub archiveFile
{
   my $archive    = shift;
   my $path       = shift;
   my $file       = shift;
   my $timeout    = shift;
   my $tsmcommand = "$TSMClient archive $file $archive $path $TSMFlags";
   my $tsmoutput  = "";
   
   die "ERROR: Timeout too short (< 30 sec.)"   if ($timeout < 30);
   die "ERROR: $file is not existing/readable!" unless (-r $file);

   # do not try to query a file more than 3 times
   for (my $try = 0; $try < 3; $try++)
   {
      eval {
	 # set a timeout (causes an exception via $SIG{ALRM})
	 alarm( $timeout );

	 # query files in the archive
	 print( "==> Exec: $tsmcommand \n" ) if ($opt_v);
	 $tsmoutput = `$tsmcommand`;

	 # clear the still-pending alarm
	 alarm( 0 );
      };

      # catch the exception probably cause by a timeout
      if ($@)
      {
	 # timeout ?
	 if ($@ =~ /timeout/) 
	 {
	    # kill process
	    my $pid = `ps -x | grep "$tsmcommand"`;
	    $pid =~ /^\s*(\d+)\s/;
	    kill( 9, $1 );

	    print "WARNING: The TSM archiving " .
	       "operation on file $file timed out!\n" if ($opt_v);
	    next;
	 }
	 else
	 {
	    # clear the still-pending alarm
	    alarm( 0 );

	    # propagate unexpected exception
	    die "ERROR: TSM retrieve totally failed!";
	 }
      } 

      if ($tsmoutput =~ /$file already archived at specified path/)
      {
	 # error
	 return( 0 );
      }
      elsif ($tsmoutput =~ /$file.*(\d+) Bytes in/s)
      {
	 # return amount of saved bytes
	 return( $1 );
      }
      else 
      {
	 # error
	 print "WARNING: TSM archiving on $archive:$path/$file" .
	    "returned malicious results!\n" if ($opt_v);
	 return( -1 );
      }
   }

   die "ERROR: TSM archive operations timed out 3 times!";
}

sub showHelp
{
   print <<"EOF";

	 checkFileList - A tool to check which files from a textfile are in a given directory on the taperobot.

   -h         : Show the help.
   -v         : Verbose mode.

   The textfile must contain two entries per line:
     the filename,
     and the number of events.

   The second part is not used in this script, but normaly the output of an sql script is used as input to this script.
   This SQL script puts those two informations in one line.
EOF
}
