Gitlab Community Edition Instance

Commit 6f3d5c18 authored by bnachtw's avatar bnachtw
Browse files

...

parent 321a34ca
##############################################################################
#
# dsmci.pl
#
# script for multi stream backup of a given path
# -- limited depth approach
#
# (C) 2014 -- 2019 GWDG Göttingen, Bjørn Nachtwey
# mailto:bjoern.nachtwey@gwdg.de
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##############################################################################
#
# changelog
# date version remark
# 2014-12-19 0.1 initial coding using bash
# 2018-02-05 0.6.0.0 fork from dsmcis.pl
# 2018-02-13 0.6.1.0 first version working, but profiling skipped out
# 2018-02-16 0.6.1.1 added global logfile, copying logs from each thread, avoiding lots of childlog files
# 2018-02-19 0.6.1.3 removed further unused lines and settings for testing
# 2018-02-20 0.6.1.4 wrong variable for depth: fixed
# 2018-02-20 0.6.1.5 eliminate grouping delimiter for bytes transferred
# 2019-03-18 0.6.1.6 several fixes suggested by Salvatore Bonaccorso <bonaccos@ee.ethz.ch> -- Thanks, Salvatore!
# - removed "*" from dsmc i command line
# - fixed comparison operator from "lt" to "<="
# - added "dsmc i -subdir --dirsonly" for each starting folder to remove all deleted folder from server_name
# 2019-03-25 0.6.1.7 some more fixes
# - put lockfile in first startpath
# - moved progress reporting to start of a thread instead of waiting
# 2019-03-27 0.6.2 some major changes
# - use File::Find::Rule instead of system based approaches for finding folders
# - switched from "\" to "/" in path names to skip masking it
# 2019-03-27 0.6.2.1 fixed wrong variable for path in last loop
# 2019-06-25 0.6.2.2 fixed wrong mindepth for folders processed with "-su=n"
# 2019-06-27 0.6.3 collocate both loops in one subroutine
# 2019-07-05 0.6.3.1 added escape sequence to handle with "$" in file / folder names
# Thanks to Salvatore again for this fix
# 2019-07-17 0.6.3.2 fixed issue on trailing "*" on dsmc command line: last character is needed to keep trailing "/"
# renamed last relics of name "dsmcis" to "dsmci"
# 2019-08-19 0.6.4 added file:find:rule expression to omit symlinks
# 2019-11-20 0.6.5 added a patch collecting all error and warning lines and write to an errorlog file
# Thanks to Salvatore again for this fix
# 2019-11-26 0.6.5.1 added some return codes
# 2019-11-29 0.6.5.2 added a new sequence for escaping "[" and "]" -- patch distributed by Salvatore again
# 2019-12-06 0.6.5.3 removed used sub "array_minus"
# 2019-12-06 0.6.6 changed form of path arrays to "path;SU-switch":
# removing @sund, @suyd, replaced by @allpaths
# 2019-12-10 0.6.6.1 add the number of threads to top of ".all.log" file
# 2019-12-10 0.6.6.2 add missing recording of severe errors to logfile -- patch by Salvatore
# 2020-02-04 0.6.7 switch path of pid-file from frist startpath to path of dsmci run
# 2020-02-04 0.6.7.1 add switch / option to disable check for pid file (so called sched mode, where the scheduler itself prevents multiple runs)
# 2020-02-04 0.6.7.2 typos removed
# 2020-02-05 0.6.7.3 starting to implement debug mode switch, giving some extra output on run
# 2020-02-06 0.6.8 encapsulate escaping of special characters for non-windows systems
# 2020-02-12 0.6.8.1 add trailing "*" to backup path for windows again
# 2020-03-17 0.6.9 adding some statists to profiling file to identify number of processed objects and data
#
# important notes
#
# => if not installed, install the module "File::Find::Rule" isussing: cpan -i File::Find::Rule
#
##############################################################################
my $debugmode = 0; # 1 equals TRUE, enabels some printf debugging and prevents removing logfiles
##############################################################################
##############################################################################
# global settings / global variables
##############################################################################
##############################################################################
use strict;
use warnings;
use File::Spec::Functions;
use File::Find::Rule;
use Time::Piece;
use Fcntl qw(:flock);
my $dsmcbin; # path and binary of "dsmc"
my $optfile; # optfile to be used
my $osname = $^O; # name of operation systems where this script is run
if ($osname =~ m/linux/)
{
$dsmcbin = '/usr/bin/dsmc';
$optfile = '/opt/tivoli/tsm/client/ba/bin/dsm.opt'; # default path to optfile
}
elsif (( $osname eq 'MSWin32' ) or ( $osname eq 'msys' ) )
{
$dsmcbin = 'C:\Program Files\Tivoli\TSM\baclient\dsmc.exe';
$optfile = 'C:\Program Files\Tivoli\TSM\baclient\dsm.opt';# default path to optfile
}
else
{ die " Operation System \"$osname\" is not supported :-(" }
use constant FALSE => 0;
use constant TRUE => 1;
use constant THREADFAILMAX => 10; # max number of attemps to start a new thread
use constant RETURNALLOKAY => 0; #
use constant RETURNWARNING => 4; #
use constant RETURNERROR => 12; # backup failed due to errors
use constant RETURNNOARGS => 13; # no arguments given with call
use constant RETURNNOCFGFILE => 14; # no cfg file found
use constant RETURNPIDFOUND => 15; # stopped due to existing pid file
use constant RETURNNOTHREAD => 21; # cannot start new threads
my $date; # date for statistics file | localtime->strftime ('%F') does not work on windows
my $starttime = time(); # time in seconds since Jan 1, 1970
my $starttimestring; # starting time | localtime->strftime ('%F %R') does not work on windows
my $endtime; #
my $endtimestring; #
my $min; # Minute part of localtime array
my $hour; # hour part of localtime array
my $mday; # day of month part of localtime array
my $mon; # month part of localtime array
my $year; # year part of localtime array
my $proctime = 0.0; # total processing time in seconds
my $pproctime = 0.0; # partial processing time in seconds per subdir "Elapsed processing time: 00:00:01"
my $datatransfertime = 0.0; # total data transfer time in seconds "Data transfer time: 0.00 sec"
#my $pdatatranstime = 0.0; # partial data transfer time in seconds per subdir
my $wallclocktime; # total wallclock time in seconds
my $speedup; # ratio of processing time and wallclock time
my $dttratio = 0.0; # ration between datatransfer time and total time
my $pidfile; # name of the pid file
my $ppid; # parent process id (this script originally)
my $cpid; # child process id (copy of this script)
my $startpath; # pathroot where the backup should start from
my @startpaths = undef; # array for multiple startpaths
my $actpath; # starting path actually processed
my $actpathdir; # name of the actually procces path for logging
my $maxdepth = 3; # number of directory level to dive into, right here all folders are processed with "-su=y"
my $depth = ($maxdepth - 1); # just one level above $maxdepth, down to here all folders are processed with "-su=n"
my $cfgfilename = "dsmci.cfg"; # name of config file, should be located in current folder
my $log_filename; # filename for logging
my $err_filename; # filename for error logging
my @errorlines; # An Array holding all the logged warnings, errors and severe errors
my $errorlogfilename; # filename for collected error messages
my $globallog_filename; # filename for collected status logs
my @logfiles; # array for names of all logfiles
my $logfile; # name of single logfile
my $childlogapx = ".child.log"; # Appendix of child files
my $statfilename; # Path and Name of statistics file
my $proffilename; # Path and Name of profiling file
my $profapx = ".prof"; # Appendix of profiling file
my @proftimes; # an Array holding all subfolders and processing times
my @profdirs; # an Array holding all subfolders with profiling times available
my $arg; # name part of commandline arguments
my $val; # value part of commandline arguments
my $item = undef; # running variable
my $line = undef; # another running variable
my $tline = undef; # another running temporary variable
# my @sund = undef; # array for all folders below $startpath to be processed with "su=n"
# my @suyd = undef; # array for all folders below $startpath to be processed with "su=y"
my @allpaths = undef; # array for all folders below $startpath to be processed
my @tdirs = undef; # temporary array for all folders below $startpath
my $dir = undef; # running variable
my $numdir = undef; # number of dirs to be processed
my $dircount = 0; # number of dirs already processed
my @switcher = undef; # switcher for backup with and without subfolders
my $maxthreads = 4; # max number of threads running parallel
my $threads; # number of threads running
my $threadfail = 0; # counter of threads unable to be started
my $pidmode = undef; # reading the pid check from config file
my $pidcheck = TRUE; # setting the mode for checking for a pid file, default is check;
my $os; # switch for "Linux" vs "MSWin"
my $command; # for system calls
my $childreturnvalue; # return value from child process
my $errorcount = 0; # amount of ANS....E messages
my $warncount = 0; # amount of ANS....W messages
my $sevecount = 0; # amount of ANS....S messages
my $SooScount = 0; # amount of ANS1329S "Server out of Space" messages
my $returnval = -1;
# due to the ISP statistics
my $objects_inspected = 0; # "Total number of objects inspected"
my $objects_backed_up = 0; # "Total number of objects backed up"
my $objects_updated = 0; # "Total number of objects updated"
my $objects_deleted = 0; # "Total number of objects deleted"
my $objects_expired = 0; # "Total number of objects expired"
my $objects_failed = 0; # "Total number of objects failed"
my $bytes_inspected = 0; # "Total number of bytes inspected"
my $bytes_transferred = 0; # "Total number of bytes transferred"
#filehandles
my $PPIDFILE;
my $LOGFILE;
my $ERRFILE;
my $DIRHANDLE;
my $CHILDLOGFILE;
my $GLOBALLOGFILE;
my $STATFILE;
my $CFGFILE;
my $PROFFILE;
##############################################################################
##############################################################################
# some tests for given command line arguments
##############################################################################
##############################################################################
if ( ( defined $ARGV[0]) and ( $ARGV[0] eq "--help" ) )
{
printf "usage: perl -f dsmci.pl\n";
printf "use file ./dsmci.cfg for further configuration!\n";
printf "\n";
exit RETURNNOARGS;
}
##############################################################################
##############################################################################
# read cfg file
##############################################################################
##############################################################################
open $CFGFILE, '<' , $cfgfilename;
if ( defined $CFGFILE )
{
my $line1; # local temporary line variable
my $line2; # local temporary line variable
while ($line = <$CFGFILE>)
{
# skip all lines starting with comment sign
if ( $line =~ /^\*/ or $line =~ /^#/)
{ next; }
# remove inline comments
if ( (index $line, "#", 1) ge 0 )
{
$line1 = substr($line, 0, (index $line, "#", 1) - 1)
}
else
{ $line1 = $line; }
if ( (index $line1, "*", 1) ge 0 )
{
$line2 = substr($line1, 0, (index $line1, "*", 1) - 1)
}
else
{ $line2 = $line1; }
if ( $line2 =~ /^MAXTHREADS/ )
{
(undef, $maxthreads) = split '=', $line2;
$maxthreads =~ s/^\s+|\s+$//g;
}
elsif ( $line2 =~ /^OPTFILE/ )
{
(undef, $optfile) = split '=', $line2;
$optfile =~ s/^\s+|\s+$//g;
}
elsif ( $line2 =~ /^DEPTH/ )
{
(undef, $maxdepth) = split '=', $line2;
$maxdepth =~ s/^\s+|\s+$//g;
$depth = ($maxdepth - 1);
}
elsif ( $line2 =~ /^MODE/ )
{
(undef, $pidmode) = split '=', $line2;
$pidmode =~ s/^\s+|\s+$//g;
if ( $pidmode =~ /SCHED/ or $pidmode =~ /sched/)
{ $pidcheck = FALSE; }
}
elsif ( $line2 =~ /^STARTPATH/ )
{
(undef, $startpath) = split '=', $line2;
$startpath =~ s/^\s+|\s+$//g;
$startpath =~ s/^"*//; # remove leading quotation marks
$startpath =~ s/"*$//; # remove trailing quotation marks
if ( @startpaths )
{
push @startpaths, $startpath;
}
else
{
@startpaths = $startpath;
}
}
}
}
else
{
warn "cannot open cfg file";
exit RETURNNOCFGFILE;
}
close $CFGFILE;
shift @startpaths;
foreach $actpath (@startpaths)
{
printf "STARTPATH >>%s<<\n", $actpath;
}
###############################################################################
#
# some preparation
#
###############################################################################
# set name of PID file
$pidfile = "dsmci.pid"; # set path and name of timestampfile
$ppid = $$; # parents process id (this script's PID)
$log_filename = $$ . "log";
$err_filename = $$ . "err";
$globallog_filename = $$ . ".all.log";
if ( $pidcheck == TRUE )
{
## check for running processes like this
if ( -e $pidfile )
{
warn "Found PID file ($pidfile) ! script stopped!";
exit RETURNPIDFOUND;
}
else
{
open $PPIDFILE , '>' , $pidfile
or die "cannot open PIDFILE ". $pidfile;
printf $PPIDFILE "%d", $ppid;
close $PPIDFILE;
}
}
# get from localtime()
(undef, $min, $hour, $mday, $mon, $year, undef, undef, undef) = localtime();
# sec min hour mday mon year wday yday isdst
$date = sprintf "%d-%2.2d-%2.2d", $year+1900, $mon+1, $mday;
$starttimestring = sprintf "%d-%2.2d-%2.2d %2.2d:%2.2d", $year+1900, $mon+1, $mday, $hour, $min;
###############################################################################
###############################################################################
# read foldernames and optimize using profiling
###############################################################################
###############################################################################
###############################################################################
# find all folders below $startpath and seperate for processing with "su=n" or "su=y"
###############################################################################
foreach $actpath (@startpaths)
{
my $rule = File::Find::Rule->new;
$rule->directory;
$rule->maxdepth($maxdepth);
$rule->mindepth($maxdepth);
$rule->not($rule->new->symlink);
my @tsuyd =$rule->in($actpath);
# add su=y information to each path entry
foreach $item (@tsuyd)
{
$line=sprintf "%s;Y", $item;
push @allpaths, $line;
}
$rule->directory;
$rule->maxdepth($depth);
$rule->mindepth(0);
$rule->not($rule->new->symlink);
my @tsund = $rule->in($actpath);
# add su=y information to each path entry
foreach $item (@tsund)
{
$line=sprintf "%s;N", $item;
push @allpaths, $line;
}
}
# remove empty top element
shift @allpaths;
###############################################################################
# read profiling
###############################################################################
$proffilename = File::Spec->canonpath("dsmci".$profapx);
## code snipped off to "profiling_v2.pl"
#else take the dirs as they are listed
###############################################################################
###############################################################################
# main loop
###############################################################################
###############################################################################
#
# start loop on all folders
#
$threads = 0; # reset number of child threads to zero
$numdir = (scalar @allpaths);
$dircount = 0; # counter of dirs already processed
# write number of threads to global logfile
open $GLOBALLOGFILE, ">>", $globallog_filename;
printf $GLOBALLOGFILE "Number of Threads: %d\n\n", $numdir;
close $GLOBALLOGFILE;
fork_backup_threads(\@allpaths);
# wait for all child threads exiting
wait_for_threads();
###############################################################################
#
# get endtime and calculate wallclocktime
#
###############################################################################
$endtime = time();
# get from localtime()
(undef, $min, $hour, $mday, $mon, $year, undef, undef, undef) = localtime();
# sec min hour mday mon year wday yday isdst
$endtimestring = sprintf "%d-%2.2d-%2.2d %2.2d:%2.2d", $year+1900, $mon+1, $mday, $hour, $min;
$wallclocktime = convert_time($endtime - $starttime);
###############################################################################
###############################################################################
# do some statistics for return code
###############################################################################
###############################################################################
# open global logfile for analyzation
open $GLOBALLOGFILE, '<' , $globallog_filename or warn "cannot open global log file $globallog_filename";
while (my $line = <$GLOBALLOGFILE>)
{
my @temparray = undef;
my $val = 0.0;
my $unit = undef;
my $tsec = 0.0;
my $tmin = 0.0;
my $thour = 0.0;
# gather information for proffile
my $tinspected = 0;
my $tbacked_up = 0;
my $tupdated = 0;
my $tdeleted = 0;
my $texpired = 0;
my $tfailed = 0;
my $tbytes_inspected = 0;
my $tbytes_transferred = 0;
#
# identify subdir
#
if ( $line =~ /Incremental backup of volume/ )
{
(undef, $actpathdir, undef) = split '\'', $line;
$actpathdir =~ s/\*//g;
}
#
# collect Errors and Warnings
#
if ( $line =~ /^AN[RS][0-9]{4}E/ ) # Errors
{
if ( $line =~ /^ANS1228E/ or $line =~ /^ANS1802E/)
{;}
else
{ $errorcount++; push @errorlines, $line; }
}
elsif ( $line =~ /^AN[RS][0-9]{4}W/ ) # Warnings
{ $warncount++; push @errorlines, $line; }
elsif ( $line =~ /^AN[RS][0-9]{4}S/ ) # Severe Errors
{ $sevecount++; push @errorlines, $line; }
if ( $line =~ /^ANS1329S/ ) # Server-out-of-Space Errors
{ $SooScount++; push @errorlines, $line; }
#
# collect statistics of all jobs
#
if ( $line =~ /Total number of objects inspected/ )
{
@temparray = split ':', $line;
$val = (pop @temparray);
$val =~ s/,//g; # just to eliminate grouping delimiter
$objects_inspected += $val;
$tinspected = $val;
}
elsif ($line =~ /Total number of objects backed up/)
{
@temparray = split ':', $line;
$val = (pop @temparray);
$val =~ s/,//g; # just to eliminate grouping delimiter
$objects_backed_up += $val;
$tbacked_up = $val;
}
elsif ($line =~ /Total number of objects updated/)
{
@temparray = split ':', $line;
$val = (pop @temparray);
$val =~ s/,//g; # just to eliminate grouping delimiter
$objects_updated += $val;
$tupdated = $val;
}
elsif ($line =~ /Total number of objects deleted/)
{
@temparray = split ':', $line;
$val = (pop @temparray);
$val =~ s/,//g; # just to eliminate grouping delimiter
$objects_deleted += $val;
$tdeleted = $val;
}
elsif ($line =~ /Total number of objects expired/)
{
@temparray = split ':', $line;
$val = (pop @temparray);
$val =~ s/,//g; # just to eliminate grouping delimiter
$objects_expired += $val;
$texpired = $val;
}
elsif ($line =~ /Total number of objects failed/)
{
@temparray = split ':', $line;
$val = (pop @temparray);
$val =~ s/,//g; # just to eliminate grouping delimiter
$objects_failed += $val;
$tfailed = $val;
}
elsif ($line =~ /Total number of bytes inspected/)
{
@temparray = split ':', $line;
($val, $unit) = split ' ', $temparray[-1];
$val =~ s/,//g; # just to eliminate grouping delimiter
$val *= get_unit_mupliplier($unit);
$bytes_inspected += $val;
$tbytes_inspected = $val;
}
elsif ($line =~ /Total number of bytes transferred/)
{
@temparray = split ':', $line;
($val, $unit) = split ' ', $temparray[-1];
$val =~ s/,//g; # just to eliminate grouping delimiter
$val *= get_unit_mupliplier($unit);
$bytes_transferred += $val;
$tbytes_transferred = $val;
}
elsif ($line =~ /Data transfer time/)
{
@temparray = split ':', $line;
($val, $unit) = split ' ', $temparray[-1];
$val =~ s/,//g; # just to eliminate grouping delimiter
$datatransfertime += $val;
}
elsif ($line =~ /Elapsed processing time/)
{
(undef, $thour, $tmin, $tsec) = split ':', $line;
# (undef, $thour, $tmin, $tsec, $actpath) = split ':', $line;
$pproctime = $tsec + (60 * $tmin) + (3600 * $thour);
$proctime += $pproctime;
my $tline = sprintf "%10.10d ; %s ; % 15.15d ; %15.3lf ; %15.3lf", $pproctime, $actpathdir, $tinspected, $tbytes_inspected, $tbytes_transferred;
if ( $actpathdir ne $startpath )
{ push @proftimes, $tline; }
}
}
close $GLOBALLOGFILE;
###############################################################################
# write new profiling infos
###############################################################################
# remove empty top elemet
shift @proftimes;
# reorder elements
my @sproflines = sort { $b cmp $a } @proftimes;
# open profiling file
open $PROFFILE, '>', $proffilename
or warn "Cannot open Profiling file $proffilename";
# wirte lines
foreach $line (@sproflines)
{ printf $PROFFILE "%s\n", $line; }
close $PROFFILE;
###############################################################################
# write error info log
###############################################################################
$errorlogfilename = File::Spec->canonpath($date."-dsmci.errors.log");
open ERRORFILE, '>', $errorlogfilename or warn "Cannot open Error/Warning logfile: $errorlogfilename";
foreach $line (@errorlines)
{
print ERRORFILE $line;
}
close(ERRORFILE);
###############################################################################
###############################################################################
# summarize stats
###############################################################################
###############################################################################
$speedup = $proctime / ($endtime - $starttime);
if ( $datatransfertime < 0.1 or $proctime < 0.1 )
{ $dttratio = 0.0; }
else
{
$dttratio = $datatransfertime / $proctime * 100.;
}
$proctime = convert_time($proctime);
$statfilename = File::Spec->canonpath($date."-stats.txt");
open $STATFILE, ">", $statfilename
or warn "cannot open $statfilename";
printf $STATFILE "Process ID : %20d\n", $ppid;
foreach $actpath (@startpaths)
{
printf $STATFILE "Path processed : %20s\n", $actpath;
}