ceae867af579c5edf5156ea9c29671e6f7cfc3b5 hiram Tue Jul 26 15:25:42 2011 -0700 initial source from running cron job on hgcustom diff --git src/hg/dbTrash/lostTables.pl src/hg/dbTrash/lostTables.pl new file mode 100755 index 0000000..0756d65 --- /dev/null +++ src/hg/dbTrash/lostTables.pl @@ -0,0 +1,193 @@ +#!/bin/env perl + +use strict; +use warnings; +use File::stat; +use Getopt::Long; +use Math::BigInt; + +sub usage() { + print STDERR <<_END_; +usage: lostTables.pl -age=N [-wigVarFile=pathName.wigVar] +required argument: +-age=N - specify N hours to age out tables, must be exactly 72 + +optional argument: +-wigVarFile=pathName.wigVar - to output a wiggle variable + - step file with date vs. size information. +_END_ + exit 255; +} + +# for a graph offset, this date will be X=0, resolution 1 second per base +# 2010-01-01 12:00:00 1262376000 +# from mktime command in kent source src/utils/timing/ +my $timeZero = 1262376000; +my $wigVarFile = ''; +my %wigData; +my $help = ''; + +my $ageHours = -1; + +my $optResult = GetOptions("age=i" => \$ageHours, + "help", => \$help, + "-wigVarFile=s", => \$wigVarFile); + +if ($ageHours == -1 || $help) { + usage; +} + +if ($optResult != 1) { + printf STDERR "ERROR: incorrect option given to script."; + usage; +} + +if ($ageHours != 72) { + printf STDERR "ERROR: given age: %d\n", $ageHours; + printf STDERR "ERROR: age must be 72 hours\n"; + usage; +} +my $ageSeconds = $ageHours * 60 * 60; + +if ($wigVarFile) { + printf STDERR "#\toutput graph data to wiggle file: $wigVarFile\n"; + open (WV, ">$wigVarFile") or die "can not write to $wigVarFile"; + printf WV "track type=wiggle_0 name=customTrashSize\n"; + printf WV "variableStep chrom=chr1\n"; +} +printf STDERR "#\tage: %d hours, %d seconds\n", $ageHours, $ageSeconds; + +if (! defined($ENV{'HGDB_CONF'})) { + $ENV{'HGDB_CONF'} = "/data/home/qateam/.ct.hg.conf"; +} + +my %showTables; # list of table names from MySQL show tables +my %metaTables; # list of table names from metaInfo +my %lostTables; # list of tables from MySQL but not in metaInfo +my $tableCount = 0; +my $metaTableCount = 0; +my $lostCount = 0; + +########################################################################### +## read in table list from metaInfo name column +########################################################################### + +my $hgsql = "/cluster/bin/x86_64/hgsql"; + +open (FH, "$hgsql -N -e 'select name from metaInfo;' customTrash |") or + die "can not run hgsql 'select name from metaInfo' command"; +while (my $table = <FH>) { + chomp $table; + if (exists($metaTables{"$table"})) { + printf STDERR "WARNING: metaInfo duplicate table name ? %s\n", $table; + } + ++$metaTableCount; + $metaTables{"$table"} = 1; +} +close (FH); + +printf STDERR "# metaInfo table count: %d\n", $metaTableCount; + +########################################################################### +## read in table list from "show tables" MySQL operation +## ignore table names: history, extFile, metaInfo +########################################################################### + +open (FH, "$hgsql -N -e 'show tables;' customTrash | egrep -v 'history|extFile|metaInfo' |") or + die "can not run hgsql 'show tables' command"; + +while (my $table = <FH>) { + chomp $table; + if (exists($showTables{"$table"})) { + printf STDERR "WARNING: duplicate table name ? %s\n", $table; + } + $showTables{"$table"} = 1; + ++$tableCount; + if (!exists($metaTables{"$table"})) { + $lostTables{"$table"} = 1; + ++$lostCount; + } +} +close (FH); + +printf STDERR "# customTrash table count: %d, lost table count: %d\n", + $tableCount, $lostCount; + +########################################################################### +## finally, scan MySQL files to determine date and size information +########################################################################### + +my $custTrashDir = "/data/mysql/customTrash"; +chdir $custTrashDir; +printf "working in: '%s'\n", $custTrashDir; +my $fileCount = 0; +my $agedOut = 0; +my $notAgedOut = 0; +my $totalBytes = Math::BigInt->bzero(); +my $agedBytes = Math::BigInt->bzero(); +my $notAgedBytes = Math::BigInt->bzero(); +open (FH, "find . -type f | grep '.frm\$' | sed -e 's#./##'|") or die "can not run find in $custTrashDir"; +my $nowTimeStamp = `date "+%s"`; +chomp $nowTimeStamp; +printf "#\ttimestamp now: %d\n", $nowTimeStamp; +printf "# table name\tage of:\t\tmtime\tctime\t\tatime\tsizes: fyi MYI MYD\n"; +while (my $table = <FH>) { + ++$fileCount; + chomp $table; + $table =~ s/.frm//; + next if ($table =~ m/metaInfo/); + next if ($table =~ m/extFile/); + next if ($table =~ m/history/); + my $frm = stat("$table.frm"); + my $myi = stat("$table.MYI"); + my $myd = stat("$table.MYD"); + my $sizeFrm = Math::BigInt->new($frm->size); + my $sizeMyi = Math::BigInt->new($myi->size); + my $sizeMyd = Math::BigInt->new($myd->size); + $totalBytes->badd($sizeFrm); + $totalBytes->badd($sizeMyi); + $totalBytes->badd($sizeMyd); + if ($wigVarFile) { + my $start = $frm->mtime - $timeZero; + if ($start > 0) { + my $sizePt; + if (exists($wigData{$start})) { + $sizePt = $wigData{$start}; + } else { + my $size = Math::BigInt->bzero(); + $sizePt = \$size; + $wigData{$start} = $sizePt; + } + $$sizePt->badd($sizeFrm); + $$sizePt->badd($sizeMyi); + $$sizePt->badd($sizeMyd); + } + } + next if (!exists($lostTables{"$table"})); # checking for lost tables + my $tableAgeSeconds = $nowTimeStamp - $frm->mtime; # what is their age + if ($tableAgeSeconds > $ageSeconds) { + ++$agedOut; + $agedBytes->badd($sizeFrm); + $agedBytes->badd($sizeMyi); + $agedBytes->badd($sizeMyd); + printf "%s\t%12d%12d%12d\t%d %d %d\n", $table, $nowTimeStamp-$frm->mtime, $nowTimeStamp-$frm->ctime, $nowTimeStamp-$frm->atime, $frm->size, $myi->size, $myd->size; + } else { + ++$notAgedOut; + $notAgedBytes->badd($sizeFrm); + $notAgedBytes->badd($sizeMyi); + $notAgedBytes->badd($sizeMyd); + printf "# %s\t%12d%12d%12d\t%d %d %d\n", $table, $frm->mtime, $frm->ctime, $frm->atime, $frm->size, $myi->size, $myd->size; + } +} +printf "#\ttable count: $fileCount, total bytes in all tables: %s\n#\tlost tables aged out: $agedOut, bytes aged: %s, not aged out: $notAgedOut, bytes: %s\n", + $totalBytes->bstr(), $agedBytes->bstr(), $notAgedBytes->bstr(); + +# if requested, output wiggle file for time vs. size of data created +# at that second. +if ($wigVarFile) { + foreach my $start (sort { $a <=> $b} keys %wigData) { + my $sizePt = $wigData{$start}; + printf WV "%d\t%s\n", $start, $$sizePt->bstr(); + } + close (WV); +}