#! /usr/local/bin/perl ################################################################################ ## thorsummary.pl - produce summary of db contents in format similar ## to sthorman. ## ## AUTHOR: Jeremy Yang ## Rev: 27 Jul 2000 ################################################################################ $| = 1; use DayPerl; require DU_LIB; require DU_TLIB; chop($PROG = `basename $0`); #################################################################### ### Associative arrays with $tag as keys #################################################################### %data_count = ( ); ## Total number of datatype occurances %data_thiscount = ( ); ## Number of datatype occurances in this TDT %data_maxcount = ( ); ## Max number of datatype occurances %data_bytes = ( ); ## Total bytes for datatype %data_tdts = ( ); ## Total number of TDTs with datatype %data_pri_ids = ( ); ## Total number of TDTs rooted with datatype %data_in_this_tdt = ( ); ## Flag: does this TDT have this datatype? %data_fields_per = ( ); ## How many fields $quiet = $verbose = 0; $MINORCOUNT = 20; $MAJORCOUNT = 1000; sub HELP { print "\t******************************************************\n". "\t* tdtsummary [options] db [>out.txt] *\n". "\t* -h ....... help *\n". "\t* -minor N ....... minor progress report *\n". "\t* -major N ....... major progress report *\n". "\t* -v ....... verbose (extra stuff) *\n". "\t* -q ....... quiet (no progress reports) *\n". "\t******************************************************\n"; exit; } ### Parse args ### for ($i = 0; $i <= $#ARGV; $i++) { $_ = $ARGV[$i]; if (/^-h$/i) { &HELP; } elsif (/^-minor$/i) { $MINORCOUNT = $ARGV[++$i]; } elsif (/^-major$/i) { $MAJORCOUNT = $ARGV[++$i]; } elsif (/^-v$/i) { $verbose = 1; } elsif (/^-q$/i) { $quiet = 1; } elsif ($i == $#ARGV) { $db = $ARGV[$i]; } else { print STDERR "Bad option \"$ARGV[$i]\" ...\n"; &HELP; } } $tdtcount = $idcount = 0; if (!(($dbname, $dbpw, $host, $service, $user, $userpw) = &DU_PARSE_DB($db))) { print STDERR "DB syntax error...\n"; &HELP; } if (!$service) { $service = "thor"; } ########################################################## ## Connect to Thorserver, open database. ########################################################## $server = dt_thor_server($host, $service, $user, $userpw, $isnew); if (NULL_OB == $server) { system 'stty', '-echo'; print "Enter server password for $host:$service:$user >> "; chop($userpw = ); print "\n"; system 'stty', 'echo'; $server = dt_thor_server($host, "thor", $user, $userpw, $isnew); if (NULL_OB == $server) { &DU_PRINTERRORS; die "Can't connect to Thorserver: \"$host:$service:$user\"\n"; } } $thordb = dt_open($server, $dbname, "r", $dbpw, $isnew); if (NULL_OB == $thordb) { system 'stty', '-echo'; print "Enter database password for $dbname\@$host >> "; chop($dbpw = ); print "\n"; system 'stty', 'echo'; $thordb = dt_open($server, $dbname, "r", $dbpw, $isnew); if (NULL_OB == $thordb) { &DU_PRINTERRORS; die "Can't open database: \"$dbname\@$host:$service:$user\"\n"; } } print STDERR "Opened database: \"$dbname\@$host:thor:$user\"\n"; printf STDERR "Total TDT count: %d\n", dt_count($thordb, TYP_DATATREE); &REPORT_HEADER(); $tdtstream = dt_stream($thordb, TYP_DATATREE); ### TDT loop ### for ($tdtcount=0; NULL_OB!=($tdt=dt_next($tdtstream)); ++$tdtcount) { $itemcount = $idcount = $nonidcount = 0; $subtdts = dt_stream($tdt, TYP_DATATREE); $subtdtseq = dt_alloc_seq(); dt_append($subtdtseq, $tdt); while (NULL_OB != ($subtdt = dt_next($subtdts))) { dt_append($subtdtseq, $subtdt); } dt_dealloc($subtdts); dt_reset($subtdtseq); ### SubTDT loop ### while (NULL_OB != ($subtdt = dt_next($subtdtseq))) { $items = dt_stream($subtdt, TYP_DATAITEM); ### Dataitem loop ### while (NULL_OB != ($item = dt_next($items))) { ++$itemcount; if ($itemcount > $max_itemcount) { $max_itemcount = $itemcount; } $dtype = dt_datatype($item); $tag = dt_tag($dtype); if ($tag && !exists($data_count{$tag})) { &INIT_STRUCT($tag, $dtype, $item); } if ($tag =~ /^\$/) { ++$idcount; if ($idcount > $max_idcount) { $max_idcount = $idcount; } } else { ++$nonidcount; if ($nonidcount > $max_nonidcount) { $max_nonidcount = $nonidcount; } } $data_in_this_tdt{$tag} = 1; ### set presence flag if (0 == $itemcount) { ++$data_pri_ids{$tag}; } ++$data_count{$tag}; ++$data_thiscount{$tag}; if ($data_thiscount{$tag} > $data_maxcount{$tag}) { $data_maxcount{$tag} = $data_thiscount{$tag}; } $fields = dt_stream($item, TYP_DATAFIELD); while (NULL_OB != ($field = dt_next($fields))) { $data_bytes{$tag} += length(dt_stringvalue($field)); } dt_dealloc($fields); } dt_dealloc($items); } dt_dealloc($subtdtseq); dt_dealloc($tdt); $total_idcount += $idcount; $total_nonidcount += $nonidcount; foreach $tag (keys(%data_in_this_tdt)) { $data_tdts{$tag} += $data_in_this_tdt{$tag}; $data_in_this_tdt{$tag} = 0; $data_thiscount{$tag} = 0; } next unless ($tdtcount); if (!$quiet && !($tdtcount%$MINORCOUNT)) { print STDERR "."; } if (!$quiet && !($tdtcount%$MAJORCOUNT)) { print STDERR "$tdtcount TDTs\n"; } } if (!$quiet && ($tdtcount%$MAJORCOUNT)) { print STDERR "$tdtcount TDTs\n"; } &REPORT_CONTENT(); dt_dealloc($thordb); dt_dealloc($server); if ($verbose) { print "\n"; &DU_PRINTERRORS(DX_ERR_NOTE); } else { &DU_PRINTERRORS(DX_ERR_ERROR); } print STDERR "So long, baby!\n"; exit 0; ################################################################################ ## INIT_STRUCT - data for each tag ################################################################################ sub INIT_STRUCT { local($tag, $dtype, $item) = @_; if ($tag =~ /^\$/) { push(@idtags, $tag); } else { push(@nonidtags, $tag); } $vtag{$tag} = dt_name($dtype); $data_count{$tag} = 0; $data_bytes{$tag} = 0; $data_tdts{$tag} = 0; $data_pri_ids{$tag} = 0; $data_in_this_tdt{$tag} = 1; $data_maxcount{$tag} = 1; $data_fields_per{$tag} = dt_count($item, TYP_DATAFIELD); } ################################################################################ ## REPORT_HEADER ################################################################################ sub REPORT_HEADER { system("date"); print "\n"; ($dbname = dt_stringvalue($thordb)) =~ s/^.*\///; printf "Database ............................ %s\n", $dbname; $_ = dt_thor_getauxillarydb($thordb, DX_THOR_DATATYPESDB); printf "Datatypes database .................. %s\n", $_ ? $_ : ""; $_ = dt_thor_getauxillarydb($thordb, DX_THOR_INDIRECTDB); if (!/_indirect/ || /[\s<>|;]/) { $_ = ""; } ### Kludge! ### printf "Indirect reference database ......... %s\n", $_ ? $_ : ""; $_ = dt_thor_getauxillarydb($thordb, DX_THOR_MONOMERDB); if (!/_monomers/ || /[\s<>|;]/) { $_ = ""; } ### Kludge! ### printf "Monomer definitions database ........ %s\n", $_ ? $_ : ""; ($pri_tdtcount, $pri_bytesused, $pri_bytesfree, $xref_tdtcount, $xref_bytesused, $xref_bytesfree, $pri_hashsize, $xref_hashsize) = split(/,/, dt_info($thordb, "sizes")); printf "Last modified date .................. %s\n", dt_info($thordb, "modified"); printf "Primary hash-table: # of TDTs ....... %d\n", $pri_tdtcount; printf "Primary hash-table size: ............ %d\n", $pri_hashsize; $pri_bytes_all = $pri_bytesused + $pri_bytesfree; printf "Primary hash-table: bytes used ...... %d (%.1f\%)\n", $pri_bytesused, $pri_bytes_all ? 100*($pri_bytesused/$pri_bytes_all) : 0.0; printf "Primary hash-table: bytes free ...... %d (%.1f\%)\n", $pri_bytesfree, $pri_bytes_all ? 100*($pri_bytesfree/$pri_bytes_all) : 0.0; printf "Cross-ref. hash-table size: ......... %d\n", $xref_hashsize; printf "Cross-ref. hash-table: # of TDTs .... %d\n", $xref_tdtcount; $xref_bytes_all = $xref_bytesused + $xref_bytesfree; printf "Cross-ref. hash-table: bytes used ... %d (%.1f\%)\n", $xref_bytesused, $xref_bytes_all ? 100*($xref_bytesused/$xref_bytes_all) : 0.0; printf "Cross-ref. hash-table: bytes free ... %d (%.1f\%)\n", $xref_bytesfree, $xref_bytes_all ? 100*($xref_bytesfree/$xref_bytes_all) : 0.0; printf "Crunch limit ........................ %.2f\n", dt_thor_autocrunch_limit($thordb, -9); return; } ################################################################################ ## REPORT_CONTENT ################################################################################ sub REPORT_CONTENT { local($total_dtypes, $total_bytes); $hr1 = sprintf ("="x23)." ".("="x11)." ".("="x18)." ".("="x8)." ".("="x8)." ".("="x6); $hr2 = sprintf ("-"x23)." ".("-"x11)." ".("-"x18)." ".("-"x8)." ".("-"x8)." ".("-"x6); if ($verbose) { printf "Read-only ........................... %s\n", dt_thor_readonly($thordb) ? "TRUE" : "FALSE"; printf "Public (no read-pw) ................. %s\n", dt_ispublic($server, $dbname) ? "TRUE" : "FALSE"; printf "Record locking ...................... %s\n", dt_thor_tdtlocking($thordb) ? "ENFORCED" : "DISABLED"; printf "\"HOLD\" database: .................... %s\n", dt_isheld($thordb) ? "TRUE" : "FALSE"; } printf("\n%23s %11s %18s %8s %8s %6s\n$hr1\n", "datatypes", "#dataitems", "#tdts (% of total)", "avg#/tdt", "max tdt", "sizeKB"); foreach $tag (sort(@idtags)) { next if ($tag eq '$_'); $total_dtypes += $data_count{$tag}; $total_idbytes += $data_bytes{$tag}; printf("%-6s %-16s %11d %10d (%5.1f) %8.1f %8d %6d\n", substr($tag, 0, 6), substr($vtag{$tag}, 0, 16), $data_count{$tag}, $data_tdts{$tag}, 100*$data_tdts{$tag}/$tdtcount, $data_count{$tag}/$tdtcount, $data_maxcount{$tag}, $data_bytes{$tag}/1000); } printf("$hr2\n%23s %11s %10s (%5.1f) %8.1f %8d %6d\n$hr1\n", "total identifiers", $total_idcount, $tdtcount, 100.0, $total_dtypes/$tdtcount, $max_idcount , $total_idbytes/1000); foreach $tag (sort(@nonidtags)) { next if ($tag eq '$_'); $total_dtypes += $data_count{$tag}; $total_nonidbytes += $data_bytes{$tag}; printf("%-6s %-16s %11d %10d (%5.1f) %8.1f %8d %6d\n", substr($tag, 0, 6), substr($vtag{$tag}, 0, 16), $data_count{$tag}, $data_tdts{$tag}, 100*$data_tdts{$tag}/$tdtcount, $data_count{$tag}/$tdtcount, $data_maxcount{$tag}, $data_bytes{$tag}/1000); } printf("$hr2\n%23s %11s %10s (%5.1f) %8.1f %8d %6d\n$hr1\n", "total non-identifiers", $total_nonidcount, $tdtcount, 100.0, $total_dtypes/$tdtcount, $max_nonidcount , $total_nonidbytes/1000); printf("%23s %11s %10s (%5.1f) %8.1f %8d %6d\n\n", "total all datatypes", $total_dtypes, $tdtcount, 100.0, $total_dtypes/$tdtcount, $max_itemcount , ($total_idbytes+$total_nonidbytes)/1000); $total_bytes = $total_idbytes +$total_nonidbytes; printf("Total data bytes .............. %d (%.1fMB)\n", $total_bytes, $total_bytes/1000000.0); return; }