#!/opt/local/bin/perl
#
#  Name: get_hsgpct.pl -- Determine the HSG for each STATSGO Mapunit
#
#  Purpose:
#
#       This script uses the HYDGRP and COMPPCT variables from the Comp
#       table of the STATSGO database to determine the percent of the
#       area of each STATSGO mapunit which is in each hydrologic soil
#       group (HSG).  The script also determines the fraction of each
#	mapunit's area which is covered by all-water components.
#
#  Usage:
#	perl get_hsgpct.pl workspace outfile rptfile
#
#  Options and Arguments:
#
#	workspace       Arc/Info Workspace location of INFO tables.
#	outfile		File to receive output table.
#	rptfile		File to receive report of processing statistics,
#			problems, etc.
#
#  External associations:
#	This script is part of a package for processing the STATSGO soil
#	properties database.  In particular, it was written to access
#	the Comp and Mapunit STATSGO tables to determine the percentage
#	contribution of each hydrologic soil group type on a mapunit
#	basis.  Output from this script is in ASCII format.  Script
#	pack_hsgpct.pl may be used to convert it to a format suitable
#	for ingest into an Arc/Info INFO table defined by script
#	def_hsgpct_items.aml.
#
#  Algorithmic details:
#
#	This script reads the HYDGRP and COMPPCT values for each
#	component of each mapunit.  The HYDGRP item in the STATSGO Comp
#	table, which specifies the HSG, has the possible values A, B, C,
#	D, A/D, B/D, C/D.  HYDRGP values A/D, B/D, and C/D are converted
#	to D.  If no HYDGRP value is specified and the component is
#	designated as being all water, it is assigned a HSG code of W;
#	otherwise, it is assigned HSG code D.  For each of the resulting
#	HSG codes (A, B, C, D, and W) the sum of the COMPPCT values for
#	all components of the mapunit having that code is determined,
#	and taken as the percent of that HSG in the mapunit.
#
#	This information is then placed in a table that can be used in
#	conjunction with land use/cover information to determine the
#	runoff curve number (CN's) for each mapunit.  Preserving the
#	percentages of the relevant HSG's in each component of each
#	mapunit allows the production of a "weighted" CN for each
#	mapunit.
#
#	The HSG W percents can also be used independently as a data
#	layer specifying the percent of each mapunit's area which is
#	composed of all-water components.
#
#  External files accessed:
#	<workspace>     Arc/Info Workspace location of INFO tables being
#                       accessed for processing.
#	<outfile>	WRITE to record HSG percentages for each
#			component.  Each record contains data for all
#			components of one mapunit.
#	<rptfile>	WRITE to record processing arguments, statistics
#		        and other problems, etc., encountered during
#                       processing.
#
#  Environment variables referenced:
#	PERLINC		Libraries to be searched by require() ahead of
#			those specified by -I and the default libraries.
#                                  
#  External subroutines included:
#	openinfo.pl	Open an INFO table, returning unpack() template
#			for reading a specified list of items.
#
#  Internal subroutines defined:
#	help		Output help text.
#	output_mu	Generate output values for each component of one
#			mapunit, and reinitialize for next component.
#	usage		Output terse usage text.
#
#  Internal variables:
#	COMP		File handle for reading COMP table listing.
#       IN              Filehandle for Info file being opened by openinfo.
#	OUT		Filehandle for output file.
#	RPT		File handle for output report file.
#       buf             Array of values from COMP table.
#	cmd		Name of this script; used for help text.
#	complist	Filename for listing of COMP table.
#       compname        Component name from COMP table.
#	comppct		Percent of component in map unit.
#	curmuid		ID of mapunit currently being processed.
#	errcnt		Count of severe errors.
#       grppct          Accumulator for percentages of hsg's within a mapunit.
#       hydgrp          Hydrologic soil group type.
#	muid		Mapunit ID.
#	opt		Name of current option.
#	outfile		Name of file to receive relayered output.
#       reclen_comp     Length of comp table record, in bytes.
#	seqnum		Component sequence number.
#	stats		Associative array of statistics for anomalies.
#       template_comp   Template for reading comp table, returned by openinfo.
#	time		Array of values returned by localtime().
#	totpct		Sum of percents for all components in map unit.
#	value		Value of one array element.
#
#  Script history:
#	2-96	Initial version, D. A. Miller
#
############   End of Prolog for get_hsgpct.pl   ############

#  Define usage and help text

@cmd =  (split ('/', $0));
$cmd = pop (@cmd);

unshift (@INC, split (/ /,$ENV{"PERLINC"}));
require "openinfo.pl";

sub usage
    {
    print "\n";
    print "Usage:  perl $cmd workspace outfile rptfile\n"; 
    print "        perl $cmd -u|h\n";

    }		#  End of subroutine usage

sub help
    {
    print "\n";
    print $cmd, " -- Get HSG percents for each STATSGO mapunit\n";
    &usage;
    print "\n";
    print "Options and arguments are\n";
    print "     -h   	Write this help text to stdout and exit.\n";
    print "     -u   	Display terse usage message and exit.\n";   
    print " workspace	Pathname of directory containing info/\n",
	  "		subdirectory for COMP and LAYER tables.\n";
    print " outfile	File to receive output table.\n",
	  " rptfile	File to receive report of processing",
			" statistics,\n",
	  "		problems, etc.\n";

    }		#  End of subroutine help

#  Subroutine to output component data for one mapunit

sub output_mu
    {
    return $muid if ($curmuid eq "");

if($totpct > 0)
    {
    foreach $hydgrp ("A", "B", "C", "D", "W")
        {
        $grppct{$hydgrp} = (defined $grppct{$hydgrp}) ?
             $grppct{$hydgrp} *= 100 / $totpct : 0;
        }
    }
else
    {
    foreach $hydgrp ("A", "B", "C", "D", "W")
        {
        $grppct[$hydgrp] = 0;
        }
    }
printf OUT ("%-8s%4d%4d%4d%4d%4d\n", $curmuid, $grppct{"A"},
  $grppct{"B"}, $grppct{"C"}, $grppct{"D"}, $grppct{"W"});

#  Report problems and anomalies

    print RPT "Total COMPPCT for MUID $curmuid = $totpct\n"
      if ((($totpct < 99) || ($totpct > 101))
	&& (++$stats{"badtotpct"}));   

#  Reinitialize accumulators

    $totpct = 0;
    %grppct = ();

    return $muid;
    }
			#  End of subroutine output_mu

#  Start of main routine
#

#  Get options

while ($ARGV[0] =~ /-(\w+)(\s+|$)/)
    {
    push (@opt, $1);
    shift;
    }	
if ($opt[0] eq 'u')
    {
    &usage;
    exit;
    }
if ($opt[0] eq 'h')
    {
    &help;
    exit;
    }

#  Preprocess arguments

if  ($#ARGV < 2)
    {
    &usage;
    exit 1;
    }

$workspace = shift;
$outfile = shift;
$rptfile = shift;


#  Open  output files

open (OUT, "> $outfile") || die "Error opening output file, $outfile,";
open (RPT, "> $rptfile") || die "Error opening report file, $rptfile,";

#  Create report header

@time = localtime (time);
$time[5] += 1900;
printf RPT "	%s, %02d/%02d/%02d %02d:%02d:%02d\n\n",
  "Summary of get_hsgpct.pl processing", $time[5], $time[4]+1,
  $time[3], $time[2], $time[1];

print RPT "Arguments are:\n";

printf RPT "    %-16s%s\n", "outfile", $outfile;
printf RPT "    %-16s%s\n\n", "rptfile", $rptfile;

#  Call openinfo to open and unpack the INFO formatted COMP table

@itemlist = ("muid", "seqnum", "compname", "comppct", "hydgrp");

(($reclen_comp, $template_comp) = &openinfo ("IN", $workspace,
     "COMP", @itemlist)) || die "Openinfo returned error";

#  Process components of each mapunit in turn

$curmuid = "";
$totpct = 0;

while (read(IN,$buf,$reclen_comp))

    {
    ($muid, $seqnum,$compname,$comppct,$hydgrp) =
      unpack($template_comp,$buf);
    $curmuid = &output_mu if ($muid ne $curmuid);
    $hydgrp = $1 if ($hydgrp =~ /\w\/(\w)/);
    $totpct += $comppct;

    if ($hydgrp =~ /[A-D]/)
	{
	$grppct{$hydgrp} += $comppct;
	}
    elsif ($compname eq "WATER")
	{
	$grppct{"W"} += $comppct;
	}
    else
	{
	$grppct{"D"} += $comppct;
	$stats{"novalue"}++ ;
	}
    }

&output_mu;		# Output results for last mapunit

close IN;
close OUT;

$valuestat = $stats{"novalue"};

#  Add statistics to processing report.

print RPT "\nProcessing statistics\n\n";
print RPT "Number of components without valid hydgrp:",
          "$valuestat\n" if ($valuestat > 0);


close RPT;

exit 0;		# From Script get_hsgpct.pl
