#!/opt/local/bin/perl
#
#  Name: get_ph.pl -- Determine average pH for standard layers
#
#  Purpose:
#	This script determines average pH for each of a set of
#	standard-thickness soil layers for each STATSGO mapunit in a
#	state.	The pH is given by the STATSGO Layer table variables PHH
#	and PHL.
#
#  Usage:
#	perl get_ph.pl layerdeftab textdeftab workspace outfile rptfile
#
#  Options and Arguments:
#	layerdeftab	File specification for table defining
#			thicknesses of standard soil layers.
#	textdeftab	File specification for table assigning standard
#			soil texture class to each texture-plus-fragment
#			class used in STATSGO tables.
#	workspace	Arc/Info Workspace location of INFO tables.
#	outfile		File to receive output table.
#	rptfile		File to receive report of processing statistics,
#			problems, etc.
#
#  External associations:
#	This script is part of a package for processing the STATSGO soil
#	properties database.  Output from this script is in an ASCII
#	format; script pack_ph.pl may be used to convert it to a
#	format suitable for ingest into Arc/Info INFO tables defined by
#	script def_ph_items.aml.
#
#  Algorithmic details:

#	The STATSGO Layer table gives a range of pH values (PHH and PHL)
#	for each layer of each component of each mapunit.  The mean pH
#	for each component layer is computed as the simple mean of the
#	PHH and PHL values.  The mean pH values for each component are
#	then interpolated from the layer depths of the particular
#	component (specified by Layer table variables LAYDEPL and
#	LAYDEPH) to the standard layers specified by the input table of
#	standard layers.  Then the pH values for each standard layer are
#	averaged over all non-water components of the mapunit, weighting
#	the contribution of each component by the COMPPCT variable from
#	the STATSGO Component table, to obtain the mean pH values for
#	the mapunit.
#
#	The region from the bottom of the last component layer to the
#	bottom of the last standard layer, if any, is assumed to be the
#	same as the lowest component layer down to the mean bedrock
#	depth (mean of Component table variables ROCKDEPL and ROCKDEPH).
#	Below this depth, the pH is taken as undefined.  The STATSGO
#	Layer table may also directly specify that the pH for a layer is
#	undefined (for example, bedrock outcroppings), by giving the
#	values of PHH and PHL as 0.0.  The computation of the average ph
#	value for a standard layer ignores component layers for which
#	the pH is undefined.  If pH is undefined for all component
#	layers which contribute to a standard output layer for the
#	mapunit, then a pH value of 0.0 is used to indicate that the pH
#	is undefined for that layer.
#	
#	NOTE that many components specify ROCKDEPL = ROCKDEPH = 60
#	inches to specify that the soil was not examined below this
#	depth.  In most cases, bedrock is not actually present;
#	nonetheless, the value of pH is computed as if bedrock was
#	encountered at this depth.
#
#  External files accessed:
#	<layerdeftab>	READ to get thickness of each standard
#			layer. This file contains one record for each
#			layer, starting at the top, giving its thickness
#			in cm.
#	<textdeftab>	READ to get assignment of each texture-plus-
#			fragment modifier to standard soil texture
#			class.	Each record contains one texture-plus-
#			modifier code (ASCII) followed by the standard
#			abbreviation for the standard texture class.
#	<workspace>	Arc/Info Workspace location of INFO tables being 
#			accessed for processing .
#	<outfile>	WRITE to record PH for each mapunit.  Each
#			record contains the mapunit ID followed by the
#			pH values for each standard layer.
#	<rptfile>	WRITE to record processing arguments, statistics
#			and other problems, etc., encountered during
#			processing.
#
#  Environment variables referenced:
#	PERLINC		Libraries to be searched by require() ahead of
#			those specified by -I and the default libraries.
#				   
#  External subroutines included:
#	openinfo.pl	Open an INFO table, returning unpack() template
#			for reading a specified list of items.
#
#  Internal subroutines defined:
#	bld_layers	Subroutine to build layers for one mapunit
#			component.
#	output_mu	Generate output values for one mapunit, and
#			reinitialize for next component.
#	set_allrock	Subroutine to build single rock or water layer
#			when no soil.
#
#  Internal variables:
#	COMP		File handle for reading Component table.
#	TEXTDEF		File handle for reading texture-definition
#			table.
#	LAYDEF		File handle for reading layer definition table.
#	LAYER		File handle for reading Layer table.
#	RPT		File handle for output report file.
#	UCONV		Conversion factor from STATSGO depth units to
#			standard depth units.
#	bufc		Array of values from COMP table.
#	bufl		Array of values from LAYER table.
#	cl		Current component layer.
#	clbot		Array of component layer bottoms, in cm.
#	cldeph		Bottom of component layer, LAYDEPH item.
#	cldepl		Top of component layer, LAYDEPL item.
#	clph		Array of component layer mean pH values.
#	cltop		Array of component layer tops, in cm.
#	cmd		Name of this script; used for help text.
#	compname	Component name from COMP table.
#	comppct		Percent of component in mapunit from COMP table.
#	comprec		Number of current COMP record.
#	curmuid		ID of mapunit currently being processed.
#	errcnt		Count of severe errors.
#	help		Help text.
#	layerdeftab	Filename for table defining standard layers.
#	layer		Number of current output layer.
#	laynum		Value of LAYERNUM item.
#	layerrec	Number of current LAYER record
#	lmuid		Mapunit ID of current layer.
#	lph		pH of standard layer.
#	lseqnum		Component sequence number of current layer.
#	lthick		Array of thicknesses of standard layers.
#	ltop		Array of depths to tops of standard layers.
#	lweight		Sum of non-zero pH component percents for layer.
#	muid		Mapunit ID.
#	ncl		Number of layers for current component.
#	nextcl		Next component layer.
#	nlayer		Number of standard layers.
#	opt		Name of current option.
#	outfile		Name of file to receive relayered output.
#	overlap		Amount of overlap of component and output layers.
#	phl		Value of PHL for component layer.
#	phh		Value of PHH for component layer.
#	rdeph		Bedrock depth for component, higher abs. value.
#	rdepl		Bedrock depth for component, lower abs. value.
#	rdepm		Bedrock depth for component, mean.
#	reclen_comp	Length of COMP table record, in bytes.
#	reclen_layer	Length of LAYER table record, in bytes.
#	savecl		Array to save data for next component layer.
#	seqnum		Component sequence number.
#	stats		Associative array of statistics for anomalies.
#	template_comp	Template for reading COMP table, returned by
#			openinfo.
#	template_lay	Template for reading LAYER table, returned by
#			openinfo.
#	textdeftab	Filename for texture definition table.
#	textmap		Associative array of texture assignments.
#	texture		Dominant texture code for component layer.
#	time		Array of values returned by localtime().
#	totpct		Sum of percents for all components in map unit.
#	usage		Text of usage message.
#	value		Value of one array element.
#
#  Script history:
#
#	2000-02		Initial version, R. A. White.
#
############   End of Prolog for get_ph.pl   ############

#  Load external library modules, adding $PERLINC to search path

unshift (@INC, split (/ /,$ENV{"PERLINC"}));
require "openinfo.pl";

#  Define usage and help text

@cmd =  (split ('/', $0));
$cmd = pop (@cmd);

$usage = <<"End of Usage";

Usage:  $cmd layerdeftab textdeftab workspace outfile rptfile
        $cmd -u|h
End of Usage

$help = <<"End of Help";

$cmd -- determine average pH for standard layers
$usage
Options and arguments are
     -h   	Write this help text to stdout and exit.
     -u   	Display terse usage message and exit.
  layerdeftab	File specification for table defining thicknesses of
		standard soil layers.
  textdeftab	File specification for table assigning standard soil
		texture class to each texture-plus-fragment class used
		in STATSGO tables. 
   workspace	Arc/Info workspace location of INFO tables.
    outfile	File to receive output table.
    rptfile	File to receive report of processing statistics,
		problems, etc.
End of Help

#
#  Start of main routine
#

#  Get options

while ($ARGV[0] =~ /^\w*-(\w+)($|\s+)/)
    {
    push (@opt, $1);
    shift;
    }	
($opt[0] eq 'u') && (print $usage) && exit;
($opt[0] eq 'h') && (print $help) && exit;

#  Preprocess arguments

(@ARGV != 5) && (print $usage) && exit;
$layerdeftab = shift;
$textdeftab = shift;
$workspace = shift;
$outfile = shift;
$rptfile = shift;

# Set program constants

$UCONV = 2.54;			# Convert inches to centimeters

# Open Layer defintion file;  Get layer depths.

$nlayer = 0;
$ltop[0] = 0;

open (LAYDEF, $layerdeftab)
  || die "Error opening layer table, $layerdeftab,";
while (<LAYDEF>)
    {
    if (/^\s*(\d+)($|\s+)/)
	{
	$lthick[$nlayer++] = $1;
	$ltop[$nlayer] = $ltop[$nlayer - 1] + $1;
	}
    }
close LAYDEF;

#  Build associative array of texture definitions

open (TEXTDEF, $textdeftab)
 || die "Error opening texture definition table, $textdeftab,";
while (<TEXTDEF>)
    {
    $textmap{$1} = $2 if (/^\s*(\w\S*)\s*(\S+)($|\s+)/);
    }
close TEXTDEF;

#  Open	 output files

open (OUT, "> $outfile") || die "Error opening output file, $outfile,";
open (RPT, "> $rptfile") || die "Error opening report file, $rptfile,";

#  Create report header

@time = localtime (time);
$time[5] += 1900;
printf RPT "	%s, %02d/%02d/%02d %02d:%02d:%02d\n\n",
  "Summary of get_ph.pl processing", $time[5], $time[4]+1,
  $time[3], $time[2], $time[1];

print RPT "Arguments are:\n";

printf RPT "	%-16s%s\n", "layerdeftab", $layerdeftab;
printf RPT "	%-16s%s\n", "workspace", $workspace;
printf RPT "	%-16s%s\n", "outfile", $outfile;
printf RPT "	%-16s%s\n\n", "rptfile", $rptfile;

#  Call openinfo to open and unpack the INFO formatted COMP table

(($reclen_comp, $template_comp) = &openinfo ("COMP",$workspace, "COMP",
    "MUID", "SEQNUM", "COMPNAME", "COMPPCT", "ROCKDEPL", "ROCKDEPH"))
  || die "Openinfo returned error";

#  Call openinfo to open and unpack the INFO formatted LAYER table

(($reclen_layer, $template_lay) = &openinfo ("LAYER", $workspace,
    "LAYER", "MUID", "SEQNUM", "LAYERNUM", "LAYDEPL", "LAYDEPH",
    "TEXTURE1", "PHL", "PHH"))
  || die "Openinfo returned error";

#  Clear all ph entries

for ($layer = 0; $layer < $nlayer; $layer++)
    {
    $lph[$layer] = 0.0;
    $lweight[$layer] = 0.0;
    }
   
#  Process components of each mapunit in turn

$curmuid = "";
$totpct = 0;
$nextcl = 0;
$comprec = 0;
$layerrec = 0;

while (read (COMP, $bufc, $reclen_comp))
    {
    $comprec++;
    (($muid, $seqnum, $compname, $comppct, $rdeph, $rdepl)
        = unpack ($template_comp, $bufc))
      || die "Bad COMP record $comprec";
    $curmuid = &output_mu if ($muid ne $curmuid);

 #  Collect layer data for current component

    if (defined $laynum)
	{
	print RPT "Bad LAYERNUM \"$laynum\" for layer 1,",
	    " $lmuid $lseqnum\n"
	  if (($laynum != 1) && ( ! eof (LAYER)));
	if (($lmuid ne $curmuid) || ($lseqnum != $seqnum))
	    {
	    &bld_layers if (&set_allrock);
	    ($cltop[0], $clbot[0], $clph[0]) = @savecl;
	    next;
	    }
	}
    while (read (LAYER, $bufl, $reclen_layer))
	{
	$layerrec++;
	(($lmuid, $lseqnum, $laynum, $cldepl, $cldeph, $texture, $phl,
	  $phh) = unpack ($template_lay, $bufl))
	  || die "Bad LAYER record $layerrec";
	$cltop[$nextcl] = $cldepl * $UCONV;
	$clbot[$nextcl] = $cldeph * $UCONV;

#  Compute mean pH for this component layer

	if (($phl eq "") || $phh eq "")
	    {
	    ($phh ne "") && print RPT
	      "PHL only missing, $lmuid $lseqnum, layer $laynum\n";
	    ($phl ne "") && print RPT
	      "PHH only missing, $lmuid $lseqnum, layer $laynum\n";
	    $phl = $phh = 0.0;
	    $stats{"noval"}++;
	    }
	if ($phl > $phh)
	    {
	    print RPT "PHL > PHH, $lmuid $lseqnum, layer $laynum",
	      " -- layer ignored\n";
	    $stats{"valrev"}++;
	    die "Too many errors -- see report file $rptfile: "
	      if ($errcnt++ > 10);
	    next;
	    }
	if (($phl <= 0.0) || ($phh <= 0.0))
	    {
	    if ($textmap{$texture} !~ /^(ND|OM|W|BR|O)/)
		{
		print RPT "PHL or PHH missing, $lmuid $lseqnum, ",
		  "layer $laynum, TEXTURE1 = $texture\n";
		$stats{"msmiss"} ++;
		}
	    $clph{$nextcl} = 0.0;
	    }
	else
	    {
	    $clph[$nextcl] = ($phl + $phh)/2.0;
	    }

	last if (($lseqnum != $seqnum) || ($lmuid ne $curmuid));
 	print RPT "Bad LAYERNUM \"$laynum\" for layer",
	  " $nextcl, $lmuid $lseqnum\n"
	  if (++$nextcl != $laynum);
	}
    @savecl = ($cltop[$nextcl], $clbot[$nextcl], $clph[$nextcl]);
    $nextcl = &bld_layers;
    ($cltop[0], $clbot[0], $clph[0]) = @savecl;
    }

&output_mu;		# Output results for last mapunit

close COMP;
close LAYER;
close OUT;

#  Add statistics to processing report.

print RPT "\nProcessing statistics\n\n";
$value = 0 unless ($value = $stats{"totalcomp"});
print RPT "    Total components in input compfile:          $value\n"; 
print RPT "    Component layers with PHL/H missing:         $value\n"
  if ($value = $stats{"noval"});
print RPT "    Mineral soil layers with PHL/H missing:      $value\n"
  if ($value = $stats{"msmiss"});
close RPT;

exit 0;		# From Script get_ph.pl

#
#  Subroutine to build layers for one mapunit component
#

sub bld_layers
    {
    $stats{"totalcomp"}++;	# Total components
    $ncl = $nextcl;
    return 1 if (($ncl == 0) && ( ! &set_allrock));
    $totpct += $comppct;

#  Convert bedrock depths to cm

    if (($rdepl eq "") || ($rdeph eq ""))
	{
	print RPT "Mapunit $curmuid, component $seqnum:	no ROCKDEP",
	  " specified\n" unless ($clph[0] <= 0.0);
	$rdepl = 0 if ($rdepl eq "");
	$rdeph = 0 if ($rdeph eq "");
	}
    $rdepl *= $UCONV;
    $rdeph *= $UCONV;
    $rdepm = ($rdepl + $rdeph) / 2.0;

#  Check for layer overlaps, inversion, etc.

    if ($cltop[0] != 0.0)
	{
	print RPT "Layer 1 of $curmuid $seqnum starts at $cltop[0]",
	  " -- changed to 0.0\n";
	$cltop[0] = 0.0;
	$stats{"nonzero"}++;
	}
    for ($cl = 0; $cl < $ncl; $cl++)
	{
	if ($cltop[$cl] > $clbot[$cl])
	    {
	    print RPT "LAYDEPH = $clbot[$cl] less than LAYDEPL =",
	      " $cltop[$cl], $curmuid $seqnum, layer $cl\n";
	    $clbot[$cl] = $cltop[$cl];
	    die "Too many errors -- see report file $rptfile: "
	      if ($errcnt++ > 10);
	    }	    
	if (($cl > 0) && ($cltop[$cl] < $clbot[$cl - 1]))
	    {
	    if (($cltop[$cl] == $cltop[$cl -1])
	      && ($clbot[$cl] == $clbot[$cl - 1]))
		{
		$value = $cl + 1;
		print RPT "Layer $value duplicates $cl, $curmuid",
		  " $seqnum -- using first only\n";
		$cltop[$cl] = $clbot[$cl];
		$stats{"duplayer"}++;
		}
	    elsif ($cltop[$cl] > $cltop[$cl - 1])
		{
		$value = $cl + 1;
		print RPT "Layer $value overlaps layer $cl, $curmuid",
		  " $seqnum -- moving top of layer $value downwards\n";
		$cltop[$cl] = $clbot[$cl - 1];
		$clbot[$cl] = $cltop[$cl]
		  if ($clbot[$cl] < $cltop[$cl]); 
		$stats{"overlap"}++;
		}
	    else
		{
		$value = $cl + 1;
		print RPT "Layer $value starts above $cl, $curmuid",
		  " $seqnum -- moving layer $value downwards\n";
		$cltop[$cl] = $clbot[$cl - 1];
		$clbot[$cl] = $cltop[$cl]
		  if ($clbot[$cl] < $cltop[$cl]); 
		die "Too many errors -- see report file $rptfile: "
		  if ($errcnt++ > 10);
		}
	    }
	elsif (($cl > 0) && ($cltop[$cl] > $clbot[$cl - 1]))
	    {
	    $value = $cl + 1;
	    print RPT "Gap between layer $cl and layer $value,",
	      " $curmuid $seqnum -- extending layer $cl downwards\n";
	    $clbot[$cl - 1] = $cltop[$cl];
	    $stats{"layergap"}++;
	    }
	}

#  Extend component layer coverage to bottom of output layers

    if ($clbot[$ncl - 1] < $ltop[$nlayer])
	{
	$stats{"lextend"}++;	# Component layer extended downwards
	if ($clbot[$ncl - 1] >= $rdepm)
	    {
	    $cltop[$ncl] = $clbot[$ncl - 1];
	    $clbot[$ncl] = $ltop[$nlayer];
	    $clph[$ncl] = 0.0;
	    }
	else
	    {
	    $clbot[$ncl - 1] = $rdepm;
	    if ($ltop[$nlayer] > $rdepm)
		{
		$cltop[$ncl] = $clbot[$ncl - 1];
		$clbot[$ncl] = $ltop[$nlayer];
		$clph[$ncl++] = 0.0;
		}
	    }
	}
    
#  Get pH contributions to output layers

    $cl = 0;
    for ($layer = 0; $layer < $nlayer; $layer++)
	{
	$overlap = $lthick[$layer];
	for ( ; $clbot[$cl] < $ltop[$layer + 1]; $cl++)
	    {
	    $overlap -= ($ltop[$layer + 1] - $clbot[$cl]);
	    $lph[$layer]
	      += $comppct * $clph[$cl] * ($overlap / $lthick[$layer]); 
	    $lweight[$layer] += $comppct * ($overlap / $lthick[$layer])
	      if $clph[$cl] > 0.0; 
	    $overlap = $ltop[$layer + 1] - $clbot[$cl];
	    }
	$lph[$layer]
	  += $comppct * $clph[$cl] * ($overlap / $lthick[$layer]); 
	$lweight[$layer] += $comppct * ($overlap / $lthick[$layer])
	  if $clph[$cl] > 0.0; 
	}
    return 1;
      
    }		# End of subroutine bld_layers

#
#  Subroutine to output layer data for one mapunit
#

sub output_mu
    {
    return $muid if ($curmuid eq "");

#  Report problems and anomalies

    print RPT "Total COMPPCT for MUID $curmuid = $totpct\n"
      if (($totpct < 99) || ($totpct > 101 ));

#  Write record for this mapunit

    printf OUT "%-8s", $curmuid;
    for ($layer = 0; $layer < $nlayer; $layer++)
	{
	$lph = (($lph[$layer] > 0.0) && ($lweight[$layer] > 0.0)) ?
	  $lph[$layer]/$lweight[$layer] : 0.0;
	printf OUT "%5.1f",$lph;
	$lph[$layer] = 0.0;
	$lweight[$layer] = 0.0;
	}
    print OUT "\n";

#  Reinitialize accumlators not done above

    $totpct = 0;
   
    return $muid;
    }			#  End of subroutine output_mu

#
#  Subroutine to build single rock layer when no soil
#

sub set_allrock
    {
    if (($compname =~ /^\s*WATER\s*$/) || ($curmuid =~ /^[A-Z]{2}W/))
	{
	$nextcl = $ncl = 1;
	$cltop[0] = 0;
	$clbot[0] = $ltop[$nlayer];
	$clph[0] = 0.0;
	}
    
    elsif ($rdeph == 0)
	{
	print RPT "No layers for $curmuid $seqnum -- assuming",
	  " solid rock\n";
	$nextcl = $ncl = 1;
	$cltop[0] = 0;
	$clbot[0] = $ltop[$nlayer];
	$clph[0] = 0.0;
	}
    else
	{
	print RPT "MUID/SEQNUM mismatch -- ",
	  "compfile:  $curmuid $seqnum,	 ",
	  "layerfile:  $lmuid $lseqnum\n";
	die "Too many errors -- see report file $rptfile: "
	  if ($errcnt++ > 10);
	return 0;
	}
    return 1;
    }			#  End of subroutine set_allrock

#  End of script get_ph.pl
