#!/usr/bin/perl -w

use Data::Dumper;
use Carp::Assert;
use strict;

my($code, $pod);

use vars qw($RT_Num $RT_Desc %RT_Dict $Year $VERSION $This_Prog $Class
            @RT_Fields @Data_Dict);
$VERSION = '0.02';


$Class = 'Geo::TigerLine';
$This_Prog = $0;
$Year = 2003;


($RT_Num, $RT_Desc) = read_title();

my($outfile) = "../lib/Geo/TigerLine/Record/$RT_Num.pm";
open(CLASS, ">$outfile") || 
  die "Can't open $outfile for class generation:  $!";
select CLASS;

my @header = next_data_line();

%RT_Dict = ();
my @fields;
while(my @row = next_data_line()) {
    assert(@row == @header) if DEBUG;

    my %def = map { (lc $header[$_], $row[$_]) } 0..$#header;
    $def{field} = lc $def{field};
    $def{field} =~ s/\W/_/g;
    push @fields, \%def;
}

# Instead on relying on the input order, we look at where each field
# begins to order them.
@fields = sort { $a->{beg} <=> $b->{beg} } @fields;

# Check to make sure all characters are used and no fields overlap.
assert($fields[0]->{beg} == 1);
for my $fidx (1..$#fields) {
    my($last_field) = $fields[$fidx-1];
    my($field) = $fields[$fidx];

    assert($field->{beg} == $last_field->{end} + 1);
}

# Do some more data sanity checks
foreach my $def (@fields) {
    assert($def->{end} - $def->{beg} + 1 == $def->{len}) if DEBUG;

    assert($def->{bv} =~ /^[YN]/i) if DEBUG;

    assert($def->{type} =~ /^[AN]$/i) if DEBUG;

    assert($def->{fmt} =~ /^[LR]$/i) if DEBUG;

    assert($def->{field} =~ /\S/) if DEBUG;
}


# Generate a hash to represent our record definition.
@RT_Fields = map { lc $_->{field} } @fields;
@RT_Dict{@RT_Fields} = @fields;

for my $fidx (0..$#fields) {
    my $field = $fields[$fidx]->{field};
    $RT_Dict{$field}{fieldnum} = $fidx + 1;
}

# Class initialization code.
printf <<'CODE', $Class, $RT_Num, $Class, $Class, $Class, $VERSION;
package %s::Record::%s;

use strict;

use Carp::Assert;
use base qw(%s::Record::Parser %s::Record::Accessor
            %s::Record Class::Data::Inheritable);

use vars qw($VERSION);
$VERSION = '%s';

CODE


# Generate data fields as class data.
my $data_dict_dump   = Data::Dumper->Dump([\%RT_Dict], ['*Data_Dict']);
my $data_fields_dump = Data::Dumper->Dump([\@RT_Fields], 
                                          ['*Data_Fields']);

printf <<'CODE', $data_dict_dump, $data_fields_dump;

# Auto-generated data dictionary.
my %s

my %s

assert(keys %%Data_Dict == @Data_Fields);

# Turn the data dictionary into class data
__PACKAGE__->mk_classdata('Fields');
__PACKAGE__->mk_classdata('Dict');
__PACKAGE__->mk_classdata('Pack_Tmpl');

__PACKAGE__->Dict(\%%Data_Dict);
__PACKAGE__->Fields(\@Data_Fields);

# Generate a pack template for parsing and turn it into class data.
my $pack_tmpl = join ' ', map { "A$_" } map { $_->{len} } 
                                          @Data_Dict{@Data_Fields};
__PACKAGE__->Pack_Tmpl($pack_tmpl);

# Generate accessors for each data field
foreach my $def (@Data_Dict{@Data_Fields}) {
    __PACKAGE__->mk_accessor($def);
}


CODE


# NAME
printf <<'POD', $Class, $RT_Num, $Year, $RT_Desc, ;
=pod

=head1 NAME

%s::Record::%s - TIGER/Line %d %s

POD


# SYNOPSIS
printf <<'POD', $Class, $RT_Num, $Class, $RT_Num, $Class, $RT_Num, $Class, $RT_Num;
=head1 SYNOPSIS

  use %s::Record::%s;

  @records = %s::Record::%s->parse_file($fh);
  @records = %s::Record::%s->parse_file($fh, \&callback);

  $record = %s::Record::%s->new(\%%fields);

POD


# Accessor SYNOPSIS
foreach my $field (@RT_Dict{@RT_Fields}) {
    printf <<'POD', $field->{field};
  $record->%s();
POD
}


# DESCRIPTION
printf <<'POD', $RT_Num, $Year, $RT_Num, $This_Prog;


=head1 DESCRIPTION

This is a class representing record type %s of the TIGER/Line %d
census geographic database.  Each object is one record.  It also
contains methods to parse TIGER/Line record type %s files and turn them
into objects.

This is intended as an intermediate format between pulling the raw
data out of the simplistic TIGER/Line data files into something more
sophisticated (a process you should only have to do once).  As such,
it's not very fast, but its careful, easy to use and performs some
verifications on the data being read.

As this class is autogenerated by %s, think before you modify this
file.  It's OO, so consider sub-classing instead.


POD


# Accessor doc header.
printf <<'POD', $Year;
=head2 Accessors

These are simple get/set accessors for each field of a record
generated from the TIGER/Line %d data dictionary.  They perform some
data validation.

=over 4

POD


# Individual accessor docs.
my $accessor_POD = <<'POD';
=item B<%s>

    $data = $record->%s();
    $record->%s($data);

%s.  

Expects %s data of no more than %d characters.  $data %s be blank 
and should be %s justified.


POD

#'#
foreach my $field (@RT_Dict{@RT_Fields}) {
    my $name = $field->{field};
    my $type = $field->{type} eq 'A' ? 'alphanumeric' : 'numeric';
    my $can_blank = $field->{bv} eq 'Yes' ? 'can' : 'cannot';
    my $justification = $field->{fmt} eq 'R' ? 'right' : 'left';

    printf $accessor_POD, $name, $name, $name, 
                          $field->{description}, $type, $field->{len},
                          $can_blank, $justification;
}


printf <<'POD';

=back


POD


# Data dictionary docs
printf <<'POD', $Year, join '', map { "    $_" } @Data_Dict;
=head2 Data dictionary

This is the original TIGER/Line %d data dictionary from which this
class was generated.

%s


POD


# POD footer
printf <<'POD', $Class, $This_Prog;
=head1 AUTHOR

Michael G Schwern <schwern@pobox.com>

=head1 SEE ALSO

L<%s>, L<%s>

=cut

POD


# Code footer
printf <<'CODE';

return 'Honey flash!';
CODE



sub next_data_line {
    my $line;

    do { 
        $line = get_line();
        return unless defined $line;
    } until $line =~ /\S/;

    $line =~ s/^\s+//g;
    $line =~ s/\s+$//g;

    return split /\s+/, $line, 8;
}


sub read_title {
    my $title = get_line();
    chomp $title;
    my($num, $desc) = $title =~ /(\S+)\s+-\s+(.*?)\s*$/;

    return ($num, $desc);
}


sub get_line {
    my $line = <>;
    return unless defined $line;

    # Save the original data dictionary.
    push @Data_Dict, $line;

    chomp $line;

    return $line;
}
