#!/usr/bin/perl

use SIL::Shoe::Data;
use SIL::Shoe::Sort::unicode;
use SIL::Shoe::Sort::default;
use SIL::Shoe::Sort::shoe;
use SIL::Shoe::Sort::numeric;
#use Getopt::Std;
use Getopt::Long;
use XML::Parser;
use SIL::Shoe::Backend::html;
use SIL::Shoe::Backend::openoffice;
use Pod::Usage;

$VERSION="0.8";     # MJPH       9-APR-2007     Add <picture>
# $VERSION="0.7.1"    # MJPH         MAR-2007     Add <inline-map> tighten semantics 
# $VERSION="0.7";     # MJPH      11-DEC-2006     Add <map> and <foreach> and $var
# $VERSION="0.6.1";   # MJPH      26-JUN-2006     Fix indexing
# $VERSION="0.6";     # MJPH      21-JUN-2006     Add unique attribute
# $VERSION="0.5";     # MJPH      12-JUN-2006     Bottom to top rewrite with full nodes
# $VERSION="0.2";     #   MJPH    10-JUN-2006     Fix sorting
# $VERSION="0.1";     #   MJPH    25-MAY-2006     Original

#getopts('b:c:dhno:s:');
GetOptions('b=s', 'c=s', 'd', 'h', 'n', 'o=s', 's=s', 'v=s%');

my %inheritable = map {$_ => 1} qw(inline-map);

unless ((defined $ARGV[0] && defined $opt_c) || defined $opt_h)
{
    pod2usage(1);
    exit;
}

if ($opt_h)
{
    if ($opt_b)
    { pod2usage( -verbose => 2, -input => "SIL/Shoe/Backend/$opt_b.pm", -pathlist => \@INC); }
    else
    { pod2usage( -verbose => 2); }
    exit;
}


my (@layouts, %layouts, $currlayout, $groups, $curr, $currback);
my (%world);

$xml = XML::Parser->new(Handlers => {Start => sub {
        my ($xml, $tag, %attrs) = @_;
        
        if ($tag eq 'layout')
        { }
        elsif ($tag eq 'backend')
        {
            $currback = $attrs{'type'};
        }
        elsif ($tag eq 'property')
        {
            $world{'backprops'}{$currback}{$attrs{'name'}} = $attrs{'value'};
        }
        elsif ($tag eq "section")
        {
            $currlayout = SIL::dict::parser::layout->new(\%world, %attrs);
            push (@{$world{'layoutlist'}}, $currlayout);
            if ($attrs{'name'})
            {
                $world{'layouts'}{$attrs{'name'}} = $#{$world{'layoutlist'}};
                $world{'indices'}{"_$attrs{'name'}"} = $currlayout;
            }
            $curr = undef;
        }
        elsif ($tag eq "map")
        {
            $curr = SIL::dict::parser::map->new(\%world, %attrs);
            $world{'maps'}{$attrs{'name'}} = $curr;
        }
        elsif ($tag eq 'replace')
        {
            $xml->xpcroak("<replace> found not under parent <map>") unless ($curr);
            $curr->add_replacement(%attrs);
        }
        elsif ($tag eq 'inline-map')
        {
            $curr = SIL::dict::parser::inline_map->new(\%world, %attrs);
            $world{'inline-maps'}{$attrs{'name'}} = $curr;
        }
        elsif ($tag eq 'inline-style')
        {
            $curr->add_style(%attrs);
        }
        elsif ($tag eq "letter")
        {
            $curr = SIL::dict::parser::letter->new(\%world, %attrs);
            $currlayout->{'letters'}{$attrs{'name'} || 'primary'} = $curr;
        }
        elsif ($tag eq "group")
        {
            $next = SIL::dict::parser::group->new($curr, \%world, \%inheritable, $currlayout, %attrs);
            if ($curr)
            { $curr->add($next); }
            else
            { $currlayout->{'groups'}{$attrs{'name'}} = $next; }
            $curr = $next;
        }
        elsif ($tag eq 'switch')
        {
            my ($next) = SIL::dict::parser::switch->new($curr, \%world, \%inheritable, %attrs);
            $xml->xpcroak("<switch> needs parent, e.g. <group>") unless ($curr);
            $curr->add($next);
            $curr = $next;
        }
        elsif ($tag eq 'marker')
        {
            my ($next) = SIL::dict::parser::marker->new($curr, \%world, \%inheritable, %attrs);
            $xml->xpcroak("<marker> needs parent, e.g. <group>") unless ($curr);
            $curr->add($next);
            $curr = $next;
        }
        elsif ($tag eq 'use-group')
        {
            my ($next) = SIL::dict::parser::use->new($curr, \%world, \%inheritable, $currlayout, %attrs);
            $xml->xpcroak("<use-group> needs parent, e.g. <group>") unless ($curr);
            $curr->add($next);
            $curr = $next;
        }
        elsif ($tag eq 'foreach')
        {
            my ($next) = SIL::dict::parser::foreach->new($curr, \%world, \%inheritable, %attrs);
            $xml->xpcroak("<foreach> needs parent, e.g. <group>") unless ($curr);
            $curr->add($next);
            $curr = $next;
        }
        elsif ($tag eq 'output')
        {
            my ($next) = SIL::dict::parser::output->new($curr, \%world, \%inheritable, %attrs);
            $xml->xpcroak("<output> needs parent, e.g. <group>") unless ($curr);
            $curr->add($next);
            $curr = $next;
        }
        elsif ($tag eq 'index')
        {
            my ($next) = SIL::dict::parser::index->new($curr, \%world, \%inheritable, %attrs);
            $world{'indices'}{$attrs{'name'}} = $next;
            $curr = $next;
        }
        elsif ($tag eq 'debug')
        {
            my ($next) = SIL::dict::parser::debug->new($curr, \%world, \%inheritable, %attrs);
            $xml->xpcroak("<debug> needs parent, e.g. <group>") unless ($curr);
            $curr->add($next);
            $curr = $next;
        }
        elsif ($tag eq 'picture')
        {
            my ($next) = SIL::dict::parser::picture->new($curr, \%world, \%inheritable, %attrs);
            $xml->xpcroak("<picture> needs parent, e.g. <marker>") unless ($curr);
            $curr->add($next);
            $curr = $next;
        }
        else
        { $xml->xpcroak("Unknown marker $tag"); }
        }, End => sub {
        my ($xml, $tag) = @_;
        
        return if ($tag eq 'replace' || $tag eq 'inline-style');
        $curr = $curr->parent if ($curr);
        }});

$xml->parsefile($opt_c) if ($opt_c);

if ($opt_b)
{
    $opt_b = lc($opt_b);
    eval "require SIL::Shoe::Backend::$opt_b";
    die "Can't find backend for $opt_b" if ($@);
    $backend = "SIL::Shoe::Backend::$opt_b"->new($opt_o, $opt_s, $world{'backprops'}{$opt_b});
}
else
{ $backend = SIL::Shoe::Backend::html->new($opt_o, $opt_s, $world{'backprops'}{'html'}); }

%sorts = (
    '' => SIL::Shoe::Sort::default->new()
);

my (%ind_fn);

foreach $index (values %{$world{'indices'}})
{
    push (@{$ind_fn{$index->{'filename'}}}, $index);
    $index->{'index_num'} = $#{$ind_fn{$index->{'filename'}}};
}

foreach $index (keys %ind_fn)
{
    my ($mydb);
    $mydb = SIL::dict::parser::Shoebox->make_indices($index || $ARGV[0],
            map {[$_->get_keys]} @{$ind_fn{$index}});
    foreach (@{$ind_fn{$index}})
    { $_->{'db'} = $mydb; }
}

foreach $lay (@{$world{'layoutlist'}})
{
    my ($oldkey);
    my ($localdb, $newset);
    
    $backend->start_section($lay->{'type'}, $lay->{'name'});
    $lay->iter_keys($lay, sub {
        my ($lay, $nodeset, $key) = @_;

        $nodeset->{'nodes'}[0][4]{'.old'}[0] = $oldkey;
        $nodeset->{'nodes'}[0][4]{'.new'}[0] = $key;
        $oldkey = $key;
        $nodeset->{'vars'} = $opt_v;

        if (defined $lay->{'letters'}{'primary'})
        { $lay->{'letters'}{'primary'}->execute($nodeset, $backend); }

        if ($lay->{'groups'}{'main'})
        { $lay->{'groups'}{'main'}->execute($nodeset, $backend); }
    });
    $backend->end_section($lay->{'type'}, $lay->{'name'});
}

$backend->finish;

sub make_sort
{
    my ($str) = @_;
    my ($type) = split(/\|/, $str);

    if ($str =~ m/\.lng/oi)
    {
        $type = 'shoe';
    }
    eval "require SIL::Shoe::Sort::$type";
    die "Can't create sorting object $type" if ($@);
    return "SIL::Shoe::Sort::$type"->new($str);
}    

sub byMultiLevel
{
    my ($a, $b, @types) = @_;
    my (@x) = split("\000", $a);
    my (@y) = split("\000", $b);
    my ($i, $j, $l, $t, $res);
    
    for ($i = 0, $j = 0; $i < scalar @x; $i++, $j++)
    {
        $t = $types[$j];
        if (!$t)
        { $res = $x[$i] cmp $y[$i]; }
        elsif ($t->isa('SIL::Shoe::Sort::numeric'))
        { $res = $t->cmp($x[$i], $y[$i]); }
        elsif (($l = $t->num_fields) > 1)
        {
            $res = $t->cmp(join("\000", $x[$i .. $i + $l - 1]),
                join("\000", $y[$i .. $i + $l - 1]));
            $i += $l - 1;
        }
        else
        { $res = $t->cmp($x[$i], $y[$i]); }
        return $res if ($res != 0); 
    }
    return -1 if (scalar @y > scalar @x);
    return 0;
}


package SIL::dict::parser::nodeset;

# fields in a nodeset:
#   parent - parent nodeset that created this one
#   dbs    - array of hashes against fields listed in corresponding fields
#   fields - array of lists of fields in record order in dbs
#   nodes  - array of nodes which are [index into dbs & fields, first field,
#            last field + 1, %data an array for each field]
#   pos    - position of this nodeset in its parent (only in marker elements)
#            value is propagated except through index lookups
#   vars   - variables for looking up
sub new
{
    my ($class, @nodes) = @_;
    my ($self) = {nodes => [@nodes]};
    return bless $self, $class;
}

# elements in a node
#   0 - node index in set
#   1 - first field index
#   2 - last field index
#   3 - value of identifying attribute
#   4 - field values as arrays
sub append_node
{
    my ($self, @node) = @_;
    push (@{$self->{'nodes'}}, [@node]);
    return $#{$self->{'nodes'}};
}

sub append_record
{
    my ($self, $db, $fields) = @_;

    push (@{$self->{'dbs'}}, $db);
    push (@{$self->{'fields'}}, $fields);
    return $#{$self->{'dbs'}};
}

# subset a nodelist based on test then sort on keys, no parent reln setup
sub subset
{
    my ($self, $info) = @_;
    my ($res) = {%$self};

    if (defined $info->{'test'})
    {
        my ($i, $n);
        $res->{'nodes'} = [];
        foreach $n (@{$self->{'nodes'}})
        {
            my ($r) = eval("\$node = \$n; \$nset = \$self; \$pos = \$i; $info->{'test'}");
            push (@{$res->{'nodes'}}, $n) if ($r);
            $i++;
        }
        $res->{'count'} = scalar @{$res->{'nodes'}};
    }

    if (defined $info->{'sort'})
    {
        my ($nc, %sorted, @sort_types);
        foreach $n (@{$res->{'nodes'}})
        { push (@{$sorted{$n->[3]}}, $nc++); }

        foreach $k (split(' ', $self->{'sort'}))
        {
            $main::sorts{$k} = main::make_sort($k) unless (defined $main::sorts{$k});
            push (@sort_types, $main::sorts{$k});
        }
        $res->{'nodes'} = [map {@{$res->{'nodes'}}[@{$sorted{$_}}]} sort {main::byMultiLevel($a, $b, @sort_types)} keys %sorted];
    }
    return bless $res, ref $self;
}


sub make_nodeset
{
    my ($self, $info) = @_;
    my ($res);

    if (defined $info->{'index'})
    {
        my (@keys);
        foreach $n (@{$self->{'nodes'}})
        {
            my (@list);
            foreach $k (split(' ', $info->{'keys'}))
            {
                next unless (@{$n->[4]{$k}});
                foreach $v (@{$n->[4]{$k}})
                {
                    next unless ($v ne '' || $info->{'empty'});
                    if (scalar @list)
                    { foreach (@list) { push (@{$_}, $v); } }
                    else
                    { push (@list, [$v]); }
                }
            }
            push (@keys, map {join("\000", @{$_})} @list);
        }
        $res = $info->{'world'}{'indices'}{$info->{'index'}}->make_nodeset($self, $info, $info->{'unique'}, @keys);
        $res->{'count'} = scalar @{$res->{'nodes'}};
    }
    elsif (defined $info->{'tag'})
    {
        my (%sort, $i);

        $res = {%$self};
        $res->{'nodes'} = [];
        bless $res, ref $self;

        if (defined $info->{'keys'})
        { %sort = map {$_ => $i++} split(' ', $info->{'keys'}); }
        else
        { %sort = ($info->{'tag'} => 0); }

        foreach $n (@{$self->{'nodes'}})
        {
            my ($newn, @sortkey);
            for ($i = $n->[1]; $i < $n->[2]; $i++)
            {
                my ($s) = $self->{'fields'}[$n->[0]][$i];
                my ($v) = $self->{'dbs'}[$n->[0]]{$s};
                $s =~ s/\s.*$//o;
                if ($s eq $info->{'tag'} || !$info->{'tag'})
                {
                    if ($newn)
                    {
                        $newn->[2] = $i;
                        $newn->[3] = join("\000", @sortkey);
                        push (@{$res->{'nodes'}}, $newn);
                        undef $newn;
                    }
                    if ($v ne '' || $info->{'empty'} || defined $info->{'keys'})
                    {
                        $newn = [$n->[0], $i];
                        @sortkey = ();
                    }
                }
				elsif (defined $info->{'limit'} && $info->{'limit'} =~ m/(?:^|\s)$s(?:\s|$)/)
				{ last; }
                if (defined $newn)
                {
                    push (@{$newn->[4]{$s}}, $v);
                    $sortkey[$sort{$s}] = $v if (defined $sort{$s});        # takes the last value
                }
            }
            if ($newn)
            {
                $newn->[2] = $i;
                $newn->[3] = join("\000", @sortkey);
                push (@{$res->{'nodes'}}, $newn);
            }
        }
        $res->{'parent'} = $self;
        $res = $res->subset($info);
        $res->{'count'} = scalar @{$res->{'nodes'}};
    }
    elsif ($info->{'keys'})
    {
        $res = bless {%$self}, ref $self;
        $res->{'parent'} = $self;
        foreach $n (@{$res->{'nodes'}})
        { $n->[3] = join("\000", map {$n->[4]{$_}[0]} split(' ', $info->{'keys'})); }
    }
    else
    {
        $res = $self->subset($info);
        $res->{'parent'} = $self;
    }
    return $res;
}

sub expand
{
    my ($self, $str) = @_;

    $str =~ s/\$\{([^}]+)\}/$self->{'vars'}{$1}/oge;
    $str =~ s/\$(\S+)/$self->{'vars'}{$1}/oge;
    $str;
}

# returns (value, node, nodeset)
sub get_nodes
{
    my ($en, $ens, $str) = @_;
    my (@els) = split('/', $str);
    my ($index, $nisa);
    
    for ($i = 0; $i < scalar @els; $i++)
    {
        $e = $els[$i];
        if ($e =~ s/\[(\d*)\]$//o)
        { $index = $1; }
        else
        { $index = undef; }

        if ($e eq '..' && defined $ens)
        {
            $ens = $ens->{'parent'};
            return (undef, undef undef) unless ($ens);
            if (defined $index)
            {
                $en = $ens->{'nodes'}[$index];
                $nisa = 0;
            }
            else
            { 
                $en = $ens->{'nodes'};
                $nisa = 1;
            }
        }
        elsif ($e eq '.')
        { }
        elsif ($e)
        {
            $en = $en->[0] if ($nisa);
            if ($index)
            { return ($en->[4]{$e}[$index] || '', $en, $ens); }
            else
            { return ($en->[4]{$e} || '', $en, $ens); }
        }
    }
    $en = $en->[0] if ($nisa);
    return (undef, $en, $ens);
}

sub value
{
    my ($str, $ind) = @_;
    my ($lv, $ln, $ls) = get_nodes($node, $nset, $str);

    if (ref $lv eq 'ARRAY')
    { return $lv->[$ind]; }
    elsif (defined $lv)
    { return $lv; }
    elsif (ref $ln eq 'ARRAY')
    { return $ln->[3]; }
    elsif ($ln)
    { return $ln; }
    elsif ($ls)
    { return $ls->{'nodes'}[0][3]; }
    return undef;
}

sub count
{
    my ($str) = @_;
    my ($lv, $ln, $ls) = get_nodes($node, $nset, $str);

    return $ls->{'count'};
}

sub position
{
    my ($str) = @_;
    my ($lv, $ln, $ls) = get_nodes($node, $nset, $str);

#    return $pos if ($ls eq $nset);
    return $ls->{'pos'};
}

sub var
{
    return $nset->{'vars'}{$_[0]};
}

sub firstchar
{
    my ($str, $sort, $level) = @_;
    my ($res);
    
    $main::sorts{$sort} = main::make_sort($sort) unless (defined $main::sorts{$sort});
    return $main::sorts{$sort}->firstchar($str, $level);
}
        
sub split
{
    my ($str) = @_;

    return [CORE::split("\000", $str)];
}

sub join
{
    return CORE::join("\000", @_);
}

sub regexp
{
    my ($str, $re) = @_;
    my ($res);
    
    if ($str =~ m/$re/)
    { $res = $1; }
    return $res;
}

sub cmp
{
    my ($a, $b, $sort, $level) = @_;

    $main::sorts{$sort} = main::make_sort($sort) unless (defined $main::sorts{$sort});
    return $main::sorts{$sort}->cmp($a, $b, $level);
}

sub upper
{
    my ($str, $sort) = @_;
    
    $main::sorts{$sort} = main::make_sort($sort) unless (defined $main::sorts{$sort});
    return $main::sorts{$sort}->uppercase($str);
}
    
sub lower
{
    my ($str, $sort) = @_;
    
    $main::sorts{$sort} = main::make_sort($sort) unless (defined $main::sorts{$sort});
    return $main::sorts{$sort}->lowercase($str);
}

package SIL::dict::parser::group;

sub new
{
    my ($class, $parent, $world, $inheritable, $currlayout, %attrs) = @_;
    my ($self) = {parent => $parent, world => $world, %attrs};
    if ($parent)
    {
        foreach (keys %$inheritable)
        { $self->{$_} = $parent->{$_} unless (defined $self->{$_}); }
    }
    else
    {
        foreach (keys %$inheritable)
        { $self->{$_} = $currlayout->{$_} unless (defined $self->{$_}); }
    }
    return bless $self, $class;
}

sub add
{
    my ($self, $child) = @_;
    push (@{$self->{'children'}}, $child);
}

sub parent
{
    my ($self) = @_;
    
    return $self->{'parent'};
}

sub execute
{
    my ($self, $parent, $backend, %opts) = @_;
    my ($jobs) = $parent->make_nodeset($self);
    my ($res);

    return $self->{'neg'} unless (scalar @{$jobs->{'nodes'}});
    if (defined $self->{'paragraph'})
    { $backend->new_para($jobs->expand($self->{'paragraph'})); }

    if (defined $self->{'before'})
    { $backend->output($jobs->expand($self->{'before'})); }
    elsif ($opts{count} > 0 && defined $self->{'between'})
    { $backend->output($self->{'between'}); }
    
    if (defined $self->{'style'} && $self->{'style'} eq '')
    { $backend->output($jobs->{'nodes'}[0][3]); }
    elsif (defined $self->{'style'} && $self->{'style'})        # don't accidentally define it
    { $backend->char_style($jobs->expand($self->{'style'}), $jobs->{'nodes'}[0][3]); }
            
    foreach $c (@{$self->{'children'}})
    { $res |= $c->execute($jobs, $backend, %opts); }

    if (defined $self->{'after'})
    { $backend->output($jobs->expand($self->{'after'})); }
    return ($self->{'neg'} ? !$res : $res);
}

package SIL::dict::parser::switch;

use base 'SIL::dict::parser::group';

sub new
{
    my ($class, $parent, $world, $inheritable, %attrs) = @_;
    my ($self) = {parent => $parent, world => $world, %attrs};

    if ($parent)
    {
        foreach (keys %$inheritable)
        { $self->{$_} = $parent->{$_} unless (defined $self->{$_}); }
    }
    return bless $self, $class;
}

sub execute
{
    my ($self, $parent, $backend, %opts) = @_;
    my ($jobs) = $parent->make_nodeset($self);
    
    if (scalar @{$jobs->{'nodes'}})
    {
        foreach $c (@{$self->{'children'}})
        { return 1 if ($c->execute($jobs, $backend, %opts)); }
    }
    return 0;
}

package SIL::dict::parser::marker;

use base 'SIL::dict::parser::group';

sub new
{
    my ($class, $parent, $world, $inheritable, %attrs) = @_;
    my ($self) = {parent => $parent, world => $world, %attrs};

    if ($parent)
    {
        foreach (keys %$inheritable)
        { $self->{$_} = $parent->{$_} unless (defined $self->{$_}); }
    }
    return bless $self, $class;
}

sub execute
{
    my ($self, $parent, $backend, %opts) = @_;
    my ($jobs) = $parent->make_nodeset($self);
    my ($localjob) = bless {%{$jobs}}, ref $jobs;
    my ($i, $res, $c);

    return $self->{'neg'} unless (@{$jobs->{'nodes'}});

    my ($indent) = $jobs->expand($self->{'indent'});
    if ($indent eq 'tab')
    { $backend->output_tab; }
    elsif ($indent eq 'newline')
    { $backend->output_newline; }
    elsif ($indent eq 'none' || $indent eq '' && !defined $self->{'style'})
    { }
    else
    { $backend->output_space; }

    if (defined $self->{'before'})
    { $backend->output($jobs->expand($self->{'before'})); }

    for ($i = 0; $i < scalar @{$jobs->{'nodes'}}; $i++)
    {
        my ($j) = $jobs->{'nodes'}[$i];

        $localjob->{'nodes'} = [$j];
        $localjob->{'pos'} = $i;
        $res = 1;
        if (defined $self->{'paragraph'})
        { $backend->new_para($jobs->expand($self->{'paragraph'})); }
        
        if ($i > 0 && defined $self->{'between'})
        { $backend->output($jobs->expand($self->{'between'})); }
            
        if (defined $self->{'style'} && defined $self->{'inline-map'})
        {
            my ($map) = $self->{'world'}{'inline-maps'}{$jobs->expand($self->{'inline-map'})};
            $map->output($j->[3], $jobs->expand($self->{'style'}), $backend, $jobs, %opts);
        }
        elsif (defined $self->{'style'})
        { $backend->char_style($jobs->expand($self->{'style'}), $j->[3]); }
            
        foreach $c (@{$self->{'children'}})
        { $c->execute($localjob, $backend, %opts); }
            
    }

    if (defined $self->{'after'})
    { $backend->output($jobs->expand($self->{'after'})); }

    return ($self->{'neg'} ? !$res : $res);
}

package SIL::dict::parser::use;

use base 'SIL::dict::parser::group';

sub new
{
    my ($class, $parent, $world, $inheritable, $currlayout, %attrs) = @_;
    my ($self) = {parent => $parent, world => $world, layout => $currlayout, %attrs};

    if ($parent)
    {
        foreach (keys %$inheritable)
        { $self->{$_} = $parent->{$_} unless (defined $self->{$_}); }
    }
    return bless $self, $class;
}

sub execute
{
    my ($self, $parent, $backend, %opts) = @_;
    my ($jobs) = $parent->make_nodeset($self);
    my ($res);

    return 0 unless (scalar @{$jobs->{'nodes'}});
    return $self->{'layout'}{'groups'}{$self->{'name'}}->execute($jobs, $backend, %opts) if (defined $self->{'layout'}{'groups'}{$self->{'name'}});
    return 0;
}

package SIL::dict::parser::foreach;

use base 'SIL::dict::parser::group';

sub new
{
    my ($class, $parent, $world, $inheritable, %attrs) = @_;
    my ($self) = {parent => $parent, world => $world, %attrs};

    if ($parent)
    {
        foreach (keys %$inheritable)
        { $self->{$_} = $parent->{$_} unless (defined $self->{$_}); }
    }
    return bless $self, $class;
}

sub execute
{
    my ($self, $parent, $backend, %opts) = @_;
    my (@vars) = split(' ', $self->{'over'});
    my ($jobs, $v, $c);

    foreach $v (@vars)
    {
        $jobs = bless {%$parent}, SIL::dict::parser::nodeset;
        $jobs->{'vars'} = {%{$jobs->{'vars'}}};      # copy contents so don't pollute parent
        $jobs->{'vars'}{$self->{'var'}} = $v;

        foreach $c (@{$self->{'children'}})
        { $res |= $c->execute($jobs, $backend, %opts); }
    }
    return $res;
}

package SIL::dict::parser::output;

use base 'SIL::dict::parser::group';

sub new
{
    my ($class, $parent, $world, $inheritable, %attrs) = @_;
    my ($self) = {parent => $parent, world => $world, %attrs};

    if ($parent)
    {
        foreach (keys %$inheritable)
        { $self->{$_} = $parent->{$_} unless (defined $self->{$_}); }
    }
    return bless $self, $class;
}

sub execute
{
    my ($self, $parent, $backend, %opts) = @_;
    my ($mydb, $text);

    if (defined $self->{'test'} && !eval "\$SIL::dict::parser::nodeset::nset = \$parent; \$SIL::dict::parser::nodeset::node = \$parent->{'nodes'}[0]; package SIL::dict::parser::nodeset; $self->{'test'}")
    { return 0; }

    if (defined $self->{'text'})
    { $text = $parent->expand($self->{'text'}); }
    elsif (defined $self->{'value'})
    { $text = eval "\$SIL::dict::parser::nodeset::nset = \$parent; \$SIL::dict::parser::nodeset::node = \$parent->{'nodes'}[0]; package SIL::dict::parser::nodeset; $self->{'value'}"; }

    if (defined $self->{'paragraph'})
    { $backend->new_para($parent->expand($self->{'paragraph'})); }

    if ($opts{'count'} > 0 && defined $self->{'between'})
    { $backend->output($parent->expand($self->{'between'})); }

    my ($indent) = $parent->expand($self->{'indent'});
    if ($indent eq 'tab')
    { $backend->output_tab; }
    elsif ($indent eq 'newline')
    { $backend->output_newline; }
    elsif ($indent eq 'none')
    { }
    else
    { $backend->output_space; }

    if (defined $self->{'before'})
    { $backend->output($parent->expand($self->{'before'})); }

    if (defined $self->{'style'} && $self->{'style'})
    { $backend->char_style($parent->expand($self->{'style'}), $text); }
    elsif ($text)
    { $backend->output($text); }

    if (defined $self->{'after'})
    { $backend->output($parent->expand($self->{'after'})); }
    return 1;
}

package SIL::dict::parser::debug;

use base 'SIL::dict::parser::output';

sub new
{
    my ($class, $parent, $world, $inheritable, %attrs) = @_;
    my ($self) = {parent => $parent, world => $world, %attrs};

    if ($parent)
    {
        foreach (keys %$inheritable)
        { $self->{$_} = $parent->{$_} unless (defined $self->{$_}); }
    }
    return bless $self, $class;
}

sub execute
{
    my ($self, $parent, $backend, %opts) = @_;
    my ($mydb, $text);

    return 0 unless ($main::opt_d);

    if (defined $self->{'test'} && !eval "\$SIL::dict::parser::nodeset::nset = \$parent; \$SIL::dict::parser::nodeset::node = \$parent->{'nodes'}[0]; package SIL::dict::parser::nodeset; $self->{'test'}")
    { return 0; }

    if (defined $self->{'text'})
    { $text = $parent->expand($self->{'text'}); }
    elsif (defined $self->{'value'})
    { $text = eval "\$SIL::dict::parser::nodeset::nset = \$parent; \$SIL::dict::parser::nodeset::node = \$parent->{'nodes'}[0]; package SIL::dict::parser::nodeset; $self->{'value'}"; }

    if ($text)
    { print STDOUT "$text\n"; }
    return 0;
}

package SIL::dict::parser::picture;

use base 'SIL::dict::parser::group';

sub new
{
    my ($class, $parent, $world, $inheritable, %attrs) = @_;
    my ($self) = {parent => $parent, world => $world, %attrs};

    if ($parent)
    {
        foreach (keys %$inheritable)
        { $self->{$_} = $parent->{$_} unless (defined $self->{$_}); }
    }
    return bless $self, $class;
}

sub execute
{
    my ($self, $parent, $backend, %opts) = @_;
    my ($jobs) = $parent->make_nodeset($self);
    my ($res) = 1;

    return $self->{'neg'} unless (@{$jobs->{'nodes'}});

    foreach my $j (@{$jobs->{'nodes'}})
    {
        my ($fname) = $jobs->expand($self->{'path'}).$j->[3];
        if (-f $fname)
        { $backend->picture($jobs->expand($self->{'style'}), $fname, $jobs->expand($self->{'scale'}) || 1); }
        else
        {
            print STDERR "WARNING: Picture file $fname not found\n";
            $res = 0;
        }
    }
    return $self->{'neg'} ? !$res : $res;
}


package SIL::dict::parser::map;

sub new
{
    my ($class, $world, %attrs) = @_;
    my ($self) = {world => $world, %attrs};
    return bless $self, $class;
}

sub parent
{ return undef; }

sub add_replacement
{
    my ($self, %attrs) = @_;
    $self->{'maps'}{$attrs{'in'}} = $attrs{'out'};
}

package SIL::dict::parser::inline_map;

sub new
{
    my ($class, $world, %attrs) = @_;
    my ($self) = {world => $world, %attrs};
    return bless $self, $class;
}

sub parent
{ return undef; }

sub add_style
{
    my ($self, %attrs) = @_;

    $self->{'styles'}{$attrs{'begin'}} = {%attrs};
}

sub output
{
    my ($self, $text, $style, $backend, $parent, %opts) = @_;
    my ($match) = join("|", map {my ($m) = $_; $m =~ s/([[\]*$.?|()])/\\$1/og; "($m)(.*?)(?:$self->{'styles'}{$_}{'end'})"} keys %{$self->{'styles'}});

    while ($text =~ m/\G(.*?)(?:$match|$)/g)
    {
        if ($1)
        { $backend->char_style($style, $1); }

        for ($i = 2; $i < $#-; $i += 2)
        {
            next if ($-[$i] == $+[$i]);
            my ($begin) = substr($text, $-[$i], $+[$i] - $-[$i]);
            my ($val) = substr($text, $-[$i+1], $+[$i+1] - $-[$i+1]);
            my ($mstyle) = $parent->expand($self->{'styles'}{$begin}{'style'});
            $backend->char_style($mstyle, $val);
        }
    }
}

package SIL::dict::parser::letter;

sub new
{
    my ($class, $world, %attrs) = @_;
    my ($self) = {world => $world, %attrs};
    return bless $self, $class;
}

sub add
{
    my ($self, $child) = @_;
    push (@{$self->{'children'}}, $child);
}

sub parent
{
    my ($self) = @_;
    
    return $self->{'parent'};
}

sub execute
{
    my ($self, $nodes, $backend, %opts) = @_;
    my ($mydb, $res, $text);

    if (defined $self->{'test'})
    { $res = eval "\$SIL::dict::parser::nodeset::nset = \$nodes; \$SIL::dict::parser::nodeset::node = \$nodes->{'nodes'}[0]; package SIL::dict::parser::nodeset; $self->{'test'}"; }
    else
    { $res = 1; }

    if ($res)
    {
        $backend->start_letter();

        if (defined $self->{'text'})
        { $text = $nodes->expand($self->{'text'}); }
        elsif (defined $self->{'value'})
        { $text = eval "\$SIL::dict::parser::nodeset::nset = \$nodes; \$SIL::dict::parser::nodeset::node = \$nodes->{'nodes'}[0]; package SIL::dict::parser::nodeset; $self->{'value'}"; }

        if (defined $self->{'paragraph'})
        { $backend->new_para($nodes->expand($self->{'paragraph'})); }
        
        if (defined $self->{'before'})
        { $backend->output($nodes->expand($self->{'before'})); }

        if (defined $self->{'style'} && $self->{'style'})
        { $backend->char_style($nodes->expand($self->{'style'}), $text); }
        elsif ($text)
        { $backend->output($text); }

        foreach $c (@{$self->{'children'}})
        { $c->execute($nodes, $backend); }

        if (defined $self->{'after'})
        { $backend->output($nodes->expand($self->{'after'})); }

        $backend->end_letter();
        return 1;
    }
}

package SIL::dict::parser::index;

sub new
{
    my ($class, $curr, $world, $inheritable, %attrs) = @_;
    my ($self) = {parent => $curr, world => $world, %attrs};

    if ($parent)
    {
        foreach (keys %$inheritable)
        { $self->{$_} = $parent->{$_} unless (defined $self->{$_}); }
    }
    return bless $self, $class;
}

sub parent
{
    my ($self) = @_;
    return $self->{'parent'};
}

sub get_keys
{
    my ($self) = @_;

    return (split(' ', $self->{'keys'}));
}

sub iter_keys
{
    my ($self, $info, $code) = @_;
    my (@sort_types, @sorted, $i, $res);

    foreach $i (split(' ', $info->{'sort'}))
    {
        $main::sorts{$i} = main::make_sort($i) unless (defined $main::sorts{$i});
        push (@sort_types, $main::sorts{$i});
    }
    @sorted = sort {main::byMultiLevel($a, $b, @sort_types)} keys %{$self->{'db'}{'indices'}[$self->{'index_num'}]};
    
    foreach $i (@sorted)
    {
        my ($nodeset) = $self->make_nodeset(undef, $info, $self->{'unique'}, $i);
        next unless (scalar @{$nodeset->{'nodes'}});        # skip if empty
        &{$code}($info, $nodeset, $i);
        $res = 1;
    }
    return $res;
}

# makes a base nodeset that is passed for filtering and ordering
sub make_nodeset
{
    my ($self, $parent, $info, $unique, @keys) = @_;
    my ($nodeset) = SIL::dict::parser::nodeset->new;
    my ($nodecount, $k, $i, $f, $res);

    $nodeset->{'parent'} = $parent;
    $nodeset->{'vars'} = $parent->{'vars'} if ($parent);

    foreach $k (@keys)
    {
        next if (!$info->{'empty'} && $k eq '');
        my (@inds) = @{$self->{'db'}{'indices'}[$self->{'index_num'}]{$k}};
        if ($unique)
        {
            my (%ind_unique) = map {$_ => 1} @inds;
            @inds = sort {$a <=> $b} keys %ind_unique;
        }
        foreach $i (@inds)
        {
            my (%data, %db, $keyval);
            my ($fields) = $self->{'db'}->readrecord($i);

            foreach $f (@$fields)
            {
                my ($t) = $f;
                my ($n) = $1 if ($t =~ s/\s(\d+)$//o);
                my ($dat) = $self->{'db'}->getfield($f);
                $keyval = $dat unless (defined $keyval);
                $t = $self->{'world'}{'maps'}{$self->{'map'}}{'maps'}{$t}
                        if (defined $self->{'map'} && defined $self->{'world'}{'maps'}{$self->{'map'}} && defined $self->{'world'}{'maps'}{$self->{'map'}}{$t});
                $f = ($n ne '') ? "$t $n" : $t;

                push (@{$data{$t}}, $dat);
                $db{$f} = $dat;
            }
            $nodeset->append_record(\%db, $fields);
            $nodeset->append_node($nodecount, 0, scalar @{$fields}, $k, \%data);
            $nodecount++;
        }
    }

    $res = $nodeset->subset($info);
    $res;
}


package SIL::dict::parser::layout;

use base 'SIL::dict::parser::index';

sub new
{
    my ($class, $world, %attrs) = @_;
    my ($self) = {world => $world, %attrs};
    return bless $self, $class;
}

package SIL::dict::parser::Shoebox;

use SIL::Shoe::Data;

sub make_indices
{
    my ($class, $filename, @indices) = @_;
    my ($db) = SIL::Shoe::Data->new($filename, undef, unicode => 1) || die "Can't open database $filename";
    my ($self) = {'db' => $db};

    foreach $i (@indices)       # this is the slow way for now
    {
#        $attrs = shift(@{$i});
        $db->index(@{$i});
#        delete $db->{' index'}{''} if ($attrs->{'noempty'});
        push (@{$self->{'indices'}}, $db->{' index'});
    }
    return bless $self, $class;
}

sub readrecord
{
    my ($self, $offset) = @_;
    my (@flist);

#    return if (defined $offset && $self->{'db'}{' loc'} == $offset);
    $self->{'db'}->readrecord(\@flist, $offset || 0);
    $self->{'db'}{' loc'} = $offset;
    return [@flist];
}

sub getfield
{
    my ($self, $f) = @_;

    return $self->{'db'}{$f};
}


__END__

=head1 TITLE

shlex - typesets a dictionary

=head1 SYNOPSIS

  shlex -c config.xml [-o outfile] [-s style_info] [-b backend] [-n] [-d] infile
  shlex -h

Typesets an sfm dictionary according to a configuration to generate output in
a given format.

=head1 OPTIONS

  -b backend        Which backend module to use [html]
  -c config.xml     Configuration file for controlling the typesetting
  -h                Give full help
  -n                Sort keys by line number of first occurrence (i.e. no sort)
  -o outfile        Where to store the output (or stdout)
  -s style_info     Backend specific configuration
  -v var=val        Assign values

=head1 DESCRIPTION

This documentation is aimed at programmers rather than providing a tutorial.
I'm too close to the code to write the tutorial at this time.

The best way to really control shlex is to have some kind of understanding of
how it works even if the understanding is somewhat high level.

An shlex configuration consists of 2 parts, parameters to be passed to the
backend, and a sequence of sections. Each section corresponds to a major
section in the dictionary be it the main dictionary output or an index of some
kind such as a reverse finder or semantic domain based index. What a section
does is to sort the dictionary according to the sort keys given and then
to process each nodeset of records with a given key by passing it to a group
in the section called C<main>.

shlex is basically a nodeset processor. It thinks in terms of nodesets. Nodesets
are a concept borrowed from XML. For an sfm style database a nodeset can be
thought of as a set of ranges. Each range is a contiguous sequence of fields
within a record, for example the whole record or all of the fields from one
marker up to but not including the next occurrence of a field with the same
marker. A set of ranges can be collected from different records (in fact
very often each range is from a different record). shlex works with such node
or range sets by passing them from parent element to child element according
to the type of element involved.

A C<group> element takes a nodeset and passes it to each of its children in turn.
There are 3 other elements that take nodesets in the way a C<group> element
does. The C<marker> element iterates through its nodeset and passes each node
to each of its child elements in turn. The C<switch> element passes its
nodeset to each of its children in turn and stops when one of the children
returns true. This is useful for alternation.

The nodeset that is passed can be filtered using the C<test> attribute. It can
also be sorted on fields within each node. But this is true for any of the
nodeset generating elements.

The configuration file is an XML file that controls the typesetting of each
dictionary entry. The language consist of 4 key elements.

=over 4

=item group

Each group is a sequence of elements which are processed in order to output
information. Each group is named and processing starts by executing all the
elements in the group named C<main>. A group is executed over a range of
markers, which starts out being all the markers in the record.

=item use-group

Executes another group using the same range of markers that is active for the
use-group element.

=item marker

A marker is used to match all occurrences of the given sfm (in the tag attribute)
within the parent range of markers. For each marker that matches an optional test
is run and if the test passes then if a paragraph attribute is given a new
paragraph is started with the given style. Then any before text is output in the
default style of the paragraph. Then if a style attribute exists, then the contents
of the field are output using that character style. Then any children are output
as if the marker were a group. Then the after text is output using the default
paragraph style. Finally C<use-group> passes its nodeset to another named group in
the section.

An element may assemble its nodeset from 3 sources. It can use the nodeset it has
been passed, in which case it can filter it using the C<test> attribute and sort it
on any fields in each node (the last field occurring is used). It can make a set
of subnodes by taking a marker in each node and assembling a nodeset for that node
which is the field with that marker up to the next field with the same marker
which starts a new subnode. Finally all the subnodes from all the nodes in the
set are joined into one big nodeset which again can be filtered and sorted.
Finally, a nodeset may be assembled by looking up records against a particular
set of fields in the incoming nodeset. Each node is searched for the first occurrence
of each field in the C<keys> attribute and a search key is created. This is
looked up in an index and the resulting nodes added to the output nodeset.
The output nodeset can be filtered and sorted.

=item picture

Outputs a picture whose filename (relative or absolute) is given by the value of
the node. One picture is output for each node in the nodeset.

=item output

Outputs the given C<text> attribute or the calculated C<value> attribute if the
optional C<test> attribute is passed. Returns true if the test is passed.

=item debug

This is much like an output element but its output is sent to STDOUT for
debugging purposes, when the option C<-d> is used on the command line.

=item inline-map

It is possible for some fields to contain style markup, for example:

 \nt Rarely does |fv{fruble} interact with this word
 
The inline-map element contains a list of such style markup elements
and then can be applied to a particular field as it is output.

=item inline-style

This element occurs within an inline-map and maps a particular style
markup to an output style.

=back

=head2 Indices

Indices are named globally regardless of where they are defined. Each section also
creates an index with the name of the section prepended with an underscore C<_>.
An index may be used to access other files than the input file. A section works
by iterating over the keys in the index creating a nodeset from all the records
that match that key. It then passes that nodeset to the C<letter>
element named C<primary> and then on to the C<group> element named C<main>.

The C<letter> element is used to control the section headings used for character
change headings and groupings.

=head2 Expression functions

The test attribute is a perl expression that can be used to further constrain the
marker used. The following functions are provided. Remember that since this is
a perl expression even node paths will need to be marked as strings,

=over 8

=item value(nodeset|field, [index])

This takes a string and an optional index. Each nodeset knows about its parent
so it is possible to track back through the nodesets to one corresponding
to a particular ancestral element in the configuration. In addition, each
node in a nodeset has a string associated with it which is considered the
value of the node. A nodeset is considered as an array of nodes and by default
the first node in a nodeset is used when a string is required.

In addition it is possible to query the fields in the node being processed
or filtered. Note it is not possible to query the fields in any other
nodes. Each field is considered to be an array of all fields with that name.
Again the default index is 0.

=item count(nodeset|field)

Returns the number of nodes or fields in the referenced array of nodes or
fields.

=item position(nodeset)

The C<marker> element iterates over its output nodeset. It is possible for a
child to query the iteration number it is being executed within a C<marker>'s
nodeset. All other elements just take on the position of their parent.

=item firstchar(string, sort, level, ignore)

This takes a string and finds the first character in it. It uses sort principles
of including characters at a given level and ignoring or including characters
of a lower level. Thus if a is a primary character, ' a secondary one and * a tertiary one

    firstchar("a*'", "unicode", 0, 0) -> "a"   ("*", "'")
    firstchar("a*'", "unicode", 1, 0) -> "a"   ("*", "'")
    firstchar("a*'", "unicode", 0, 1) -> "a"   ()
    firstchar("a*'", "unicode", 1, 1) -> "a'"  ()

The sort parameter specifies a sort type to use be it C<numeric>, C<unicode>,
nothing or some other. See the section on sorting.

=item split(string)

This returns an array. Keys made from multiple fields are joined using a null
character. This function splits them up.

=item join(string, [string, ...])

Joins elements together to form a splittable string.

=item regexp(string, regexp)

This runs the regexp over the string returning the first group ($1)

=item cmp(string, string, sort, level)

Compares two strings using a given sort order at a given level. Returns
1 if the first string is 'greater' than the second string; 0 if they are
equal and -1 otherwise.

=item lower(string)

Returns the lowercase form of string

=item upper(string)

Returns the uppercase form of string

=back

=head2 Attributes

Here we list all the key attributes and what they do and mean

=over 8

=item after

The contents of this attribute are output once after all iterations over the
nodeset.

=item before

The contents of this attribute are output once before the nodeset is processed.

=item begin

Occurs in an inline-style element and gives the identifying string that starts
a run in this particular style.

=item between

This is output only before the C<before> and C<indent> attributes if this is
not the first node being processed.

=item empty

By default if the contents of the field that defines the start of a node is empty
then no node is generated, unless this attribute is non-zero. If an empty nodeset
is being processed, then no action occurs. This is for skipping empty fields.

=item end

Occurs in an inline-style element and gives the identifying string that closes
a run in this particular style. Runs may be nested and the end attribute does
not have to be unique. The end attribute may not occur as an initial substring
of any begin attribute in any inline-style in the parent inline-map.

=item filename

Indexes (including sections) may draw their index from another file than the
input file. The filename is the file to use in the index and all access to
that index will be drawn from that file.

=item indent

Specifies how much indent to insert before any C<before> or output. No indent
occurs at the beginning of a paragraph. Values supported are:

=over 8

=item newline

Inserts a line break (not a paragraph break).

=item none

Don't output any space

=item space

Output a space. This is the default

=item tab

Output a tab.

=back

Note that if there is no output (i.e. C<style> isn't defined) then the default
behaviour for C<indent> becomes C<none>.

=item index

Assembles the output nodeset by assembling lookup C<keys> from the input nodeset
and then using these to create an output nodeset according to C<sort>.

=item inline-map

This attribute indicates that the given inline-map should be used to map style
markup to character styles for any field data processed by this element. Notice
that the inline-map attribute is inherited by all this elements children.

=item keys

Keys have two uses in shlex.

They specify the fields to use when creating an index key. In an C<index> element
it specifies the fields to use in a record, and all combinations are indexed. 

In a processing element it specifies the fields to use from the input nodeset when
constructing the value of a node. Node values are used for sorting the nodes in
a nodeset. Note that only the first occurence of each field is used.

=item limit

If we consider a record might contain subrecords, a field may occur both in the
record and the subrecords and it is difficult to not pick up the subrecord fields
when processing the main record. This attribute allows a tag selection to stop
collecting elements in a node when a particular marker is hit. The attribute is
a space separated list of markers to stop processing at.

=item map

Used in conjunction with an index lookup. This specifies a conversion map to map
from input markers to markers to be used inside shlex.

=item name

Used to name elements, for example groups and indexes, so that they can be
referenced again in use-group elements for example.

=item neg

Negates the result of a processing element. A processing element is considered to
be true if it creates a non-empty nodeset and that the processing of any
children according to that nodeset results in at least one true result. This
attribute is useful in a C<switch> element to say: if the C<test> is true then
continue processing the other children, otherwise stop.

=item paragraph

Specifies that this element starts a new paragraph of the given style. Only one
paragraph is started for one element.

=item path

This value is inserted before a picture filename to locate the actual picture
file. A warning is given if the file is not found. Since the value is simply
prepended onto the picture filename, a trailing / is required (or whatever is
appropriate for your OS, note on Windows that / are easier to use).

=item scale

This gives a floating point multiplier for the size of an image inserted into
the document from a picture element. The size of the image is taken as its
pixel size in pts (i.e. presuming 72dpi). Use scale to linearly scale to account
for a difference between this and what the real resolution is.

=item sort

Specifies a list of sorting algorithms one for each of the fields specified in
the C<keys> attribute. This attribute is not needed in an C<index> element, since
sorting is only done on nodes in a nodeset not on indexes which are merely for
looking things up. See the section on sorting for more information.

=item style

Specifies a character style to use to output the C<value()> of a particular node
in a nodeset. In the case of C<group> this is the first node of the nodeset. For
C<marker>s each node in the set causes some output. C<style> may be empty in
which case the text is output using the underlying paragraph style.

=item tag

Specifies a field marker to find within each node from which a new sub node is
created. If C<tag> is defined, but empty, the match will occur on any field marker.
This results in a nodeset consisting of nodes containing only one field.

=item test

This is a perl expression that is used to filter a nodeset. It makes use of
the expression functions and is tested for non-zero being considered as the
test passing.

=item text

Specifies text to be output. The text is not evaluated and is output unchanged.

=item unique

When processing an index, only add a record to a nodeset once. I.e. don't have
the same record in the nodeset more than once.

=item value

Specifies an expression to be evaluated and output as text.

=back

=head2 Sorting

One of the most complex issues when creating a dictionary is to ensure that the
dictionary is sorted correctly in all the different areas where sorting occurs.
For this reason shlex supports a relatively powerful array of sorting options.
Each of the different support sorting algorithms are listed here.

Sort algorithms are also used for tokenizing particularly in the C<firstchar>
function.

=over 8

=item default

This is the default sorting option and uses perl's implicit binary cmp function.
The firstchar is simply the first character of the string.

=item numeric

Does a numeric comparison treating the value as numbers rather than strings. It
will also handle strings of the form: x.y.z... as an array of numbers to be
compared. firstchar returns the whole string up to the first .

=item unicode

This uses L<Unicode::Collate> to compare strings and comparison can be done
at different sorting levels. For this the level should be specified after the
unicode as in C<unicode|2>. firstchar takes sorting levels into account.

=item language.lng|order

This sort method uses a Shoebox .lng file and an optional specific order
within that langauge file. If no order is specified then the default sort order
is used.

=back


=head2 Configuration DTD

A DTD for the configuration file is:

  <!ELEMENT layout (backend*, map*, index*, inline-map* section+)>

  <!ELEMENT backend (property*)>
  <!ATTLIST backend
        type        CDATA #REQUIRED>
        
  <!ELEMENT inline-map (inline-style+)>
  <!ATTLIST inline-map
        name        CDATA #REQUIRED>
        
  <!ELEMENT inline-style>
  <!ATTLIST inline-style
        begin       CDATA #REQUIRED
        end         CDATA #REQUIRED
        style       CDATA #REQUIRED>

  <!ELEMENT property>
  <!ATTLIST property
        name        CDATA #REQUIRED
        value       CDATA #REQUIRED>

  <!ELEMENT section ((index | letter)*, group+)>
  <!ATTLIST section
        type        CDATA #REQUIRED
        name        CDATA #IMPLIED
        keys        CDATA #REQUIRED
        sort        CDATA #IMPLIED
        unique      CDATA #IMPLIED
        test        CDATA #IMPLIED>

  <!ELEMENT letter (output | group | marker | switch | use-group | debug | foreach)*>
  <!ATTLIST letter
        name        CDATA #REQUIRED
        test        CDATA #REQUIRED
        value       CDATA #IMPLIED
        text        CDATA #IMPLIED
        paragraph   CDATA #IMPLIED
        before      CDATA #IMPLIED
        style       CDATA #IMPLIED
        after       CDATA #IMPLIED>
        
  <!ELEMENT group (group | use-group | marker | switch | switch | debug | foreach)+>
  <!ATTLIST group
        name        CDATA #REQUIRED
        paragraph   CDATA #IMPLIED
        before      CDATA #IMPLIED
        between     CDATA #IMPLIED
        style       CDATA #IMPLIED
        after       CDATA #IMPLIED
        index       CDATA #IMPLIED
        keys        CDATA #IMPLIED
        sort        CDATA #IMPLIED
        unique      CDATA #IMPLIED
        test        CDATA #IMPLIED
        inline-map  CDATA #IMPLIED
        tag         CDATA #IMPLIED>

  <!ELEMENT use-group>
  <!ATTLIST use-group
        name        CDATA #REQUIRED
        text        CDATA #IMPLIED
        index       CDATA #IMPLIED
        keys        CDATA #IMPLIED
        sort        CDATA #IMPLIED
        unique      CDATA #IMPLIED
        test        CDATA #IMPLIED
        inline-map  CDATA #IMPLIED
        tag         CDATA #IMPLIED>

  <!ELEMENT marker (use-group | marker | switch | group | output | debug | foreach)*>
  <!ATTLIST marker
        tag         CDATA #REQUIRED
        test        CDATA #IMPLIED
        paragraph   CDATA #IMPLIED
        indent      CDATA #IMPLIED
        before      CDATA #IMPLIED
        between     CDATA #IMPLIED
        style       CDATA #IMPLIED
        after       CDATA #IMPLIED
        index       CDATA #IMPLIED
        keys        CDATA #IMPLIED
        sort        CDATA #IMPLIED
        unique      CDATA #IMPLIED
        inline-map  CDATA #IMPLIED
        neg         CDATA #IMPLIED>

  <!ELEMENT switch (switch | marker | group | output | debug | use-group | foreach)+>
  <!ATTLIST switch
        tag         CDATA #REQUIRED
        test        CDATA #IMPLIED
        index       CDATA #IMPLIED
        keys        CDATA #IMPLIED
        sort        CDATA #IMPLIED
        unique      CDATA #IMPLIED
        inline-map  CDATA #IMPLIED
        neg         CDATA #IMPLIED>
  
  <!ATTLIST foreach (switch | marker | group | output | debug | use-group | foreach)+>
  <!ATTLIST foreach
        var         CDATA #REQUIRED
        inline-map  CDATA #IMPLIED
        over        CDATA #REQUIRED>

  <!ELEMENT index>
  <!ATTLIST index
        filename    CDATA #IMPLIED
        name        CDATA #REQUIRED
        keys        CDATA #REQUIRED>

  <!ELEMENT map (replace*)>
  <!ATTLIST map
        name        CDATA #REQUIRED>

  <!ELEMENT replace>
  <!ATTLIST replace
        in          CDATA #REQUIRED
        out         CDATA #REQUIRED>

  <!ELEMENT output>
  <!ATTLIST
        style       CDATA #IMPLIED
        test        CDATA #IMPLIED
        text        CDATA #IMPLIED
        value       CDATA #IMPLIED
        paragraph   CDATA #IMPLIED
        style       CDATA #IMPLIED
        before      CDATA #IMPLIED
        after       CDATA #IMPLIED
        inline-map  CDATA #IMPLIED
        indent      CDATA #IMPLIED>

  <!ELEMENT debug>
  <!ATTLIST
        test        CDATA #IMPLIED
        text        CDATA #IMPLIED
        value       CDATA #IMPLIED>
  
  <!ELEMENT picture>
  <!ATTLIST
        path        CDATA #IMPLIED
        scale       CDATA #IMPLIED
        tag         CDATA #IMPLIED
        test        CDATA #IMPLIED
        index       CDATA #IMPLIED
        keys        CDATA #IMPLIED
        sort        CDATA #IMPLIED
        unique      CDATA #IMPLIED
        neg         CDATA #IMPLIED>
        
=head2 Limitations

Here are some things that this program won't do.

=over 8

=item .

Add or change any fields in the database. If you need to munge your data before
processing, e.g. splitting up fields, then this should be done before running
shlex.

=item .

This program presumes the data it is processing is in Unicode even if it isn't.
I.e. if you want to work with legacy encoded data then convert it to Unicode say
as codepage 1252 and then work with it in that way.

=back

=head1 TODO

=over 8

=item .

Add system locale based sorting and configuration

=item .

Add chinese sorting (that takes two fields) when I get a chinese cmp module

=item .

The documentation is poor and rushed.

=back

=begin comment

Some thoughts on integrating XML based databases into this system.

Rather than forcing the XML into a particular element naming scheme, one
approach would be to map the fieldname queries to XML values using xpath.
So there would need to be a description in another file to map from
the fieldnames used above into xpath in the XML file. Each xpath would be
relative to the record node.

Probably the XML file would not be read into memory in its entirety, so
each record would be read in as needed (with the record being held completely
under one node, which would be indexed). It may be that we could read the
whole database into memory in which case a more relational record structure
would be possible with non-local nodes appearing in the record. But for
simplicity a record would still have to map to an element rather than an
attribute say.

This all would require some major restructuring of the code to account for the
fact that a tag test is simply an xpath lookup to get a nodeset and so on.

=end comment
