#!/usr/bin/env perl
# implements xgettext for Log::Report only, using Log::Report::Extract::PPI
# Options like GNU's xgettext

use warnings;
use strict;

use Log::Report 'log-report';

use Getopt::Long qw/:config no_ignore_case bundling/;
use File::Find   qw/find/;
use Pod::Usage   qw/pod2usage/;

my $lang    = 'perl';
my $version = 0;
my $help    = 0;
my $cleanup = 1;
my ($from, $output, $fn_match, %configs);
my ($char_in, $char_out) = ('utf-8', 'utf-8');
my ($default_domain, $mode, $template);

GetOptions
   'cleanup!'          => \$cleanup  # kill transl from removed files, def true
 , 'config|c=s'        => \%configs  # domain configurations
 , 'domain|d=s'        => \$default_domain    # for templates
 , 'files-from|f=s'    => \$from     # file with filenames (MANIFEST?) or '-'
 , 'files-match|m=s'   => \$fn_match # select filename is dir
 , 'from-code=s'       => \$char_in
 , 'help|h'            => \$help
 , 'language|L=s'      => \$lang
 , 'mode=s'            => \$mode
 , 'output-dir|p=s'    => \$output
 , 'template|t=s'      => \$template # pattern in ::Template
 , 'to-code=s'         => \$char_out # missing in xgettext?
 , 'verbose=i'         => \$mode
 , 'version|V'         => \$version
 , 'v+'                => \$mode
   or pod2usage(1);

if($version)
{   print "Log::Report $Log::Report::VERSION\n";
    exit 0;
}

$help && podusage(0);

# Load domain information, for instance defining context_rules.  The
# definitions are global, so automatically find their way in the Log::Report
# internals.
#    --config domain1=filename domain2=filename
#    --config domain1=filename --config domain2=filename

while(my ($domain, $fn) = each %configs)
{   trace "configuring domain $domain from $fn";
    textdomain $domain, config => $fn;
}

# all output to stderr
dispatcher FILE => stderr => to => \*STDERR
  , mode => $mode, format => sub {shift};

dispatcher close => 'default';

$template || $lang eq 'perl'
    or error __x"programming language {lang} not supported", lang => $lang;

defined $output
    or error __"explicit output directory (-p) required";

-d $output or mkdir $output
    or fault __x"cannot create output directory {dir}", dir => $output;

my @filenames;
if(defined $from)
{   !@ARGV
        or error __x"do not combine command-line filenames with --files-from";

    if($from eq '-')
    {   @filenames = <STDIN>;
    }
    else
    {   open FILENAMES, '<:raw', $from
            or fault __x"cannot read filename list from {fn}", fn => $from;

        @filenames = <FILENAMES>;
        close FILENAMES;
    }
    chomp(@filenames);
}
elsif(@ARGV)
{   find sub { push @filenames, $File::Find::name if -f }, @ARGV;
}
else
{   error "give --files-from or directories to be processed";
}

my $extr;
my %processed;

if($template)
{   # process from template toolkit
    eval "require Log::Report::Template::Extract";
    panic $@ if $@;

    $default_domain
        or error __x"specify a text-domain (-d) for the templates";

    $extr = Log::Report::Template::Extract->new
      ( lexicon => $output
      , charset => $char_out
      , domain  => $default_domain
      , pattern => $template
      );

    $fn_match ||= qr/\.tt2?$/i;

    foreach my $filename (@filenames)
    {   unless($filename =~ $fn_match)
        {   info __x"skipping (not a template) {fn}", fn => $filename;
            next;
        }
        $extr->process($filename, charset => $char_in);
        $processed{$filename}++;
    }
}
else
{   # process the pm files
    eval "require Log::Report::Extract::PerlPPI";
    error $@ if $@;

    $extr = Log::Report::Extract::PerlPPI->new
      ( lexicon => $output
      , charset => $char_out
      );

    $fn_match ||= qr/\.p[lm]$/i;
    foreach my $filename (@filenames)
    {   unless($filename =~ $fn_match)
        {   info __x"skipping (not perl) {fn}", fn => $filename;
            next;
        }
        $extr->process($filename, charset => $char_in);
        $processed{$filename}++;
    }
}

warn "CLEANUP KEEP=$cleanup";
$extr->cleanup(keep => \%processed)
    if $cleanup;

$extr->showStats;
$extr->write;

__END__

=head1 NAME

xgettext-perl - extract translatable strings

=head1 SYNOPSIS

  xgettext-perl [GENERIC OPTIONS][SCRIPT   OPTIONS] directories
  xgettext-perl [GENERIC OPTIONS][TEMPLATE OPTIONS] directories

  GENERIC OPTIONS
  --config      -c %config    domain configuration
  --files-from  -f $filename  source of filenames to be processed
  --from-code      $charset   used by input files (default utf-8)
  --no-cleanup                keep unprocessed files in po table
  --output-dir  -p $directory location of lexicons (required)
  --to-code        $charset   charset of po files (default utf-8)
  --version     -V            show version of this script
  --verbose=3 -v -vv -vvv     debug mode

  TEMPLATE OPTIONS
  --domain      -d $domain    domain to be used
  --template    -t $notation  how to recognize the strings to be taken
  --files-match -m $regex     filter filenames, default .tt and .tt2

  SCRIPT OPTIONS
  --language    -L $proglang  programming language syntax (now only perl)
  --files-match -m $regex     filter filenames, default .pm and .pl

=head1 DESCRIPTION

This script will maintain PO-files: translation files.  On the moment, the
number of syntaxes is quite limited (see below)
There is no restrain on syntaxes which can be supported: there just was no
practical use to implement it yet.

=head2 Complex options

=over 4

=item --config %config

Log::Report translations supports complex additional features, like
context sensitive translations, which require a configuration file.
See Log::Report::Context

Say, your scripts and templates use textdomain name-spaces C<domain1>
and C<domain2> (please use better names), then you can pass their
respective configuration files as:

  --config domain1=filename domain2=filename    # or
  --config domain1=filename --config domain2=filename

=item --cleanup --no-cleanup

You should scan all script or template files in one go, because PO records
from files which are not mentioned will get removed.  That's the clean-up.
However, when you need more scans for a full update, you need to use this
option.  This also implies possible polution of your translation tables.

=back

=head2 Extracting from Perl with Log::Report syntax

When no --template notation is given, the provided file-names are expected
to contain program text.  Only Perl5 programs using the Log::Report msgid
notation (with leading '__' to mean gettext)

=head2 Extracting from Template::Toolkit

		See L<Log::Report::Template::Extract>

=cut
