#!/usr/bin/env perl

use strict;
use warnings FATAL => 'uninitialized';
use utf8;
use experimental 'signatures';

use Cwd 'abs_path';

my ($mydir, $myname);

BEGIN {
    my $location = (-l $0) ? abs_path($0) : $0;
    $location =~ /(.*?)([^\/]+)\z/s or die "?";
    ($mydir, $myname) = ($1, $2);
}

use lib "$mydir/../lib";

use Chj::xtmpfile;
use Chj::xopen 'xopen_read';
use Chj::TEST;
use List::Util 'pairkeys';

(my $email = 'ch%christianjaeger,ch') =~ tr/%,/@./;

sub usage {
    print STDERR map {"$_\n"} @_ if @_;
    my $ops = join("", map {"    $_\n"} pairkeys ops());
    print "Usage: $myname --op opname [--op opname ...] file...

  opname can be any of:
$ops
  (Christian Jaeger <$email>)
";
    exit(@_ ? 1 : 0);
}

use Getopt::Long;
my $verbose = 0;
my @opt_op;
my $opt_test;
my $opt_repl;
my $opt_run;

#our $opt_dry;
GetOptions(
    "verbose" => \$verbose,
    "help"    => sub {usage},
    "op=s"    => sub {
        my (undef, $val) = @_;
        push @opt_op, $val;
    },
    "test"  => \$opt_test,
    "repl"  => \$opt_repl,
    "run=s" => \$opt_run,

    #"dry-run"=> \$opt_dry,
) or exit 1;

sub protos_to_arity ($str) {
    my @p     = grep { length $_ } split /\s*/, $str;
    my $s     = join('', @p);
    my @parts = split /;/, $s;
    my $l0    = length($parts[0]);
    if (@parts == 1) {
        [$l0]
    } elsif (@parts == 2) {
        my $l1 = length($parts[1]);
        [$l0, $l0 + $l1]
    } elsif (@parts == 0) {
        [0]
    } else {
        die "invalid prototype decl: '$s'"
    }
}

TEST { protos_to_arity '$' } [1];
TEST { protos_to_arity ' $  $$' } [3];
TEST { protos_to_arity '$$ ; $' } [2, 3];
TEST { protos_to_arity '&$' } [2];
TEST { protos_to_arity '@$' } [2];
TEST { protos_to_arity '' } [0];

sub checkcode_for_arity ($arity) {
    my ($min, $maybe_max) = @$arity;
    if (!defined $maybe_max) {
        "\@_ == $min or fp_croak_arity $min;\n"
    } else {
        "\@_ >= $min and \@_ <= $maybe_max or fp_croak_arity \"$min-$maybe_max\";\n"
    }
}

my $compare_re = qr(<|>|<=|>=|==);

my %rising  = map { $_ => 1 } qw(> >=);
my %falling = map { $_ => 1 } qw(< <=);
my %equal   = map { $_ => 1 } qw(==);

sub compare_range {
    my ($compare1, $n1, $maybe_andor, $maybe_compare2, $maybe_n2) = @_;
    if (defined $maybe_andor) {
        if ($maybe_andor eq '&&' or $maybe_andor eq 'and') {
            if ($rising{$compare1} and $falling{$maybe_compare2}) {
                "$n1-$maybe_n2"
            } else {
                die "don't know how to handle '$compare1 and $maybe_compare2'"
            }
        } elsif ($maybe_andor eq '||' or $maybe_andor eq 'or') {
            if ($equal{$compare1} and $equal{$maybe_compare2}) {
                "$n1 or $maybe_n2"
            } else {

                # XX could simply say "$compare1 $n1 or $maybe_compare2
                # $maybe_n2" or 'optimize' it in the cases where it's
                # ==
                die "don't know how to handle '$compare1 and $maybe_compare2'"
            }
        } else {
            die "invalid andor: $maybe_andor"
        }
    } else {
        $compare1 eq "==" ? $n1 : "$compare1 $n1"
    }
}

TEST { compare_range qw(> 5) } '> 5';
TEST { compare_range qw(<= 5) } '<= 5';
TEST { compare_range qw(== 5) } '5';
TEST_EXCEPTION { compare_range qw(== 5 and == 6) }
'don\'t know how to handle \'== and ==\'';
TEST { compare_range qw(== 5 or == 6) } '5 or 6';
TEST_EXCEPTION { compare_range qw(== 5 or > 6) }
'don\'t know how to handle \'== and >\'';

my $identifier_re = qr/[_a-zA-Z]\w*(?:::[_a-zA-Z]\w*)*/;
my $globref_re    = qr/\{[A-Z]+\}/;

my %pre_identifiers_that_need_glob = map { $_ => 1 } (
    'local', 'undef',

    # '=item * Use of member names that are also used as subroutine names, perl'
    # (ugly since should check for the '='?...)
    'item'
);

sub glob2functionref ($str, $is_comment) {
    my $perhapsspace = $is_comment ? "" : qr/ ?/;
    $str =~ s{(
        ([\(\[\{=>#,]|$identifier_re) # pre
        (\s*) # spacing
        \* $perhapsspace ($identifier_re) )}{
        my ($all, $pre, $spacing, $identifier)= ($1, $2, $3, $4);
        my $post = substr($str, pos($str)+length($all), 10);
        my $prepre = substr($str, 0, pos($str)); # XX performance?
        # use FP::Repl;repl;
        if ($identifier =~ m{^STD}
            #or $post =~ m{^\*}
            # including ' # possibly *map contents* of inline code or code sections':
            or $post =~ m{^(?:[^*]{0,20}\w)?\*}
            # *Foo{CODE} and such:
            or $post =~ m{^$globref_re}
            # 'local (*F);' etc.:
            or $pre_identifiers_that_need_glob{$pre}
            or ($prepre =~ /\b($identifier_re)\s*\z/s
                and $pre_identifiers_that_need_glob{$1})
            # 'our @EXPORT_OK = qw(*trace_failures ..)':
            or $prepre =~ /\@EXPORT\w*\s*=.{0,70}\z/s
            # 'use FP::Failure qw(*use_failure *fails);':
            or $prepre =~ /\bqw\s*(?:[<\(\"\'\[](?:\s*[*&$@]$identifier_re)*\s*)?\z/s
            # 'tequals *foo, *FP::Equal::t::foo':
            or $prepre =~ /\btequals\b.{0,70}\z/s
            # '$code =~ s/\buse\s*Sub::Call::Tail\b.*?;//s;':
            or $prepre =~ /=~\s*.{0,70}\z/s
            # 'our $get = $db->prepare("select * from, *examples/csv_to_xml-example.csv");':
            or $prepre =~ /[^\\]\"[^\"]{0,70}\z/s
            # '$n * fact($n - 1)'
            or $prepre =~ /[\$]\z/s
            # 'our @EXPORT* variables yet. Drop me a note if you would like to have that.)'
            # and
            # '|($PACKAGE)\s*->\s*new)' (regex rule)
            or $prepre =~ /[\@\$\*\\]\s*\z/s
            # 'to prevent redefinitions of subroutines *which are NOT in the namespace'
            or ($pre =~ /^$identifier_re\z/
                and $prepre =~ /^\s*(?:\s*\w+,?)*\s*\z/s)
        ) {
            $all
        } else {
            "$pre$spacing\\\&$identifier"
        }
    }sgex;
    $str
}

TEST {
    glob2functionref
        "; *bar = *foo1; # * foo2 is fine, *bar is fine, *not* unexpected.", 0
}
'; *bar = \\&foo1; # \\&foo2 is fine, \&bar is fine, *not* unexpected.';
TEST {
    glob2functionref
        "*bar = *foo; # * foo is fine, *bar is fine, *not* unexpected.", 1
}
'*bar = \\&foo; # * foo is fine, \&bar is fine, *not* unexpected.';
TEST {
    glob2functionref 'local (*F);', 0
}
'local (*F);';
TEST { glob2functionref 'BEGIN { undef(*array) }', 0 }
'BEGIN { undef(*array) }';
TEST { glob2functionref 'BEGIN { undef *array }', 0 }
'BEGIN { undef *array }';
TEST { glob2functionref ' *fm = memoizing *f; # memoize in process memory', 0 }
' *fm = memoizing \&f; # memoize in process memory';
TEST {
    glob2functionref
        'our @EXPORT_OK = qw(*trace_failures *use_failure *fails);', 0
}
'our @EXPORT_OK = qw(*trace_failures *use_failure *fails);';
TEST { glob2functionref 'use FP::Failure qw(*use_failure *fails);', 0 }
'use FP::Failure qw(*use_failure *fails);';
TEST {
    glob2functionref 'TEST { tequals((*foo::bar) . "", "*foo::bar") } 1;
TEST { tequals *foo, *FP::Equal::t::foo } 1;', 0
}
'TEST { tequals((*foo::bar) . "", "*foo::bar") } 1;
TEST { tequals *foo, *FP::Equal::t::foo } 1;';

sub deglob ($line) {

    # In comments, only convert globs if they appear without space
    # after the star (with a space, it's probably a bullet point).
    if (my ($pre, $post) = $line =~ /^([^#]*)#(.*)/s) {
        glob2functionref($pre, 0) . "#" . glob2functionref($post, 1)
    } else {
        glob2functionref($line, 0)
    }
}

TEST { deglob 'cmp => on(*string_lc, *string_cmp),' }
'cmp => on(\&string_lc, \&string_cmp),';

TEST {
    deglob
        '$self->if_suffix_md_to_html($path, $for_title, *identity, *identity)'
}
'$self->if_suffix_md_to_html($path, $for_title, \&identity, \&identity)';

TEST { deglob '$code =~ s/\buse\s*Sub::Call::Tail\b.*?;//s;' }
'$code =~ s/\buse\s*Sub::Call::Tail\b.*?;//s;';
TEST {
    deglob
        'our $get = $db->prepare("select * from, *examples/csv_to_xml-example.csv");'
}
'our $get = $db->prepare("select * from, *examples/csv_to_xml-example.csv");';

TEST { deglob 'local *add = \&counting_add;' } 'local *add = \&counting_add;';
TEST { deglob '*is_AlsoSee = is_heading_of(sub($s) { $s =~ /also *see/i });' }
'*is_AlsoSee = is_heading_of(sub($s) { $s =~ /also *see/i });';
TEST { deglob ' # possibly *map contents* of inline code or code sections' }
' # possibly *map contents* of inline code or code sections';
TEST { deglob '$perl++ if $str =~ /(?:^|\n)\s*use\s+\w+/;' }
'$perl++ if $str =~ /(?:^|\n)\s*use\s+\w+/;';

# yes I know, it's a complete hack, this *is to get things done*.
TEST { deglob '$perl += 1 if $str =~ /(?:^|\n|;)\s*push\s+\@\w+\s*,\s*/;' }
'$perl += 1 if $str =~ /(?:^|\n|;)\s*push\s+\@\w+\s*,\s*/;';
TEST { deglob '$n * fact($n - 1)' }
'$n * fact($n - 1)';
TEST {
    deglob
        'our @EXPORT* variables yet. Drop me a note if you would like to have that.)'
}
'our @EXPORT* variables yet. Drop me a note if you would like to have that.)';
TEST {
    deglob
        '=item * Use of member names that are also used as subroutine names, perl'
}
'=item * Use of member names that are also used as subroutine names, perl';
TEST {
    deglob
        'to prevent redefinitions of subroutines *which are NOT in the namespace'
}
'to prevent redefinitions of subroutines *which are NOT in the namespace';
TEST { deglob '|($PACKAGE)\s*->\s*new)' } '|($PACKAGE)\s*->\s*new)';
TEST {
    deglob
        '        [*is_procedure,                                            "nav_bar"],'
}
'        [\&is_procedure,                                            "nav_bar"],';

# Ah this is not converted in real case since it's in bench/globs!
TEST { deglob 'time_this { t *inc } " *";' } 'time_this { t \&inc } " *";';

our $current_file;

sub ops () {
    (

        # [ needs_whole_file, proc ]
        opspaces => [
            0,
            sub {
                if (/http|href/) {
                    $_
                } else {
                    s{ ([^/>=~<!|+*-]) (=|=>|==|=~|/=|//=|>=|<=|<<|>>|!=|\|\||\|\|=|\+=|-=|\*=) ([^/>=~<!|]) }{
                        my ($a,$b,$c)=($1,$2,$3);
                        my $all = "$a$b$c";
                        my $pre= substr($_, 0, pos($_)+1);
                        my $is_perl = 0;
                        if ($b eq "=>") {
                            $is_perl = 1
                        } elsif (not substr($pre, length($pre)-1, 1)=~ /\w/) {
                            $is_perl = 1
                        } elsif (my ($sigil) = $pre =~ /([^\w])[A-Za-z_]\w*\s*$/) {
                            $is_perl= $sigil =~ /[\$*&@%]/
                        }
                        #use FP::Repl;repl;
                        if ($is_perl) {
                            ($a eq " " ? $a : "$a ").$b.($c eq " " ? $c : " $c")
                        } else {
                            $all
                        }
                    }sgex and s/[ \t]*$//;
                    $_
                }
            }
        ],

        functionparameters2signatures => [
            1,
            sub {
                s{
                     \b(method|fun)(\s+\w+)
                     (?:
                         (\s*\(\s*)
                         ([^()]*?)
                         (\s*\))
                     )?
                     (\s*\{)
                }{
                    my ($which,$name,$a,$b,$c,$end)=($1,$2,$3,$4,$5,$6);
                    "sub$name"
                      . ($which eq "method" ? 
                         (defined($b) ? $a.(length($b) ? q{$self, }.$b : q{$self}).$c
                          : q{($self)})
                         : "$a$b$c")
                      . $end
                }sgex;
                $_
            }
        ],

        excise_prototypes => [
            1,
            sub {
                return $_
                    if /use +experimental [^\n;]*signatures[^\n;]*;/
                    ;    # since `()` is ambiguous
                s{
                     ( \bsub\b \ *)(\w+)?( \ * )
                     \( ([\@\$; ]*) \)  # do *not* include & here, as those are needed
                     ( \s* \{ \s* )
                     ( (?:[^\n]*\n){0,2} )
                }{
                    my ($_pre, $maybe_name, $_post, $protos, $post, $bodystart)
                      = ($1,$2,$3,$4,$5,$6);
                    my $pre = $_pre . ($maybe_name // "") . $_post;

                    if (defined $maybe_name and $maybe_name =~ /^[A-Z0-9_]+\z/
                        and $protos =~ /^\s*\z/ and $bodystart =~ /^\s*\d+\}/) {
                        # constant, leave as is
                        "$pre($protos)$post$bodystart"
                    } else {
                        # make sure bodystart doesn't slurp over a subsequent definition
                        die "accidentally slurping up subsequent definition"
                          if $bodystart =~ /(\bsub\b \ *(?:\w+)?) \ * (?:\([@$&;]*\))? (\s* \{)/;

                        my $checkcode = do {
                            if ($bodystart =~ /\@_ *(?:==|<=|>=|<|>) *\d+/) {
                                ""
                            } else {
                                checkcode_for_arity(protos_to_arity($protos))
                            }
                        };
                        # Make sure there's no empty line before the checkcode
                        my $post_and_checkcode = "$post$checkcode";
                        $post_and_checkcode =~ s/^(\s*\{)[ \t]*\n\s*/$1\n    /s;
                        "$pre$post_and_checkcode    $bodystart"
                    }
                }sgex;
                $_
            }
        ],

        move_to_fp_croak_arity => [
            1,
            sub {
                my $replacements = s{
                 ( ; | \{ \s* )
                 ( \(? \s* \@_ \s* ($compare_re) \s* (\d+) \s*
                     (?: ( and | && | or | \|\| ) \s* \@_ \s* ($compare_re) \s* (\d+) )? \s* \)?  \s* )
                 ( or \s+ (?:die|(?:\w+::)*croak) \s*
                     (?: "[^"]*wrong\ number\ of\ arguments[^"]*" |
                         "[^"]*(?:expecting|expects|needs?) \s+ \d+
                            (?: \ * (?:-|to) \ * \d+)? \s+ (?:parameter|argument)s?[^"]*" |
                         "[^"]*not\ enough\ arguments[^"]*"
                     ) \s* ;)
                }{
                    my ($pre, $compare, $compare1, $n1, $maybe_andor, $maybe_compare2, $maybe_n2, $or_part)
                      = ($1,$2,$3,$4,$5,$6,$7,$8);
                    my $range = compare_range($compare1, $n1, $maybe_andor, $maybe_compare2, $maybe_n2);
                    unless ($range =~ /^\d+$/s) {
                        $range = "\"$range\"";
                    }
                    "$pre$compare or fp_croak_arity $range;"
                }sgex;
                if ($replacements) {
                    unless (/use FP::Carp/) {
                        do {
                            if (/[\@\%]EXPORT/) {

                                # force it after 'EXPORT'
                                s{(.*[\@\%]EXPORT.*\nuse [A-Z][^;\n]+;[^\n]*\n)}{${1}use FP::Carp;\n}s
                            } else {
                                s{(.*\nuse [A-Z][^;\n]+;[^\n]*\n)}{${1}use FP::Carp;\n}s
                            }
                            }
                            or warn
                            "could not insert FP::Carp into $current_file\n";
                    }
                }
                $_
            }
        ],
        deglob => [
            0,
            sub {
                return $_
                    if (
                    # this file uses globs for function passing on
                    # purpose:
                    $current_file eq 'bench/globs' or

                    # leave tests etc. alone:
                    $current_file eq 'meta/code-reformat'
                    );
                deglob $_
            }
        ]
    )
}

my %ops = ops;

sub run {
    @opt_op or usage "no op given, use the --op option";

    my @op = map { $ops{$_} // die "unknown op '$_'" } @opt_op;

    my %needs_whole_file = map { $_->[0] ? (1 => undef) : (0 => undef) } @op;

    (keys %needs_whole_file) == 1
        or die
        "can't satisfy ops of different needs_whole_file requirement at the same time";

    my ($needs_whole_file) = keys %needs_whole_file;

    for my $file (@ARGV) {
        local $current_file = $file;
        my $f     = xopen_read $file;
        my @lines = do {
            local $/ = $needs_whole_file ? undef : $/;
            $f->xreadline;
        };
        $f->xclose;

        my $t = xtmpfile $file;
        $t->xprint(
            map {
                for my $op (@op) {
                    my (undef, $proc) = @$op;
                    $_ = &$proc($_);
                }
                $_
            } @lines
        );
        $t->xclose;
        $t->xputback;
    }
}

if ($opt_test) {
    my $failures = Chj::TEST::run_tests(__PACKAGE__)->{failures};
    exit($failures ? 1 : 0)
} elsif ($opt_repl) {
    require FP::Repl;
    FP::Repl->import("repl");
    repl();
} elsif (defined $opt_run) {
    require FP::Show;
    my $res;
    eval "\$res= do { $opt_run }; 1" or die $@;
    print FP::Show::show($res), "\n" or die $!;
} else {
    run
}

