#!/usr/bin/perl
#
# very handy for e.g.:
#
#   grep SUBJECT_FREQ spam.log | ./mass-check-results-to-mbox | grep Subject:

my $grep = undef;
my $annotate = 1;
while ($#ARGV >= 0) {
  $_ = $ARGV[0]; shift;
  if ($_ eq '-grep') { $grep = $ARGV[0]; shift; }
  if ($_ eq '-noannotate') { $annotate = 0; }
}

while (<>) {
  s/^[^\s:]+://;  # filenames, from "grep foo *"

  next if /^#/;
  /^.\s+-?\d+\s+(\S+) / or next;
  my $mail = $1;

  if ($mail =~ /^(\S+):</) {
    my $msgp = find_in_mailbox ($mail);
    if (defined $msgp) {
      $annotate and unshift (@$msgp, "X-Mass-Check-Id: $mail\n");
      handle ($msgp);
    } else {
      mywarn ("failed to find message for $mail\n");
    }

  } else {
    if ($mail =~ /\.gz$/) {
      open (IN, "gunzip -cd $mail |") or mywarn ("gunzip $mail failed: $@");
    } elsif ($mail =~ /\.bz2$/) {
      open (IN, "bzip2 -cd $mail |") or mywarn ("bunzip2 $mail failed: $@");
    } else {
      open (IN, "<$mail") or mywarn ("open $mail failed: $@");
    }
    my @msg = (<IN>); close IN;
    $annotate and unshift (@msg,
	"From nobody\@example.com  Wed Sep 18 00:00:00 1972\n",
	"X-Mass-Check-Id: $mail\n");
    handle (\@msg);
  }
}

###########################################################################

sub find_in_mailbox {
  my ($mail) = @_;
  $mail =~ /^(\S+):</;
  $folder = $1; my $wantid = $_;

  if (defined $CURRENT_MBOX_OPEN && $folder eq $CURRENT_MBOX_OPEN) {
    # try from current position first
    my $msgp = mbox_search($mail, $folder);
    if (defined ($msgp->[0])) { return $msgp; }
  }

  # failed. have to (re-|)open.
  if ($folder =~ /\.gz$/) {
    open (MBOX, "gunzip -cd $folder |") or mywarn ("gunzip $folder failed: $@");
  } elsif ($folder =~ /\.bz2$/) {
    open (MBOX, "bzip2 -cd $folder |") or mywarn ("bunzip2 $folder failed: $@");
  } else {
    open (MBOX, "<$folder") or mywarn ("open $folder failed: $@");
  }

  $CURRENT_MBOX_OPEN = $folder;
  while (<MBOX>) { /^From \S+ +... ... / and last; }
  my $msgp = mbox_search($mail, $folder);
  return $msgp;
}

sub mbox_search {
  my ($mail, $folder) = @_;
  my $wantid = $mail;
  
  my $count = 0;
  my $host  = $ENV{'HOSTNAME'} || $ENV{'HOST'} || `hostname` || 'localhost';

  while (!eof MBOX) {
    my @msg = ();
    my $msgid = undef;
    my $in_header = 1;
    $count++;

    while (<MBOX>) {
      if (/^$/ && $in_header) {
        $in_header = 0 ;

        if (!defined ($msgid)) {
          $msgid = sprintf('<no-msgid-in-msg-%06d@%s.masses.spamassasin.org>', $count, $host);
          push (@msg, "Message-Id: $msgid\n");
        }
      }
      if ($in_header) {
        /^Message-Id: (.*)\s*$/i and $msgid = $1;
      }

      /^From \S+ +... ... / and last;
      push (@msg, $_);
    }

    $msgid = "$folder:$msgid";	# so we can find it again
    $msgid =~ s/\s/_/gs;	# make safe

    # print "JMD $wantid $msgid\n";

    if ($wantid ne $msgid) { next; }
    return \@msg;
  }

  close MBOX; $CURRENT_MBOX_OPEN = undef;
}

###########################################################################

sub handle {
  my $msgp = shift;
  print STDOUT "From nobody\@nowhere  Wed Aug 21 12:41:07 2002\n", @$msgp, "\n";
}

sub mywarn {
  warn @_;
  if ($annotate) { print "X-Mass-Check-Warning: ".join ('',@_)."\n"; }
}
