package Bio::DOOP::Util::Run::Fuzznuc;

use strict;
use warnings;
use Carp qw(cluck carp verbose);

=head1 NAME

  Bio::DOOP::Util::Run::Fuzznuc - Fuzznuc runner module.

=head1 VERSION

  Version 0.5

=cut

our $VERSION = '0.5';

=head1 SYNOPSIS

#!/usr/bin/perl -w

use Bio::DOOP::DOOP;
$db     = Bio::DOOP::DBSQL->connect("user","pass","doop-plant-1_5","localhost");

@list   = ("81001020","81001110","81001200","81001225","81001230","81001290","81001470","81001580","81001610","81001620","81001680","81001680");

$fuzznuc = Bio::DOOP::Util::Run::Fuzznuc->new($db,'500','M',\@list,"/data/DOOP/dummy.txt");

print $fuzznuc->get_tmp_file_name,"\n";

$error = $fuzznuc->run('TTGGGC' , 1 , 0);

if ($error == -1){
   die "No results or error!\n";
}

@res = @{$fuzznuc->get_results};
for $result (@res){
  print $$result[0]->get_id,"| ",$$result[1]," ",$$result[2]," ",$$result[3]," ",$$result[4],"\n";
}

=head1 DESCRIPTION

  This module is a wrapper for the Emboss program fuzznuc. You can search
  patterns in the promoter sequences with it.

=head1 AUTHORS

  Tibor Nagy, Godollo, Hungary and Endre Sebestyen, Martonvasar, Hungary

=head1 SUBRUTINES

=head2 new

  $fuzznuc = Bio::DOOP::Util::Run::Fuzznuc->new($db,500,'M',@list,'/tmp/tmpfile');

  Create new Fuzznuc object.
  Arguments: Bio::DOOP::DBSQL object, promoter type (500, 1000, 3000), subset type (depends on reference species),
  listref of clusters, temp file name (default: /tmp/fuzznuc_run.txt).

=cut

sub new {
  my $self                 = {};
  my $dummy                = shift;
  my $db                   = shift;
  my $promo_type           = shift;
  my $subset_type          = shift;
  my $cluster_id_list      = shift;
  my $tmp_filename         = shift;

  if (!$tmp_filename) { $tmp_filename = "/tmp/fuzznuc_run.txt" }
  open TMP,">$tmp_filename";
  for my $cl_id (@{$cluster_id_list}){
     my $cl = Bio::DOOP::Cluster->new($db,,$cl_id,$promo_type);
     if ($cl == -1){ next }
     my $subset = $cl->get_subset_by_type($subset_type);
     if ($subset == -1){ next }
     my @seqs = @{$subset->get_all_seqs};
     for my $seq (@seqs){
        print TMP ">",$seq->get_id,"\n";
	print TMP $seq->get_raw_seq,"\n\n";
     }
  }
  close TMP;
  #Get the Emboss version
  my $ver = `embossversion -stdout -auto`;
  chomp($ver);
  $self->{EMBOSSVER}       = $ver;
  $self->{DB}              = $db;
  $self->{CLLIST}          = $cluster_id_list;
  $self->{TMP_FILE}        = $tmp_filename;

  bless $self;
  return($self);
}

=head2 new_by_file

  Creates new fuzznuc object from query file.
  Arguments: Bio::DOOP::DBSQL object, promoter type (500, 1000, 3000), subset type (depends on reference species),
  file that contain cluster ids, temp file name (default: /tmp/fuzznuc_run.txt).

=cut

sub new_by_file {
  my $self                 = {};
  my $dummy                = shift;
  my $db                   = shift;
  my $promo_type           = shift;
  my $subset_type          = shift;
  my $filename             = shift;
  my $tmp_filename         = shift;
  my @cluster_id_list;

  if (!$tmp_filename) { $tmp_filename = "/tmp/fuzznuc_run.txt" }

  open CLUSTER_ID_FILE,$filename or cluck("No such file or directory!\n");
  open TMP,">$tmp_filename" or cluck("Can't write to the tmp file!\n");
  while(<CLUSTER_ID_FILE>){
     chomp;
     my $cl_id = $_;
     push @cluster_id_list,$cl_id;
     my $cl = Bio::DOOP::Cluster->new($db,,$cl_id,$promo_type);
     my $subset = $cl->get_subset_by_type($subset_type);
     if ($subset == -1) { next }
     my @seqs = @{$subset->get_all_seqs};
     for my $seq (@seqs){
        print TMP ">",$seq->get_id,"\n";
	print TMP $seq->get_raw_seq,"\n\n";
     }
  }
  close CLUSTER_ID_FILE;
  close TMP;

  #Get the Emboss version
  my $ver = `embossversion -stdout -auto`;
  chomp($ver);
  $self->{EMBOSSVER}       = $ver;

  $self->{DB}              = $db;
  $self->{CLLIST}          = \@cluster_id_list;
  $self->{TMP_FILE}        = $tmp_filename;

  bless $self;
  return($self);
}

=head2 new_by_tmp

  Creates new Fuzznuc object from existing tmp file. It is good for
  speeding up the search with a previously created tmp file.
  Arguments: DBSQL object, tmp filename.

=cut

sub new_by_tmp {
  my $self                 = {};
  my $dummy                = shift;
  my $db                   = shift;
  my $tmp_filename         = shift;

  $self->{DB}              = $db;
  $self->{TMP_FILE}        = $tmp_filename;
  #Get the Emboss version
  my $ver = `embossversion -stdout -auto`;
  chomp($ver);
  $self->{EMBOSSVER}       = $ver;
      
  bless $self;
  return($self);
}

=head2 get_tmp_file_name

  Get the temporary file name.
  Return type: string

=cut

sub get_tmp_file_name {
  my $self                 = shift;
  return($self->{TMP_FILE});
}

=head2 get_emboss_version

  $fuzznuc->get_emboss_version

  Get the installed emboss version number

=cut

sub get_emboss_version {
  my $self                 = shift;
  return($self->{EMBOSSVER});
}

=head2 run

  Run mofext on temporary file, containing motifs.
  Arguments: query pattern, mismatch, complement (0 or 1).
  Return type: 0 -> success, -1 -> no result or error

=cut

sub run {
  my $self                 = shift;
  my $pattern              = shift;
  my $mismatch             = shift;
  my $complement           = shift;

  my $file = $self->{TMP_FILE};
  my $ver;
  my $mismopt = "-mismatch";

  if ($self->{EMBOSSVER} =~ /^([0-9]+\.[0-9]+)/) { $ver = $1}
  if ($ver > 4){ $mismopt = "-pmismatch"}

  my @result = `fuzznuc $file -pattern='$pattern' $mismopt=$mismatch -complement=$complement -stdout -auto`;

  my $seq_id;
  my $start;
  my $end;
  my $mism;
  my $hitseq;
  my @parsed;

  if ($#result == -1){return(-1)}

  for my $line (@result){
     if ($line =~ / Sequence: (\S+)/){
        $seq_id = $1;
     }
     if ($line =~ /\s+(\d+)\s+(\d+)\s+([1234567890.]+) (.+)/){
     #TODO : watch for different EMBOSS versions
        $start  = $1;
	$end    = $2;
	$mism   = $3;
	$hitseq = $4;
	$mism =~ s/\./0/;
	push @parsed,"$seq_id $start $end $mism $hitseq";
     }
  }

  $self->{RESULT} = \@parsed;
  return(0);
}

=head2 run_background

  Run fuzznuc, but do not wait for the search to finish.
  Arguents: query pattern, mismatch, complement, output file name
  Return type: the process id

=cut

sub run_background {
  my $self                 = shift;
  my $pattern              = shift;
  my $mismatch             = shift;
  my $complement           = shift;
  my $outfile              = shift;
  my $file = $self->{TMP_FILE};
  my $pid;

  my $ver;
  my $mismopt = "-mismatch";

  if ($self->{EMBOSSVER} =~ /^([0-9]+\.[0-9]+)/) { $ver = $1}
  if ($ver > 4){ $mismopt = "-pmismatch"}

  unless($pid = fork){
     `fuzznuc $file -pattern='$pattern' $mismopt=$mismatch -complement=$complement -outfile=$outfile`;
  }

  return($pid);
}

=head2 get_results

  Returns an arrayref of arrays of cluster objects.

=cut

sub get_results {
  my $self                = shift;

  my @fuzznuc_res;
  my $res = $self->{RESULT};
  my $seq_id;
  my $start;
  my $end;
  my $mism;
  my $hitseq;

  for my $line (@{$res}){
     ($seq_id,$start,$end,$mism,$hitseq) = split(/ /,$line);
     #TODO : use cluster objects, not Sequence
     my $cl = Bio::DOOP::Sequence->new($self->{DB},$seq_id);
     push @fuzznuc_res,[$cl,$start,$end,$mism,$hitseq];
  }

  return(\@fuzznuc_res);
}


=head2 get_results_from_file

  Returns ... or -1 in case
  of error.
  This is a very uniq method because it does not depend on the object. So you can fetch
  different results of different mofext objects.

=cut

sub get_results_from_file {
  my $self                 = shift;
  my $filename             = shift;

  my $seq_id;
  my $start;
  my $end;
  my $mism;
  my $hitseq;
  my @parsed;

  open FILE,$filename or return(-1);
  while(<FILE>){
     chomp;
     my $line = $_;
     if ($line =~ / Sequence: (\S+)/){
        $seq_id = $1;
     }
     if ($line =~ /\s+(\d+)\s+(\d+) pattern1\s+([1234567890.]+) (.+)/){
        $start  = $1;
	$end    = $2;
	$mism   = $3;
	$hitseq = $4;
	$mism =~ s/\./0/;
        #TODO : use cluster objects, not Sequence
        my $cl = Bio::DOOP::Sequence->new($self->{DB},$seq_id);
	push @parsed,[$cl,$start,$end,$mism,$hitseq];
     }
  }
  close FILE;

  $self->{RESULT} = \@parsed;
  return(\@parsed);
}

1;
