#!/usr/bin/perl

# 02.05.00
# Jonathan Flint
# anova_test.pl
 
#Deal with options

use Getopt::Std ;
getopts("hug:p:r:a:v") ;   
&usage if $opt_h; 
$size = 0;
######################################
#work out where we are
#####################################
#$uname = `uname -a`;
#($machine) = $uname =~ /(\S+)\s+.*/;

if ($opt_a) {
$analysis_programme = $opt_a;
}
else {
$analysis_programme = "./anova.Linux";
}

#$analysis_programme = $analysis_programme . "." . $machine;

if ($opt_p) {
	
	open (PHENO, $opt_p ) || die " I can't find the phenotype file $opt_p\n";
	while (<PHENO>) {
	($id1, $pheno) = /(\S+)\s+(\S+)/; 
	unless ($pheno eq "ND") {
	$phenotypes{$id1}= $pheno;
		}
		}
	close (PHENO);
	}

else {
	print "you must give a phenotype file with the -p option\n";
		&usage;
		exit 1;
		}
if ($opt_g) {
	
	open (GENO, $opt_g) || die " I can't find the genotype file $opt_g\n";
	while (<GENO>) {
	($id1, $allele1, $allele2) = /(\S+)\s+(\S+)\s+(\S+)/;
	unless ($allele1 eq "ND" || $allele2 eq "ND" ) { 
	$alleles = $allele1 . "\t" . $allele2;
	$genotypes{$id1}= $alleles;
	}
		}
	close (GENO);
	}

else {
	print "you must give a genotype file with the -g option\n";
		&usage;
		exit 1;
		}
		


	
foreach $line (keys %genotypes ) {
	++$size;
	open (OUT, ">>.work") ;
	if ($phenotypes{$line}) {
		print OUT "$line\t$genotypes{$line}\t$phenotypes{$line}\n";
		}
		}




	close (OUT);


if ($opt_r ) {
    $command = "$analysis_programme -i $opt_r -n $size .work";
$_ = `$analysis_programme -i $opt_r -n $size .work`;

#@Perm = spawnProcess($command);
#print "@Perm\n";
	if (($df1, $df2, $p, $log)= /.*df1\s+(\S+),\s+df2\s+(\S+),\s+P\s+(\S+)\s+-logP\s+(\S+)/) {
print "ANOVA on $opt_g for $opt_p: DF1 $df1 DF2 $df2 P $p\tLOGP $log\n";
			}
	if (($pc) = /(.*pc:.*)/) {
	print "$pc\n";
		}
#exit;
}


else  {
    $command = "$analysis_programme -n $size .work";
if ($opt_v) {
warn "$command\n";
}
$_ = `$analysis_programme -n $size .work`;
	if (($df1, $df2, $p, $log)= /.*df1\s+(\S+),\s+df2\s+(\S+),\s+P\s+(\S+)\s+-logP\s+(\S+)/) {
print "ANOVA on $opt_g for $opt_p: DF1 $df1 DF2 $df2 P $p\tLOGP $log\n";
}
}
unless ($opt_u) {
unlink (".work");
}
#-------------------------------------------------------------------------------
sub usage {
    die "      
Usage:	anova_test.pl <options> -g <genotype file> -p <phenotype file>
	
	phenotype should be in two columns - ID PHENOTYPE
	genotypes in three columns         - ID ALLELE1 ALLELE2
	
	*****Set  missing data to ND before running the program******
	
	-h		usage
	-r <integer>	permute phenotypes
	-a <exectuable file> analysis programme (default is ~/bin/anova)
	-u 		don't delete the .work file
";
}
sub spawnProcess {

    local($command)=@_;

    if ($opt_v) {
      print "$command\n";
    }

   open(PROCESS, "$command 2>&1 |")
        || warn "$prog: spawn couldn't open \"$command\": $!\n"
            && return;

    my(@output)=<PROCESS>;

    close (PROCESS);
    $status=($? >> 8);

    if ( $status != 0 ) {
        warn "$prog: spawn \"$command\" returned non-zero exit status: @output\n";
    }

    return (@output);

}

