/*permtest.c - permutation test on a group of GnuroScan averaged EEG data files.
  Copyright (c) 1996 Matthew Belmonte

  This program is free software; you can redistribute it and/or
  modify it under the terms of the GNU General Public License
  as published by the Free Software Foundation; either version 2
  of the License, or (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

  If you find this program useful, please send mail to Matthew Belmonte.
  <mkb4@Cornell.edu>.  If you base a publication on data processed by this
  program, please notify Matthew Belmonte and include the following citation
  in your publication:

	Matthew Belmonte, `A Software System for Analysis of
	Steady-State Evoked Potentials', Association for Computing
	Machinery SIGBIO Newsletter 17:1:9-14 (April 1997).
*/

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include <errno.h>
#include "gnuro.h"
#include "intel.h"
#include "twiddle.h"

#define MAX_NUM_COMBOS 10000

int	num_experimentals,	/*# of subjects in the experimental group*/
	num_controls,		/*# of subjects in the control group*/
	num_channels,		/*# of channels in each average*/
	num_samples;		/*# of samples in each average*/
double	**experimental_sum,	/*channel-by-sample sum of experimental avgs.*/
	**comparison_sum,	/*comparison sum of the same # of subjects*/
	***comparison;	/*avgs for each individual in current comparison group*/
int	**comparison_count, /*# of comparison sums > exp sum minus # < exp sum*/
	*p;			/*working storage for twiddle()*/
float *calibration;		/*temporary storage for calibration factors*/

/*read the # of channels and # of samples from the named file*/
void get_dimensions(name)
char *name;
  {
  FILE *avg;
  if((avg = fopen(name,
#ifdef MSDOS
    "rb"
#else
    "r"
#endif
    )) == NULL)
    {
    perror(name);
    exit(errno);
    }
  fseek(avg, (long)S_pnts_offset, SEEK_SET);
  num_samples = read_Intel_short(avg);
  fseek(avg, (long)S_nchans_offset, SEEK_SET);
  num_channels = read_Intel_short(avg);
  fclose(avg);
  }

/*allocate dynamic storage*/
void alloc_storage()
  {
  register int channel, subject;
  experimental_sum = (double **)calloc(num_channels, sizeof(double *));
  comparison_sum = (double **)calloc(num_channels, sizeof(double *));
  comparison_count = (int **)calloc(num_channels, sizeof(int *));
  for(channel = 0; channel != num_channels; channel++)
    {
    experimental_sum[channel] = (double *)calloc(num_samples, sizeof(double));
    comparison_sum[channel] = (double *)calloc(num_samples, sizeof(double));
    comparison_count[channel] = (int *)calloc(num_samples, sizeof(int));
    }
  comparison = (double ***)calloc(num_experimentals, sizeof(double **));
  for(subject = 0; subject != num_experimentals; subject++)
    {
    comparison[subject] = (double **)calloc(num_channels, sizeof(double *));
    for(channel = 0; channel != num_channels; channel++)
      comparison[subject][channel] = (double *)calloc(num_samples, sizeof(double));
    }
  calibration = (float *)calloc(num_channels, sizeof(float));
  p = (int *)calloc(num_controls+num_experimentals+2, sizeof(int));
  }

/*deallocate dynamic storage*/
void release_storage()
  {
  register int channel, subject;
  free(p);
  free(calibration);
  for(subject = num_experimentals-1; subject >= 0; subject--)
    {
    for(channel = num_channels-1; channel >= 0; channel--)
      free(comparison[subject][channel]);
    free(comparison[subject]);
    }
  for(channel = num_channels-1; channel >= 0; channel--)
    {
    free(comparison_count[channel]);
    free(comparison_sum[channel]);
    free(experimental_sum[channel]);
    }
  }

/*true iff n-choose-m is larger than MAX_NUM_COMBOS*/
int huge_number_of_combos(m, n)
int m, n;
  {
  register double approx_num_combos, k, nf;
  if(m > n-m)
    m = n-m;
  nf = (double)n;
  approx_num_combos = 1.0;
  for(k = (double)m; (approx_num_combos <= MAX_NUM_COMBOS) && (k >= 1.0); k -= 1.0)
    approx_num_combos *= (nf-k+1.0)/k;
  return(approx_num_combos > MAX_NUM_COMBOS);
  }

/*Open the named file, verify that its dimensions match the previously
  established dimensions.  Replace the values of
  comparison[0..num_channels-1][0..num_samples-1] with the data from the file.
  Subtract the old values of comparison[][] from comparison_sum[][], and add
  the new values of comparison[][] to comparison_sum[][].*/
void read_file(name, comparison, comparison_sum)
char *name;
double **comparison, **comparison_sum;
  {
  register int sample, channel;
  FILE *avg;
  if((avg = fopen(name,
#ifdef MSDOS
    "rb"
#else
    "r"
#endif
    )) == NULL)
    {
    perror(name);
    exit(errno);
    }
  fseek(avg, (long)S_pnts_offset, SEEK_SET);
  if(read_Intel_short(avg) != num_samples)
    {
    fclose(avg);
    fprintf(stderr, "wrong number of samples in %s\n", name);
    exit(1);
    }
  fseek(avg, (long)S_nchans_offset, SEEK_SET);
  if(read_Intel_short(avg) != num_channels)
    {
    fclose(avg);
    fprintf(stderr, "wrong number of channels in %s\n", name);
    exit(1);
    }
  fseek(avg, (long)(packed_sizeof_SETUP+EL_calib_offset), SEEK_SET);
  for(channel = 0; channel != num_channels; channel++)
    {
    calibration[channel] = read_Intel_float(avg);
    fseek(avg, (long)(packed_sizeof_ELECTLOC-sizeof(float)), SEEK_CUR);
    }
  fseek(avg, (long)(packed_sizeof_SETUP+num_channels*packed_sizeof_ELECTLOC), SEEK_SET);
  for(channel = 0; channel != num_channels; channel++)
    {
    fseek(avg, 5L, SEEK_CUR);
    for(sample = 0; sample != num_samples; sample++)
      {
      comparison_sum[channel][sample] -= comparison[channel][sample];
      comparison[channel][sample] = read_Intel_float(avg);
      comparison_sum[channel][sample] += comparison[channel][sample];
      }
    }
  fclose(avg);
  }

/*For each sample of each channel, write to the named file the probability of
  incorrectly rejecting the null hypothesis.  Copy the new file's headers from
  the given header file, but clear the variance flag and set all calibrations
  to 1.  The probability is one minus the quotient of the number of comparison
  means whose eccentricity was less than that of the experimental mean and the
  total number of comparison means.*/
void write_probabilities(name, header_file, comparison_count, num_combinations)
char *name, *header_file;
int **comparison_count, num_combinations;
  {
  register int sample, channel;
  double n;
  FILE *hdr, *avg;
/*open the output file*/
  if((avg = fopen(name,
#ifdef MSDOS
    "wb"
#else
    "w"
#endif
    )) == NULL)
    {
    perror(name);
    exit(errno);
    }
/*open the header template file*/
  hdr = fopen(header_file,
#ifdef MSDOS
    "rb"
#else
    "r"
#endif
    );
/*copy headers*/
  for(sample = 0; sample != S_variance_offset; sample++)
    putc(getc(hdr), avg);
  getc(hdr);
  putc(0, avg);
  for(sample = 1+S_variance_offset; sample != packed_sizeof_SETUP; sample++)
    putc(getc(hdr), avg);
  for(channel = 0; channel != num_channels; channel++)
    {
    for(sample = 0; sample != EL_calib_offset; sample++)
      putc(getc(hdr), avg);
    fseek(hdr, (long)sizeof(float), SEEK_CUR);
    write_Intel_float(1.0, avg);
    for(sample = EL_calib_offset+sizeof(float); sample != packed_sizeof_ELECTLOC; sample++)
      putc(getc(hdr), avg);
    }
  fclose(hdr);
  n = (double)num_combinations;
/*write probabilities*/
  for(channel = 0; channel != num_channels; channel++)
    {
  /*5-byte dummy sweep header*/
    for(sample = 0; sample != 5; sample++)
      putc(0, avg);
  /*vector of floats*/
    for(sample = 0; sample != num_samples; sample++)
      write_Intel_float(1.0-abs(comparison_count[channel][sample])/n, avg);
    }
  fclose(avg);
  }

void main(argc, argv)
int argc;
char **argv;
  {
  register int sample, channel;
  int num_combinations, subject, x, dummy;
  printf("Copyright (c) 1996 Matthew Belmonte <mkb4@Cornell.edu>.  Please cite.\n");
/*process arguments*/
  for(subject = 1; (subject != argc) && ((argv[subject][0] != '-')||(argv[subject][1] != '\0')); subject++)
    ;
  num_controls = subject-1;
  num_experimentals = argc-num_controls-3;
  if((num_controls < 1) || (num_experimentals < 1))
    {
    fprintf(stderr, "usage: %s <cntrl.avg> ... - <expmtl.avg> ... <probabilities.avg>\n", *argv);
    exit(1);
    }
/*get number of channels and number of samples*/
  get_dimensions(argv[argc-1-num_experimentals]);
/*initialise storage*/
  alloc_storage();
  for(channel = 0; channel != num_channels; channel++)
    for(sample = 0; sample != num_samples; sample++)
      comparison_sum[channel][sample] = 0.0;
  for(subject = 0; subject != num_experimentals; subject++)
    {
    for(channel = 0; channel != num_channels; channel++)
      for(sample = 0; sample != num_samples; sample++)
	comparison[subject][channel][sample] = 0.0;
    read_file(argv[argc-1-num_experimentals+subject], comparison[subject], comparison_sum);
    }
  for(channel = 0; channel != num_channels; channel++)
    for(sample = 0; sample != num_samples; sample++)
      {
      experimental_sum[channel][sample] = comparison_sum[channel][sample];
      comparison_count[channel][sample] = 0;
      }
  inittwiddle(num_experimentals, num_experimentals+num_controls, p);
/*process all possible combinations*/
  num_combinations = 1;
  if(huge_number_of_combos(num_experimentals, num_experimentals+num_controls))
    {
  /*Monte Carlo permutation test (based on the code for the exhaustive version;
    otherwise could be implemented slightly more efficiently)*/
    printf("Monte Carlo mode, %ld iterations\n", MAX_NUM_COMBOS);
    while(num_combinations != MAX_NUM_COMBOS)
      {
      for(subject = 0; subject != num_experimentals; subject++)
	{
	x = (int)(random()%(num_controls+num_experimentals));
        read_file(argv[x+1+(x>=num_controls)], comparison[subject], comparison_sum);
	}
      for(channel = 0; channel != num_channels; channel++)
	for(sample = 0; sample != num_samples; sample++)
	  comparison_count[channel][sample] += (comparison_sum[channel][sample] < experimental_sum[channel][sample]) - (comparison_sum[channel][sample] > experimental_sum[channel][sample]);
      num_combinations++;
      }
    }
  else
  /*exhaustive permutation test*/
    while(!twiddle(&x, &dummy, &subject, p))
      {
      read_file(argv[x+1+(x>=num_controls)], comparison[subject], comparison_sum);
      for(channel = 0; channel != num_channels; channel++)
	for(sample = 0; sample != num_samples; sample++)
	  comparison_count[channel][sample] += (comparison_sum[channel][sample] < experimental_sum[channel][sample]) - (comparison_sum[channel][sample] > experimental_sum[channel][sample]);
      num_combinations++;
      }
/*write output*/
  write_probabilities(argv[argc-1], argv[1], comparison_count, num_combinations);
/*clean up*/
  release_storage();
  exit(0);
  }
