#! /usr/bin/perl -w
use strict;
use Statistics::Distributions qw(chisqrprob);

print <<EOT;
This program does an interactive G-test A/B calculation.  For your use,
it is probably better to rewrite this to take its arguments on the
command-line.  For your users, you probably want to put a web interface
in front of it.

Note that the G-test is very, very similar to the chi-square.

Let's get started.

EOT

print "How many versions to test: ";
my $n = read_non_negative_integer();
if ($n < 2) {
  die "Not enough versions\n";
}

my @tests;
my $test_name = "A";
for (1..$n) {
  print "How many in $test_name did what you want? ";
  my $yes = read_non_negative_integer();

  print "How many in $test_name did not? ";
  my $no = read_non_negative_integer();

  if ($yes + $no < 1) {
    die "$test_name does not have enough samples to test\n";
  }

  push @tests, {name => $test_name, yes => $yes, no => $no};
  $test_name++;
}

if (2 == $n) {
  my $g_test = calculate_g_test( @tests );
  print "G-test: $g_test\n";

  my $p = chisqrprob(1, $g_test);
  print "p: $p\n";
}
else {
  for (@tests) {
    $_->{ratio} = $_->{yes} / ($_->{yes} + $_->{no});
  }
  @tests = sort { $a->{ratio} <=> $b->{ratio} } @tests;

  print "Best test: $tests[-1]{name}\n";
  print "Worst test: $tests[0]{name}\n";

  my $g_test = calculate_g_test( @tests[0,-1] );
  print "G-test: $g_test\n";

  my $fudge = $n * ($n-1) / 2;

  my $p = chisqrprob(1, $g_test) * $fudge;
  print "Overestimate of p: $p\n";
}

sub calculate_g_test {
  # We expect the names to be A and B, but they might not be.
  my $a_name = $_[0]{name};
  my $a_yes  = $_[0]{yes};
  my $a_no   = $_[0]{no};

  my $b_name = $_[1]{name};
  my $b_yes  = $_[1]{yes};
  my $b_no   = $_[1]{no};

  my $a     = $a_yes + $a_no;
  my $b     = $b_yes + $b_no;
  my $yes   = $a_yes + $b_yes;
  my $no    = $a_no  + $b_no;
  my $total = $a + $b;

  # Sanity checks.
  if (0 == $yes) {
    die "Cannot calculate with no yeses\n";
  }

  if (0 == $no) {
    die "Cannot calculate with no nos\n";
  }

  my $e_a_yes = $a * $yes / $total;
  my $e_a_no  = $a * $no  / $total;
  my $e_b_yes = $b * $yes / $total;
  my $e_b_no  = $b * $no  / $total;

  # Sanity warnings.
  if ($e_a_yes < 10) {
    print "WARNING: expected yeses for $a_name is $e_a_yes < 10\n";
  }

  if ($e_a_no < 10) {
    print "WARNING: expected nos for $a_name is $e_a_yes < 10\n";
  }

  if ($e_b_yes < 10) {
    print "WARNING: expected yeses for $b_name is $e_b_yes < 10\n";
  }

  if ($e_b_no < 10) {
    print "WARNING: expected nos for $b_name is $e_b_yes < 10\n";
  }

  return
    2 * (
        $a_yes * log($a_yes / $e_a_yes)
      + $a_no  * log($a_no  / $e_a_no)
      + $b_yes * log($b_yes / $e_b_yes)
      + $b_no  * log($b_no  / $e_b_no)
    );
}

sub read_non_negative_integer {
  my $i = <>;
  chomp($i);

  if ($i eq "0") {
    return $i;
  }
  elsif ($i =~ /^[1-9]\d*\z/) {
    return $i;
  }
  else {
    print "'$i' is not a non-negative integer\n";
    print "Please try again: ";
    goto &read_non_negative_integer;
  }
}
