#! /usr/bin/perl -w
use strict;
use Statistics::Distributions qw(chisqrprob);

print <<EOT;
This program does an interactive chi-square A/B test calculation.  For
your use, it is probably better to rewrite this to take its arguments
on the command-line.  For your users, you probably want to put a web
interface in front of it.

Let's get started.

EOT

print "How many versions to test: ";
my $n = read_non_negative_integer();
if ($n < 2) {
  die "Not enough versions\n";
}

my @tests;
my $test_name = "A";
for (1..$n) {
  print "How many in $test_name did what you want? ";
  my $yes = read_non_negative_integer();

  print "How many in $test_name did not? ";
  my $no = read_non_negative_integer();

  if ($yes + $no < 1) {
    die "$test_name does not have enough samples to test\n";
  }

  push @tests, {name => $test_name, yes => $yes, no => $no};
  $test_name++;
}

if (2 == $n) {
  my $chi_square = calculate_chi_square( @tests );
  print "Chi-square: $chi_square\n";

  my $p = chisqrprob(1, $chi_square);
  print "p: $p\n";
}
else {
  for (@tests) {
    $_->{ratio} = $_->{yes} / ($_->{yes} + $_->{no});
  }
  @tests = sort { $a->{ratio} <=> $b->{ratio} } @tests;

  print "Best test: $tests[-1]{name}\n";
  print "Worst test: $tests[0]{name}\n";

  my $chi_square = calculate_chi_square( @tests[0,-1] );
  print "Chi-square: $chi_square\n";

  my $fudge = $n * ($n-1) / 2;

  my $p = chisqrprob(1, $chi_square) * $fudge;
  print "Overestimate of p: $p\n";
}

sub calculate_chi_square {
  # We expect the names to be A and B, but they might not be.
  my $a_name = $_[0]{name};
  my $a_yes  = $_[0]{yes};
  my $a_no   = $_[0]{no};

  my $b_name = $_[1]{name};
  my $b_yes  = $_[1]{yes};
  my $b_no   = $_[1]{no};

  my $a     = $a_yes + $a_no;
  my $b     = $b_yes + $b_no;
  my $yes   = $a_yes + $b_yes;
  my $no    = $a_no  + $b_no;
  my $total = $a + $b;

  # Sanity checks.
  if (0 == $yes) {
    die "Cannot calculate with no yeses\n";
  }

  if (0 == $no) {
    die "Cannot calculate with no nos\n";
  }

  my $e_a_yes = $a * $yes / $total;
  my $e_a_no  = $a * $no  / $total;
  my $e_b_yes = $b * $yes / $total;
  my $e_b_no  = $b * $no  / $total;

  # Sanity warnings.
  if ($e_a_yes < 10) {
    print "WARNING: expected yeses for $a_name is $e_a_yes < 10\n";
  }

  if ($e_a_no < 10) {
    print "WARNING: expected nos for $a_name is $e_a_yes < 10\n";
  }

  if ($e_b_yes < 10) {
    print "WARNING: expected yeses for $b_name is $e_b_yes < 10\n";
  }

  if ($e_b_no < 10) {
    print "WARNING: expected nos for $b_name is $e_b_yes < 10\n";
  }

  return
      ($a_yes - $e_a_yes)**2 / $e_a_yes
    + ($a_no  - $e_a_no )**2 / $e_a_no
    + ($b_yes - $e_b_yes)**2 / $e_b_yes
    + ($b_no  - $e_b_no )**2 / $e_b_no;
}

sub read_non_negative_integer {
  my $i = <>;
  chomp($i);

  if ($i eq "0") {
    return $i;
  }
  elsif ($i =~ /^[1-9]\d*\z/) {
    return $i;
  }
  else {
    print "'$i' is not a non-negative integer\n";
    print "Please try again: ";
    goto &read_non_negative_integer;
  }
}
