#! /usr/bin/perl
use strict;
use Statistics::Distributions qw(chisqrprob);

print <<EOT;
This does a G-test calculation for any size table, which can tell
you whether there are any differences in a number of tests.  Note that
you probably DO NOT want to use this for A/B testing because knowing
that there IS a difference does not tell you WHAT the difference is.

Let's begin.

EOT

print "How many rows do you want? ";
my $n = read_non_negative_integer();
if ($n < 2) {
  die "Must have at least 2 rows\n";
}

print "How many columns do you want? ";
my $m = read_non_negative_integer();
if ($m < 2) {
  die "Must have at least 2 columns\n";
}

my @rows;
print "Now start entering in your rows, separating entries with non-digits\n";
for my $row (0..($n-1)) {
  print "Row $row: ";
  my $data = <>;
  chomp($data);
  my @entries = split /\D+/, $data;

  if ($m != @entries) {
    print "Row $row has " . @entries . " entries.  It needs $m\n";
    print "Please try again\n";
    redo;
  }

  push @rows, \@entries;
}

# Let's sum across.
my @row_total;
for my $i (0..($n-1)) {
  my $t = sum(@{ $rows[$i] });

  if (0 == $t) {
    die "Cannot calculate: row $i sums to 0\n";
  }

  push @row_total, $t;
}

# Let's sum down.
my @column_total;
for my $j (0..($m-1)) {
  my $t = sum(
    map $_->[$j], @rows
  );

  if (0 == $t) {
    die "Cannot calculate: column $j sums to 0\n";
  }

  push @column_total, $t;
}

# And the grand total.
my $total = sum(@row_total);

# Now we calculate our expectations and g-test together.
my $g_test;
for my $i (0..($n-1)) {
  for my $j (0..($m-1)) {
    my $expected = $row_total[$i] * $column_total[$j] / $total;

    if ($expected < 10) {
      print "WARNING: expected[$i][$j] = $expected < 10\n";
    }

    $g_test += 2 * $rows[$i][$j] * log($rows[$i][$j] / $expected);
  }
}

print "G-test: $g_test\n";

print "p: " . chisqrprob( ($n-1) * ($m-1), $g_test) . "\n";

exit();

sub read_non_negative_integer {
  my $i = <>;
  chomp($i);

  if ($i eq "0") {
    return $i;
  }
  elsif ($i =~ /^[1-9]\d*\z/) {
    return $i;
  }
  else {
    print "'$i' is not a non-negative integer\n";
    print "Please try again: ";
    goto &read_non_negative_integer;
  }
}

sub sum {
  my $total = shift;
  $total += $_ for @_;
  return $total;
}
