#! /usr/bin/perl -w
use strict;
use Statistics::Distributions;
use Getopt::Long;

my $step = 0;

my $count_by_a_yes_by_b_yes = {};
my $next_count_by_a_yes_by_b_yes;
my $correct = 0;
my $incorrect = 0;

my $outstanding = 100;
my $stop_at_portion_outstanding = 0;
my $p_a_yes = 0.51;
my $p_b_yes = 0.5;
my $threshold_prob = 0.01;

# You can set all of the parameters on the command line.
# For the talk, I set total to 100000, and a_success,
# b_success and cutoff to whatever was appropriate for
# the simulation.
#
# It outputs a header, then the simulation results.  The
# fields in those results are:
#
#   o Steps:  The number of people in the test is actually
#     double this because we add one person to A and one
#     person to B at each step.
#   o Outstanding:  How many tests are still running.  Because
#     of the way I avoid tracking individual tests as long as I
#     can, you can have a fractional number outstanding.
#   o Correct:  How many tests finished with B better than A.
#   o Incorrect:  How many tests finished with A better than B.
#
# Be warned, some of the simulations took me over a day to run.
#
GetOptions(
  "a_success=f" => \$p_a_yes,
  "b_success=f" => \$p_b_yes,
  "cutoff=f" => \$threshold_prob,
  "total=i" => \$outstanding,
  "end_at=f" => \$stop_at_portion_outstanding,
) or die "Cannot process options";

my $threshold_chi_square
  = Statistics::Distributions::chisqrdistr(1, $threshold_prob);
my $end_at = $outstanding * $stop_at_portion_outstanding;

my $old = {
  "0:0:0:0" => $outstanding,
};

print "trials: $outstanding\n";
printf "prob(a): %.02f\n", $p_a_yes;
printf "prob(b): %.02f\n", $p_b_yes;
printf "threshold: %.03f\n", $threshold_prob;
printf "threshold chisqr: %.03f\n", $threshold_chi_square;
print "  Step  Outstanding  Correct  Incorrect\n";

while ($outstanding > $end_at) {
  $step++;
  my $new = {};
  my $changed = 0;
  my ($a, $b);
  while (my ($key, $count) = each %$old) {
    my ($a_yes, $b_yes, $a_no, $b_no) = split /:/, $key;
    if (1 == $count) {
      if (rand(1) < $p_a_yes) {
        $a_yes++;
      }
      else {
        $a_no++;
      }

      if (rand(1) < $p_b_yes) {
        $b_yes++;
      }
      else {
        $b_no++;
      }

      $new->{"$a_yes:$b_yes:$a_no:$b_no"} += $count;
    }
    else {
      my ($count_both, $count_a, $count_b, $count_neither)
        = quantize(
              $count * $p_a_yes * $p_b_yes
            , $count * $p_a_yes * (1 - $p_b_yes)
            , $count * (1 - $p_a_yes) * $p_b_yes
            , $count * (1 - $p_a_yes) * (1 - $p_b_yes)
          );
  
      $a_yes++;
      $b_yes++;
      $new->{"$a_yes:$b_yes:$a_no:$b_no"} += $count_both if $count_both;
  
      $a_yes--;
      $a_no++;
      $new->{"$a_yes:$b_yes:$a_no:$b_no"} += $count_b if $count_b;
  
      $b_yes--;
      $b_no++;
      $new->{"$a_yes:$b_yes:$a_no:$b_no"} += $count_neither if $count_neither;
  
      $a_yes++;
      $a_no--;
      $new->{"$a_yes:$b_yes:$a_no:$b_no"} += $count_a if $count_a;
    }
  }

  for my $key (keys %$new) {
    my ($a_yes, $b_yes, $a_no, $b_no) = split /:/, $key;

    next if grep {$_ < 10} $a_yes, $b_yes, $a_no, $b_no;

    my $a     = $a_yes + $a_no;
    my $b     = $b_yes + $b_no;
    my $yes   = $a_yes + $b_yes;
    my $no    = $a_no  + $b_no;
    my $total = $a + $b;
  
    my $e_a_yes = $a * $yes / $total;
    my $e_a_no  = $a * $no  / $total;
    my $e_b_yes = $b * $yes / $total;
    my $e_b_no  = $b * $no  / $total;
  
    my $g_test =
      2 * (
          $a_yes * log($a_yes / $e_a_yes)
        + $a_no  * log($a_no  / $e_a_no)
        + $b_yes * log($b_yes / $e_b_yes)
        + $b_no  * log($b_no  / $e_b_no)
      );

    if ($g_test > $threshold_chi_square) {
      $changed++;
      my $count = delete $new->{$key};
      $outstanding -= $count;
      if ($a_yes > $b_yes) {
        $correct += $count;
      }
      else {
        $incorrect += $count;
      }
    }
  }

  $old = $new;
  if ($changed) {
    printf("% 6d% 13d% 9d% 11d\n"
      , $step, $outstanding, $correct, $incorrect
    );
  }

  use Data::Dumper;
  $Data::Dumper::Indent = 1;
  #print Dumper($new);
#  <STDIN>;
}

# Takes several fractional numbers, rounds them off to integers, with the
# random noise proportionately distributed.  (Note, 2 can go to one, but
# this will happen in a way that makes the overall proportions correct.)
sub quantize {
  my $subtotal = 0;
  my $rand = rand(1);
  my @result;
  for my $i (0..$#_) {
    my $integer = int($_[$i]);
    my $fraction = $_[$i] - $integer;
    if ($subtotal <= $rand and $rand < $fraction + $subtotal) {
      $integer++;
    }
    $subtotal += $fraction;
    if (1 < $subtotal) {
      $subtotal -= 1;
      $rand = rand(1);
      if ($rand < $subtotal) {
        $integer++;
      }
    }
    push @result, $integer;
  }
  return @result;
}

