#! /usr/bin/env perl
use strict;
use warnings;

use Data::Dumper;
$Data::Dumper::Indent = 1;
use Getopt::Long;
use List::Util qw(sum);
use Pod::Usage qw(pod2usage);

my $cutoff_ratio = 1e-9;
my $help;
my $output;
my $rules;
my $win;

my $options_processed = GetOptions(
  'cutoff-ratio=f'  => \$cutoff_ratio,
  'help'            => \$help,
  'output=s'        => \$output,
  'rules=s'         => \$rules,
  'win=f'           => \$win,
);

# Obvious error checking.
if ($help or not $options_processed) {
  pod2usage(-verbose => 2);
}

open(RULES, "<", $rules) or die "Can't read --rules file '$rules': $!";
our ($cutoff_conversions, $cutoff_difference);
get_next_rule();

my $total_error = 0;
my $cumulative_errors = 0;
my $cumulative_right = 0;
my $p_b = (1 + $win)/(2 + $win);
my $p_a = 1 - $p_b;
# We keep the distribution as an array from the left edge.
#
# The width of the distribution is $#distribution, and every other value is
# filled in.  (That is we start with a value at 0.  Then values at -1, 1.
# Then values at -2, 0, 2.  Then -3, -1, 1, 3.  etc.)
#
# Therefore $distribution[0] is the probability of having a difference of
# -$#distribution, and $distribution[-1] is the probability of having a
# difference of $#distribution.
my @distribution = (1);
my $conversions = 0;
my $fix_ratio = 1;

while ($cutoff_conversions) {
  $conversions++;
  # Figure out the new distribution
  my @new_distribution;
  for my $i (0..$#distribution) {
    my $prob = $distribution[$i] * $fix_ratio;
    $new_distribution[$i + 1] = $prob * $p_b;
    $new_distribution[$i] += $prob * $p_a;
  }

  # Do we need to cut things off?
  while ($cutoff_conversions and $cutoff_conversions <= $conversions) {
    # We track this error so we know when to bail out early.
    my $this_error = 0;
    while ($cutoff_difference <= $#new_distribution and @new_distribution) {
      $this_error += shift @new_distribution;
      if (@new_distribution) {
        $cumulative_right += pop @new_distribution;
      }
    }

    $cumulative_errors += $this_error;
    get_next_rule();
    if ($this_error < $cumulative_errors * $cutoff_ratio) {
      # EXIT HERE (cumulative remaining errors are assumed not material)
      last;
    }
  }

  @distribution = @new_distribution;
  my $total_running = sum(@distribution);
  my $should_run = 1 - $cumulative_errors - $cumulative_right;
  if ($total_running) {
    $fix_ratio = $should_run / $total_running;
  }
}

if ($output) {
  open(OUT, ">", $output) or die "Can't write to '$output': $!";
}
else {
  *OUT = *STDOUT;
}

print OUT "$cumulative_errors\n";

sub get_next_rule {
  my $line = <RULES> || "";
  chomp($line);
  ($cutoff_conversions, $cutoff_difference) = split /\t/, $line;
}

__END__

=head1 NAME

simulate-test-error-probability - How often will a ruleset call a test wrong?

=head1 SYNOPSIS

  ./simulate-test-error-probability --win 0.02 --rules FILE --output FILE

=head1 DESCRIPTION

Sets up a numerical simulation of the possible outcomes of an A/B test given
a specific stopping rule.

=head1 OPTIONS

=head2 --cutoff-ratio FLOAT

When the error at the current cutoff is less than this times the accumulated
error, stop the simulation.

=head2 --help

Display this documentation.

=head2 --output FILE

The file to write the output to.

=head2 --rules FILE

The file with the rules in it.  The file is expected to be simple tab
delimited, with the first column being the number of conversions you could
stop at, and the second being the threshold at which you will stop.

=head2 --output FILE

The file to write output to.  Output is a single number - the probability of
coming to the wrong conclusion.

=head2 --win FLOAT

How soundly version B beats version A.  The conversion rate for version B is
1 + $win times better.

=head1 BUGS

If the win is 0, the reported errors are off by a factor of two.

If the win is negative, the reported figure is not errors, but the fraction
of the time that the test was correctly called.  This is actually a feature
because it lets this program answer questions that it couldn't otherwise.

=head1 AUTHOR

Ben Tilly <btilly@gmail.com>

=head1 COPYRIGHT AND LICENSE

This software is copyright 2013 by btilly@gmail.com.

This is free software, you can redistribute it and/or modify it under the
same terms as the Perl 5 programming language itself.
