#! /usr/bin/env perl
use strict;
use warnings;

use Data::Dumper;
use Getopt::Long;
use Pod::Usage qw(pod2usage);
use Statistics::Distributions qw(uprob);

my $bias = 0;
my $decision_limit = 'n/(n + 10000)';
my $help;
my $n = 1000000;
my $p_limit = 0.05;
my $header;

my $options_processed = GetOptions(
  'bias=f'          => \$bias,
  'decision-limit=s'=> \$decision_limit,
  'help'            => \$help,
  'n=i'             => \$n,
  'p-limit=f'       => \$p_limit,
  'header'          => \$header,
);

if ($help or not $options_processed) {
  pod2usage(-verbose => 2);
}

my $boundary = 0;
my $portion = {0 => 1};
my $decided = 0;
my $prob_increase = (1 + $bias) / (2 + $bias);
my $finished = 0;
my $fix_ratio = 1; # Hack for addressing roundoff errors.

$decision_limit =~ s/n/\$j/g;
my $decision_threshold_fn = sub {
  my $j = shift;
  my $answer = eval $decision_limit;
  die $@ if $@;
  return $p_limit * $answer;
};

if ($header) {
  print "conversions\tdifference\tfinished\tstandard deviations\tp-value\n";
}

for my $j (1..$n) {
  my $old_portion = $portion;
  $portion = {};
  my $i = - $boundary;
  my $total_running = 0;
  while ($i <= $boundary) {
    # This hack corrects for currently known roundoff errors.
    $old_portion->{$i} *= $fix_ratio;
    $total_running += $old_portion->{$i};

    $portion->{$i - 1} += (1 - $prob_increase) * $old_portion->{$i};
    $portion->{$i + 1} += $prob_increase * $old_portion->{$i};
    $i += 2;
  }

  $fix_ratio = (1 - $finished)/$total_running;

  $boundary++;
  my $allowed_decisions = $decision_threshold_fn->($j);
  my $this_decision = 0;
  if (0 <= $bias) {
    $this_decision += $portion->{-$boundary};
  }
  if ($bias <= 0) {
    $this_decision += $portion->{$boundary};
  }
  if ($this_decision + $decided <= $allowed_decisions) {
    # We're going to stop the test here.
    $decided += $this_decision;
    $finished += delete $portion->{$boundary};
    $finished += delete $portion->{-$boundary};
    my $standard_deviations = $boundary/sqrt($j);
    my $p = 2 * uprob($standard_deviations);
    print "$j\t$boundary\t$finished\t$standard_deviations\t$p\n";
    # We only populate every other, so the boundary jumped back 2.
    $boundary -= 2;
    # There is a possibility that we decide all cases...
    last if $boundary < 0;
  }
#  print Dumper($portion);
#  <STDIN>;
}

__END__

=head1 NAME

conversion-simulation - Simulate probabilities of conversion scenarios.

=head1 SYNOPSIS

    ./conversion-simulation --p-limit 0.01 > output.txt

=head1 DESCRIPTION

When run with particular parameters, does a numerical simulation of the
likelihood of observing different conversion results, and uses those to
output information about the decision thresholds at which an experiment
could be stopped.

The output is a tab-delimited file with the following columns:

=head2 conversions

The number of conversions this cutoff threshold is reached at.

=head2 difference

The difference between the number of A and B conversions at which the
test is stopped.

=head2 finished

What fraction of the simulation is now finished running.

=head2 standard deviations

How many standard deviations out this cutoff is.  One standard deviation is
the square root of the number of conversions.

=head2 p-limit

The portion of the standard normal within this cutoff.

=head1 OPTIONS

=head2 --bias B

The underlying bias that conversions have.  For instance if this is
0.05, then version A is assumed to convert 5% better than version B.
Default 0.

=head2 --decision-limit FORMULA

The formula to use to figure out what portion of eventual decisions you
are willing to have made by the n'th conversion.  Default n/(n + 10000).
It is assumed that this formula varies from 0 to 1 as n goes from 0 to
infinity, but this is not enforced.

=head2 --header

Show a header line telling you what the columns mean.

=head2 --help

Display this documentation.

=head2 --n N

How many conversions to run the simulation for.  Default 1_000_000.

=head2 --p-limit P

In the long-run, what portion of the time you are willing to come to an
incorrect decision in the simulation.  Default 0.05, which would be a
95% confidence interval.

=head1 BUGS

None reported.

=head1 AUTHOR

Ben Tilly <btilly@gmail.com>

=head1 COPYRIGHT AND LICENSE

This software is copyright 2012 by btilly@gmail.com.

This is free software, you can redistribute it and/or modify it under the
same terms as the Perl 5 programming language itself.
