#! /usr/bin/env perl use strict; use warnings; use Data::Dumper; $Data::Dumper::Indent = 1; use Getopt::Long; use List::Util qw(sum); use Pod::Usage qw(pod2usage); my $cutoff_ratio = 1e-9; my $help; my $output; my $rules; my $win; my $options_processed = GetOptions( 'cutoff-ratio=f' => \$cutoff_ratio, 'help' => \$help, 'output=s' => \$output, 'rules=s' => \$rules, 'win=f' => \$win, ); # Obvious error checking. if ($help or not $options_processed) { pod2usage(-verbose => 2); } open(RULES, "<", $rules) or die "Can't read --rules file '$rules': $!"; our ($cutoff_conversions, $cutoff_difference); get_next_rule(); my $total_error = 0; my $cumulative_errors = 0; my $cumulative_right = 0; my $p_b = (1 + $win)/(2 + $win); my $p_a = 1 - $p_b; # We keep the distribution as an array from the left edge. # # The width of the distribution is $#distribution, and every other value is # filled in. (That is we start with a value at 0. Then values at -1, 1. # Then values at -2, 0, 2. Then -3, -1, 1, 3. etc.) # # Therefore $distribution[0] is the probability of having a difference of # -$#distribution, and $distribution[-1] is the probability of having a # difference of $#distribution. my @distribution = (1); my $conversions = 0; my $fix_ratio = 1; while ($cutoff_conversions) { $conversions++; # Figure out the new distribution my @new_distribution; for my $i (0..$#distribution) { my $prob = $distribution[$i] * $fix_ratio; $new_distribution[$i + 1] = $prob * $p_b; $new_distribution[$i] += $prob * $p_a; } # Do we need to cut things off? while ($cutoff_conversions and $cutoff_conversions <= $conversions) { # We track this error so we know when to bail out early. my $this_error = 0; while ($cutoff_difference <= $#new_distribution and @new_distribution) { $this_error += shift @new_distribution; if (@new_distribution) { $cumulative_right += pop @new_distribution; } } $cumulative_errors += $this_error; get_next_rule(); if ($this_error < $cumulative_errors * $cutoff_ratio) { # EXIT HERE (cumulative remaining errors are assumed not material) last; } } @distribution = @new_distribution; my $total_running = sum(@distribution); my $should_run = 1 - $cumulative_errors - $cumulative_right; if ($total_running) { $fix_ratio = $should_run / $total_running; } } if ($output) { open(OUT, ">", $output) or die "Can't write to '$output': $!"; } else { *OUT = *STDOUT; } print OUT "$cumulative_errors\n"; sub get_next_rule { my $line = || ""; chomp($line); ($cutoff_conversions, $cutoff_difference) = split /\t/, $line; } __END__ =head1 NAME simulate-test-error-probability - How often will a ruleset call a test wrong? =head1 SYNOPSIS ./simulate-test-error-probability --win 0.02 --rules FILE --output FILE =head1 DESCRIPTION Sets up a numerical simulation of the possible outcomes of an A/B test given a specific stopping rule. =head1 OPTIONS =head2 --cutoff-ratio FLOAT When the error at the current cutoff is less than this times the accumulated error, stop the simulation. =head2 --help Display this documentation. =head2 --output FILE The file to write the output to. =head2 --rules FILE The file with the rules in it. The file is expected to be simple tab delimited, with the first column being the number of conversions you could stop at, and the second being the threshold at which you will stop. =head2 --output FILE The file to write output to. Output is a single number - the probability of coming to the wrong conclusion. =head2 --win FLOAT How soundly version B beats version A. The conversion rate for version B is 1 + $win times better. =head1 BUGS If the win is 0, the reported errors are off by a factor of two. If the win is negative, the reported figure is not errors, but the fraction of the time that the test was correctly called. This is actually a feature because it lets this program answer questions that it couldn't otherwise. =head1 AUTHOR Ben Tilly =head1 COPYRIGHT AND LICENSE This software is copyright 2013 by btilly@gmail.com. This is free software, you can redistribute it and/or modify it under the same terms as the Perl 5 programming language itself.