#! /usr/bin/perl -w use strict; use Statistics::Distributions; use Getopt::Long; my $step = 0; my $count_by_a_yes_by_b_yes = {}; my $next_count_by_a_yes_by_b_yes; my $correct = 0; my $incorrect = 0; my $outstanding = 100; my $stop_at_portion_outstanding = 0.1; my $p_a_yes = 0.5; my $p_b_yes = 0.51; my $threshold_prob = 0.01; # You can set all of the parameters on the command line. # For the talk, I set total to 100000, and a_success, # b_success and cutoff to whatever was appropriate for # the simulation. # # It outputs a header, then the simulation results. The # fields in those results are: # # o Steps: The number of people in the test is actually # double this because we add one person to A and one # person to B at each step. # o Outstanding: How many tests are still running. Because # of the way I avoid tracking individual tests as long as I # can, you can have a fractional number outstanding. # o Correct: How many tests finished with B better than A. # o Incorrect: How many tests finished with A better than B. # # Be warned, some of the simulations took me over a day to run. # GetOptions( "a_success=f" => \$p_a_yes, "b_success=f" => \$p_b_yes, "cutoff=f" => \$threshold_prob, "total=i" => \$outstanding, "end_at=f" => \$stop_at_portion_outstanding, ) or die "Cannot process options"; my $threshold_g_test = Statistics::Distributions::chisqrdistr(1, $threshold_prob); $count_by_a_yes_by_b_yes->{0}{0} = $outstanding; my $stop_at_x_outstanding = $outstanding * $stop_at_portion_outstanding; my $last_outstanding = 0; #print "threshold: $threshold_g_test\n"; print "trials: $outstanding\n"; printf "prob(a): %.02f\n", $p_a_yes; printf "prob(b): %.02f\n", $p_b_yes; printf "threshold: %.03f\n", $threshold_prob; print " Step Outstanding Correct Incorrect\n"; while ($outstanding > $stop_at_x_outstanding) { $next_count_by_a_yes_by_b_yes = {}; # Distribute them to the next. for my $b_yes (keys %$count_by_a_yes_by_b_yes) { my $count_by_a_yes = $count_by_a_yes_by_b_yes->{$b_yes}; my @a_yeses = keys %$count_by_a_yes; for my $a_yes (keys %$count_by_a_yes) { my $count = $count_by_a_yes->{$a_yes}; my $a_success = $count * $p_a_yes; my $a_fail = $count - $a_success; my $both_success = $a_success * $p_b_yes; my $a_only_success = $a_success - $both_success; my $b_only_success = $a_fail * $p_b_yes; my $no_success = $a_fail - $b_only_success; my $both_n = int($both_success); my $both_frac = $both_success - $both_n; my $a_only_n = int($a_only_success); my $a_only_frac = $a_only_success - $a_only_n; my $b_only_n = int($b_only_success); my $b_only_frac = $b_only_success - $b_only_n; my $no_n = int($no_success); my $no_frac = $no_success - $no_n; my $to_distribute = $both_frac + $a_only_frac + $b_only_frac + $no_frac; # We really need no roundoff errors here. $to_distribute = int($to_distribute + 0.5); # print ("$both_success: $a_only_success: $b_only_success; $no_success: $to_distribute\n"); # print ("$both_n: $a_only_n; $b_only_n: $no_n\n"); # print ("$both_frac: $a_only_frac; $b_only_frac: $no_frac\n"); # ; # Distribute each one randomly and fairly. for (1..$to_distribute) { my $choice = rand(1); $choice -= $both_frac/$to_distribute; if ($choice < 0) { $both_n++; next; } $choice -= $a_only_frac/$to_distribute; if ($choice < 0) { $a_only_n++; next; } $choice -= $b_only_frac/$to_distribute; if ($choice < 0) { $b_only_n++; next; } $no_n++; } $next_count_by_a_yes_by_b_yes->{$b_yes}{$a_yes} += $no_n if $no_n; $next_count_by_a_yes_by_b_yes->{$b_yes + 1}{$a_yes} += $b_only_n if $b_only_n; $next_count_by_a_yes_by_b_yes->{$b_yes}{$a_yes + 1} += $a_only_n if $a_only_n; $next_count_by_a_yes_by_b_yes->{$b_yes + 1}{$a_yes + 1} += $both_n if $both_n; } } $step++; # Now pass through, figure out who we've decided on my @b_yeses = sort {$a <=> $b} keys %$next_count_by_a_yes_by_b_yes; for my $b_yes (@b_yeses) { my $count_by_a_yes = $next_count_by_a_yes_by_b_yes->{$b_yes}; my @a_yeses = sort {$a <=> $b} keys %$count_by_a_yes; filter_out_bottom_decisions( $b_yes, $count_by_a_yes, @a_yeses, ); # And reversing filters at the top. :-) filter_out_bottom_decisions( $b_yes, $count_by_a_yes, reverse @a_yeses, ); } $count_by_a_yes_by_b_yes = $next_count_by_a_yes_by_b_yes; if ($outstanding != $last_outstanding) { printf("% 6d% 13d% 9d% 11d\n" , $step, $outstanding, $correct, $incorrect ); } $last_outstanding = $outstanding; } sub calculate_g_test { my ($a_yes, $a_no, $b_yes, $b_no) = @_; my $a = $a_yes + $a_no; my $b = $b_yes + $b_no; my $yes = $a_yes + $b_yes; my $no = $a_no + $b_no; my $total = $a + $b; my $e_a_yes = $a * $yes / $total; my $e_a_no = $a * $no / $total; my $e_b_yes = $b * $yes / $total; my $e_b_no = $b * $no / $total; if ( $a_yes < 10 or $a_no < 10 or $b_yes < 10 or $b_no < 10 ) { # print "@_ n/a\n"; return "n/a"; } my $g_test = 2*$a_yes*log($a_yes/$e_a_yes) + 2*$a_no*log($a_no/$e_a_no) + 2*$b_yes*log($b_yes/$e_b_yes) + 2*$b_no*log($b_no/$e_b_no); # print "@_ $g_test\n"; } sub filter_out_bottom_decisions { my $b_yes = shift; my $count_by_a_yes = shift; my @a_yeses = @_; for my $a_yes (@a_yeses) { my $g_test = calculate_g_test( $a_yes, $step - $a_yes, $b_yes, $step - $b_yes, ); next if $g_test eq "n/a"; if ($g_test > $threshold_g_test) { my $count = delete $count_by_a_yes->{$a_yes} || 0; $outstanding -= $count; if ($a_yes > $b_yes) { $incorrect += $count; } else { $correct += $count; } } else { # We're into insignificant territory now return; } } }