*******************************************************************************************************************; *Program Name : CI_Single_Proportion.sas *; *Programmer Name : Jiangtang Hu *; * Jiangtanghu@gmail.com *; * Jiangtanghu.com/blog *; * *; *Purpose : Compute two-sided confidence intervals for single proportion with 11 methods: *; * 1. Simple asymptotic, Without CC | Wald *; * 2. Simple asymptotic, With CC *; * 3. Score method, Without CC | Wilson *; * 4. Score method, With CC *; * 5. Binomial-based, 'Exact' | Clopper-Pearson *; * 6. Binomial-based, Mid-p *; * 7. Likelihood-based *; * 8. Jeffreys *; * 9. Agresti-Coull, pseudo frequency, z^2/2 successes| psi = z^2/2 *; * 10. Agresti-Coull, pseudo frequency, 2 successes and 2 fail| psi = 2 *; * 11. Agresti-Coull, pseudo frequency, psi = 1 *; * 12. Agresti-Coull, pseudo frequency, psi = 3 *; * 13. Logit *; * 14. Blaker *; * *; *Input : r - the number of interested responses *; * n - total observations, 0 =< r <= n *; * alpha - 0.05 by default *; *Output : confidence intervals using 14 methods *; *Usage : %CI_Single_Proportion(r=81,n=263) *; * *; *References : Newcombe R.G., Two-sided confidence intervals for the single proportion: *; * comparison of seven methods, Statistics in Medicine, (1998) 17, 857-872 *; * *; * http://www2.jura.uni-hamburg.de/instkrim/kriminologie/Mitarbeiter/Enzmann/Software/prop.CI.r *; * *; *License : public domain, ABSOLUTELY NO WARRANTY *; *Platform : tested in SAS/Base 9.4 (TS1M2) *; *Version : V1.0 *; *Date : 21May2015 *; *******************************************************************************************************************; %macro CI_Single_Proportion(r=,n=,alpha=0.05); proc fcmp outlib=work.func.CI; function acceptbin(r, n, p) label = "computes the Blaker acceptability of p when x is observed and X is bin(n, p)"; p1 = 1 - CDF('BINOMIAL', r - 1,p,n); p2 = CDF('BINOMIAL', r,p,n); a1 = p1 + CDF('BINOMIAL', quantile('BINOM', p1, p, n)-1, p, n) ; a2 = p2 + 1 - CDF('BINOMIAL', quantile('BINOM', 1-p2, p, n), p, n) ; return (min(a1,a2)); endsub; run; options cmplib=work.func; data param; do i=1 to 14; r = &r; n = &n; alpha = α p = r/n; q = 1 - p; z = probit (1-alpha/2); output; end; run; /*method 1-5,8-14;*/ data CI5; length method $75.; set param(where=(i not in (6 7))); if i=1 then do; Method = "1. Simple asymptotic, Without CC | Wald"; se = (sqrt(&n*p*(1-p)))/n; *standard error; p_CI_low = p - z * se; p_CI_up = p + z * se; end; if i=2 then do; Method = "2. Simple asymptotic, With CC"; se = (sqrt(&n*p*(1-p)))/n; *standard error; cc = 1/(2*&n); *continuity correction; p_CI_low = p - (z * se + cc); p_CI_up = p + (z * se + cc); /* if r=0 then p_CI_low=0;*/ /* if r=n then p_CI_up =1;*/ end; if i=3 then do; Method = "3. Score method, Without CC | Wilson"; *n1=2*r+z**2; *n2=z*sqrt(z**2+4*r*q); *d=2*(n+z**2); *p_CI_low = (n1 - n2)/d; *p_CI_up = (n1 + n2)/d; p_CI_low = ( 2*r+z**2 - (z*sqrt(z**2+4*r*q)) ) / (2*(n+z**2)); p_CI_up = ( 2*r+z**2 + (z*sqrt(z**2+4*r*q)) ) / (2*(n+z**2)); end; if i=4 then do; Method = "4. Score method, With CC"; *n1=2*r+z**2; *n12=z*sqrt(z**2 - 2- 1/n + 4*p*(n*q+1)); *n22=z*sqrt(z**2 + 2- 1/n + 4*p*(n*q-1)); *d=2*(n+z**2); *p_CI_low = ( n1 -1 - n12) / d; *p_CI_up = ( n1 +1 + n22) / d; p_CI_low = ( 2*r+z**2 -1 - z*sqrt(z**2 - 2- 1/n + 4*p*(n*q+1))) / (2*(n+z**2)); p_CI_up = ( 2*r+z**2 +1 + z*sqrt(z**2 + 2- 1/n + 4*p*(n*q-1))) / (2*(n+z**2)); /* if r=0 then p_CI_low=0;*/ /* if r=n then p_CI_up =1; */ end; if i=5 then do; Method = "5. Binomial-based, 'Exact' | Clopper-Pearson"; p_CI_low =1 - betainv(1 - alpha/2,n-r+1,r); p_CI_up = betainv(1 - alpha/2,r+1 ,n-r); /* if r=0 then p_CI_low=0;*/ /* if r=n then p_CI_up =1;*/ end; if i=8 then do; Method = "8. Jeffreys"; p_CI_low = betainv( alpha/2, r+0.5,n-r+0.5); p_CI_up = betainv(1-alpha/2, r+0.5,n-r+0.5); end; if i=9 then do; Method = "9. Agresti-Coull, pseudo frequency, z^2/2 successes| psi = z^2/2"; psi = z**2/2; p2=(r+psi)/(n+2*psi); p_CI_low =p2 - z*(sqrt(p2*(1-p2)/(n+2*psi))); p_CI_up =p2 + z*(sqrt(p2*(1-p2)/(n+2*psi))); if p_CI_low<0 then p_CI_low=0; if p_CI_up>1 then p_CI_up =1; end; if i=10 then do; Method = "10. Agresti-Coull, pseudo frequency, 2 successes and 2 failures| psi = 2"; psi = 2; p2=(r+psi)/(n+2*psi); p_CI_low =p2 - z*(sqrt(p2*(1-p2)/(n+2*psi))); p_CI_up =p2 + z*(sqrt(p2*(1-p2)/(n+2*psi))); if p_CI_low<0 then p_CI_low=0; if p_CI_up>1 then p_CI_up =1; end; if i=11 then do; Method = "11. Agresti-Coull, pseudo frequency, psi = 1"; psi = 1; p2=(r+psi)/(n+2*psi); p_CI_low =p2 - z*(sqrt(p2*(1-p2)/(n+2*psi))); p_CI_up =p2 + z*(sqrt(p2*(1-p2)/(n+2*psi))); if p_CI_low<0 then p_CI_low=0; if p_CI_up>1 then p_CI_up =1; end; if i=12 then do; Method = "12. Agresti-Coull, pseudo frequency, psi = 3"; psi = 3; p2=(r+psi)/(n+2*psi); p_CI_low =p2 - z*(sqrt(p2*(1-p2)/(n+2*psi))); p_CI_up =p2 + z*(sqrt(p2*(1-p2)/(n+2*psi))); if p_CI_low<0 then p_CI_low=0; if p_CI_up>1 then p_CI_up =1; end; if i=13 then do; Method = "13. Logit"; p_CI_low=exp(log(p/(1-p)) - z*sqrt(n/(r*(n-r))))/(1+exp(log(p/(1-p)) - z*sqrt(n/(r*(n-r))))); p_CI_up =exp(log(p/(1-p)) + z*sqrt(n/(r*(n-r))))/(1+exp(log(p/(1-p)) + z*sqrt(n/(r*(n-r))))); end; if i=14 then do; Method = "14. Blaker"; tolerance=1e-05; lower = 0; upper = 1; if r ^= 0 then do; lower = quantile('BETA',alpha/2, r, n-r+1); do while (acceptbin(r, n, lower + tolerance) < (alpha)); lower = lower + tolerance; end; end; if r ^= n then do; upper = quantile('BETA',1 - alpha/2, r+1, n-r); do while (acceptbin(r, n, upper - tolerance) < (alpha)); upper = upper - tolerance; end; end; p_CI_low=lower; p_CI_up =upper; end; run; /*method 6;*/ data param6; set param(where=(i=6)); max_idx=alpha/2; min_idx=1-alpha/2; do j=0.000001 to 0.999999 by 0.00001; if (r>0 and r max_idx and r>0 and r0 and rk ; create table param7_2 as select distinct * from param where i=7 ; quit; data CI7; merge param7_2 max2; Method = "7. Likelihood-based"; if r=0 then p_CI_low=0; if r=n then p_CI_up =1; run; /*put together,1-12;*/ data CI_SP; set CI5 CI6 CI7; p_CI=compress(catx("","[",put(round(p_CI_low,0.0001),6.4),",",put(round(p_CI_up,0.0001),6.4),"]")); run; proc sort; by i;run; proc print data=CI_SP; var r n p method p_ci; run; %mend CI_Single_Proportion; /*test; filename CI url 'https://raw.github.com/Jiangtang/Programming-SAS/master/CI_Single_Proportion.sas'; %include CI; %CI_Single_Proportion(r=81,n=263); %CI_Single_Proportion(r=15,n=148); %CI_Single_Proportion(r=0, n=20 ); %CI_Single_Proportion(r=1, n=29 ); %CI_Single_Proportion(r=29,n=29 ); check with SAS data test; input grp outcome $ count; datalines; 1 f 81 1 u 182 ; proc freq data=test; tables outcome / binomial; weight Count; run; ods select BinomialCLs; proc freq data=test; tables outcome / binomial (CL=ALL); weight Count; run; ods select BinomialCLs; proc freq data=test; tables outcome / binomial (CL= WALD WILSON CLOPPERPEARSON MIDP LIKELIHOODRATIO JEFFREYS AGRESTICOULL LOGIT BLAKER ); weight Count; run; ods select BinomialCLs; proc freq data=test; tables outcome / binomial (CL = WILSON(CORRECT) WALD(CORRECT) ); weight Count; run; */