#!/usr/bin/env python
'''
Power of Cochran-Armitage trend test for case/ctrl genetic association studies
Gao Wang (*) 2012 distributed under GNU GPL
References
----------
Hum Hered 2002;53:146-152 (DOI: 10.1159/000064976)
Genet Epi 2003 (DOI: 10.1002/gepi.10245) for matrix notation for sigma under H1
Bash Example
------------
maf="0.1 0.2 0.3"
OR="1.1 1.2 1.3 1.4"
N="200 500 800"
f0=0.01
moi='additive'
alpha=0.05
for i in $maf; do
for j in $OR; do
for k in $N; do
power=`catt -b $f0 -r $j -p $i -R $k -S $k -m $moi -a $alpha`
echo $power $i $j $k $f0 $alpha $moi
done
done
done
ChangeLog
---------
2012-December-23
* Initial implementation
'''
import sys
try:
import numpy as np
from scipy.stats import norm
except ImportError as e:
sys.exit(e)
try:
from argparse import ArgumentParser
except ImportError:
sys.exit('argparse module is required (available in Python 2.7.2+ or Python 3.2.1+)')
class CATT:
def __init__(self, odr, f0, maf, n1, n2, model = 'additive'):
base_odds = f0 / (1.0 - f0)
if model == 'recessive':
# odds assuming recessive model
self.odds = np.array([base_odds, base_odds, base_odds * odr])
# genotype coding
self.X = np.array([0,0,1], dtype=float)
elif model == 'dominant':
# odds assuming dominant model
self.odds = np.array([base_odds, base_odds * odr, base_odds * odr])
# genotype coding
self.X = np.array([0,1,1], dtype=float)
elif model == 'multiplicative':
# odds assuming dominant model
self.odds = np.array([base_odds, base_odds * odr, base_odds * odr * odr])
# genotype coding
self.X = np.array([0,1,2], dtype=float)
else:
# odds assuming additive model
self.odds = np.array([base_odds, base_odds * odr, max(base_odds * (2 * odr - 1.0), 0.0)])
# genotype coding
self.X = np.array([0,1,2], dtype=float)
# sample size
self.N1 = float(n1)
self.N2 = float(n2)
self.N = float(n1 + n2)
# genotype frequency assuming HWE
self.gf = np.array([(1-maf)**2, 2*maf*(1-maf), maf**2])
# genotype frequency in cases
self.p = self.conditionalGF('cases')
# genotype frequency in ctrls
self.q = self.conditionalGF('ctrls')
def conditionalGF(self, condition = 'cases'):
'''genotype frequency conditional on disease status,
assuming complete LD between marker and disease causal variant'''
if condition == 'ctrls':
f = 1.0 - self.odds / (1.0 + self.odds)
else:
f = self.odds / (1.0 + self.odds)
P_status_and_genotype = f * self.gf
self.prevalence = np.sum(P_status_and_genotype)
if condition == 'cases':
sys.stderr.write('(disease prevalence = {0})\n'.format(np.around(self.prevalence,4)))
return P_status_and_genotype / self.prevalence
def getSigma0(self, sigma1, mu1):
'''see proof in Appendix A of Freidlin et al 2002'''
return np.sqrt(sigma1 ** 2 + mu1 ** 2)
def getMu1(self, X, N1, N2, N, p, q):
'''Equation 4 of Freidlin et al 2002'''
return np.sum(X * (p - q) * N1 * N2 / N ** 2)
def getSigma1(self, X, N1, N2, N, p, q):
'''Equation 4 of Freidlin et al 2002.
Matrix notation, Pfeiffer and Gail 2003'''
Sp = self.correlationMatrix(p)
Sq = self.correlationMatrix(q)
sigma1 = (N1 * N2 / N ** 3) * \
(N1 * np.dot(np.dot(X.T, Sp), X) + N2 * np.dot(np.dot(X.T, Sq), X))
# equivalent non-matrix notation, easier to compute but less intuitive
# sigma1 = (N1 * N2 / N ** 3) * \
# ( (N1 * (np.sum(X ** 2 * p) - np.sum(X * p) ** 2)) + \
# (N2 * (np.sum(X ** 2 * q) - np.sum(X * q) ** 2)) )
return np.sqrt(sigma1)
def power(self, alpha, alternative = 'two-sided'):
'''Equation 5 of Freidlin et al 2002'''
sigma1 = self.getSigma1(self.X, self.N1, self.N2, self.N, self.p, self.q)
mu1 = self.getMu1(self.X, self.N1, self.N2, self.N, self.p, self.q)
sigma0 = self.getSigma0(sigma1, mu1)
z = norm.ppf(1 - alpha / 2.0)
power = 1 - norm.cdf((z * sigma0 - np.sqrt(self.N) * mu1) / sigma1) + \
norm.cdf((-1.0 * z * sigma0 - np.sqrt(self.N) * mu1) / sigma1)
return power
### ###
def correlationMatrix(self, p):
'''correlation matrix where m_ii = pi * (1-pi) and m_ik = -pi*pk'''
m = np.empty((len(p),len(p),))
for i in range(len(p)):
for k in range(len(p)):
if k == i:
m[i,k] = p[i] * (1 - p[i])
else:
m[i,k] = p[i] * p[k] * -1.0
return m
if __name__ == '__main__':
parser = ArgumentParser(description='Cochran-Armitage trend test (CATT) power calculation for case/ctrl genetic association studies, implementing Freidlin et al, Hum Hered 2002;53:146-152')
parser.add_argument('-b', '--baseline_penetrance', metavar = 'f0', type = float, default = 0.01, help = 'wildtype genotype penetrance, default to 0.01')
parser.add_argument('-r', '--odds_ratio', metavar = 'gamma', type = float, default = 1.0, help = 'odds ratio, default to 1.0')
parser.add_argument('-p', '--maf', type = float, default = 0.1, help = 'minor allele frequency, default to 0.1')
parser.add_argument('-R', '--num_cases', metavar = 'N', type = int, default = 500, help = 'number of cases, default to 500')
parser.add_argument('-S', '--num_ctrls', metavar = 'N', type = int, default = 500, help = 'number of ctrls, default to 500')
parser.add_argument('-m', '--moi', default = 'additive', choices = ['additive', 'dominant', 'recessive', 'multiplicative'], help = 'mode of inheritance, default to "additive"')
parser.add_argument('-a', '--alpha', type = float, default = 0.05, help = 'significance level (test size), default to 0.05')
parser.add_argument('--alternative', default = 'two-sided', choices = ['two-sided'], help = 'alternative hypothesis, default to two-sided')
args = parser.parse_args()
#
try:
t = CATT(args.odds_ratio, args.baseline_penetrance,
args.maf, args.num_cases, args.num_ctrls, model = args.moi)
power = t.power(args.alpha, args.alternative)
print(np.around(power, 4))
except Exception as e:
sys.exit("ERROR: {0}".format(e))