#include #include #include #include #include #include /* CUE: Calculating Undetected Errors Joshua Tauberer, Derek Gordon, Jurg Ott To find the proportion of errors that go unnoticed in n-tuples of genomic data of nuclear families. Note that allele combinations are stored as codes. For example: 1=1/1, 2=1/2, and 3=2/2 for a two-allele locus. Please refer to the CUE website for more information: http://linkage.rockefeller.edu/derek/cue */ #define MIN(a,b) ((a>b) ? b : a) #define MAX(a,b) ((a>b) ? a : b) #define MINMAXIDX(a,b) [(a>b) ? b : a][((a>b) ? a : b)] /* STRUCTS and GLOBALS and THE LIKE */ enum Consistency { consistent, inconsistent }; #define MAX_PEOPLE 15 // The max. number of people in an ntuple #define MAX_COMBINATIONS 110 // The max. number of allele combinations #define MAX_ALLELES 16 // The max. number of alleles in the polymorphism struct NTuple { char people[MAX_PEOPLE]; // Stores allele combiation code double probability; int likenessCount; double PrN0[MAX_PEOPLE*2+1]; // Buffers Pr(N0|M,i) }; // An AlleleCrossing stores the (coded) possible outcomes for a cross of two // allele combinations (those that are consistent) and their probabilities. struct HWProbability { int coeficient; int powers[MAX_ALLELES]; }; struct AlleleCrossing { int outcomes[MAX_COMBINATIONS+1]; // This stores 4* the probability. struct HWProbability probability; }; double alpha = .5; int n = 3; double alleleProbabilities[MAX_ALLELES] = {0.2,0.8}; // This will store the NTuples- dynamically allocated. struct NTuple *ntuples; // The number of ntuples in the array that ntuple will point to. long ntupleCount = 0; // The number of alleles in the polymorphism. int alleleCount = 2; int alleleCombinationCount = 3; char AlleleCombinations[MAX_COMBINATIONS+1][2]; struct AlleleCrossing AlleleCrossings[MAX_COMBINATIONS+1][MAX_COMBINATIONS+1]; int debug = 1; /* PROTOTYPES */ void DefineAlleleCombinations(int debug); double Combination(int a, int b); double Factorial(int a); double PrIErrors(int i, int pedSize); enum Consistency IsConsistent(struct NTuple ntuple); double power(double x, int y); void DefineNTuples(int debug); double ProbabilityOfConsistency(char *alleles, int i, int r, int s); struct NTuple AllelesToNTuple(char *alleles, struct NTuple *ntuple); char *NTupleToAlleles(struct NTuple ntuple, char *alleles); void PrintAlleles(char *alleles); void PrintNTuple(struct NTuple ntuple); void PrintNTupleAsAlleles(struct NTuple ntuple); int main(int argc, char *argv[]) { char *displayMode; int displayHelp = 0; int i, pedSize = 0; double sum = 0; double beta = 0; double PrN0MiPrMSum; double p, PrI = 0; double apparentErrorRate = 0, alphaIncrement = .00005; double pedErrorRate = 0; int ntuple; char alleles[MAX_PEOPLE*2]; char *bp; time_t now; long statusCtr, statusMax; int readArg = 1; time(&now); printf("\nCalculating Undetected Errors\n"); if (argc > 6) { displayMode = argv[readArg++]; n = (int)strtol(argv[readArg++], &bp, 10); pedSize = n; if (n > MAX_PEOPLE) { printf("You may only specify up to %i members in the family.\n", MAX_PEOPLE); displayHelp = 1; } else if (n < 3) { printf("There must be at least three members in a family (2 parents, at least 1 child).\n"); displayHelp = 1; } if (strcmp(argv[readArg], "true") == 0) { alpha = strtod(argv[readArg+1], &bp); apparentErrorRate = 0.0; if (alpha == 0.0) { printf("You cannot have an error rate of 0.\n\n"); displayHelp = 1; } } else if (strcmp(argv[readArg], "observed") == 0) { apparentErrorRate = strtod(argv[readArg+1], &bp); alpha = 0.0; if (apparentErrorRate == 0.0) { printf("You cannot have an error rate of 0.\n\n"); displayHelp = 1; } } else { printf("You did not specify a correct error type: 'true' or 'observed'.\n\n"); displayHelp = 1; } readArg+=2; alleleCount = (int)strtol(argv[readArg++], &bp, 10); if (alleleCount > MAX_ALLELES-1) { printf("You may only specify up to %i alleles. (You specified %i.)\n\n", MAX_ALLELES-1, alleleCount); displayHelp = 1; } else if (alleleCount <= 1) { printf("You did not provide a proper number of alleles. Valid values are between 2 and %i.\n\n", MAX_ALLELES); displayHelp = 1; } if (argc < alleleCount + readArg - 1) { printf("You did not provide enough arguments.\n\n"); displayHelp = 1; } else { sum = 0; for (i=0; i= apparentErrorRate) { printf(" Results: true error rate, alpha = %.4f error detection rate, 1-beta = %.4f proportion of pedigrees with errors = %.4f ", alpha, 1.0 - sum, pedErrorRate); break; } p = sum; } } // Free up the dynamic memory to store ntuples. free(ntuples); // Print citation/help information. printf(" When using these results, please cite the following: Gordon D, Heath SC, Ott J (1999) True pedigree errors more frequent than apparent errors for single nucleotide polymorphisms. Hum Hered 49:65-70 Gordon D, Leal SM, Heath SC, Ott J (2000) An analytic solution to single nucleotide polymorphism error-detection rates in nuclear families: implications for study design. In Pacific Symposium on Biocomputing 5 (Altman, Dunker, Hunter, Lauderdale, Klein, eds.). Honolulu, Hawaii. Questions should be directed to tauberer@for.net. "); // If not in web mode, display finish time. /*if (strcmp(argv[1], "web")) { time(&now); printf("\nProgram End: %s\n", asctime(localtime(&now))); }*/ // Done! return 0; } int AllelesToCombination(int a, int b) { int c; for (c=1; c MAX_COMBINATIONS) { printf("\nFATAL: This requires too many combinations of alleles. The max. is %d.\n", MAX_COMBINATIONS); exit(1); } } } if (debug) printf("%i entries.\n", alleleCombinationCount); // For each crossing of the two alleles... for (a=1; ab+1): (floats are used to avoid conversions later) */ double Combination(int a, int b) { double d; if (a < b) { printf("\nFATAL: Combination called a (%i) < b (%i)\n", a , b); exit(1); } else if (a == b || b == 0) { return 1; } else if ((a-1 == b) || b == 1) { return a; } d = Factorial(a) / (Factorial(b) * Factorial(a-b)); return d; } double Factorial(int a) { double b = 1; int c; if (a <= 0) { printf("\nFATAL: Factorial called with a (%i) <= 0.\n", a); exit(1); } if (a > 60) printf("\nNON-FATAL: Factorial called with a (%i) > 60. I sure wouldn't bet on results being accurate.", a); for (c=1; c<=a; c++) b *= c; return b; } // PrIErrors = Pr(i errors in tuple) is now: double PrIErrors(int i, int pedSize) { double numerator = Combination(2*pedSize, i) * power(alpha, i) * power(1.0-alpha, (2*pedSize) - i); double denominator = 1.0 - power(1.0-alpha, 2*pedSize); if (denominator == 0) { printf("\nFATAL: PrIErrors(n=%i,alpha=%g,i=%i) called returning undefined as denominator is zero.\n", n, alpha, i); exit(1); } return (numerator/denominator); } double power(double x, int y) { int i; double result = 1.0; if (y < 0) { printf("\nFATAL: pow2 called with y < 0.\n"); exit(1); } for (i=0; i0; a--) ntuples[ntupleCount].people[a-1] = counting[a]; ntuples[ntupleCount].likenessCount = 1; // If this is a consistent ntuple... if (IsConsistent(ntuples[ntupleCount]) == consistent) { // See if there's a previos ntuple that is this one in a // different order. f = 0; if (ntupleCount < 7500) { for (ct=1; ct0; a--) { if (counting[a] == (alleleCombinationCount)) { // should never be greater than this counting[a] = 1; counting[a-1]++; } } } // Multiply the likenessCount by the calculated probs. for (ct=0; ct"); printf(" %li/%li\n", ConsistentCount, PermutationCount); } return ((double)ConsistentCount)/((double)PermutationCount); } } else { return 0; } } struct NTuple AllelesToNTuple(char *alleles, struct NTuple *ntuple) { int p, a, f; for (p=0; ppeople[p] = a; break; } } if (f == 0) { printf("\nFATAL: AllelesToNTuple found improper allele combination: p=%i=>(%i, %i).", p, alleles[2*p], alleles[2*p +1]); exit(1); } } return *ntuple; } char *NTupleToAlleles(struct NTuple ntuple, char *alleles) { int p; for (p=0; p