#!/usr/local/bin/perl # # This script is used to simplify pedigrees. # Idea : remove the individuals without genotypes # which are not necessary for the pedigree structure. # pedin & pedout format : LINKAGE(ped, ind, father, mother, sex, DNA, ...). # nbGeno = min genotype number for an ind to be conserve. # # Usage : red_map.pl pedin=file1 pedout=resultfile nbGenu=number # Help : red_map.pl -h # Contact : Department of human Genetics CNRS UPRESA8090 # Institute of Biologie of Lille FRANCE # gaget@mail-good.pasteur-lille.fr do main(); sub help { print("\n$0 [pedin=file1(pedin.dat)] [pedout=file2(pedout.dat)] [nbGeno=number(1)] [-v] [-h]\n"); print "\t-v : verbose, -h : help\n\n"; exit; } sub main { parseArgs(); lect($pedin); famille(); calcul(); ecrire($pedout); } sub parseArgs { while ($arg = shift(@ARGV)) { $pedin = $1 if $arg =~ /pedin=(.*)/; $pedout = $1 if $arg =~ /pedout=(.*)/; $nbGeno = $1 if $arg =~ /nbGeno=(.*)/; $verbose = "-v" if $arg =~ /-v/; $help = "Y" if $arg =~ /-h/; } help() if ($help); $pedin="pedin.dat" unless ($pedin); $pedout="pedout.dat" unless ($pedout); $nbGeno=1 unless ($nbGeno); die("$0 : cannot find file $pedin\n") unless (-f $pedin); } sub lect { local($file) = @_; local($fh) = 'FH'; local($cptr) = 0; print "read $file\n" if ($verbose); open($fh, $file) || dir("$0 : cannot open $file : $!"); while(<$fh>) { $cptr++; chop; push @inList, $_; } close($fh); print "$cptr lines read from $file\n" if ($verbose); } sub famille { local($cptr) = 0; local(@ici); local($mec, $ped, $compt); local($all1, $all2); for $line (@inList) { $cptr++; $old_nb_ind++; @ici=(); @ici = split(/\t/,$line); $ped = $ici[0]; $mec = $ici[1]; if ($ped > $maxfam) {$maxfam=$ped;} if ($ped < $minfam) {$minfam=$ped;} #put ped number in pedList $tmp = grep ($ped eq $_, @pedList); push @pedList, $ped unless ($tmp); #put ind number in his pedigree indList $indList{$ped} .= "$mec "; if ($mec > $maxind[$ped]) {$maxind[$ped]=$mec;} # keep father informations $fat{$ped, $mec} = $ici[2]; $nbenf{$ped, $fat{$ped, $mec}}++; # keep mother informations $mot{$ped, $mec} = $ici[3]; $nbenf{$ped, $mot{$ped, $mec}}++; # Verification de la presence d'au moins un genotype connu dans famille #check if there is at less one genotype $compt=6; # $compt=5; $somme{$ped,$mec}=0; while (defined $ici[$compt]) { ($all1,$all2)=split(/ /,$ici[$compt]); $all[$compt,1]=$all1;$all[$compt,2]=$all2; if (($all[$compt,1]>0) && ($all[$compt,2]>0)) { $somme{$ped,$mec}++; $ped_som{$ped}++; } $compt++; } } print "$cptr lines compute from $inFile\n" if ($verbose); } sub calcul { local($ped, $ind); local(@ind); for $ped (@pedList) { @ind=(); (@ind) = split(/\s/, $indList{$ped}); if ( $ped_som{$ped} == 0 ) { for $ind (@ind) { $existe{$ped, $ind}=1; print "out :\t$ped\t$ind\n" if ($verbose); } } else { for $ind (@ind) { test($ped, $ind); } } } } sub test { local($PED, $IND) = @_; if ($nbenf{$PED, $IND} == 0) { if ( ($fat{$PED, $IND}==0) && ($mot{$PED, $IND}==0) ) { $existe{$PED, $IND}=1; print "out :\t$PED\t$IND\t\n" if ($verbose); } else { if ( $somme{$PED, $IND} < $nbGeno ) { $existe{$PED, $IND}=1; print "out :\t$PED\t$IND\n" if ($verbose); $nbenf{$PED, $mot{$PED, $IND}}--; test($PED, $mot{$PED, $IND}); $nbenf{$PED, $fat{$PED, $IND}}--; test($PED, $fat{$PED, $IND}); } } } } sub ecrire { local($file) = @_; local($fh) = 'FH'; local($cptr) = 0; local($line, $ped, $mec); local($tut); print "creat $file\n" if ($verbose); open($fh, "> $file") || dir("$0 : cannot open $file : $!"); for $line (@inList) { ($ped, $mec, $tut) = split(/\t/, $line); if ( $existe{$ped, $mec} != 1 ) { print $fh "$line\n"; $cptr++; } } close(fh); print "$cptr lines written on $file\n" if ($verbose); }