#!/Perl/bin/perl ################################################################################# # # # Script name: Bracket_batch.plx # # Author: Anneleen Van Geystelen # # Version: v1.0 # # Created: 30/05/2011 # # Last Modified: 02/03/2011 # # # # Description: This Perl script will create a consensus of the DNA # # markers of 1 kit or 2 different kits which can have run # # twice in case of samples and only once in case of # # references. # # # # The following rules were applied when making the consensus: # # 1. Markers of samples that were analysed once will be copied. An # # exception are the markers that were analysed by both kits. # # Parentheses are always copied except when the allel in the other # # kit has no parentheses for that marker. # # # # 2. A hyphen will be ignored except when this hyphen is present in # # all PCRs for 1 marker. Then de hyphen is copied into the consensus. # # # # 3. Alleles will be copied into the consensus without soft parentheses # # when the alleles appears at least 2 and once without parentheses. # # # # 4. Alleles will be copied into the consensus with soft parentheses # # when when the alleles appears at least 2 without parentheses. # # # # 5. Alleles will be placed in hard parentheses with one star if # # marker doesn't have an allele in common between the different PCRs. # # When only 1 PCR has a result for a marker, then the alleles # # will be places in hard parentheses with two stars. # # # # 6. Alleles that appear only once with or without soft parentheses # # will be place in hard parentheses with two stars. # # # # !!! Use forward slashes in the paths when running under Windows !!! # # INPUT: # # variable: path of directory that contains the tab separated files # # case number # # name kit #1 # # path to file of kit #1 that contains name of all markers# # name kit #2 # # path to file of kit #2 that contains name of all markers# # path of output directory # # fixed: # # # # OUTPUT: output file # # # # PERL MODULES: # # # ################################################################################# ################################################################################# # VARIABLES # ################################################################################# # Get path of directory that contains the tab separated files. my $directory = $ARGV[0]; # e.g. C:/Users/Anneleen/Desktop/haakjessysteem/files/ if (substr($directory, -1) ne "/") { $directory = $directory."/"; } # Get case number. my $case = $ARGV[1]; # e.g. 6758 # Get name of first kit. my $kit1 = $ARGV[2]; # e.g. MP9-G5 # Get path of file of first kit. my $pathKit1 = $ARGV[3]; # e.g. C:/Users/Anneleen/Desktop/haakjessysteem/MP9-G5.txt # Get name of second kit. my $kit2 = $ARGV[4]; # e.g. PESI16 or '-' # Get path of file of second kit. my $pathKit2 = $ARGV[5]; # e.g. C:/Users/Anneleen/Desktop/haakjessysteem/PESI16.txt or '-' # Get path of output directory. my $outdir = $ARGV[6]; # e.g. C:/Users/Anneleen/Desktop/haakjessysteem/ if (substr($outdir, -1) ne "/") { $outdir = $outdir."/"; } # Get threshold for determining the soft brackets. my $end = $#ARGV; my @thresholds = @ARGV[7..$end]; # e.g. AMEL 0.5 D1S1656 0.75 # perl C:\Users\ANL\Desktop\Bracket_Script\Bracket_batch.pl C:\Users\ANL\Desktop\VWA\vwa\ 1584 NGMSE C:\Users\ANL\Desktop\VWA\NGMSE.txt PPESI17 C:\Users\ANL\Desktop\VWA\PPESI17.txt C:\Users\ANL\Desktop\VWA\ VWA 0.6 ################################################################################# # END OF VARIABLES # ################################################################################# ################################################################################# # SUBROUTINES # ################################################################################# # Sort cell regarding less if the elements of the array have soft brackets or not. sub sortCellBrackets { my @cell = @_; my @newCell; foreach $element (@cell) { if (substr($element, 0, 1) eq '(') { $element = substr($element, 1, -1); } push(@newCell, $element); } return sort {$a <=> $b} @newCell; } # Make an array unique. sub makeUnique { my @array = @_; my %seen; my @ret; foreach $element (sort {$a <=> $b} @array) { unless ($seen{$element}) { push(@ret, $element); $seen{$element} =1; } } return @ret; } # Remove soft brackets from string. sub trimSoftBrackets { my ($string) = @_; $string =~ s/\(//; $string =~ s/\)//; return $string; } # Remove hard brackets from string. sub trimHardBrackets { my ($string) = @_; $string =~ s/\[//; $string =~ s/\]//; return $string; } # Sort the list of samples. sub sortSamples { my (@list) = @_; my @ret; my %list; foreach $sample (sort {$a <=> $b} @list) { my ($case, $number) = split(/-/, $sample); $list{$number} = $case; } foreach $key (sort {$a <=> $b} keys %list) { $value = $list{$key}; my $element = $value."-".$key; push(@ret, $element); } return @ret; } # Remove hard brackets from string. sub trimStars { my ($string) = @_; while ($string =~ m/\*/) { $string =~ s/\*//; } return $string; } # Remove space at beginning and end of string. sub trimSpace { my ($string) = @_; while ($string =~ m/^ /) { $string =~ s/^ //; } while ($string =~ m/ $/) { $string =~ s/ $//; } return $string; } # Check parentheses of a cell. It returns 2 lists: one with all elements of the cell and one with only the elements that are surrounded by parentheses. sub checkParentheses { my (@cell) = @_; my $listParentheses; my $listAll; foreach $element (sort {$a <=> $b} @cell) { if (substr($element, 0, 1) eq "(") { $listParentheses = $listParentheses." ".substr($element, 1, -1); $listAll = $listAll." ".substr($element, 1, -1); } else { $listAll = $listAll." ".$element; } } $listParentheses = trimSpace($listParentheses); $listAll = trimSpace($listAll); my @ret = ($listParentheses, $listAll); return @ret; } # Sort elements of a cell and create 2 groups: one with and one without parentheses. sub sortCell { my ($cell) = @_; my $newCell; if ($cell eq '-') { $newCell = $cell; } else { my @cell = split(/ /, $cell); my ($listParentheses, $listAll) = checkParentheses(@cell); my @listAll = split(/ /, $listAll); my %listAll; foreach $element (sort {$a <=> $b} @listAll) { $listAll{$element} = 1; } my @listParentheses = split(/ /, $listParentheses); my %listParentheses; foreach $element (sort {$a <=> $b} @listParentheses) { $listParentheses{$element} = 1; } my @listNonParentheses; foreach $element (sort {$a <=> $b} @listAll) { if ($listParentheses{$element} != 1) { push(@listNonParentheses, $element); } } ## part of Parentheses my $newCellParentheses; if ((scalar @listParentheses) != 0 ) { foreach $element (sort {$a <=> $b} @listParentheses) { if ($element ne "") { $newCellParentheses = $newCellParentheses."$element\-"; } } while (substr($newCellParentheses, -1) eq "-") { $newCellParentheses = substr($newCellParentheses, 0, -1); } $newCellParentheses = "(".$newCellParentheses.")"; } ## part of non Parentheses my $newCellNonParentheses; if ((scalar @listNonParentheses) != 0 ) { foreach $element (sort {$a <=> $b} @listNonParentheses) { if ($element ne "") { $newCellNonParentheses = $newCellNonParentheses."$element\-"; } } while (substr($newCellNonParentheses, -1) eq "-") { $newCellNonParentheses = substr($newCellNonParentheses, 0, -1); } } ## combine 2 parts if (($newCellNonParentheses ne "") && ($newCellParentheses ne "")) { $newCell = $newCellNonParentheses." ".$newCellParentheses; } elsif ($newCellNonParentheses eq "") { $newCell = $newCellParentheses; } elsif ($newCellParentheses eq "") { $newCell = $newCellNonParentheses; } return $newCell } } # Compare 2 cells and return an intermediate consensus cell. sub compare2Cells { my ($cell1, $cell2) = @_; my $ret; ## possibility 1: cells are equal if ($cell1 eq $cell2) { $ret = sortCell($cell1); ## possibility 2: one cell is empty or contains "-" } elsif (($cell1 eq "" || $cell1 eq "-") && ($cell2 eq "" || $cell2 eq "-")) { $ret = "-"; } elsif (($cell1 eq "" || $cell1 eq "-") && ($cell2 ne "" || $cell2 ne "-")) { $ret = sortCell($cell2); } elsif (($cell2 eq "" || $cell2 eq "-") && ($cell1 ne "" || $cell1 ne "-")) { $ret = sortCell($cell1); ## possibility 2: both alleles are different } else { my @cell1 = split(/ /, $cell1); my @cell2 = split(/ /, $cell2); my ($listParentheses1, $listAll1) = checkParentheses(@cell1); my ($listParentheses2, $listAll2) = checkParentheses(@cell2); ### Create list with all alleles in first PCR. my @listAll1 = split(/ /, $listAll1); my %listAll1; foreach $element (sort {$a <=> $b} @listAll1) { $listAll1{$element} = 1; } ### Create list with alleles with parentheses in first PCR. my @listParentheses1 = split(/ /, $listParentheses1); my %listParentheses1; foreach $element (sort {$a <=> $b} @listParentheses1) { $listParentheses1{$element} = 1; } ### Create list with all alleles in second PCR. my @listAll2 = split(/ /, $listAll2); my %listAll2; foreach $element (sort {$a <=> $b} @listAll2) { $listAll2{$element} = 1; } ### Create list with alleles with parentheses in second PCR. my @listParentheses2 = split(/ /, $listParentheses2); my %listParentheses2; foreach $element (sort {$a <=> $b} @listParentheses2) { $listParentheses2{$element} = 1; } ### Create list with all alleles from first and second PCR. my %listUnion; foreach $element (sort {$a <=> $b} @listAll1) { $listUnion{$element} = 1; } foreach $element (sort {$a <=> $b} @listAll2) { $listUnion{$element} = 1; } my @listUnion; foreach $key (sort {$a <=> $b} keys %listUnion) { if ($key ne '') { push(@listUnion, $key); } } ### Create new cell. my $newCell; foreach $element (sort {$a <=> $b} @listUnion) { if (($listAll1{$element} == 1) && ($listAll2{$element} == 1)) { if (($listParentheses1{$element} == 1) && ($listParentheses2{$element} == 1)) { $newCell = $newCell."(".$element.")-"; } else { $newCell = $newCell.$element."-"; } } else { if (($listParentheses1{$element} == 1) && ($listParentheses2{$element} == 1)) { $newCell = $newCell."(".$element.")-"; } else { $newCell = $newCell.$element."-"; } } } if (substr($newCell, -1) eq '-') { $newCell = substr($newCell, 0, -1); } $newCell = checkStars($newCell); while ($newCell =~ m/ /) { $newCell =~ s/ / /; } $newCell = trimSpace($newCell); $ret = $newCell; } return $ret; } # Place hard parentheses with one star around the elements. sub oneStar { my (@cell) = @_; my $ret; my @listElements; foreach $allele (sort {$a <=> $b} @cell) { $allele = trimSoftBrackets($allele); $allele = trimHardBrackets($allele); push(@listElements, $allele); } my $oneStar; foreach $allele (sort {$a <=> $b} @listElements) { $oneStar = $oneStar.$allele."\-"; } $oneStar = "[".substr($oneStar, 0, -1)."]*"; $ret = $oneStar; return $ret; } # Places hard parentheses with two stars around the elements. sub twoStar { my (@cell) = @_; my $ret; my @listElements; foreach $allele (sort {$a <=> $b} @cell) { $allele = trimSoftBrackets($allele); $allele = trimHardBrackets($allele); push(@listElements, $allele); } my $twoStar; foreach $allele (sort {$a <=> $b} @listElements) { $twoStar = $twoStar.$allele."\-"; } $twoStar = "[".substr($twoStar, 0, -1)."]**"; $ret = $twoStar; return $ret; } # This procedure places soft parentheses around the elements. sub softParentheses { my (@cell) = @_; my $ret; my @listElements; foreach $allele (sort {$a <=> $b} @cell) { $allele = trimSoftBrackets($allele); $allele = trimHardBrackets($allele); push(@listElements, $allele); } my $soft; foreach $allele (sort {$a <=> $b} @listElements) { $soft = $soft.$allele."\-"; } $soft = "(".substr($soft, 0, -1).")"; $ret = $soft; return $ret; } # Determine which elements of an intermediate consensus cell are of which kind (one star/ two star / soft parentheses / no parentheses) and groups them. # Return an ordered consensus cell sub checkStars { my ($cell) = @_; my $ret; my @cellSplit = split(/\-/, $cell); my @listsoftParentheses; my @listoneStar; my @listtwoStar; my @listNone; foreach $allele (sort {$a <=> $b} @cellSplit) { my $firstChar = substr($allele, 0, 1); if ($firstChar eq "\(") { $allele = trimSoftBrackets($allele); push(@listsoftParentheses, $allele); } elsif ($firstChar eq "\[") { my $beforeLastChar = substr($allele, -2,1); $allele = trimHardBrackets($allele); $allele = trimStars($allele); if ($beforeLastChar eq "*") { push(@listtwoStar, $allele); } else { push(@listoneStar, $allele); } } else { push(@listNone, $allele); } } my $OrderedParentheses = softParentheses(@listsoftParentheses); my $OrderedoneStar = oneStar(@listoneStar); my $OrderedtwoStar = twoStar(@listtwoStar); my $newCell; foreach $element (sort {$a <=> $b} @listNone) { $newCell = $newCell."$element\-"; } $newCell= substr($newCell, 0, -1); if (length($OrderedParentheses) > 2) {$newCell = $newCell." $OrderedParentheses"} else {$newCell = $newCell." "} if (length($OrderedoneStar) > 3) {$newCell = $newCell." $OrderedoneStar"} else {$newCell = $newCell." "} if (length($OrderedtwoStar) > 4) {$newCell = $newCell." $OrderedtwoStar"} else {$newCell = $newCell." "} return $newCell } # Create consensus of 3 cells that aren't empty or equal to '-'. sub consensus2Cells { my ($cell1, $cell2,) = @_; my ($listParentheses1, $listAll1) = checkParentheses(split(/ /, $cell1)); my @listAll1 = split(/ /, $listAll1); my %listAll1; foreach $element (sort {$a <=> $b} @listAll1) { $listAll1{$element} = 1; } my @listParentheses1 = split(/ /, $listParentheses1); my %listParentheses1; foreach $element (sort {$a <=> $b} @listParentheses1) { $listParentheses1{$element} = 1; } my ($listParentheses2, $listAll2) = checkParentheses(split(/ /, $cell2)); my @listAll2 = split(/ /, $listAll2); my %listAll2; foreach $element (sort {$a <=> $b} @listAll2) { $listAll2{$element} = 1; } my @listParentheses2 = split(/ /, $listParentheses2); my %listParentheses2; foreach $element (sort {$a <=> $b} @listParentheses2) { $listParentheses2{$element} = 1; } my @listUnion = (@listAll1, @listAll2); @listUnion = makeUnique(@listUnion); # Remove empty elements. my @listUnionCopy; foreach $element (sort {$a <=> $b} @listUnion) { push (@listUnionCopy, $element); } @listUnion = (); foreach $element (sort {$a <=> $b} @listUnionCopy) { if ($element ne "" && $element ne "-") { push(@listUnion, $element); } } my $newCell; if (scalar(@listUnion) == (scalar(@listAll1) + scalar(@listAll2))) { foreach $element (sort {$s <=> $b} @listUnion) { $newCell = $newCell."$element-"; } $newCell = "[".substr($newCell, 0, -1)."]*"; } else { foreach $element (sort {$a <=> $b} @listUnion) { # allele appears once ## in 1 if ($listAll1{$element} == 1 && $listAll2{$element} != 1 && $listAll3{$element} != 1) { $newCell = $newCell."\[$element\]**-"; ## in 2 } elsif ($listAll1{$element} != 1 && $listAll2{$element} == 1 && $listAll3{$element} != 1) { $newCell = $newCell."\[$element\]**-"; # allele appears twice ## in 1 & 2 } elsif ($listAll1{$element} == 1 && $listAll2{$element} == 1 && $listAll3{$element} != 1 ) { if ($listParentheses1{$element} == 1 && $listParentheses2{$element} == 1) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element-"; } } } $newCell = checkStars(substr($newCell, 0, -1)); } return $newCell; } # Create consensus of 3 cells that aren't empty or equal to '-'. sub consensus3Cells { my ($cell1, $cell2, $cell3) = @_; my ($listParentheses1, $listAll1) = checkParentheses(split(/ /, $cell1)); my @listAll1 = split(/ /, $listAll1); my %listAll1; foreach $element (sort {$a <=> $b} @listAll1) { $listAll1{$element} = 1; } my @listParentheses1 = split(/ /, $listParentheses1); my %listParentheses1; foreach $element (sort {$a <=> $b} @listParentheses1) { $listParentheses1{$element} = 1; } my ($listParentheses2, $listAll2) = checkParentheses(split(/ /, $cell2)); my @listAll2 = split(/ /, $listAll2); my %listAll2; foreach $element (sort {$a <=> $b} @listAll2) { $listAll2{$element} = 1; } my @listParentheses2 = split(/ /, $listParentheses2); my %listParentheses2; foreach $element (sort {$a <=> $b} @listParentheses2) { $listParentheses2{$element} = 1; } my ($listParentheses3, $listAll3) = checkParentheses(split(/ /, $cell3)); my @listAll3 = split(/ /, $listAll3); my %listAll3; foreach $element (sort {$a <=> $b} @listAll3) { $listAll3{$element} = 1; } my @listParentheses3 = split(/ /, $listParentheses3); my %listParentheses3; foreach $element (sort {$a <=> $b} @listParentheses3) { $listParentheses3{$element} = 1; } my @listUnion = (@listAll1, @listAll2,@listAll3); @listUnion = makeUnique(@listUnion); # Remove empty elements. my @listUnionCopy; foreach $element (sort {$a <=> $b} @listUnion) { push (@listUnionCopy, $element); } @listUnion = (); foreach $element (sort {$a <=> $b} @listUnionCopy) { if ($element ne "" && $element ne "-") { push(@listUnion, $element); } } my $newCell; if (scalar(@listUnion) == (scalar(@listAll1) + scalar(@listAll2) + scalar(@listAll3))) { foreach $element (sort {$a <=> $b} @listUnion) { $newCell = $newCell."$element-"; } $newCell = "[".substr($newCell, 0, -1)."]*"; } else { foreach $element (sort {$a <=> $b} @listUnion) { # allele appears once ## in 1 if ($listAll1{$element} == 1 && $listAll2{$element} != 1 && $listAll3{$element} != 1) { $newCell = $newCell."\[$element\]**-"; ## in 2 } elsif ($listAll1{$element} != 1 && $listAll2{$element} == 1 && $listAll3{$element} != 1) { $newCell = $newCell."\[$element\]**-"; ## in 3 } elsif ($listAll1{$element} != 1 && $listAll2{$element} != 1 && $listAll3{$element} == 1) { $newCell = $newCell."\[$element\]**-"; # allele appears twice ## in 1 & 2 } elsif ($listAll1{$element} == 1 && $listAll2{$element} == 1 && $listAll3{$element} != 1 ) { if ($listParentheses1{$element} == 1 && $listParentheses2{$element} == 1) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element-"; } ## in 1 & 3 } elsif ( $listAll1{$element} == 1 && $listAll2{$element} != 1 && $listAll3{$element} == 1) { if ($listParentheses1{$element} == 1 && $listParentheses3{$element} == 1 ) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element\-"; } ## in 2 & 3 } elsif ($listAll1{$element} != 1 && $listAll2{$element} == 1 && $listAll3{$element} == 1) { if ($listParentheses2{$element} == 1 && $listParentheses3{$element} == 1) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element\-"; } # allele appears 3 times ## in 1 & 2 & 3 } elsif ($listAll1{$element} == 1 && $listAll2{$element} == 1 && $listAll3{$element} == 1) { if ($listParentheses1{$element} == 1 && $listParentheses2{$element} == 1 && $listParentheses3{$element} == 1) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element\-"; } } } $newCell = checkStars(substr($newCell, 0, -1)); } return $newCell; } # Create consensus of 4 cells that aren't empty or equal to '-'. sub consensus4Cells { my ($cell1, $cell2, $cell3, $cell4) = @_; my ($listParentheses1, $listAll1) = checkParentheses(split(/ /, $cell1)); my @listAll1 = split(/ /, $listAll1); my %listAll1; foreach $element (sort {$a <=> $b} @listAll1) { $listAll1{$element} = 1; } my @listParentheses1 = split(/ /, $listParentheses1); my %listParentheses1; foreach $element (sort {$a <=> $b} @listParentheses1) { $listParentheses1{$element} = 1; } my ($listParentheses2, $listAll2) = checkParentheses(split(/ /, $cell2)); my @listAll2 = split(/ /, $listAll2); my %listAll2; foreach $element (sort {$a <=> $b} @listAll2) { $listAll2{$element} = 1; } my @listParentheses2 = split(/ /, $listParentheses2); my %listParentheses2; foreach $element (sort {$a <=> $b} @listParentheses2) { $listParentheses2{$element} = 1; } my ($listParentheses3, $listAll3) = checkParentheses(split(/ /, $cell3)); my @listAll3 = split(/ /, $listAll3); my %listAll3; foreach $element (sort {$a <=> $b} @listAll3) { $listAll3{$element} = 1; } my @listParentheses3 = split(/ /, $listParentheses3); my %listParentheses3; foreach $element (sort {$a <=> $b} @listParentheses3) { $listParentheses3{$element} = 1; } my ($listParentheses4, $listAll4) = checkParentheses(split(/ /, $cell4)); my @listAll4 = split(/ /, $listAll4); my %listAll4; foreach $element (sort {$a <=> $b} @listAll4) { $listAll4{$element} = 1; } my @listParentheses4 = split(/ /, $listParentheses4); my %listParentheses4; foreach $element (sort {$a <=> $b} @listParentheses4) { $listParentheses4{$element} = 1; } my @listUnion = (@listAll1, @listAll2,@listAll3, @listAll4); @listUnion = makeUnique(@listUnion); # Remove empty elements. my @listUnionCopy; foreach $element (sort {$a <=> $b} @listUnion) { push (@listUnionCopy, $element); } @listUnion = (); foreach $element (sort {$a <=> $b} @listUnionCopy) { if ($element ne "" && $element ne "-") { push(@listUnion, $element); } } my $newCell; if (scalar(@listUnion) == (scalar(@listAll1) + scalar(@listAll2) + scalar(@listAll3) + scalar(@listAll4))) { foreach $element (sort {$a <=> $b} @listUnion) { $newCell = $newCell."$element-"; } $newCell = "[".substr($newCell, 0, -1)."]*"; } else { foreach $element (sort {$a <=> $b} @listUnion) { # allele appears once if (($listAll1{$element} == 1 && $listAll2{$element} != 1 && $listAll3{$element} != 1 && $listAll4{$element} != 1) || ($listAll1{$element} != 1 && $listAll2{$element} == 1 && $listAll3{$element} != 1 && $listAll4{$element} != 1) || ($listAll1{$element} != 1 && $listAll2{$element} != 1 && $listAll3{$element} == 1 && $listAll4{$element} != 1) || ($listAll1{$element} != 1 && $listAll2{$element} != 1 && $listAll3{$element} != 1 && $listAll4{$element} == 1)) { $newCell = $newCell."\[$element\]**-"; # allele appears twice ## in 1 & 2 } elsif (($listAll1{$element} == 1 && $listAll2{$element} == 1 && $listAll3{$element} != 1 && $listAll4{$element} != 1)) { if ($listParentheses1{$element} == 1 && $listParentheses2{$element} == 1) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element-"; } ## in 3 & 4 } elsif (($listAll1{$element} != 1 && $listAll2{$element} != 1 && $listAll3{$element} == 1 && $listAll4{$element} == 1)) { if ($listParentheses3{$element} == 1 && $listParentheses4{$element} == 1) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element\-"; } ## in 1 & 3 } elsif (( $listAll1{$element} == 1 && $listAll2{$element} != 1 && $listAll3{$element} == 1 && $listAll4{$element} != 1)) { if ($listParentheses1{$element} == 1 && $listParentheses3{$element} == 1 ) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element\-"; } ## in 2 & 4 } elsif (($listAll1{$element} != 1 && $listAll2{$element} == 1 && $listAll3{$element} != 1 && $listAll4{$element} == 1)) { if ($listParentheses2{$element} == 1 && $listParentheses4{$element} == 1) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element\-"; } ## in 1 & 4 } elsif (($listAll1{$element} == 1 && $listAll2{$element} != 1 && $listAll3{$element} != 1 && $listAll4{$element} == 1)) { if ($listParentheses1{$element} == 1 && $listParentheses4{$element} == 1) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element\-"; } ## in 2 & 3 } elsif (($listAll1{$element} != 1 && $listAll2{$element} == 1 && $listAll3{$element} == 1 && $listAll4{$element} != 1)) { if ($listParentheses2{$element} == 1 && $listParentheses3{$element} == 1) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element\-"; } # allele appears 3 times ## in 1 & 2 & 3 } elsif (($listAll1{$element} == 1 && $listAll2{$element} == 1 && $listAll3{$element} == 1 && $listAll4{$element} != 1)) { if ($listParentheses1{$element} == 1 && $listParentheses2{$element} == 1 && $listParentheses3{$element} == 1) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element\-"; } ## in 1 & 2 & 4 } elsif (($listAll1{$element} == 1 && $listAll2{$element} == 1 && $listAll3{$element} != 1 && $listAll4{$element} == 1)) { if ($listParentheses1{$element} == 1 && $listParentheses2{$element} == 1 && $listParentheses4{$element} == 1 ) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element\-"; } ## in 1 & 3 & 4 } elsif (($listAll1{$element} == 1 && $listAll2{$element} != 1 && $listAll3{$element} == 1 && $listAll4{$element} == 1)) { if ($listParentheses1{$element} == 1 && $listParentheses3{$element} == 1 && $listParentheses4{$element} == 1 ) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element\-"; } ## in 2 & 3 & 4 } elsif (($listAll1{$element} != 1 && $listAll2{$element} == 1 && $listAll3{$element} == 1 && $listAll4{$element} == 1)) { if ($listParentheses2{$element} == 1 && $listParentheses3{$element} == 1 && $listParentheses4{$element} == 1) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element\-"; } # allele appears 4 times } else { if ($listParentheses1{$element} == 1 && $listParentheses2{$element} == 1 && $listParentheses3{$element} == 1 && $listParentheses4{$element} == 1) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element\-"; } } } $newCell = checkStars(substr($newCell, 0, -1)); } return $newCell; } # Compare 4 cells and return an intermediate consensus cell. sub compare4Cells { my ($cell1, $cell2, $cell3, $cell4) = @_; $cell1 = trimSpace($cell1); $cell2 = trimSpace($cell2); $cell3 = trimSpace($cell3); $cell4 = trimSpace($cell4); my $ret; ## possibility 1: alleles are tested in both kits and these 4 cells are equal if (($cell1 eq $cell2 ) && ($cell2 eq $cell3 ) && ($cell3 eq $cell4 )) { if ($cell1 eq '-') { $ret = '-'; } else { $ret = sortCell($cell1); } ## possibility 2: alleles are tested in only one kit and these cells are equal } elsif (($cell1 eq $cell2 ) && ($cell1 ne "" ) && ($cell3 eq $cell4) && ($cell3 eq "")) { if ($cell1 eq '-') { $ret = '-'; } else { $ret = sortCell($cell1); } } elsif (($cell1 eq $cell2 ) && ($cell1 eq "" ) && ($cell3 eq $cell4) && ($cell3 ne "")) { if ($cell3 eq '-') { $ret = '-'; } else { $ret = sortCell($cell3); } ## possibility 3: alleles are tested in only one kit and these cells are not equal } elsif (($cell1 ne $cell2 ) && ($cell1 ne "" ) && ($cell2 ne "" ) && ($cell3 eq $cell4) && ($cell3 eq "")) { if ($cell1 eq '-') { my @cell2 = split(/ /, $cell2); my $newCell; @cell2 = sortCellBrackets(@cell2); foreach $element (@cell2) { $element = trimSpace($element); if (substr($element, 0, 1) eq '(') { $element = substr($element, 1, -1); } $newCell = $newCell.$element."-"; } $newCell = "[".substr($newCell, 0, -1)."]**"; $ret = $newCell; } elsif ($cell2 eq '-') { my @cell1 = split(/ /, $cell1); my $newCell; @cell1 = sortCellBrackets(@cell1); foreach $element (@cell1) { $element = trimSpace($element); if (substr($element, 0, 1) eq '(') { $element = substr($element, 1, -1); } $newCell = $newCell.$element."-"; } $newCell = "[".substr($newCell, 0, -1)."]**"; $ret = $newCell; } else { my ($listParentheses1, $listAll1) = checkParentheses(split(/\ /, $cell1)); my @listAll1 = split(/ /, $listAll1); my %listAll1; foreach $element (sort {$a <=> $b} @listAll1) { $listAll1{$element} = 1; } my @listParentheses1 = split(/ /, $listParentheses1); my %listParentheses1; foreach $element (sort {$a <=> $b} @listParentheses1) { $listParentheses1{$element} = 1; } my ($listParentheses2, $listAll2) = checkParentheses(split(/\ /, $cell2)); my @listAll2 = split(/ /, $listAll2); my %listAll2; foreach $element (sort {$a <=> $b} @listAll2) { $listAll2{$element} = 1; } my @listParentheses2 = split(/ /, $listParentheses2); my %listParentheses2; foreach $element (sort {$a <=> $b} @listParentheses2) { $listParentheses2{$element} = 1; } my @listUnion = (@listAll1, @listAll2); @listUnion = makeUnique(@listUnion); # Remove empty elements. my @listUnionCopy; foreach $element (sort {$a <=> $b} @listUnion) { push (@listUnionCopy, $element); } @listUnion = (); foreach $element (sort {$a <=> $b} @listUnionCopy) { if ($element ne "" && $element ne "-") { push(@listUnion, $element); } } my $newCell; if (scalar(@listUnion) == (scalar(@listAll1) + scalar(@listAll2))) { foreach $element (sort {$a <=> $b} @listUnion) { $newCell = $newCell."$element-"; } $ret = "[".substr($newCell, 0, -1)."]*"; } else { foreach $element (sort {$a <=> $b} @listUnion) { if (($listAll1{$element} == 1) && ($listAll2{$element} == 1)) { if (($listParentheses1{$element} == 1) && ($listParentheses2{$element} == 1)){ $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element\-"; } } else { $newCell = $newCell."\[$element\]**-"; } } $newCell = substr($newCell, 0, -1); $ret = checkStars($newCell); } } } elsif (($cell1 eq $cell2 ) && ($cell1 eq "" ) && ($cell3 ne $cell4) && ($cell3 ne "") && ($cell4 ne "" )) { if ($cell3 eq '-') { my @cell4 = split(/ /, $cell4); my $newCell; @cell4 = sortCellBrackets(@cell4); foreach $element (@cell4) { $element = trimSpace($element); if (substr($element, 0, 1) eq '(') { $element = substr($element, 1, -1); } $newCell = $newCell.$element."-"; } $newCell = "[".substr($newCell, 0, -1)."]**"; $ret = $newCell; } elsif ($cell4 eq '-') { my @cell3 = split(/ /, $cell3); my $newCell; @cell3 = sortCellBrackets(@cell3); foreach $element (@cell3) { $element = trimSpace($element); if (substr($element, 0, 1) eq '(') { $element = substr($element, 1, -1); } $newCell = $newCell.$element."-"; } $newCell = "[".substr($newCell, 0, -1)."]**"; $ret = $newCell; } else { my ($listParentheses3, $listAll3) = checkParentheses(split(/\ /, $cell3)); my @listAll3 = split(/ /, $listAll3); my %listAll3; foreach $element (sort {$a <=> $b} @listAll3) { $listAll3{$element} = 1; } my @listParentheses3 = split(/ /, $listParentheses3); my %listParentheses3; foreach $element (sort {$a <=> $b} @listParentheses3) { $listParentheses3{$element} = 1; } my ($listParentheses4, $listAll4) = checkParentheses(split(/\ /, $cell4)); my @listAll4 = split(/ /, $listAll4); my %listAll4; foreach $element (sort {$a <=> $b} @listAll4) { $listAll4{$element} = 1; } my @listParentheses4 = split(/ /, $listParentheses4); my %listParentheses4; foreach $element (sort {$a <=> $b} @listParentheses4) { $listParentheses4{$element} = 1; } my @listUnion = (@listAll3, @listAll4); @listUnion = makeUnique(@listUnion); # Remove empty elements. my @listUnionCopy; foreach $element (sort {$a <=> $b} @listUnion) { push (@listUnionCopy, $element); } @listUnion = (); foreach $element (sort {$a <=> $b} @listUnionCopy) { if ($element ne "" && $element ne "-") { push(@listUnion, $element); } } my $newCell; if (scalar(@listUnion) == (scalar(@listAll3) + scalar(@listAll4))) { foreach $element (sort {$a <=> $b} @listUnion) { $newCell = $newCell."$element-"; } $ret = "[".substr($newCell, 0, -1)."]*"; } else { foreach $element (sort {$a <=> $b} @listUnion) { if (($listAll3{$element} == 1) && ($listAll4{$element} == 1)) { if (($listParentheses3{$element} == 1) && ($listParentheses4{$element} == 1)) { $newCell = $newCell."\($element\)-"; } else { $newCell = $newCell."$element\-"; } } else { $newCell = $newCell."\[$element\]**-"; } } $newCell = substr($newCell,0, -1); $ret = checkStars($newCell); } } ## possibility 4: alleles are tested in both kits and these 4 cells are not equal } else { ### possibility 4.1: 3 cells are "-" #### 2 & 3 & 4 if ($cell2 eq '-' && $cell3 eq '-' && $cell4 eq '-') { my @cell1 = split(/ /, $cell1); my $newCell; foreach $element (sort {$a <=> $b} @cell1) { $element = trimSpace($element); if (substr($element, 0, 1) eq '(') { $element = substr($element, 1, -1); } $newCell = $newCell.$element."-"; } $newCell = "[".substr($newCell, 0, -1)."]**"; $ret = $newCell; #### 1 & 3 & 4 } elsif ($cell1 eq '-' && $cell3 eq '-' && $cell4 eq '-') { my @cell2 = split(/ /, $cell2); my $newCell; foreach $element (sort {$a <=> $b} @cell2) { $element = trimSpace($element); if (substr($element, 0, 1) eq '(') { $element = substr($element, 1, -1); } $newCell = $newCell.$element."-"; } $newCell = "[".substr($newCell, 0, -1)."]**"; $ret = $newCell; #### 1 & 2 & 4 } elsif ($cell1 eq '-' && $cell2 eq '-' && $cell4 eq '-') { my @cell3 = split(/ /, $cell3); my $newCell; foreach $element (sort {$a <=> $b} @cell3) { $element = trimSpace($element); if (substr($element, 0, 1) eq '(') { $element = substr($element, 1, -1); } $newCell = $newCell.$element."-"; } $newCell = "[".substr($newCell, 0, -1)."]**"; $ret = $newCell; #### 1 & 2 & 3 } elsif ($cell1 eq '-' && $cell2 eq '-' && $cell3 eq '-') { my @cell4 = split(/ /, $cell4); my $newCell; foreach $element (sort {$a <=> $b} @cell4) { $element = trimSpace($element); if (substr($element, 0, 1) eq '(') { $element = substr($element, 1, -1); } $newCell = $newCell.$element."-"; } $newCell = "[".substr($newCell, 0, -1)."]**"; $ret = $newCell; ### possibility 4.2: 2 cells are "-" #### 1 & 2 } elsif ($cell1 eq '-' && $cell2 eq '-') { $ret = consensus2Cells($cell3, $cell4); #### 3 & 4 } elsif ($cell3 eq '-' && $cell4 eq '-') { $ret = consensus2Cells($cell1, $cell2); #### 1 & 3 } elsif ($cell1 eq '-' && $cell3 eq '-') { $ret = consensus2Cells($cell2, $cell4); #### 2 & 4 } elsif ($cell2 eq '-' && $cell4 eq '-') { $ret = consensus2Cells($cell1, $cell3); #### 1 & 4 } elsif ($cell1 eq '-' && $cell4 eq '-') { $ret = consensus2Cells($cell2, $cell3); #### 2 & 3 } elsif ($cell2 eq '-' && $cell3 eq '-') { $ret = consensus2Cells($cell1, $cell4); ### possibility 4.3: 1 cell is "-" #### 1 } elsif ($cell1 eq '-') { $ret = consensus3Cells($cell2, $cell3, $cell4); #### 2 } elsif ($cell2 eq '-') { $ret = consensus3Cells($cell1, $cell3, $cell4); #### 3 } elsif ($cell3 eq '-') { $ret = consensus3Cells($cell1, $cell2, $cell4); #### 4 } elsif ($cell4 eq '-') { $ret = consensus3Cells($cell1, $cell2, $cell3); } else { $ret = consensus4Cells($cell1, $cell2, $cell3, $cell4); } } return $ret; } ################################################################################# # END OF SUBROUTINES # ################################################################################# ####################### Check validity of arguments ############################# # Check validity of number of arguments. if (scalar @ARGV < 8) { print "Please give all arguments.\n"; die; # Check validity of input directory. } elsif (!(-d $directory)) { print "Please give a valid input folder.\n";die; # Check validity of case number. } elsif ($case <= 0) { print "Please give a valid case number.\n";die; # Check validity of file of kit #1. } elsif (!(-f $pathKit1)) { print "Please give a valid path to file of kit #1.\n";die; # Check validity of file of kit #2. } elsif (!(-f $pathKit2) && $pathKit2 ne '-') { print "Please give a valid path to file of kit #2.\n";die; # Check validity of output directory. } elsif (!(-d $outdir)) { print "Please give a valid path to output folder.\n";die; } # Check validity of threshold. my $odd = 0; foreach $threshold (@thresholds) { if ($odd == 1) { if ($threshold > 1 || $threshold < 0) { print "Please give valid thresholds.\n";die; } $odd = 0; } else { $odd = 1; } } ########################### Create hash of threshold ############################ %thresholds; my $odd = 0; my $key; my $value; foreach $threshold (@thresholds) { if ($odd == 1) { $value = $threshold; $thresholds{$key} = $value; $odd = 0; } else { $key = uc $threshold; $odd = 1; } } ######################## Create hashes for each marker ########################## # Declare array that will contain all markers. my @markers; # Declare array that will contail all markers of kit #1. my @markersKit1; # Declare array that will contail all markers of kit #2. my @markersKit2; # Put all markers of kit #1 in array. open (KIT1, "< $pathKit1"); while () { my $line = $_; chomp($line); push(@markers, (uc $line)); push(@markersKit1, (uc $line)); } close KIT1; # Put all markers of kit #2 in array. if ($pathKit2 ne '-') { open (KIT2, "< $pathKit2"); while () { my $line = $_; chomp($line); push(@markers, (uc $line)); push(@markersKit2, (uc $line)); } close KIT2; } # Remove duplicated markers. @markers = makeUnique(@markers); # Create the hashes for each marker. foreach my $marker (sort {$a <=> $b} @markers) { $marker = uc $marker; %{$marker}; } ####################### Get all files in folder. ################################ opendir(DIR, "$directory"); my @files = readdir(DIR); closedir(DIR); # Remove file "." shift(@files); # Remove file ".." shift(@files); ############################### Read all files. ################################# # Create array that will contain the name of all samples. my @samples; # Create array with all names of sample - marker - kit - pcr combinations. my @cells; # Create hash with all name of sample - kit - pcr combinations. my %cells; # Read in each file. foreach $file (sort {$a <=> $b} @files) { $file = $directory.$file; open (IN, "< $file") || die " Could not open input file: $file\n"; while () { my $line = $_; my @splittedLine = split(/\t/, $line); my @splittedSampleName = split(/\_/, $splittedLine[1]); my @splittedSampleNumber = split(/\-/, $splittedSampleName[2]); if ($splittedSampleNumber[0] eq $case) { my $kit = $splittedSampleName[5]; my $pcr = $splittedSampleName[6]; my $sample = $splittedSampleName[2]; my $marker = $splittedLine[2]; $marker = uc $marker; if ($marker ne "") { my $allele = $splittedLine[3]; if ($allele eq "") { $allele = "-"; } my $height = $splittedLine[5]; my $info = $sample."_".$marker."_".$kit."_".$pcr; $$info{$allele} = $height; push(@samples, $sample); push(@cells, $info); my $info2 = $sample."_".$kit."_".$pcr; $cells{$info2} = 1; } } # end of line } # end of file } # end of array # Remove duplicated sample names. @samples = makeUnique(@samples); @samples = sortSamples(@samples); # Remove duplicated sample names. @cells = makeUnique(@cells); @cells = sort {$a <=> $b} @cells; ################## Get max of height per marker and per sample ################## ########################### and add soft brackets. ############################# foreach $marker (@markersKit1) { foreach $sample (@samples) { my $info1 = $sample."_".$kit1."_PCR-2"; my $info2 = $sample."_".$kit2."_PCR-2"; my @pcr; if ($cells{$info1} != 1 && $cells{$info2} != 1) { @pcr = ('PCR-1'); } else { @pcr = ('PCR-1', 'PCR-2'); } foreach $pcr (@pcr) { my $info = $sample."_".$kit1."_".$pcr; if ($$marker{$info} eq '') { $$marker{$info} = '-'; } } } } foreach $marker (@markersKit2) { foreach $sample (@samples) { my $info1 = $sample."_".$kit1."_PCR-2"; my $info2 = $sample."_".$kit2."_PCR-2"; my @pcr; if ($cells{$info1} != 1 && $cells{$info2} != 1) { @pcr = ('PCR-1'); } else { @pcr = ('PCR-1', 'PCR-2'); } foreach $pcr (@pcr) { my $info = $sample."_".$kit2."_".$pcr; if ($$marker{$info} eq '') { $$marker{$info} = '-'; } } } } foreach $cell (@cells) { # Get max of height. my $max = 0; foreach $allele (sort {$a <=> $b} keys %{$cell}) { my $value = $$cell{$allele}; if ($value > $max) { $max = $value; } } my %temp = %{$cell}; %{$cell} = (); # Add soft brackets. foreach $key (sort {$a <=> $b} keys %temp) { my $value = $temp{$key}; my @cell = split(/\_/, $cell); my $keySample = $cell[0]; my $keyMarker = $cell[1]; my $keyKit = $cell[2]; my $keyPCR = $cell[3]; my $allele = $key; my $threshold = $thresholds{$keyMarker}; if ($value < ($max*$threshold)) { $allele = "(".$allele.")"; } my $info = $keySample."_".$keyKit."_".$keyPCR; my $current = $$keyMarker{$info}; if ($current eq '-') { $current = ''; } $current = $current." ".$allele; $$keyMarker{$info} = $current; } # end of foreach key of %temp } # end of foreach marker # Open output file. my $oufile = $outdir."$case\_consensus.txt"; open (OUT, "> $oufile"); # Create log. @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); @weekDays = qw(Sun Mon Tue Wed Thu Fri Sat Sun); ($second, $minute, $hour, $dayOfMonth, $month, $yearOffset, $dayOfWeek, $dayOfYear, $daylightSavings) = localtime(); $year = 1900 + $yearOffset; $theTime = "$hour:$minute:$second, $weekDays[$dayOfWeek] $months[$month] $dayOfMonth, $year"; my $param = "Input Folder:\t$directory Case Number:\t$case Kit #1 Name: \t$kit1 File: \t$pathKit1 Kit #2 Name: \t$kit2 File \t$pathKit2 Output Folder:\t$outdir Thresholds:\n"; my $maxlen = 0; foreach $marker (keys %thresholds) { my $len = length($marker); if ($len > $maxlen) {$maxlen = $len} } $maxlen++; foreach $marker (sort keys %thresholds) { my $th = $thresholds{$marker}; chomp($marker); $marker = " ".$marker.":"; while (length($marker) < $maxlen) { $marker = $marker." "; } $param = $param."$marker\t$th\n"; } # Print log to output file. print OUT "Bracket\n$theTime\n"; print OUT "---------------------------------------------------------------------------------------------------------------------------\nPARAMETERS\n\n"; print OUT $param; print OUT "---------------------------------------------------------------------------------------------------------------------------\n\n"; # Print header to output file. my $header = "extrait ADN\t"; foreach $marker (sort {$a <=> $b} @markers) { $marker =~ s/ //; $header = $header.$marker."\t"; } if (substr($header, -1) eq "\t") { $header = substr($header, 0, -1)."\n"; } print OUT $header; ################### Apply rules for each sample ################################# foreach $sample (@samples) { my $line = $sample; foreach $marker (sort {$a <=> $b} @markers) { my $info = $sample."_".$kit1."_PCR-1"; my $pcr1kit1 = $$marker{$info}; my $info = $sample."_".$kit1."_PCR-2"; my $pcr2kit1 = $$marker{$info}; my $info = $sample."_".$kit2."_PCR-1"; my $pcr1kit2 = $$marker{$info}; my $info = $sample."_".$kit2."_PCR-2"; my $pcr2kit2 = $$marker{$info}; if ($pcr2kit1 eq '' && $pcr2kit2 eq '') { my $consensusCell = trimSpace(compare2Cells($pcr1kit1, $pcr1kit2)); $line = $line."\t".$consensusCell; } else { my $consensusCell = trimSpace(compare4Cells($pcr1kit1, $pcr2kit1, $pcr1kit2, $pcr2kit2)); while ($consensusCell =~ m/ /) { $consensusCell =~ s/ / /; } $line = $line."\t".$consensusCell; } } print OUT $line."\n"; } close OUT;