#!/usr/bin/perl -w # ----------------------------------------------------------------------- # repeatTable 1.0 # # This script extracts genoStart and the genoEnd value from a repeat # list file. The values are extracted and written into an output file. # Every value pair is in a new line and surrounded by '()'. # The values of the line from the repeat list are extracted if the # conditions of the passed parameters are true. # For every value in the repeat list can be passed a condition. # # For example: # You want to extract the start and end positions where the following # conditions are true: # - Repeat Family is L1 # - Strand is + # - Repeat Class is LINE # - Repeat Name is L1Md_A # # You have to use the following command line: # ./repeatTable input.txt output.txt -repFamily L1 -strand + -repClass LINE -repName L1Md_A # # Copyright by Joern Hameister (2005) # ----------------------------------------------------------------------- sub printHelp { print " repeatTable 1.0\n"; print " \n"; print " This script extracts genoStart and the genoEnd value from a repeat \n"; print " list file. The values are extracted and written into an output file.\n"; print " Every value pair is in a new line and surrounded by '()'.\n"; print " The values of the line from the repeat list are extracted if the \n"; print " conditions of the passed parameters are true.\n"; print " For every value in the repeat list can be passed a condition.\n"; print "\n"; print " For example:\n"; print " You want to extract the start and end positions where the following\n"; print " conditions are true:\n"; print " - Repeat Family is L1\n"; print " - Strand is +\n"; print " - Repeat Class is LINE\n"; print " - Repeat Name is L1Md_A\n"; print " \n"; print " You have to use the following command line:\n"; print " ./repeatTable input.txt output.txt -repFamily L1 -strand + -repClass LINE -repName L1Md_A \n"; print " \n"; print " \n"; print "./repeatTable INPUTFILE OUTPUTFILE -[OPTIONS]\n"; print "\n"; print "-bin:\t\t 607 \n"; print "-swScore:\t 27368\n"; print "-milliDiv:\t 25\n"; print "-milliDel:\t 1\n"; print "-milliIns:\t 0\n"; print "-genoName:\t chr1\n"; print "-genoStart:\t 3000000\n"; print "-genoEnd:\t 3003389\n"; print "-genoLeft:\t -192106223\n"; print "-strand:\t +\n"; print "-repName:\t L1Md_A\n"; print "-repClass:\t LINE\n"; print "-repFamily:\t L1\n"; print "-repStart:\t 2193\n"; print "-repEnd:\t 5586\n"; print "-repLeft:\t -297\n"; print "-id:\t\t 1\n"; } if(@ARGV%2==1) { if($ARGV[0] ne '-h') { print "Wrong arguments!\n"; } printHelp(); exit(-1); } # load input file open(INPUT , "<", $ARGV[0]); open(OUTPUT, ">", $ARGV[1]); open(LOG, ">", "log.txt"); my $bin = 0; my $swScore = 0; my $milliDiv = 0; my $milliDel = 0; my $milliIns = 0; my $genoName = 0; my $genoStart = 0; my $genoEnd = 0; my $genoLeft = 0; my $strand = 0; my $repName = 0; my $repClass = 0; my $repFamily = 0; my $repStart = 0; my $repEnd = 0; my $repLeft = 0; my $id = 0; my $binValue; my $swScoreValue; my $milliDivValue; my $milliDelValue; my $milliInsValue; my $genoNameValue; my $genoStartValue; my $genoEndValue; my $genoLeftValue; my $strandValue; my $repNameValue; my $repClassValue; my $repFamilyValue; my $repStartValue; my $repEndValue; my $repLeftValue; my $idValue; my $counter = 0; foreach $arg (@ARGV) { # print "$arg\n"; if($arg eq '-h') { printHelp(); exit(0); } if($arg eq '-bin') { $bin = 1; $binValue = $ARGV[$counter+1]; print "Set value: $binValue\n"; } elsif($arg eq '-swScore') { $swScore = 1; $swScoreValue = $ARGV[$counter+1]; } elsif($arg eq '-milliDivValue') { $milliDiv = 1; $milliDivValue = $ARGV[$counter+1]; } elsif($arg eq '-milliDel') { $milliDel = 1; $milliDelValue = $ARGV[$counter+1]; } elsif($arg eq '-milliIns') { $milliIns = 1; $milliInsValue = $ARGV[$counter+1]; } elsif($arg eq '-genoName') { $genoName = 1; $genoNameValue = $ARGV[$counter+1]; } elsif($arg eq '-genoStart') { $genoStart = 1; $genoStartValue = $ARGV[$counter+1]; } elsif($arg eq '-genoEnd') { $genoEnd = 1; $genoEndValue = $ARGV[$counter+1]; } elsif($arg eq '-genoLeft') { $genoLeft = 1; $genoLeftValue = $ARGV[$counter+1]; } elsif($arg eq '-strand') { $strand = 1; $strandValue = $ARGV[$counter+1]; } elsif($arg eq '-repName') { $repName = 1; $repNameValue = $ARGV[$counter+1]; } elsif($arg eq '-repClass') { $repClass = 1; $repClassValue = $ARGV[$counter+1]; } elsif($arg eq '-repFamily') { $repFamily = 1; $repFamilyValue = $ARGV[$counter+1]; } elsif($arg eq '-repStart') { $repStart = 1; $repStartValue = $ARGV[$counter+1]; } elsif($arg eq '-repEnd') { $repEnd = 1; $repEndValue = $ARGV[$counter+1]; } elsif($arg eq '-repLeft') { $repLeft = 1; $repLeftValue = $ARGV[$counter+1]; } elsif($arg eq '-id') { $id = 1; $idValue = $ARGV[$counter+1]; } $counter++; } while(defined ($line = )) { chomp($line); # Use tabs as separator @fields = split /\t/,$line; my $miss = 0; # Check fields and conditions if($bin==1) { if($fields[0] ne $binValue) { $miss = 1; } } if($swScore==1) { if($fields[1] ne $swScoreValue) { $miss = 1; } } if($milliDiv==1) { if($fields[2] ne $milliDivValue) { $miss = 1; } } if($milliDel==1) { if($fields[3] ne $milliDelValue) { $miss = 1; } } if($milliIns==1) { if($fields[4] ne $milliInsValue) { $miss = 1; } } if($genoName==1) { if($fields[5] ne $genoNameValue) { $miss = 1; } } if($genoStart==1) { if($fields[6] ne $genoStartValue) { $miss = 1; } } if($genoEnd==1) { if($fields[7] ne $genoEndValue) { $miss = 1; } } if($genoLeft==1) { if($fields[8] ne $genoLeftValue) { $miss = 1; } } if($strand==1) { if($fields[9] ne $strandValue) { $miss = 1; } } if($repName==1) { if($fields[10] ne $repNameValue) { $miss = 1; } } if($repClass==1) { if($fields[11] ne $repClassValue) { $miss = 1; } } if($repFamily==1) { if($fields[12] ne $repFamilyValue) { $miss = 1; } } if($repStart==1) { if($fields[13] ne $repStartValue) { $miss = 1; } } if($repEnd==1) { if($fields[14] ne $repEndValue) { $miss = 1; } } if($repLeft==1) { if($fields[15] ne $repLeftValue) { $miss = 1; } } if($id==1) { if($fields[16] ne $idValue) { $miss = 1; } } if($miss!=1) { print LOG "$line"; $startPos = $fields[6]+1; print OUTPUT "{$startPos, $fields[7]}\n"; } }