#!/usr/bin/perl -w # ----------------------------------------------------------------------- # simpleRepeatStatistics.pl 1.0 # # This script creates a statistic for simple repeat in a repeat table file. # # Copyright by Joern Hameister (2006) # ----------------------------------------------------------------------- sub printHelp { print "This script creates a statistic for simple repeat in a repeat table file.\n"; print "\n"; print "Example: simpleRepeatStatistics.pl INPUTFILE OUTPUT\n"; print "INPUTFILE: Repeat table\n"; print "OUTPUT: Output file with statistics.\n"; } if(@ARGV<2) { if($ARGV[0] ne '-h') { print "Wrong arguments!\n"; } printHelp(); exit(-1); } # load input file open(INPUT , "<", $ARGV[0]); open(OUTPUT, ">", $ARGV[1]); open(LOG, ">", "log.txt"); open(LOGREST, ">", "logrest.txt"); my $repClass = "Simple_repeat"; my %patterns; my %patternsLength; print OUTPUT "start\tend\tlength\trawPattern\tpattern\tpatternlength\n"; while(defined ($line = )) { chomp($line); # Use tabs as separator @fields = split /\t/,$line; # Ignore first line if($fields[0] ne "#bin") { my $set = 0; if($fields[11] eq $repClass) { $startPos = $fields[6]; $endPos = $fields[7]; $length = $endPos - $startPos; $patternRaw = $fields[10]; $patternRaw =~ /((A|C|G|T)+)/; $pattern = $1; $patternLength = length $pattern; my $value = $patterns{$pattern}; my $valueLength = $patternsLength{$pattern}; if(defined $value) { $value = $value + 1; $valueLength = $valueLength + $length; } else { $value = 1; $valueLength = $length; } #print "$value\n"; $patterns{$pattern} = $value; $patternsLength{$pattern} = $valueLength; # print "$startPos, $endPos, $length, $patternRaw, $pattern, $patternLength \n"; print OUTPUT "$startPos\t$endPos\t$length\t$patternRaw\t$pattern\t$patternLength\n"; print LOG "$line\n"; $set = 1; } # Rest if($set==0) { #$startPos = $fields[6]+1; #print OUTPUTREST "{$startPos, $fields[7]}\n"; print LOGREST "$line\n"; } $set=0; } } print OUTPUT "---------------------------------------------------------------------------\n"; print OUTPUT "Number of occurence of a pattern in the Repeat table file:\n\n"; print OUTPUT "Pattern: The pattern\n"; print OUTPUT "Occurences: Number of occurrences of the pattern in the repeat table\n"; print OUTPUT "CompleteLength: Sum of all (endpos-startpos) of the repeate table\n"; print OUTPUT "tAbsoluteOccurence: CompleteLength divided by pattern length\n\n"; print OUTPUT "Pattern\tOccurences\tCompleteLength\tAbsoluteOccurence\n"; @keyList = sort keys %patterns; foreach $keyValue (@keyList) { my $count = $patterns{$keyValue}; my $countLength = $patternsLength{$keyValue}; my $counter = $countLength/(length $keyValue); print OUTPUT "($keyValue)n = $count\t$countLength\t$counter\n"; }