samtools <command> [options]
Program: samtools (Tools for alignments in the SAM format) Version: 0.1.18 (r982:295) Usage: samtools <command> [options] Command: view SAM<->BAM conversion sort sort alignment file mpileup multi-way pileup depth compute the depth faidx index/extract FASTA tview text alignment viewer index index alignment idxstats BAM index stats (r595 or later) fixmate fix mate information flagstat simple stats calmd recalculate MD/NM tags and '=' bases merge merge sorted alignments rmdup remove PCR duplicates reheader replace BAM header cat concatenate BAMs targetcut cut fosmid regions (for fosmid pool only) phase phase heterozygotes
head test.sam
samtools view -bT reference.fa test.sam > test.bam
samtools view -bS test.sam > test.bam
samtools sort test.bam test_sorted
samtools view -bS file.sam | samtools sort - file_sorted
samtools index test_sorted.bam test_sorted.bai
samtools view -h NA06984.chrom16.ILLUMINA.bwa.CEU.low_coverage.20100517.bam > NA06984.chrom16.ILLUMINA.bwa.CEU.low_coverage.20100517.sam
samtools view -h -F 4 blah.bam > blah_only_mapped.sam OR samtools view -h -F 4 -b blah.bam > blah_only_mapped.bam
samtools view -L test.bed test.bam | awk '$2 != 4 {print}' OR samtools view -S -L test.bed test.sam | awk '$2 != 4 {print}' OR samtools view -F 4 -S -L test.bed test.sam
intersectBed -abam file.bam -b test.bed
samtools view -h -f 0x0040 test.bam > test_first_pair.sam
samtools flagstat NA06984.chrom20.ILLUMINA.bwa.CEU.low_coverage.20111114.bam
samtools view NA06984.chrom20.ILLUMINA.bwa.CEU.low_coverage.20111114.bam | wc 6874858 #same as line 1 samtools view -F 4 NA06984.chrom20.ILLUMINA.bwa.CEU.low_coverage.20111114.bam | wc 6683299 #same as line 3 samtools view -f 0x0040 NA06984.chrom20.ILLUMINA.bwa.CEU.low_coverage.20111114.bam | wc 3408650 #same as line 5 samtools view -f 0x0080 NA06984.chrom20.ILLUMINA.bwa.CEU.low_coverage.20111114.bam | wc 3407433 #same as line 6 and so on...
#!/usr/bin/perl use strict; use warnings; my $usage = "Usage: $0 <bam_flag>\n"; my $bam_flag = shift or die $usage; my $binary = dec2bin($bam_flag); $binary = reverse($binary); #163 -> 10100011 my @desc = ( 'The read is paired in sequencing', 'The read is mapped in a proper pair', 'The query sequence itself is unmapped', 'The mate is unmapped', 'strand of the query', 'strand of the mate', 'The read is the first read in a pair', 'The read is the second read in a pair', 'The alignment is not primery (a read having split hits may have multiple primary alignment records)', 'The read fails quality checks', 'The read is either a PCR duplicate or an optical duplicate', ); my $query_mapped = '0'; my $mate_mapped = '0'; my $proper_pair = '0'; print "$binary\n"; for (my $i=0; $i< length($binary); ++$i){ my $flag = substr($binary,$i,1); #print "\$i = $i and \$flag = $flag\n"; if ($i == 1){ if ($flag == 1){ $proper_pair = '1'; } } if ($i == 2){ if ($flag == 0){ $query_mapped = '1'; } } if ($i == 3){ if ($flag == 0){ $mate_mapped = '1'; } } if ($i == 4){ next if $query_mapped == 0; if ($flag == 0){ print "The read is mapped on the forward strand\n"; } else { print "The read is mapped on the reverse strand\n"; } } elsif ($i == 5){ next if $mate_mapped == 0; next if $proper_pair == 0; if ($flag == 0){ print "The mate is mapped on the forward strand\n"; } else { print "The mate is mapped on the reverse strand\n"; } } else { if ($flag == 1){ print "$desc[$i]\n"; } } } exit(0); #from The Perl Cookbook sub dec2bin { my $str = unpack("B32", pack("N", shift)); $str =~ s/^0+(?=\d)//; # otherwise you'll get leading zeros return $str; } __END__
#random BAM file mapped with BWA samtools view blah.bam blah 0 chr1 948827 37 1M2D17M1I8M * 0 0 GGTGTGTGCCTCAGGCTTAATAATAGG ggcgde`a`egfbfgghdfa_cfea^_ XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:1^AC25 #the -e changes identical bases between the read and reference into ='s samtools view -b blah.bam | samtools fillmd -e - ref.fa blah 0 chr1 948827 37 1M2D17M1I8M * 0 0 ==================A======== ggcgde`a`egfbfgghdfa_cfea^_ XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:1^AC25 The original read sequence is: blah G--GTGTGTGCCTCAGGCTTAATAATAGG | |||||||||||||||||| ||||||| genome GACGTGTGTGCCTCAGGCTTA-TAATAGG The CIGAR string (1M2D17M1I8M) is shown with respect to the read, i.e. a deletion means a deletion in the read and an insertion is an insertion in the read, as you can see above. SAMTools fillmd shows the insertions (as above) and mismatches (not in the example above) but not deletions.
bam2fastq -o blah_unaligned.fastq --no-aligned blah.bam
bam2fastq -o blah_aligned.fastq --no-unaligned blah.bam
No comments:
Post a Comment