Skip to content

Instantly share code, notes, and snippets.

@ryanlayer
Created May 3, 2015 16:32
Show Gist options
  • Select an option

  • Save ryanlayer/64cf24af7d3fccac2be9 to your computer and use it in GitHub Desktop.

Select an option

Save ryanlayer/64cf24af7d3fccac2be9 to your computer and use it in GitHub Desktop.
Hack a BAM into LUMPY parts
BAM=your.bam
OUT_DIR=what_lumpy_needs
LUMPY_HOME=$HOME/src/lumpy-sv
SAMBAMBA=$HOME/bin/sambamba
SAMBLASTER=$HOME/bin/samblaster
SAMTOBAM="$SAMBAMBA view -S -f bam -l 0"
SAMSORT="$SAMBAMBA sort -m 1G --tmpdir "
PAIREND_DISTRO=$LUMPY_HOME/scripts/pairend_distro.py
BAMGROUPREADS=$LUMPY_HOME/scripts/bamkit/bamgroupreads.py
MAX_SPLIT_COUNT=2
MIN_NON_OVERLAP=20
BAM_BASE=`basename $BAM .bam`
OUTPUT=`basename $BAM`.vcf
OUTBASE=`basename "$OUTPUT"`
# make pipes
TEMP_DIR=$OUT_DIR/$BAM_BASE
mkdir -p $TEMP_DIR/spl $TEMP_DIR/disc
mkfifo $TEMP_DIR/spl_pipe
mkfifo $TEMP_DIR/disc_pipe
READ_LENGTH=`$SAMBAMBA view $BAM | head -n 10000 | awk 'BEGIN { MAX_LEN=0 } { LEN=length($10); if (LEN>MAX_LEN) MAX_LEN=LEN } END { print MAX_LEN }'`
$PYTHON $BAMGROUPREADS \
--fix_flags \
-i $BAM \
| $SAMBLASTER \
--acceptDupMarks \
--excludeDups \
--addMateTags \
--maxSplitCount $MAX_SPLIT_COUNT \
--minNonOverlap $MIN_NON_OVERLAP \
--splitterFile $TEMP_DIR/spl_pipe \
--discordantFile $TEMP_DIR/disc_pipe \
| $SAMBAMBA view -S /dev/stdin \
| awk '{if (NR<=1000000)
print > "/dev/stdout";
else
print > "/dev/null" }' \
| $PYTHON $PAIREND_DISTRO \
-r $READ_LENGTH \
-X 4 \
-N 1000000 \
-o ${TEMP_DIR}/$OUTBASE.sample$(($i+1)).lib$(($j+1)).x4.histo \
> ${TEMP_DIR}/$OUTBASE.sample$(($i+1)).lib$(($j+1)).insert.stats &
$SAMTOBAM $TEMP_DIR/spl_pipe \
| $SAMSORT $TEMP_DIR/spl \
-o $TEMP_DIR/$OUTBASE.sample$(($i+1)).lib$(($j+1)).splitters.bam \
/dev/stdin &
$SAMTOBAM $TEMP_DIR/disc_pipe \
| $SAMSORT $TEMP_DIR/disc \
-o $TEMP_DIR/$OUTBASE.sample$(($i+1)).lib$(($j+1)).discordants.bam \
/dev/stdin
wait
rm $TEMP_DIR/spl_pipe
rm $TEMP_DIR/disc_pipe
rmdir $TEMP_DIR/spl
rmdir $TEMP_DIR/disc
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment