Skip to content

Commit

Permalink
Merge pull request #19 from TheJacksonLaboratory/ddradseq_handling (L…
Browse files Browse the repository at this point in the history
…PWHR-9)

Add process to enable ddradseq sequence data upstream processing
  • Loading branch information
sam-widmayer authored Nov 2, 2023
2 parents d0951c2 + 8936329 commit 418cf54
Show file tree
Hide file tree
Showing 9 changed files with 158 additions and 44 deletions.
2 changes: 2 additions & 0 deletions bin/log/stitch.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ ______________________________________________________
--nFounders ${params.nFounders}
--downsample_to_cov ${params.downsample_to_cov}
--run_name ${params.run_name}
--library_type ${params.library_type}
--read_type ${params.read_type}
--sample_folder ${params.sample_folder}
--pattern ${params.pattern}
--extension ${params.extension}
--library_type ${params.library_type}
--concat_lanes ${params.concat_lanes}
-w ${workDir}
-c ${params.config}
Expand Down
1 change: 0 additions & 1 deletion config/stitch.config
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ manifest {
params {
// Shared params
gen_org = 'mouse'
extension='.fastq.gz'
pattern="*_R{1,2}*"
read_type='PE'
concat_lanes=false
Expand Down
1 change: 1 addition & 0 deletions env/stacks.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ LABEL Sam Widmayer <[email protected]>

RUN apt-get --allow-releaseinfo-change update \
&& apt-get install -y g++ \
&& apt-get install -y procps \
automake \
autoconf \
libpcre3-dev \
Expand Down
35 changes: 35 additions & 0 deletions modules/bwa/bwa_mem_ddradseq.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
process BWA_MEM_DDRADSEQ {
tag "$sampleID"

cpus 8
memory {60.GB * task.attempt}
time {30.hour * task.attempt}
errorStrategy 'retry'
maxRetries 1

container 'quay.io/biocontainers/bwakit:0.7.17.dev1--hdfd78af_1'

//publishDir "${params.pubdir}/${ params.organize_by=='sample' ? sampleID : 'bwa_mem' }", pattern: "*.sam", mode:'copy', enabled: params.keep_intermediate

input:
tuple val(sampleID), file(fq_1), file(fq_2), file(read_groups)

output:
tuple val(sampleID), file("*.sam"), emit: sam

script:
log.info "----- BWA-MEM Alignment Running on: ${sampleID} -----"

if (params.read_type == "SE"){
inputfq="${fq_1}"
}
if (params.read_type == "PE"){
inputfq="${fq_1} ${fq_2}"
}

"""
rg=\$(cat $read_groups)
bwa mem -R \${rg} \
-t $task.cpus ${params.mismatch_penalty} ${params.ref_fa_indices} $inputfq > ${sampleID}.sam
"""
}
2 changes: 1 addition & 1 deletion modules/quilt/genoprobs.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ process GENOPROBS {
memory {200.GB * task.attempt}
time {12.hour * task.attempt}
errorStrategy 'retry'
maxRetries 2
maxRetries 3


container 'docker://sjwidmay/lcgbs_hr:qtl2_et_al'
Expand Down
32 changes: 32 additions & 0 deletions modules/stacks/clone_filter.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
process CLONE_FILTER {

tag "$sampleID"

cpus 1
memory 100.GB
time '02:00:00'

container 'docker://sjwidmay/stitch_nf:stacks'

//publishDir "${params.sample_folder}/fastp", pattern:"*_fastp_report.html", mode:'copy'

input:
tuple val(sampleID), file(fq_reads)

output:
tuple val(sampleID), file("*1.1.fq.gz*"), file("*2.2.fq.gz"), emit: clone_filtered

script:
log.info "----- Stacks Clone Filtering: ${sampleID} -----"


"""
/stacks-2.64/clone_filter -1 ${fq_reads[0]} \\
-2 ${fq_reads[1]} \\
-i gzfastq \\
-y gzfastq \\
-D \\
--oligo_len_1 8 \\
--inline_null
"""
}
23 changes: 23 additions & 0 deletions modules/utility_modules/fastqc_ddradseq.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
process FASTQC_DDRADSEQ {

tag "$sampleID"

cpus 1
memory 5.GB
time '01:00:00'

container 'docker://biocontainers/fastqc:v0.11.9_cv8'

input:
tuple val(sampleID), file(fq_1), file(fq_2)

output:
tuple file("*_fastqc.html"), file("*_fastqc.zip"), emit: to_multiqc

script:
log.info "----- FASTQC Running on Sample: ${sampleID} -----"

"""
fastqc ${fq_1} ${fq_2}
"""
}
2 changes: 1 addition & 1 deletion rename_fastqs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

# Run from within sample directory with fastqs
for f in *.R1.fastq.gz; do mv -v "$f" "${f/.R1/_R1}"; done;
for f in *.R2.fastq.gz; do mv -v "$f" "${f/.R2/_R2}"; done;
for f in *.R2.fastq.gz; do mv -v "$f" "${f/.R2/_R2}"; done;
104 changes: 63 additions & 41 deletions workflows/stitch.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,13 @@ include {getLibraryId} from "${projectDir}/bin/shared/getLibraryId.nf"
include {CONCATENATE_READS_PE} from "${projectDir}/modules/utility_modules/concatenate_reads_PE"
include {CONCATENATE_READS_SE} from "${projectDir}/modules/utility_modules/concatenate_reads_SE"
include {FASTQC} from "${projectDir}/modules/utility_modules/fastqc"
include {FASTQC_DDRADSEQ} from "${projectDir}/modules/utility_modules/fastqc_ddradseq"
include {MULTIQC} from "${projectDir}/modules/utility_modules/multiqc"
include {FASTP} from "${projectDir}/modules/utility_modules/fastp"
include {CLONE_FILTER} from "${projectDir}/modules/stacks/clone_filter"
include {READ_GROUPS} from "${projectDir}/modules/utility_modules/read_groups"
include {BWA_MEM} from "${projectDir}/modules/bwa/bwa_mem"
include {BWA_MEM_DDRADSEQ} from "${projectDir}/modules/bwa/bwa_mem_ddradseq"
include {PICARD_SORTSAM} from "${projectDir}/modules/picard/picard_sortsam"
include {PICARD_MARKDUPLICATES} from "${projectDir}/modules/picard/picard_markduplicates"
include {PICARD_COLLECTALIGNMENTSUMMARYMETRICS} from "${projectDir}/modules/picard/picard_collectalignmentsummarymetrics"
Expand Down Expand Up @@ -100,45 +103,69 @@ chrs = Channel.of(1..19,"X")
// main workflow
workflow QUILT {

// Step 0: Concatenate Fastq files if required.
if (params.concat_lanes){
if (params.read_type == 'PE'){
CONCATENATE_READS_PE(read_ch)
read_ch = CONCATENATE_READS_PE.out.concat_fastq
} else if (params.read_type == 'SE'){
CONCATENATE_READS_SE(read_ch)
read_ch = CONCATENATE_READS_SE.out.concat_fastq
}
}
// Step 0: Concatenate Fastq files if required.
if (params.concat_lanes){
if (params.read_type == 'PE'){
CONCATENATE_READS_PE(read_ch)
read_ch = CONCATENATE_READS_PE.out.concat_fastq
} else if (params.read_type == 'SE'){
CONCATENATE_READS_SE(read_ch)
read_ch = CONCATENATE_READS_SE.out.concat_fastq
}
}

// Run trimmomatic
//TRIMMOMATIC_PE(read_ch)
//read_ch.view()

// Calculate quality statistics for sequencing
FASTP(read_ch)
//QUALITY_STATISTICS(read_ch)

// Run fastqc on adapter trimmed reads
FASTQC(FASTP.out.fastp_filtered)

// Run multiqc
fastqc_reports = FASTQC.out.to_multiqc.flatten().collect()
MULTIQC(fastqc_reports)

// Generate read groups
READ_GROUPS(FASTP.out.fastp_filtered, "gatk")
mapping = FASTP.out.fastp_filtered.join(READ_GROUPS.out.read_groups)
if (params.library_type == 'ddRADseq'){
CLONE_FILTER(read_ch)
//QUALITY_STATISTICS(read_ch)
// Run fastqc on adapter trimmed reads
FASTQC_DDRADSEQ(CLONE_FILTER.out.clone_filtered)

// Run multiqc
fastqc_reports = FASTQC_DDRADSEQ.out.to_multiqc.flatten().collect()
MULTIQC(fastqc_reports)

// Generate read groups
READ_GROUPS(CLONE_FILTER.out.clone_filtered, "gatk")
mapping = CLONE_FILTER.out.clone_filtered.join(READ_GROUPS.out.read_groups)

// Alignment
BWA_MEM_DDRADSEQ(mapping)

// Sort SAM files
PICARD_SORTSAM(BWA_MEM_DDRADSEQ.out.sam)
data = PICARD_SORTSAM.out.bam.join(PICARD_SORTSAM.out.bai)
data.view()

} else {
FASTP(read_ch)
//QUALITY_STATISTICS(read_ch)
// Run fastqc on adapter trimmed reads
FASTQC(FASTP.out.fastp_filtered)

// Run multiqc
fastqc_reports = FASTQC.out.to_multiqc.flatten().collect()
MULTIQC(fastqc_reports)

// Generate read groups
READ_GROUPS(FASTP.out.fastp_filtered, "gatk")
mapping = FASTP.out.fastp_filtered.join(READ_GROUPS.out.read_groups)

// Alignment
BWA_MEM(mapping)

// Sort SAM files
PICARD_SORTSAM(BWA_MEM.out.sam)

// Mark duplicates
PICARD_MARKDUPLICATES(PICARD_SORTSAM.out.bam)
data = PICARD_MARKDUPLICATES.out.dedup_bam.join(PICARD_MARKDUPLICATES.out.dedup_bai)
}

// Alignment
BWA_MEM(mapping)
// BOWTIE2(mapping)

// Sort SAM files
PICARD_SORTSAM(BWA_MEM.out.sam)

// Mark duplicates
PICARD_MARKDUPLICATES(PICARD_SORTSAM.out.bam)
data = PICARD_MARKDUPLICATES.out.dedup_bam.join(PICARD_MARKDUPLICATES.out.dedup_bai)

// Calculate pileups
PICARD_COLLECTALIGNMENTSUMMARYMETRICS(data)
Expand All @@ -147,7 +174,7 @@ workflow QUILT {

// Downsample bams to specified coverage if the full coverage allows
coverageFilesChannel = SAMPLE_COVERAGE.out.depth_out.map {
tuple -> [tuple[0], tuple[1].splitText()[0].replaceAll("\\n", "").toFloat()]
tuple -> [tuple[0], tuple[1].splitText()[0].replaceAll("\\n", "").toFloat()]
}

// downsample bam files
Expand All @@ -156,18 +183,13 @@ workflow QUILT {
// Collect downsampled .bam filenames in its own list
bams = DOWNSAMPLE_BAM.out.downsampled_bam.collect()
CREATE_BAMLIST(bams)

// (No downsampling of bams)
// Collect .bam filenames in its own list
//bams = PICARD_MARKDUPLICATES.out.dedup_bam.collect()
//CREATE_BAMLIST(bams)

// Run QUILT
quilt_inputs = CREATE_BAMLIST.out.bam_list.combine(chrs)
RUN_QUILT(quilt_inputs)

// Perform LD pruning on QUILT output
//QUILT_LD_PRUNING(RUN_QUILT.out.quilt_vcf)
QUILT_LD_PRUNING(RUN_QUILT.out.quilt_vcf)

// Convert QUILT outputs to qtl2 files
quilt_for_qtl2 = RUN_QUILT.out.quilt_vcf
Expand All @@ -176,5 +198,5 @@ workflow QUILT {
// Reconstruct haplotypes with qtl2
GENOPROBS(QUILT_TO_QTL2.out.qtl2files)

}
}

0 comments on commit 418cf54

Please sign in to comment.