-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnf-m6anet.nf
187 lines (160 loc) · 7.03 KB
/
nf-m6anet.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#!/usr/bin/env nextflow
/*
========================================================================================
MaestSi/nf-m6anet
========================================================================================
MaestSi/nf-m6anet analysis pipeline.
#### Homepage / Documentation
https://github.com/MaestSi/nf-m6anet
----------------------------------------------------------------------------------------
*/
def helpMessage() {
log.info"""
Usage:
nextflow -c nf-m6anet.conf run nf-m6anet.nf --samples="/path/to/samples.txt" --resultsDir="/path/to/resultsDir" -profile docker
Mandatory argument:
-profile Configuration profile to use. Available: docker, singularity
Other mandatory arguments which may be specified in the nf-m6anet.conf file
--samples Path to the tab-separated sample file including sample name, condition, path to fast5 folder and path to fastq file
--resultsDir Path to a folder where to store results
--transcriptome_fasta Path to the transcriptome fasta file
--gtf Path to genome annotation gtf file
--min_mapq Minimum mapping quality
--prob_mod_thr Probability modification threshold for calling a site as m6A+
--optArgs_f5c Optional arguments for f5c, for example "--kmer-model /path/to/rna004.nucleotide.5mer.model"
--optArgs_m6anet Optional arguments for m6Anet, for example "--pretrained_model HEK293T_RNA004" or "--pretrained_model arabidopsis_RNA002"
--postprocessingScript Path to Transcript_to_genome.R script
--bulkLevelScript Path to Calculate_m6anet_bulk.R script
""".stripIndent()
}
// Show help message
if (params.help) {
helpMessage()
exit 0
}
// Input of transcriptome fasta.
Channel
.fromPath(params.transcriptome_fasta, checkIfExists:true)
.set{ reference_fasta_ch }
// Input of sample names, conditions, FAST5s path and FASTQ.
Channel
.fromPath( params.samples )
.splitCsv(header: true, sep:'\t')
.map{ row-> tuple(row.sample, row.condition, file(row.fast5_dir), file(row.fastq)) }
.set{ samples_minimap2 }
// Transcriptome alignment.
process minimap2 {
input:
tuple val(sample), val(condition), val(fast5_dir), val(fastq)
each file('transcriptome.fa')
output:
tuple val(sample), val(condition), val(fast5_dir), val(fastq)
script:
if(params.minimap2)
"""
mkdir -p ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/
minimap2 -x map-ont -k14 -t ${task.cpus} -a transcriptome.fa ${fastq} | samtools view -hSb | samtools sort -@ ${task.cpus} -o ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/minimapT.bam
samtools view ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/minimapT.bam -bh -F 2324 -q ${params.min_mapq} | samtools sort -@ ${task.cpus} -o ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/minimap.filt.sortT.bam
samtools index -@ ${task.cpus} ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/minimap.filt.sortT.bam
ln -s ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/minimap.filt.sort.bam ./minimap.filt.sortT.bam
ln -s ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/minimap.filt.sort.bam.bai ./minimap.filt.sortT.bam.bai
"""
else
"""
ln -s ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/minimap.filt.sort.bam ./minimap.filt.sortT.bam
ln -s ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/minimap.filt.sort.bam.bai ./minimap.filt.sortT.bam.bai
"""
}
// Resquigling with nanopolish for each condition
process nanopolish {
input:
tuple val(sample), val(condition), val(fast5_dir), val(fastq)
each file('transcriptome.fa')
output:
tuple val(condition), val(sample)
script:
if(params.nanopolish)
"""
mkdir -p ${params.resultsDir}/${condition}/${sample}/nanopolish/
seqsum_file=\$(dirname ${fast5_dir})/sequencing_summary.txt
if [[ -f "\$seqsum_file" ]]; then
f5c index -d ${fast5_dir} ${fastq} -s \$seqsum_file
else
f5c index -d ${fast5_dir} ${fastq}
fi
f5c eventalign --rna -r ${fastq} -b ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/minimap.filt.sortT.bam -g transcriptome.fa --signal-index --scale-events --summary ${params.resultsDir}/${condition}/${sample}/nanopolish/summary.txt -t ${task.cpus} --min-mapq ${params.min_mapq} ${params.optArgs_f5c} > ${params.resultsDir}/${condition}/${sample}/nanopolish/eventalign_readIndex.txt
"""
else
"""
echo "Skipped"
"""
}
// Data formatting for m6anet for each sample
process m6anet1 {
input:
tuple val(condition), val(sample)
output:
tuple val(condition), val(sample)
script:
if(params.m6anet1)
"""
mkdir -p ${params.resultsDir}/${condition}/${sample}/m6anet/
m6anet dataprep --eventalign ${params.resultsDir}/${condition}/${sample}/nanopolish/eventalign_readIndex.txt \
--out_dir ${params.resultsDir}/${condition}/${sample}/m6anet --n_processes ${task.cpus}
"""
else
"""
ln -sf ${params.resultsDir}/${condition}/${sample}/m6anet m6anet
"""
}
// RNA modifications detection with m6anet
process m6anet2 {
input:
tuple val(condition), val(sample)
output:
val(condition)
script:
if(params.m6anet2)
"""
mkdir -p ${params.resultsDir}/${condition}/m6anet
preprocessing_dirs=\$(find ${params.resultsDir}/${condition} -maxdepth 2 -mindepth 2 -type d | grep "m6anet\$")
m6anet inference --input_dir \$preprocessing_dirs --out_dir ${params.resultsDir}/${condition}/m6anet --n_processes ${task.cpus} ${params.optArgs_m6anet}
"""
else
"""
echo "Skipped"
"""
}
// Processing of each output to obtain bed files
process postprocessing {
input:
val(condition)
output:
script:
if(params.postprocessing)
"""
mkdir -p ${params.resultsDir}/${condition}/m6anet_postprocessing
Rscript ${params.postprocessingScript} \
input_file=${params.resultsDir}/${condition}/m6anet/data.site_proba.csv \
output_file=${params.resultsDir}/${condition}/m6anet_postprocessing/data.site_proba.thr${params.prob_mod_thr}.tsv \
output_file_genome=${params.resultsDir}/${condition}/m6anet_postprocessing/data.site_proba.genome.thr${params.prob_mod_thr}.tsv \
genome_gtf=${params.gtf} \
mccores=${task.cpus} \
prob_mod_thr=${params.prob_mod_thr}
Rscript ${params.bulkLevelScript} \
m6anet_output_file=${params.resultsDir}/${condition}/m6anet/data.site_proba.csv \
report_file=${params.resultsDir}/${condition}/m6anet_postprocessing/m6A_bulk_level_estimate.txt \
prob_mod_thr=${params.prob_mod_thr}
"""
else
"""
echo "Skipped"
"""
}
workflow {
minimap2(samples_minimap2, reference_fasta_ch)
nanopolish(minimap2.out, reference_fasta_ch)
m6anet1(nanopolish.out)
m6anet2(m6anet1.out.groupTuple(by:0))
postprocessing(m6anet2.out)
}