Skip to content

Commit

Permalink
[internal] safe_read to auto-handle gzipping (#187)
Browse files Browse the repository at this point in the history
* rm gzip text

* add auto-gzip reading

* set 0-100 bounds for plot
  • Loading branch information
pdimens authored Jan 29, 2025
1 parent 25e6a03 commit 1beb8e1
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 21 deletions.
9 changes: 9 additions & 0 deletions harpy/_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,12 @@ def gzip_file(infile):
with open(infile, 'rb') as f_in, gzip.open(infile + '.gz', 'wb', 6) as f_out:
shutil.copyfileobj(f_in, f_out)
os.remove(infile)

def safe_read(file_path):
"""returns the proper file opener for reading if a file_path is gzipped"""
try:
with gzip.open(file_path, 'rt') as f:
f.read(10)
return gzip.open(file_path, 'rt')
except gzip.BadGzipFile:
return open(file_path, 'r')
23 changes: 5 additions & 18 deletions harpy/_validations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from rich.table import Table
import rich_click as click
from ._printing import print_error, print_notice, print_solution, print_solution_with_culprits
from ._misc import harpy_progressbar
from ._misc import harpy_progressbar, safe_read
from concurrent.futures import ThreadPoolExecutor, as_completed

# logic to properly refresh progress bar for jupyter sessions
Expand Down Expand Up @@ -303,9 +303,7 @@ def validate_regions(regioninput, genome):
# check if the region is in the genome

contigs = {}
opener = gzip.open if is_gzip(genome) else open
mode = "rt" if is_gzip(genome) else "r"
with opener(genome, mode) as fopen:
with safe_read(genome) as fopen:
for line in fopen:
if line.startswith(">"):
cn = line.rstrip("\n").lstrip(">").split()[0]
Expand Down Expand Up @@ -355,17 +353,11 @@ def validate_regions(regioninput, genome):
def check_fasta(genofile):
"""perform validations on fasta file for extensions and file contents"""
# validate fasta file contents
try:
opener = gzip.open if is_gzip(genofile) else open
except:
print_error("incorrect file format", f"The file must be plain-text or b/gzipped, but failed to be recognized as either. Please check that [blue]{genofile}[/blue] is indeed a fasta file.")
sys.exit(1)
mode = "rt" if is_gzip(genofile) else "r"
line_num = 0
seq_id = 0
seq = 0
last_header = False
with opener(genofile, mode) as fasta:
with safe_read(genofile) as fasta:
for line in fasta:
line_num += 1
if line.startswith(">"):
Expand Down Expand Up @@ -406,8 +398,7 @@ def check_fasta(genofile):
def fasta_contig_match(contigs, fasta):
"""Checks whether a list of contigs are present in a fasta file"""
valid_contigs = []
opener = gzip.open if is_gzip(fasta) else open
with opener(fasta, "rt") as gen_open:
with safe_read(fasta) as gen_open:
for line in gen_open:
if not line.startswith(">"):
continue
Expand All @@ -431,11 +422,7 @@ def validate_fastq_bx(fastq_list, threads, quiet):
def validate(fastq):
BX = False
BC = False
if is_gzip(fastq):
fq = gzip.open(fastq, "rt")
else:
fq = open(fastq, "r")
with fq:
with safe_read(fastq) as fq:
for line in fq:
if not line.startswith("@"):
continue
Expand Down
4 changes: 2 additions & 2 deletions harpy/bin/inline_to_haplotag.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
)
parser.add_argument("-p", "--prefix", required = True, type = str, help = "Prefix for outfile files (e.g. <prefix>.R1.fq.gz)")
parser.add_argument("-b", "--barcodes", required = True, type=str, help="Barcode conversion key file with format: ATCG<tab>ACBD")
parser.add_argument("forward", type = str, help = "Forward reads of paired-end FASTQ file pair (gzipped)")
parser.add_argument("reverse", type = str, help = "Reverse reads of paired-end FASTQ file pair (gzipped)")
parser.add_argument("forward", type = str, help = "Forward reads of paired-end FASTQ file pair")
parser.add_argument("reverse", type = str, help = "Reverse reads of paired-end FASTQ file pair")
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
Expand Down
2 changes: 1 addition & 1 deletion harpy/reports/bx_count.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ highchart() |>
hc_colors(c("#4a9fea", "#5a8c84", "#ffd75f", "#df487f")) |>
hc_title(text = "Percent Invalid Beadtag Segments") |>
hc_xAxis(type = "category", title = list(text = ""), categories = unique(invalids_long$Sample)) |>
hc_yAxis(title = list(text = "percent invalid")) |>
hc_yAxis(title = list(text = "percent invalid"), min = 0, max = 100) |>
hc_tooltip(crosshairs = TRUE, animation = FALSE,
formatter = JS("function () {return '<b>' + this.x + '</b><br><b>' + this.series.name + '</b><br/>Percent: <b>' + this.y + '</b>';}")
) |>
Expand Down

0 comments on commit 1beb8e1

Please sign in to comment.