From 9c2c06655986bb6ac8b5c3e043b683df5b02e41f Mon Sep 17 00:00:00 2001 From: Seung-been Lee Date: Thu, 6 Jun 2024 20:18:12 +0900 Subject: [PATCH] Update `pyvcf.has_chr_prefix`: * Update :meth:`pyvcf.has_chr_prefix` method to ignore the HLA contigs for GRCh38. --- CHANGELOG.rst | 2 ++ fuc/api/pyvcf.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index cde6191..360488c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,8 @@ Changelog 0.38.0 (in development) ----------------------- +* Update :meth:`pyvcf.has_chr_prefix` method to ignore the HLA contigs for GRCh38. + 0.37.0 (2023-09-09) ------------------- diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py index 8211b5b..bb714f7 100644 --- a/fuc/api/pyvcf.py +++ b/fuc/api/pyvcf.py @@ -763,6 +763,8 @@ def has_chr_prefix(file, size=1000): Return True if all of the sampled contigs from a VCF file have the (annoying) 'chr' string. + For GRCh38, the HLA contigs will be ignored. + Parameters ---------- file : str @@ -779,6 +781,8 @@ def has_chr_prefix(file, size=1000): vcf = VariantFile(file) for record in vcf.fetch(): n += 1 + if record.chrom.startswith('HLA'): + continue if 'chr' not in record.chrom: return False if n > size: