From c672be7ca859f44690af921df113e25c94fc2589 Mon Sep 17 00:00:00 2001
From: Benjamin Wingfield <bwingfield@ebi.ac.uk>
Date: Mon, 19 Feb 2024 13:35:09 +0000
Subject: [PATCH] v0.5.0 (#78)

* draft streaming with generators

* set up effect types

* profiling improvements

* fix output

* check for duplicates

* add liftover

* update dependencies and set up pre-commit

* complain when linting fails

* fix linting

* support wide files

* add log

* fix tests and liftover

* fix test

* sqlite support and add log data

* fix tests

* fix tests

* fixes to make old and new output consistent

* update tests

* drop parallel gzip and --threads

* create ScoreVariant and EffectType classes

* review comments

* add type hints

* remove coordinates from mandatory fields

* fix old scoring files

* check effect alleles and compelx scoring files

* don't access __annotations__ directly

* remove logger

* warn about complex files and variant mismatch

* refactor scorevariant from userdict to class with __slots__

* fix __repr__ and type hints

* add pyarrow support

* add license data to log

* add custom exceptions

* add custom exit code

* move class definitions

* rename

* update effect allele class

* tidy up docstring

* add docstrings to pytest

* fix pyproject

* Make sure that IID isn't converted to numeric during aggreation

Signed-off-by: smlmbrt <sam.a.lambert@gmail.com>

* bump minor version

* dynamically set is_snp

* remove samplesheet package

* delete samplesheet tests

* fix liftover

* set up local venv

* fix liftover test

* improve comment

---------

Signed-off-by: smlmbrt <sam.a.lambert@gmail.com>
Co-authored-by: smlmbrt <sam.a.lambert@gmail.com>
---
 .gitignore                                    |   3 +-
 .pre-commit-config.yaml                       |   8 +
 conftest.py                                   | 221 +++--
 .../aggregate/aggregate_scores.py             |   2 +-
 pgscatalog_utils/download/GenomeBuild.py      |  25 +-
 pgscatalog_utils/pgsexceptions.py             | 127 +++
 pgscatalog_utils/samplesheet/Config.py        |   7 -
 pgscatalog_utils/samplesheet/check.py         | 370 --------
 .../scorefile/combine_scorefiles.py           | 272 +++---
 pgscatalog_utils/scorefile/config.py          |  16 +
 pgscatalog_utils/scorefile/effect_type.py     |  34 -
 pgscatalog_utils/scorefile/effect_weight.py   |  49 -
 pgscatalog_utils/scorefile/effectallele.py    |  60 ++
 pgscatalog_utils/scorefile/effecttype.py      |  14 +
 pgscatalog_utils/scorefile/genome_build.py    |  24 -
 pgscatalog_utils/scorefile/harmonised.py      |  30 -
 pgscatalog_utils/scorefile/liftover.py        | 138 ++-
 pgscatalog_utils/scorefile/qc.py              | 294 ++++--
 pgscatalog_utils/scorefile/read.py            |  79 --
 pgscatalog_utils/scorefile/scorevariant.py    | 137 +++
 pgscatalog_utils/scorefile/scoringfile.py     | 188 ++++
 .../scorefile/scoringfileheader.py            |  92 ++
 pgscatalog_utils/scorefile/write.py           | 212 ++++-
 poetry.lock                                   | 158 +++-
 poetry.toml                                   |   3 +
 pyproject.toml                                |   7 +-
 tests/data/combine/PGS001229_22.txt           | 850 ++++++++++++++++++
 tests/data/combine/scorefile.txt              | 838 +++++++++++++++++
 .../scorefile_dominant_and_recessive.txt      | 838 +++++++++++++++++
 tests/test_combine.py                         | 145 ++-
 tests/test_liftover.py                        |  40 +-
 tests/test_samplesheet.py                     | 101 ---
 32 files changed, 4200 insertions(+), 1182 deletions(-)
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 pgscatalog_utils/pgsexceptions.py
 delete mode 100644 pgscatalog_utils/samplesheet/Config.py
 delete mode 100755 pgscatalog_utils/samplesheet/check.py
 create mode 100644 pgscatalog_utils/scorefile/config.py
 delete mode 100644 pgscatalog_utils/scorefile/effect_type.py
 delete mode 100644 pgscatalog_utils/scorefile/effect_weight.py
 create mode 100644 pgscatalog_utils/scorefile/effectallele.py
 create mode 100644 pgscatalog_utils/scorefile/effecttype.py
 delete mode 100644 pgscatalog_utils/scorefile/genome_build.py
 delete mode 100644 pgscatalog_utils/scorefile/harmonised.py
 delete mode 100644 pgscatalog_utils/scorefile/read.py
 create mode 100644 pgscatalog_utils/scorefile/scorevariant.py
 create mode 100644 pgscatalog_utils/scorefile/scoringfile.py
 create mode 100644 pgscatalog_utils/scorefile/scoringfileheader.py
 create mode 100644 poetry.toml
 create mode 100644 tests/data/combine/PGS001229_22.txt
 create mode 100644 tests/data/combine/scorefile.txt
 create mode 100644 tests/data/combine/scorefile_dominant_and_recessive.txt
 delete mode 100644 tests/test_samplesheet.py

diff --git a/.gitignore b/.gitignore
index b0b6f3a..5ee9a36 100644
--- a/.gitignore
+++ b/.gitignore
@@ -157,4 +157,5 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-.idea/
\ No newline at end of file
+.idea/
+.DS_Store
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..f7d0c74
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,8 @@
+repos:
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  # Ruff version.
+  rev: v0.1.3
+  hooks:
+    - id: ruff
+      args: [--fix, --exit-non-zero-on-fix]  
+    - id: ruff-format
diff --git a/conftest.py b/conftest.py
index a4a55c6..e01a719 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,11 +1,9 @@
 import glob
 import importlib.resources
 import os
-import pathlib
 import shutil
 from unittest.mock import patch
 
-import pandas as pd
 import polars as pl
 import pytest
 import requests as req
@@ -13,21 +11,53 @@
 from pgscatalog_utils.download.download_scorefile import download_scorefile
 from pgscatalog_utils.match.preprocess import complement_valid_alleles
 from pgscatalog_utils.scorefile.combine_scorefiles import combine_scorefiles
+from pgscatalog_utils.scorefile.scorevariant import ScoreVariant
+
+from tests.data import combine
 
 pl.toggle_string_cache(True)
 
 
 @pytest.fixture(scope="session")
 def pgs_accessions():
-    return ['PGS001229', 'PGS000922']
+    return ["PGS001229", "PGS000922"]
+
+
+@pytest.fixture(scope="session")
+def mini_score_path(tmp_path_factory):
+    path = importlib.resources.files(combine) / "PGS001229_22.txt"
+    return str(path)
+
+
+@pytest.fixture(scope="session")
+def mini_scorefile(mini_score_path, tmp_path_factory):
+    # The mini scorefile overlaps well with cineca synthetic subset
+    out_path = tmp_path_factory.mktemp("scores") / "mini_score.txt"
+    args: list[str] = (
+        ["combine_scorefiles", "-t", "GRCh37", "-s"]
+        + [mini_score_path]
+        + ["-o", str(out_path.resolve())]
+    )
+
+    with patch("sys.argv", args):
+        combine_scorefiles()
+
+    return str(out_path.resolve())
 
 
 @pytest.fixture(scope="session")
 def scorefiles(tmp_path_factory, pgs_accessions):
     fn = tmp_path_factory.mktemp("scorefiles")
-    args: list[str] = ['download_scorefiles', '-b', 'GRCh37', '-o', str(fn.resolve()), '-i'] + pgs_accessions
-
-    with patch('sys.argv', args):
+    args: list[str] = [
+        "download_scorefiles",
+        "-b",
+        "GRCh37",
+        "-o",
+        str(fn.resolve()),
+        "-i",
+    ] + pgs_accessions
+
+    with patch("sys.argv", args):
         download_scorefile()
 
     return glob.glob(os.path.join(fn.resolve(), "*.txt.gz"))
@@ -37,8 +67,9 @@ def scorefiles(tmp_path_factory, pgs_accessions):
 def target_path(tmp_path_factory):
     try:
         bim = req.get(
-            'https://gitlab.ebi.ac.uk/nebfield/test-datasets/-/raw/master/pgsc_calc/cineca_synthetic_subset.bim',
-            timeout=5)
+            "https://gitlab.ebi.ac.uk/nebfield/test-datasets/-/raw/master/pgsc_calc/cineca_synthetic_subset.bim",
+            timeout=5,
+        )
     except (req.exceptions.ConnectionError, req.Timeout):
         bim = []
 
@@ -46,129 +77,125 @@ def target_path(tmp_path_factory):
         pytest.skip("Couldn't get test data from network")
     else:
         fn = tmp_path_factory.mktemp("target") / "data.bim"
-        with open(fn, 'wb') as f:
+        with open(fn, "wb") as f:
             f.write(bim.content)
 
         return str(fn.resolve())
 
 
-@pytest.fixture(scope="session")
-def mini_score_path(tmp_path_factory):
-    try:
-        score = req.get('https://gitlab.ebi.ac.uk/nebfield/test-datasets/-/raw/master/pgsc_calc/PGS001229_22.txt',
-                        timeout=5)
-    except (req.exceptions.ConnectionError, req.Timeout):
-        score = []
-
-    if not score:
-        pytest.skip("Couldn't get test data from network")
-    else:
-        fn = tmp_path_factory.mktemp("score") / "PGS001229_22.txt"
-        with open(fn, 'wb') as f:
-            f.write(score.content)
-
-        return str(fn.resolve())
-
-
-@pytest.fixture(scope="session")
-def mini_scorefile(mini_score_path, tmp_path_factory):
-    # The mini scorefile overlaps well with cineca synthetic subset
-    out_path = tmp_path_factory.mktemp("scores") / "mini_score.txt"
-    args: list[str] = ['combine_scorefiles', '-t', 'GRCh37', '-s'] + [mini_score_path] + ['-o', str(out_path.resolve())]
-
-    with patch('sys.argv', args):
-        combine_scorefiles()
-
-    return str(out_path.resolve())
-
-
-@pytest.fixture(scope="session")
-def combined_scorefile(scorefiles, tmp_path_factory):
-    # The combined scorefile overlaps poorly with cineca synthetic subset
-    out_path = tmp_path_factory.mktemp("scores") / "combined.txt"
-    args: list[str] = ['combine_scorefiles', '-t', 'GRCh37', '-s'] + scorefiles + ['-o', str(out_path.resolve())]
-
-    with patch('sys.argv', args):
-        combine_scorefiles()
-
-    return str(out_path.resolve())
-
-
 @pytest.fixture(scope="session")
 def chain_files(tmp_path_factory):
-    chain_dir = tmp_path_factory.mktemp('chain_dir')
+    chain_dir = tmp_path_factory.mktemp("chain_dir")
 
     shutil.copy2("tests/data/hg19ToHg38.over.chain.gz", chain_dir)
     shutil.copy2("tests/data/hg38ToHg19.over.chain.gz", chain_dir)
-    
-    return str(chain_dir.resolve())
-
-
-@pytest.fixture(scope="session")
-def lifted_scorefiles(mini_score_path, chain_files, tmp_path_factory):
-    out_path = tmp_path_factory.mktemp("scores") / "lifted.txt"
-    args: list[str] = ['combine_scorefiles', '-s'] + [mini_score_path] + ['--liftover', '-c', chain_files, '-t',
-                                                                          'GRCh38',
-                                                                          '-m', '0.8'] + ['-o', str(out_path.resolve())]
-
-    with patch('sys.argv', args):
-        combine_scorefiles()
 
-    return str(out_path.resolve())
+    return str(chain_dir.resolve())
 
 
 @pytest.fixture(scope="session")
 def hg38_coords():
-    d = {'rsid': ['rs11903757', 'rs6061231'], 'chr_name': ['2', '20'], 'chr_position': [191722478, 62381861]}
-    df = pd.DataFrame(d)
-    df['accession'] = 'dummy'
-    df['genome_build'] = 'GRCh38'
-    return df
+    rs11903757 = ScoreVariant(
+        **{
+            "rsid": "rs11903757",
+            "chr_name": "2",
+            "chr_position": 191722478,
+            "row_nr": 0,
+            "effect_weight": 1,
+            "accession": "test",
+            "effect_allele": "A",
+        }
+    )
+    rs6061231 = ScoreVariant(
+        **{
+            "rsid": "rs6061231",
+            "chr_name": "20",
+            "chr_position": 62381861,
+            "row_nr": 1,
+            "effect_weight": 1,
+            "accession": "test",
+            "effect_allele": "A",
+        }
+    )
+    return (x for x in [rs11903757, rs6061231])
 
 
 @pytest.fixture(scope="session")
-def hg19_coords(hg38_coords):
+def hg19_coords():
     # hg38_coords in GRCh37, from dbSNP
-    d = {'lifted_chr': ['2', '20'], 'lifted_pos': [192587204, 60956917], 'liftover': [True, True]}
-    return pd.DataFrame(d)
+    rs11903757 = ScoreVariant(
+        **{
+            "rsid": "rs11903757",
+            "chr_name": "2",
+            "chr_position": 192587204,
+            "row_nr": 0,
+            "effect_weight": 1,
+            "accession": "test",
+            "effect_allele": "A",
+        }
+    )
+    rs6061231 = ScoreVariant(
+        **{
+            "rsid": "rs6061231",
+            "chr_name": "20",
+            "chr_position": 60956917,
+            "row_nr": 1,
+            "effect_weight": 1,
+            "accession": "test",
+            "effect_allele": "A",
+        }
+    )
+    return (x for x in [rs11903757, rs6061231])
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def small_flipped_scorefile(small_scorefile):
     # simulate a scorefile on the wrong strand
-    return (complement_valid_alleles(small_scorefile, ['effect_allele', 'other_allele'])
-            .drop(['effect_allele', 'other_allele'])
-            .rename({'effect_allele_FLIP': 'effect_allele', 'other_allele_FLIP': 'other_allele'})
-            .pipe(complement_valid_alleles, ['effect_allele', 'other_allele']))
+    return (
+        complement_valid_alleles(small_scorefile, ["effect_allele", "other_allele"])
+        .drop(["effect_allele", "other_allele"])
+        .rename(
+            {"effect_allele_FLIP": "effect_allele", "other_allele_FLIP": "other_allele"}
+        )
+        .pipe(complement_valid_alleles, ["effect_allele", "other_allele"])
+    )
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def small_target():
-    return pl.DataFrame({"#CHROM": [1, 2, 3],
-                         "POS": [1, 2, 3],
-                         "REF": ["A", "T", "T"],
-                         "ALT": ["C", "A", "G"],
-                         "ID": ["1:1:A:C", "2:2:T:A", "3:3:T:G"],
-                         "is_multiallelic": [False, False, False]})
+    return pl.DataFrame(
+        {
+            "#CHROM": [1, 2, 3],
+            "POS": [1, 2, 3],
+            "REF": ["A", "T", "T"],
+            "ALT": ["C", "A", "G"],
+            "ID": ["1:1:A:C", "2:2:T:A", "3:3:T:G"],
+            "is_multiallelic": [False, False, False],
+        }
+    )
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def small_scorefile():
-    df = pl.DataFrame({"accession": ["test", "test", "test"],
-                       "row_nr": [1, 2, 3],
-                       "chr_name": [1, 2, 3],
-                       "chr_position": [1, 2, 3],
-                       "effect_allele": ["A", "A", "G"],
-                       "other_allele": ["C", "T", "T"],
-                       "effect_weight": [1, 2, 3],
-                       "effect_type": ["additive", "additive", "additive"]})
+    df = pl.DataFrame(
+        {
+            "accession": ["test", "test", "test"],
+            "row_nr": [1, 2, 3],
+            "chr_name": [1, 2, 3],
+            "chr_position": [1, 2, 3],
+            "effect_allele": ["A", "A", "G"],
+            "other_allele": ["C", "T", "T"],
+            "effect_weight": [1, 2, 3],
+            "effect_type": ["additive", "additive", "additive"],
+        }
+    )
 
     return complement_valid_alleles(df, ["effect_allele", "other_allele"])
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def small_scorefile_no_oa(small_scorefile):
-    return small_scorefile.with_column(pl.lit(None).alias('other_allele'))
+    return small_scorefile.with_column(pl.lit(None).alias("other_allele"))
 
 
 def _get_timeout(url):
diff --git a/pgscatalog_utils/aggregate/aggregate_scores.py b/pgscatalog_utils/aggregate/aggregate_scores.py
index aa1de89..d57943d 100644
--- a/pgscatalog_utils/aggregate/aggregate_scores.py
+++ b/pgscatalog_utils/aggregate/aggregate_scores.py
@@ -33,7 +33,7 @@ def aggregate(scorefiles: list[str]):
     for i, path in enumerate(scorefiles):
         logger.debug(f"Reading {path}")
         # pandas can automatically detect zst compression, neat!
-        df = (pd.read_table(path)
+        df = (pd.read_table(path, converters={"#IID": str}, header=0)
               .assign(sampleset=path.split('_')[0])
               .set_index(['sampleset', '#IID']))
 
diff --git a/pgscatalog_utils/download/GenomeBuild.py b/pgscatalog_utils/download/GenomeBuild.py
index 419c3f2..69fd8ab 100644
--- a/pgscatalog_utils/download/GenomeBuild.py
+++ b/pgscatalog_utils/download/GenomeBuild.py
@@ -1,6 +1,25 @@
-from enum import Enum, auto
+from enum import Enum
 
 
 class GenomeBuild(Enum):
-    GRCh37 = auto()
-    GRCh38 = auto()
+    GRCh37 = "GRCh37"
+    GRCh38 = "GRCh38"
+    # just included to handle older files, incompatible unless harmonised:
+    NCBI36 = "NCBI36"  # ew
+
+    def __str__(self):
+        return str(self.value)
+
+    @classmethod
+    def from_string(cls, build):
+        match build:
+            case "GRCh37" | "hg19":
+                return cls(GenomeBuild.GRCh37)
+            case "GRCh38" | "hg38":
+                return cls(GenomeBuild.GRCh38)
+            case "NR":
+                return None
+            case "NCBI36" | "hg18":
+                return cls(GenomeBuild.NCBI36)
+            case _:
+                raise Exception(f"Can't match {build=}")
diff --git a/pgscatalog_utils/pgsexceptions.py b/pgscatalog_utils/pgsexceptions.py
new file mode 100644
index 0000000..224025e
--- /dev/null
+++ b/pgscatalog_utils/pgsexceptions.py
@@ -0,0 +1,127 @@
+""" This module defines a custom PGS exception hierarchy. There's a lot of exceptions for specific failure states,
+which can be a bad approach and too complex. However, we did this anyway for a few reasons:
+
+1. There's only a few types of common errors (around a dozen, with 3-4 very common)
+2. Want to exit the program with custom exit codes to simplify communicating program
+state with external processes (e.g. PGS Catalog Calculator, web platforms) without doing
+complicated things like logging to an external location
+3. This approach should make maintaining exit codes simple
+
+So the plan is to override sys.excepthook, intercept errors defined here, and map them
+to custom exit codes defined below
+"""
+import sys
+from types import MappingProxyType
+
+
+class BasePGSException(Exception):
+    """The base class from which all PGS errors must inherit.
+    The purpose of this class is to simplify finding PGS exceptions and exiting python
+    with a matching custom exit code."""
+
+
+class MatchError(BasePGSException):
+    """The base class for errors that are raised during variant matching"""
+
+
+class DuplicateMatchError(MatchError):
+    """Raised when a matched variant has been duplicated, so that a variant with the same ID
+    would be split across two rows in an output scoring file.
+    """
+
+
+class MatchRateError(MatchError):
+    """Raised when match rate is below match threshold for one or more scoring files"""
+
+
+class ZeroMatchesError(MatchError):
+    """Raised when zero matches are found for one or more scoring files.
+
+    Distinct from MatchRateError because it's very common, and caused by bad input data or parameters."""
+
+
+class MatchValueError(MatchError):
+    """Raised when a match function receives inappropriate values.
+
+    e.g., Multiple chromosomes detected in variant data but data is split per-chromosome"""
+
+
+class CombineError(BasePGSException):
+    """The base class for errors that are raised when combining scorefiles"""
+
+
+class BuildError(CombineError):
+    """Raised when there's a problem with a scoring file genome build."""
+
+
+class ScoreFormatError(CombineError):
+    """Raised when there's a problem with a scoring file."""
+
+
+class CatalogError(BasePGSException):
+    """The base class for errors when querying or downloading from the PGS Catalog"""
+
+
+class ScoreDownloadError(CatalogError):
+    """Raised when a scoring file can't be downloaded"""
+
+
+class ScoreChecksumError(CatalogError):
+    """Raised when a scoring file fails checksum validation"""
+
+
+class QueryError(CatalogError):
+    """Raised when the Catalog API doesn't return a valid response"""
+
+
+class InvalidAccessionError(CatalogError):
+    """Raised when an invalid term is used to query the Catalog"""
+
+
+class SamplesheetError(BasePGSException):
+    """The base class for errors related to samplesheet parsing"""
+
+
+class GenomesNotFound(SamplesheetError):
+    """Raised when FileNotFound"""
+
+
+class SamplesheetFormatError(SamplesheetError):
+    """Raised when a samplesheet is badly formatted"""
+
+
+class ExceptionExitCodeMap:
+    """A read only map to get exit codes for custom exceptions"""
+
+    # https://unix.stackexchange.com/a/604262
+    _mapping = {
+        ScoreDownloadError: 8,
+        ScoreFormatError: 9,
+        ScoreChecksumError: 10,
+        QueryError: 11,
+        InvalidAccessionError: 12,
+        DuplicateMatchError: 13,
+        MatchRateError: 14,
+        ZeroMatchesError: 15,
+        MatchValueError: 16,
+        BuildError: 17,
+        GenomesNotFound: 19,
+        SamplesheetFormatError: 20,
+    }
+
+    code_map = MappingProxyType(_mapping)
+
+    def __getitem__(self, exception_type):
+        # if an exception can't be found in the map, return an error code (> 0) but default
+        # max possible value 255
+        return self.code_map.get(exception_type, 255)
+
+
+def handle_uncaught_exception(exctype, value, trace):
+    code_map = ExceptionExitCodeMap()
+    oldHook(exctype, value, trace)
+    if isinstance(value, BasePGSException):
+        sys.exit(code_map[exctype])
+
+
+sys.excepthook, oldHook = handle_uncaught_exception, sys.excepthook
diff --git a/pgscatalog_utils/samplesheet/Config.py b/pgscatalog_utils/samplesheet/Config.py
deleted file mode 100644
index 1f4bddb..0000000
--- a/pgscatalog_utils/samplesheet/Config.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from dataclasses import dataclass
-
-
-@dataclass
-class Config:
-    input_path: str
-    output_path: str
diff --git a/pgscatalog_utils/samplesheet/check.py b/pgscatalog_utils/samplesheet/check.py
deleted file mode 100755
index b1ff16b..0000000
--- a/pgscatalog_utils/samplesheet/check.py
+++ /dev/null
@@ -1,370 +0,0 @@
-import argparse
-import logging
-import math
-import pathlib
-from pathlib import Path
-
-import pandas as pd
-
-from pgscatalog_utils import config
-from pgscatalog_utils.samplesheet.Config import Config
-
-logger = logging.getLogger(__name__)
-
-
-def _parse_args(args=None) -> argparse.Namespace:
-    d: (
-        str
-    ) = "Convert pgscatalog/pgsc_calc samplesheet file to JSON and check its contents."
-    e: str = "Example usage: python check.py <FILE_IN> <FILE_OUT>"
-
-    parser: argparse.ArgumentParser = argparse.ArgumentParser(description=d, epilog=e)
-    parser.add_argument("FILE_IN", help="Input samplesheet file.")
-    parser.add_argument(
-        "-v",
-        "--verbose",
-        dest="verbose",
-        action="store_true",
-        help="<Optional> Extra logging information",
-    )
-    parser.add_argument("FILE_OUT", help="Output file.")
-    return parser.parse_args(args)
-
-
-def _truncate_chrom(chrom):
-    match chrom:
-        case _ if chrom.isdigit():
-            return int(chrom)
-        case _ if chrom.startswith("chr"):
-            logger.critical("Please remove chr prefix from samplesheet chromosome column e.g. chr1 -> 1, chrX -> X")
-            raise ValueError("chr prefix detected")
-        case _:
-            return chrom
-
-
-def _check_colnames(df: pd.DataFrame):
-    mandatory: list[str] = ["sampleset", "path_prefix", "chrom", "format"]
-    optional: list[str] = ["vcf_genotype_field"]
-
-    if not set(mandatory) == set(df.columns):
-        if set(mandatory + optional) == set(df.columns):
-            # this is fine
-            return
-        else:
-            logger.critical("Samplesheet has invalid header row")
-            logger.critical(f"Column names must only include: {mandatory}")
-            [
-                logger.critical(f"Invalid column name: {col}")
-                for col in df
-                if col not in mandatory
-            ]
-            raise Exception
-
-
-def _check_unique_paths(df: pd.DataFrame):
-    """Each row in a samplesheet should have a unique path"""
-    duplicated: pd.Series = df["path_prefix"].duplicated()
-    for idx, duplicate in duplicated.items():
-        if duplicate:
-            bad_record = df.iloc[:idx]
-            logger.critical(f"Duplicated path found in samplesheet:\n{bad_record}")
-
-
-def _check_empty_paths(df: pd.DataFrame):
-    """Paths are mandatory"""
-    empty_paths: pd.Series = df["path_prefix"].isnull()
-    for idx, empty in empty_paths.items():
-        if empty:
-            logger.critical(f"Empty path found in samplesheet:\n {df.iloc[[idx]]}")
-            raise Exception
-
-
-def _read_samplesheet(path: str) -> pd.DataFrame:
-    csv: pd.DataFrame = pd.read_csv(path, sep=",", header=0, converters={"chrom": str})
-    csv["chrom"] = csv["chrom"].apply(_truncate_chrom)
-    return csv
-
-
-def _check_paths(df: pd.DataFrame) -> None:
-    _check_empty_paths(df)
-    _check_unique_paths(df)
-
-
-def _get_chrom_list(df: pd.DataFrame) -> dict[str, list[str | None]]:
-    chrom_dict = {}
-    for idx, row in df.iterrows():
-        key = row["sampleset"]
-        value = row["chrom"]
-        try:
-            if math.isnan(value):
-                value = None
-        except TypeError:
-            pass
-        chroms = chrom_dict.get(key, [])
-        chroms.append(value)
-        chrom_dict.update({key: chroms})
-
-    return chrom_dict
-
-
-def _check_chrom_duplicates(sampleset: str, chrom_list: dict) -> None:
-    seen = set()
-    duplicate_chromosomes: list[str] = [
-        str(x) for x in chrom_list if x in seen or seen.add(x)
-    ]
-    if duplicate_chromosomes:
-        logger.critical(f"Duplicate chromosomes detected in sampleset {sampleset}")
-        logger.critical(f"Duplicate chromosomes: {duplicate_chromosomes}")
-        raise Exception
-
-
-def _check_multiple_missing_chrom(sampleset: str, chrom_list: dict) -> None:
-    for chrom in chrom_list:
-        if chrom is None and len(chrom_list) != 1:
-            logger.critical(
-                f"Sampleset {sampleset} has rows with multiple missing chromosomes"
-            )
-            logger.critical(
-                "If you have file with multiple chromosomes, delete the duplicate rows"
-            )
-            logger.critical(
-                "If your data are split per chromosome, then chromosomes must be set for all rows"
-            )
-            raise Exception
-
-
-def _check_chrom(df: pd.DataFrame) -> None:
-    # get a list of chroms per sampleset and check them for some basic errors
-    chroms: dict = _get_chrom_list(df)
-
-    for sampleset, chrom_list in chroms.items():
-        _check_chrom_duplicates(sampleset, chrom_list)
-        _check_multiple_missing_chrom(sampleset, chrom_list)
-
-
-def _check_format(df: pd.DataFrame):
-    """Make sure the file format is a valid choice"""
-    for idx, row in df.iterrows():
-        valid_formats: list[str] = ["vcf", "pfile", "bfile"]
-        if row["format"] not in valid_formats:
-            logger.critical(
-                f"Invalid format: {row['format']} must be one of {valid_formats}"
-            )
-            logger.critical(f"\n{df.iloc[[idx]]}")
-            raise Exception
-
-
-def _setup_paths(df: pd.DataFrame) -> pd.DataFrame:
-    """Add suffix to path prefixes depending on file format / type"""
-    paths: list[pd.Series] = []
-    for idx, row in df.iterrows():
-        suffix: list[str]
-        match row["format"]:
-            case "vcf":
-                logger.info("Setting VCF input")
-                suffix = [".vcf.gz"]
-            case "bfile":
-                logger.info("Setting plink1 binary fileset (bfile) input")
-                suffix = [".bed", ".bim", ".fam"]
-            case "pfile":
-                logger.info("Setting plink2 binary fileset (pfile) input")
-                suffix = [".pgen", ".pvar", ".psam"]
-            case _:
-                raise Exception
-
-        resolved_paths: list[str] = _resolve_paths(
-            [row["path_prefix"] + x for x in suffix], row["format"]
-        )
-        paths.append(pd.Series(data=[resolved_paths], index=[idx]))
-
-    df["path"] = pd.concat(paths)
-    return df
-
-
-def _resolve_compressed_variant_path(path: str) -> pathlib.Path:
-    # .bim.zst | .bim -> OK
-    # .pvar.zst | .pvar -> OK
-    # anything else not OK
-    zstd_ext: str = ".zst"
-    compressed_path: pathlib.Path = pathlib.Path(path + zstd_ext).resolve()
-    uncompressed_path: pathlib.Path = pathlib.Path(path).resolve()
-
-    # prefer compressed data
-    if compressed_path.exists():
-        logger.info(f"Found compressed variant information file {compressed_path.name}")
-        return compressed_path
-    elif uncompressed_path.exists():
-        logger.info(
-            f"Couldn't find compressed variant information file, trying {uncompressed_path.name}"
-        )
-        return uncompressed_path
-    else:
-        logger.critical(f"{compressed_path} doesn't exist")
-        logger.critical(f"{uncompressed_path} doesn't exist")
-        logger.critical(
-            "Couldn't find variant information files, please check samplesheet path_prefix and try again"
-        )
-        raise Exception
-
-
-def _resolve_paths(path_list: list[str], filetype: str) -> list[str]:
-    resolved_list: list[str] = []
-
-    # always resolve the input samplesheet
-    base_dir: Path = Path(Config.input_path).resolve().parent
-    if (path := Path(Config.input_path)).is_symlink():
-        logger.info(
-            f"Input file {path} is symlinked, resolving to absolute path {path.resolve()}"
-        )
-
-    for path in path_list:
-        if path.startswith("https://") | path.startswith("s3://"):
-            logger.info("Remote path detected, skipping resolve")
-            resolved_list.append(str(path))
-            continue
-        elif path.startswith("http://"):
-            logger.critical("HTTP download is insecure! Did you mean https:// ?")
-            raise Exception("Insecure path detected")
-        else:
-            p: Path = Path(path)
-            if not p.is_absolute():
-                logger.warning(
-                    "Relative path detected in samplesheet. Set absolute paths to silence this warning."
-                )
-                logger.warning(
-                    "Assuming input samplesheet is a symlinked file in a nextflow working directory"
-                )
-                logger.warning(
-                    "Following symlink and attempting to resolve path relative to input file"
-                )
-                logger.warning(f"Resolving paths relative to: {base_dir}")
-                resolved = _resolve_filetypes(
-                    path=str(base_dir.joinpath(path)), filetype=filetype
-                )
-            else:
-                logger.info("Absolute path detected")
-                resolved = _resolve_filetypes(filetype=filetype, path=str(p))
-
-            if resolved.exists():
-                logger.info(f"{resolved} exists")
-                resolved_list.append(str(resolved))
-            else:
-                logger.critical(
-                    f"{resolved} doesn't exist, please check samplesheet path_prefix and try again"
-                )
-                logger.critical(
-                    "If you're 100% sure this file exists and you're confused by this error, please check https://pgsc-calc.readthedocs.io/en/latest/how-to/mount.html"
-                )
-                raise FileNotFoundError
-
-    return resolved_list
-
-
-def _resolve_filetypes(filetype: str, path: str) -> Path:
-    match filetype:
-        case "pfile" | "bfile":
-            if path.endswith(".bim") or path.endswith(".pvar"):
-                resolved = _resolve_compressed_variant_path(path)
-            else:
-                # bed / pgen | fam / psam
-                resolved = pathlib.Path(path).resolve()
-        case "vcf":
-            resolved = pathlib.Path(path).resolve()
-        case _:
-            logger.critical(f"Unsupported filetype {filetype}")
-            raise Exception
-
-    return resolved
-
-
-def _check_genotype_field(df: pd.DataFrame) -> pd.DataFrame:
-    df["vcf_import_dosage"] = False  # (dosage off by default)
-    if "vcf_genotype_field" in df.columns:
-        logger.debug("vcf_genotype_field detected")
-        for index, row in df.iterrows():
-            if row["vcf_genotype_field"] not in ["GT", "DS"]:
-                missing: bool  # missing dosage is OK
-                try:
-                    missing = math.isnan(row["vcf_genotype_field"])
-                except TypeError:
-                    missing = False
-
-                if not missing:
-                    logger.critical(
-                        f"Invalid entry in vcf_genotype_field: {row['vcf_genotype_field']}"
-                    )
-                    logger.critical(f"\n {row}")
-                    raise Exception
-
-        df.loc[df["vcf_genotype_field"] == "DS", "vcf_import_dosage"] = True
-    else:
-        logger.info("no vcf_genotype_field detected")
-
-    return df
-
-
-def _check_reserved_names(df: pd.DataFrame):
-    if any(df["sampleset"] == "reference"):
-        logger.critical(
-            "Samplesets must not be named 'reference', please rename in the sample sheet"
-        )
-        raise Exception
-
-    # Check whether reference contains reserved tokens from nextflow channels
-    badnames = [x for x in df["sampleset"] if ("." in x or "_" in x)]
-    if len(badnames) > 0:
-        logger.critical(
-            "Samplesets must not contain any reserved characters ( '_' , '.'), "
-            "please rename the following samples in the sample sheet: {}".format(
-                badnames
-            )
-        )
-        raise Exception
-
-
-def _check_one_sampleset(df: pd.DataFrame):
-    samplesets = set(df["sampleset"].to_list())
-    if len(samplesets) > 1:
-        logger.critical(f"Multiple samplesets defined in the samplesheet {samplesets}")
-        sampleset_error = """ Only one sampleset per samplesheet is supported
-        Your genomic data should _only_ be split by chromosome
-        pgsc_calc works best with cohorts
-        Individual VCFs should be merged into a multi-sample VCF
-        If you want to process multiple cohorts, please run pgsc_calc multiple times with different samplesheets. """
-        [logger.critical(x.strip()) for x in sampleset_error.split("\n")]
-        raise Exception("Multiple samplesets")
-
-
-def check_samplesheet() -> None:
-    """
-    This function checks that the samplesheet follows the following structure:
-    sampleset,vcf_path,bfile_path,chrom,chunk
-    cineca_synthetic_subset,cineca_synthetic_subset.vcf.gz,,22,
-    """
-    args = _parse_args()
-    config.set_logging_level(args.verbose)
-
-    Config.input_path = args.FILE_IN
-    Config.output_path = args.FILE_OUT
-
-    df = _read_samplesheet(Config.input_path)
-
-    # check df for errors
-    _check_one_sampleset(df)
-    _check_reserved_names(df)
-    _check_colnames(df)
-    _check_paths(df)
-    _check_chrom(df)
-    _check_format(df)
-
-    # add information to df
-    df = _setup_paths(df)
-    df = _check_genotype_field(df)  # dosages
-
-    logger.info("Samplesheet checks complete")
-    (df.drop(["path_prefix"], axis=1).to_json(Config.output_path, orient="records"))
-    logger.info(f"JSON file successfully written to {Config.output_path}")
-
-
-if __name__ == "__main__":
-    check_samplesheet()
diff --git a/pgscatalog_utils/scorefile/combine_scorefiles.py b/pgscatalog_utils/scorefile/combine_scorefiles.py
index 9465484..ffef5e4 100644
--- a/pgscatalog_utils/scorefile/combine_scorefiles.py
+++ b/pgscatalog_utils/scorefile/combine_scorefiles.py
@@ -1,39 +1,17 @@
 import argparse
+import json
 import logging
-import os
+import pathlib
 import sys
 import textwrap
-import json
 
 from pgscatalog_utils.config import set_logging_level
-from pgscatalog_utils.scorefile.effect_type import set_effect_type
-from pgscatalog_utils.scorefile.effect_weight import melt_effect_weights
-from pgscatalog_utils.scorefile.genome_build import build2GRC
-from pgscatalog_utils.scorefile.harmonised import remap_harmonised
-from pgscatalog_utils.scorefile.liftover import liftover
-from pgscatalog_utils.scorefile.qc import quality_control
-from pgscatalog_utils.scorefile.read import load_scorefile, get_scorefile_basename
-from pgscatalog_utils.scorefile.write import write_scorefile
-
-
-headers2logs = [
-    'pgs_id',
-    'pgp_id',
-    'pgs_name',
-    'genome_build',
-    'variants_number',
-    'trait_reported',
-    'trait_efo',
-    'trait_mapped',
-    'weight_type',
-    'citation'
-]
-headers2logs_harmonisation = [
-    'HmPOS_build',
-    'HmPOS_date',
-    'HmPOS_match_chr',
-    'HmPOS_match_pos'
-]
+from pgscatalog_utils.download.GenomeBuild import GenomeBuild
+from pgscatalog_utils.scorefile.config import Config
+from pgscatalog_utils.scorefile.liftover import create_liftover
+from pgscatalog_utils.scorefile.scoringfile import ScoringFile
+from pgscatalog_utils.scorefile.write import write_combined
+
 
 def combine_scorefiles():
     args = _parse_args()
@@ -41,110 +19,48 @@ def combine_scorefiles():
     logger = logging.getLogger(__name__)
     set_logging_level(args.verbose)
 
+    Config.batch_size = 100000
+    Config.drop_missing = args.drop_missing
+    Config.target_build = GenomeBuild.from_string(args.target_build)
+    Config.liftover = args.liftover
+    Config.min_lift = args.min_lift
+
+    if args.chain_dir:
+        Config.chain_dir = args.chain_dir
+        Config.lo = create_liftover()
+
+    if pathlib.Path(args.outfile).exists():
+        raise FileExistsError(f"{args.outfile}")
+
     paths: list[str] = list(set(args.scorefiles))  # unique paths only
     logger.debug(f"Input scorefiles: {paths}")
 
-    if os.path.exists(args.outfile):
-        logger.critical(f"Output file {args.outfile} already exists")
-        raise Exception
-
-    # Score header logs - init
-    score_logs = {}
-    dir_output = os.path.dirname(args.outfile)
-    if dir_output == '':
-        dir_output = './'
-    elif dir_output.endswith('/') is False:
-        dir_output += '/'
-    json_logs_file =  dir_output + args.logfile
-
-    for x in paths:
-        # Read scorefile df and header
-        h, score = load_scorefile(x)
-        score_shape_original = score.shape
-
-        if score.empty:
-            logger.critical(f"Empty scorefile {x} detected! Please check the input data")
-            raise Exception
+    sfs = [ScoringFile.from_path(x) for x in paths]
 
-        # Check if we should use the harmonized positions
-        use_harmonised = False
-        current_build = None
-        if h.get('HmPOS_build') is not None:
-            if h.get('HmPOS_build') == args.target_build:
-                use_harmonised = True
-                current_build = h.get('HmPOS_build')
-            else:
-                logger.error(
-                    f"Cannot combine {x} (harmonized to {h.get('HmPOS_build')}) in target build {args.target_build}")
-                raise Exception
-
-        # Process/QC score and check variant columns
-        score = (score.pipe(remap_harmonised, use_harmonised=use_harmonised)
-                 .pipe(quality_control, drop_missing=args.drop_missing)
-                 .pipe(melt_effect_weights)
-                 .pipe(set_effect_type))
-
-        # Annotate score with the genome_build (in GRCh notation)
-        if current_build is None:
-            current_build = build2GRC(h.get('genome_build'))
-            if current_build is None:
-                logger.error("Scorefile has no build information, "
-                             "please add the build to the header with "
-                             "('#genome_build=[insert variant build]")
-                raise Exception
-
-        score = score.assign(genome_build=current_build)
-
-        if (current_build != args.target_build) and (args.liftover is False):
-            logger.error(
-                f"Cannot combine {x} (build={h.get('genome_build')}) with target build {args.target_build} without liftover")
-            logger.error("Try running with --liftover and specifying the --chain_dir")
-            raise Exception
+    target_build = GenomeBuild.from_string(args.target_build)
+    bad_builds = [x.accession for x in sfs if x.genome_build != target_build]
 
-        if args.liftover:
-            logger.debug("Annotating scorefile with liftover parameters")
-            score = liftover(score, args.chain_dir, args.min_lift, args.target_build)
-
-        if score.empty and (args.drop_missing is False):
-            logger.critical("Empty output score detected, something went wrong while combining")
+    if not args.liftover:
+        for bad_file in bad_builds:
+            logger.critical(f"{bad_file} doesn't match {target_build}, can't combine")
+        if len(bad_builds) > 0:
             raise Exception
 
-        write_scorefile(score, args.outfile)
-
-        # Build Score header logs
-        score_id = get_scorefile_basename(x)
-        score_header = score_logs[score_id] = {}
-        # Scoring file header information
-        for header in headers2logs:
-            header_val = h.get(header)
-            if (header in ['trait_efo', 'trait_mapped']) and (header_val is not None):
-                header_val = header_val.split('|')
-            score_header[header] = header_val
-        # Other header information
-        score_header['columns'] = list(score.columns)
-        score_header['use_liftover'] = False
-        if args.liftover:
-             score_header['use_liftover'] = True
-        # Harmonized header information
-        score_header['use_harmonised'] = use_harmonised
-        if use_harmonised:
-            score_header['sources'] = sorted(score['hm_source'].unique().tolist())
-            for hm_header in headers2logs_harmonisation:
-                hm_header_val = h.get(hm_header)
-                if hm_header_val:
-                    if hm_header.startswith('HmPOS_match'):
-                        hm_header_val = json.loads(hm_header_val)
-                    score_header[hm_header] = hm_header_val
-        if score_header['variants_number'] is None:
-            score_header['variants_number'] = score_shape_original[0]
-
-    # Write Score header logs file
-    with open(json_logs_file, 'w') as fp:
-        json.dump(score_logs, fp, indent=4)
+    # provide line counts when making the scoring files
+    logs: dict[str, int] = write_combined(sfs, args.outfile)
+    json_log = []
+    for (k, v), sf in zip(logs.items(), sfs):
+        json_log.append(sf.generate_log(v))
+
+    log_out_path = pathlib.Path(args.outfile).parent / args.logfile
+    with open(log_out_path, "w") as f:
+        logger.info(f"Writing log to {f.name}")
+        json.dump(json_log, f, indent=4)
 
 
 def _description_text() -> str:
-    return textwrap.dedent('''\
+    return textwrap.dedent(
+        """\
     Combine multiple scoring files in PGS Catalog format (see https://www.pgscatalog.org/downloads/ 
     for details) to a 'long' table of columns needed for variant matching and subsequent calculation. 
 
@@ -153,43 +69,93 @@ def _description_text() -> str:
     unharmonised and harmonised PGS Catalog data. By default all variants are output (including 
     positions with duplicated data [often caused by rsID/liftover collions across builds]) and 
     variants with missing positions. 
-    ''')
+    """
+    )
 
 
 def _epilog_text() -> str:
-    return textwrap.dedent('''\
+    return textwrap.dedent(
+        """\
     The long table is used to simplify intersecting variants in target genotyping datasets 
     and the scoring files with the match_variants program.
-    ''')
+    """
+    )
 
 
 def _parse_args(args=None) -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description=_description_text(), epilog=_epilog_text(),
-                                     formatter_class=argparse.RawDescriptionHelpFormatter)
-    parser.add_argument('-s', '--scorefiles', dest='scorefiles', nargs='+',
-                        help='<Required> Scorefile path (wildcard * is OK)', required=True)
-    parser.add_argument('--liftover', dest='liftover',
-                        help='<Optional> Convert scoring file variants to target genome build?', action='store_true')
-    parser.add_argument('-t', '--target_build', dest='target_build',
-                        choices=['GRCh37', 'GRCh38'], help='<Required> Build of target genome',
-                        required=True)
-    parser.add_argument('-c', '--chain_dir', dest='chain_dir', help='Path to directory containing chain files',
-                        required="--liftover" in sys.argv)
-    parser.add_argument('-m', '--min_lift', dest='min_lift',
-                        help='<Optional> If liftover, minimum proportion of variants lifted over',
-                        required="--liftover" in sys.argv, default=0.95, type=float)
-    parser.add_argument('--drop_missing', dest='drop_missing', action='store_true',
-                        help='<Optional> Drop variants with missing information (chr/pos) and '
-                             'non-standard alleles (e.g. HLA=P/N) from the output file.')
-    parser.add_argument('-o', '--outfile', dest='outfile', required=True,
-                        default='combined.txt',
-                        help='<Required> Output path to combined long scorefile '
-                             '[ will compress output if filename ends with .gz ]')
-    parser.add_argument('-l', '--logfile', dest='logfile', default='log_combined.json',
-                        help='<Required> Name for the log file (score metadata) for combined scores.'
-                             '[ will write to identical directory as combined scorefile]')
-    parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
-                        help='<Optional> Extra logging information')
+    parser = argparse.ArgumentParser(
+        description=_description_text(),
+        epilog=_epilog_text(),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "-s",
+        "--scorefiles",
+        dest="scorefiles",
+        nargs="+",
+        help="<Required> Scorefile path (wildcard * is OK)",
+        required=True,
+    )
+    parser.add_argument(
+        "--liftover",
+        dest="liftover",
+        help="<Optional> Convert scoring file variants to target genome build?",
+        action="store_true",
+    )
+    parser.add_argument(
+        "-t",
+        "--target_build",
+        dest="target_build",
+        choices=["GRCh37", "GRCh38"],
+        help="<Required> Build of target genome",
+        required=True,
+    )
+    parser.add_argument(
+        "-c",
+        "--chain_dir",
+        dest="chain_dir",
+        help="Path to directory containing chain files",
+        required="--liftover" in sys.argv,
+    )
+    parser.add_argument(
+        "-m",
+        "--min_lift",
+        dest="min_lift",
+        help="<Optional> If liftover, minimum proportion of variants lifted over",
+        default=0.95,
+        type=float,
+    )
+    parser.add_argument(
+        "--drop_missing",
+        dest="drop_missing",
+        action="store_true",
+        help="<Optional> Drop variants with missing information (chr/pos) and "
+        "non-standard alleles (e.g. HLA=P/N) from the output file.",
+    )
+    parser.add_argument(
+        "-o",
+        "--outfile",
+        dest="outfile",
+        required=True,
+        default="combined.txt",
+        help="<Required> Output path to combined long scorefile "
+        "[ will compress output if filename ends with .gz ]",
+    )
+    parser.add_argument(
+        "-l",
+        "--logfile",
+        dest="logfile",
+        default="log_combined.json",
+        help="<Required> Name for the log file (score metadata) for combined scores."
+        "[ will write to identical directory as combined scorefile]",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        dest="verbose",
+        action="store_true",
+        help="<Optional> Extra logging information",
+    )
     return parser.parse_args(args)
 
 
diff --git a/pgscatalog_utils/scorefile/config.py b/pgscatalog_utils/scorefile/config.py
new file mode 100644
index 0000000..a7540fc
--- /dev/null
+++ b/pgscatalog_utils/scorefile/config.py
@@ -0,0 +1,16 @@
+from dataclasses import dataclass
+
+import pyliftover
+
+from pgscatalog_utils.download.GenomeBuild import GenomeBuild
+
+
+@dataclass
+class Config:
+    drop_missing: bool
+    liftover: bool
+    lo: pyliftover.liftover
+    chain_dir: str
+    min_lift: float
+    batch_size: int
+    target_build: GenomeBuild
diff --git a/pgscatalog_utils/scorefile/effect_type.py b/pgscatalog_utils/scorefile/effect_type.py
deleted file mode 100644
index 50c8c73..0000000
--- a/pgscatalog_utils/scorefile/effect_type.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import logging
-
-import pandas as pd
-
-logger = logging.getLogger(__name__)
-
-
-def set_effect_type(df: pd.DataFrame) -> pd.DataFrame:
-    if {'is_recessive', 'is_dominant'}.issubset(df.columns):
-        _check_effect_types(df)
-        return (df.assign(additive=lambda x: ~x["is_recessive"] & ~x["is_dominant"])
-                .assign(effect_type=lambda x: x[["is_recessive", "is_dominant", "additive"]].idxmax(1)))
-    else:
-        return _set_default_effect_type(df)
-
-
-def _check_effect_types(df: pd.DataFrame):
-    """ Check that only one effect type is set per variant """
-    bad_rows: pd.DataFrame = df[['is_dominant', 'is_recessive']].all(axis=1).any()
-
-    error = ''' ERROR: Bad variants in scorefile
-    is_recessive and is_dominant columns are both TRUE for a variant
-    These columns are mutually exclusive (both can't be true)
-    However, both can be FALSE for additive variant scores
-    '''
-    if bad_rows:
-        logger.error(error)
-        logger.error(bad_rows)
-        raise Exception
-
-
-def _set_default_effect_type(df: pd.DataFrame, effect_type: str = "additive") -> pd.DataFrame:
-    logger.debug(f'No effect types set, using default ({effect_type})')
-    return df.assign(effect_type=effect_type)
diff --git a/pgscatalog_utils/scorefile/effect_weight.py b/pgscatalog_utils/scorefile/effect_weight.py
deleted file mode 100644
index 4b95e0f..0000000
--- a/pgscatalog_utils/scorefile/effect_weight.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import logging
-import re
-
-import pandas as pd
-
-logger = logging.getLogger(__name__)
-
-
-def melt_effect_weights(df: pd.DataFrame) -> pd.DataFrame:
-    """ Ensure all dataframes are in long format, with one effect weight column and a score accession column """
-    elongate = _detect_multiple_weight_columns(df)
-
-    if elongate:
-        logger.debug("Melting effect weights")
-        return _melt(df)
-    else:
-        logger.debug("Skipping melt")
-        df['accession'] = df['filename']
-        return df
-
-
-def _detect_multiple_weight_columns(df: pd.DataFrame) -> bool:
-    """ Detect if multiple effect weight columns are present
-
-    Single weight format:
-    | chr_name | chr_pos | effect_allele | effect_weight
-
-    Multiple weight format:
-    | chr_name | chr_pos | effect_allele | effect_weight_score_1 | ... | effect_weight_score_n
-    """
-    columns: list[re.match | None] = [re.search("^effect_weight$", x) for x in df.columns.to_list()]
-    columns_suffix: list[re.match | None] = [re.search("^effect_weight_[A-Za-z0-9]+$", x) for x
-                                             in df.columns.to_list()]
-
-    if any([col for col in columns]):
-        logger.debug("Single effect weight column detected")
-        return False
-    elif any([col for col in columns_suffix]):
-        logger.debug("Multiple weight weight columns detected")
-        return True
-    else:
-        logger.error("ERROR: Missing valid effect weight columns")
-        raise Exception("Bad effect weights")
-
-
-def _melt(df: pd.DataFrame) -> pd.DataFrame:
-    """ Melt a multiple effect weight format """
-    ew_cols: list[str] = df.filter(regex="effect_weight_*").columns.to_list()
-    return df.melt(value_vars=ew_cols, value_name="effect_weight", var_name="accession")
diff --git a/pgscatalog_utils/scorefile/effectallele.py b/pgscatalog_utils/scorefile/effectallele.py
new file mode 100644
index 0000000..0dffd04
--- /dev/null
+++ b/pgscatalog_utils/scorefile/effectallele.py
@@ -0,0 +1,60 @@
+class EffectAllele:
+    """A class that represents an effect allele found in PGS Catalog scoring files
+
+    The allele that's dosage is counted (e.g. {0, 1, 2}) and multiplied by the variant's
+    weight (effect_weight) when calculating score. The effect allele is also known as
+    the 'risk allele'.
+    >>> simple_ea = EffectAllele("A")
+    >>> simple_ea
+    EffectAllele("A")
+    >>> simple_ea.is_snp
+    True
+    >>> str(simple_ea)
+    'A'
+    >>> EffectAllele("AG")
+    EffectAllele("AG")
+    >>> hla_example = EffectAllele("+")
+    >>> hla_example
+    EffectAllele("+")
+    >>> hla_example.is_snp
+    False
+    """
+
+    _valid_snp_bases = frozenset({"A", "C", "T", "G"})
+    __slots__ = ("_allele", "_is_snp")
+
+    def __init__(self, allele):
+        self._allele = str(allele)
+        self._is_snp = None  # computed when accessed
+
+    def __repr__(self):
+        return f'{type(self).__name__}("{self.allele}")'
+
+    def __str__(self):
+        return self.allele
+
+    @property
+    def allele(self):
+        return self._allele
+
+    @allele.setter
+    def allele(self, value):
+        self._allele = str(value)
+        self._is_snp = None  # reset _is_snp when allele is changed
+
+    @property
+    def is_snp(self) -> bool:
+        """SNPs are the most common type of effect allele in PGS Catalog scoring
+        files. More complex effect alleles, like HLAs or APOE genes, often require
+        extra work to represent in genomes. Users should be warned about complex
+        effect alleles.
+        >>> ea = EffectAllele("+")
+        >>> ea.is_snp
+        False
+        >>> ea.allele = "A"
+        >>> ea.is_snp
+        True
+        """
+        if self._is_snp is None:
+            self._is_snp = not frozenset(self.allele) - self._valid_snp_bases
+        return self._is_snp
diff --git a/pgscatalog_utils/scorefile/effecttype.py b/pgscatalog_utils/scorefile/effecttype.py
new file mode 100644
index 0000000..4878072
--- /dev/null
+++ b/pgscatalog_utils/scorefile/effecttype.py
@@ -0,0 +1,14 @@
+from enum import Enum
+
+
+class EffectType(Enum):
+    RECESSIVE = "recessive"
+    DOMINANT = "dominant"
+    ADDITIVE = "additive"
+
+    def __str__(self):
+        return str(self.value)
+
+    def __repr__(self):
+        # pasting __repr__ output should be sufficient to construct the class
+        return f"{type(self).__name__}.{self.name}"
diff --git a/pgscatalog_utils/scorefile/genome_build.py b/pgscatalog_utils/scorefile/genome_build.py
deleted file mode 100644
index 7ea4f09..0000000
--- a/pgscatalog_utils/scorefile/genome_build.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import logging
-
-import pandas as pd
-
-logger = logging.getLogger(__name__)
-
-
-def annotate_build(df: pd.DataFrame, target_build: str) -> pd.DataFrame:
-    """ Annotate the dataframe with genome build data  """
-    logger.debug(f"Annotating target build: {target_build}")
-    build_dict: dict = {'GRCh37': 'hg19', 'GRCh38': 'hg38', 'hg19': 'hg19', 'hg38': 'hg38'}  # standardise build names
-    df['chain_target_build'] = build_dict[target_build]
-    df = df.assign(chain_genome_build=[build_dict[x] for x in df['genome_build']])
-    return df
-
-
-def build2GRC(build):
-    """Map build names so they can be compared with GRCh37 and 38"""
-    build_2_GRC_dict = {'GRCh37': 'GRCh37', 'GRCh38': 'GRCh38', 'hg19': 'GRCh37',
-                        'hg38': 'GRCh38'}  # standardise build names
-    if pd.isnull(build):
-        return None
-    else:
-        return build_2_GRC_dict.get(build)
diff --git a/pgscatalog_utils/scorefile/harmonised.py b/pgscatalog_utils/scorefile/harmonised.py
deleted file mode 100644
index b56fb93..0000000
--- a/pgscatalog_utils/scorefile/harmonised.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import logging
-import re
-
-import pandas as pd
-
-logger = logging.getLogger(__name__)
-
-
-def remap_harmonised(df: pd.DataFrame, use_harmonised) -> pd.DataFrame:
-    """ Replace original columns with harmonised data, if available and appropriate """
-
-    if any([re.match("hm_\\w+", x) for x in df.columns]) and use_harmonised:
-        logger.debug("Harmonised columns detected and used")
-        hm_colnames: dict[str: str] = {'hm_chr': 'chr_name', 'hm_pos': 'chr_position',
-                                       'hm_inferOtherAllele': 'other_allele'}
-
-        if 'other_allele' not in df or all(df['other_allele'].isnull()):
-            logger.debug("other_allele column contains no information, replacing with hm_inferOtherAllele")
-            return (df.drop(['chr_name', 'chr_position', 'other_allele'], axis=1, errors='ignore')
-                    .rename(hm_colnames, axis=1))
-        else:
-            logger.debug("other_allele column contains information, dropping hm_inferOtherAllele")
-            return (df.drop(['chr_name', 'chr_position', 'hm_inferOtherAllele'], axis=1, errors='ignore')
-                    .rename(hm_colnames, axis=1))
-    elif any([re.match("hm_\\w+", x) for x in df.columns]) and not use_harmonised:
-        logger.debug(f"Harmonised columns detected but not used (use_harmonised={use_harmonised})")
-        return df
-    else:
-        logger.debug("Harmonised columns not detected")
-        return df
diff --git a/pgscatalog_utils/scorefile/liftover.py b/pgscatalog_utils/scorefile/liftover.py
index 45258b1..24b9194 100644
--- a/pgscatalog_utils/scorefile/liftover.py
+++ b/pgscatalog_utils/scorefile/liftover.py
@@ -1,103 +1,75 @@
 import logging
 import os
+import typing
 
-import pandas as pd
 import pyliftover
 
-from pgscatalog_utils.scorefile.genome_build import annotate_build
+from pgscatalog_utils.download.GenomeBuild import GenomeBuild
+from pgscatalog_utils.scorefile.config import Config
+from pgscatalog_utils.scorefile.scorevariant import ScoreVariant
 
 logger = logging.getLogger(__name__)
 
 
-def liftover(df: pd.DataFrame, chain_dir: str, min_lift: float, target_build: str) -> pd.DataFrame:
-    """ Liftover genomic coordinates to a different genome build """
-    df = annotate_build(df, target_build)  # get chain_target_build (e.g. in hg notation to match chain files)
-
-    mapped, unmapped = pd.DataFrame(), pd.DataFrame()
-    no_liftover: pd.DataFrame = df.query('chain_target_build == chain_genome_build')
-    to_liftover: pd.DataFrame = df.query('chain_target_build != chain_genome_build')
-
-    if no_liftover.empty:
-        logger.debug("Liftover required for all scorefile variants")
+def liftover(
+    variants: typing.Generator[ScoreVariant, None, None],
+    harmonised: bool,
+    current_build: GenomeBuild,
+    target_build: GenomeBuild,
+) -> typing.Generator[ScoreVariant, None, None]:
+    if harmonised:
+        skip_lo = True
+    elif target_build == current_build:
+        skip_lo = True
     else:
-        logger.debug("Skipping liftover for scorefiles with same build as target genome")
-        no_liftover.loc[:, ['lifted_chr', 'lifted_pos']] = no_liftover[
-            ['chr_name', 'chr_position']]  # assume col structure
-        no_liftover.assign(liftover=None)
+        skip_lo = False
 
-    if to_liftover.empty:
-        logger.debug("Liftover skipped because no variants required it")
+    if skip_lo:
+        logger.info("Skipping liftover")
+        for variant in variants:
+            yield variant
     else:
-        lo: dict[str, pyliftover.LiftOver] = _create_liftover(chain_dir)  # loads chain files
-        logger.debug("Lifting over scoring files")
-        lifted: pd.DataFrame = to_liftover.apply(_convert_coordinates, axis=1, lo_dict=lo)
-        to_liftover = pd.concat([to_liftover, lifted], axis=1)
-        logger.debug("Liftover complete")
-
-        mapped: pd.DataFrame = (to_liftover[~to_liftover[['lifted_chr', 'lifted_pos']].isnull().any(axis=1)]
-                                .assign(liftover=True))
-        unmapped: pd.DataFrame = (to_liftover[to_liftover[['lifted_chr', 'lifted_pos']].isnull().any(axis=1)] \
-                                  .assign(liftover=False))
-        _check_min_liftover(mapped, unmapped, min_lift)
-
-    return pd.concat([mapped, unmapped, no_liftover])
-
-
-def _check_min_liftover(mapped: pd.DataFrame, unmapped: pd.DataFrame, min_lift: float) -> None:
-    """ Check that liftover process met minimum parameters"""
-    df = pd.concat([mapped, unmapped])
-    n_variants: pd.DataFrame = (pd.DataFrame(df.groupby('accession')['liftover'].count())
-                                .reset_index()
-                                .rename({'liftover': 'n_var'}, axis=1))
-    lo_counts = (pd.DataFrame(df.groupby(['accession', 'liftover'])['liftover'].count()) \
-                 .rename_axis(['accession', 'liftover_status'])
-                 .reset_index())
-    summary: pd.DataFrame = lo_counts.merge(n_variants, on='accession')
-    summary['proportion'] = summary['liftover'] / summary['n_var']
-
-    for row in summary.query('liftover_status == True')[['accession', 'proportion']].itertuples():
-        if row.proportion < min_lift:
-            logger.error(f'Liftover failed for scorefile {row.accession}')
-            logger.error(f'{row.proportion} of variants lifted over, less than min_lift parameter ({min_lift})')
+        logger.info("Starting liftover")
+        if current_build == GenomeBuild.GRCh37 and target_build == GenomeBuild.GRCh38:
+            lo: pyliftover.LiftOver = Config.lo["hg19hg38"]
+        elif current_build == GenomeBuild.GRCh38 and target_build == GenomeBuild.GRCh37:
+            lo: pyliftover.LiftOver = Config.lo["hg38hg19"]
+        else:
+            raise Exception("Can't get pyliftover object")
+
+        n_lifted = 0
+        n = 0
+
+        for variant in variants:
+            chrom = "chr" + variant.chr_name
+            pos = int(variant.chr_position) - 1  # VCF -> 1 based, UCSC -> 0 based
+            lifted = lo.convert_coordinate(chrom, pos)
+            if lifted:
+                variant.chr_name = lifted[0][0][3:].split("_")[0]
+                variant.chr_position = lifted[0][1] + 1  # reverse 0 indexing
+                yield variant
+                n_lifted += 1
+            else:
+                variant.chr_name = None
+                variant.chr_position = None
+                yield variant
+            n += 1
+
+        if (n_lifted / n) < Config.min_lift:
+            logger.error("Liftover failed for variant {variant}")
             raise Exception
         else:
-            logger.debug(f'Minimum liftover threshold passed for scorefile {row.accession}')
-
-
-def _convert_coordinates(df: pd.Series, lo_dict: dict[str, pyliftover.LiftOver]) -> pd.Series:
-    """ Convert genomic coordinates to different build """
-    converted: list[tuple[str, int, str, int]] | None
-
-    if df[['chr_name', 'chr_position']].isnull().values.any():
-        converted = None
-    else:
-        lo = lo_dict[df['chain_genome_build'] + df['chain_target_build']]  # extract lo object from dict
-        chrom: str = 'chr' + str(df['chr_name'])
-        pos: int = int(df['chr_position']) - 1  # liftOver is 0 indexed, VCF is 1 indexed
-        # converted example: [('chr22', 15460378, '+', 3320966530)] or None
-        converted = lo.convert_coordinate(chrom, pos)
-
-    if converted:
-        lifted_chrom: str = _parse_lifted_chrom(converted[0][0][3:])  # return first matching liftover
-        lifted_pos: int = int(converted[0][1]) + 1  # reverse 0 indexing
-        return pd.Series([lifted_chrom, lifted_pos], index=['lifted_chr', 'lifted_pos'])
-    else:
-        return pd.Series([None, None], index=['lifted_chr', 'lifted_pos'])
-
-
-def _parse_lifted_chrom(i: str) -> str:
-    """ Convert lifted chromosomes to tidy integers
-
-    liftover needs chr suffix for chromosome input (1 -> chr1), and it also
-    returns weird chromosomes sometimes (chr22 -> 22_KI270879v1_alt)
-    """
-    return i.split('_')[0]
+            logger.info("Liftover successful")
 
 
-def _create_liftover(chain_dir: str) -> dict['str': pyliftover.LiftOver]:
-    """ Create LiftOver objects that can remap genomic coordinates """
+def create_liftover() -> dict["str" : pyliftover.LiftOver]:
+    """Create LiftOver objects that can remap genomic coordinates"""
+    chain_dir: str = Config.chain_dir
     builds: list[str] = ["hg19hg38", "hg38hg19"]
-    chains: list[str] = [os.path.join(chain_dir, x) for x in ["hg19ToHg38.over.chain.gz", "hg38ToHg19.over.chain.gz"]]
+    chains: list[str] = [
+        os.path.join(chain_dir, x)
+        for x in ["hg19ToHg38.over.chain.gz", "hg38ToHg19.over.chain.gz"]
+    ]
     lo: list[pyliftover.LiftOver] = [pyliftover.LiftOver(x) for x in chains]
     logger.debug("Chain files loaded for liftover")
     return dict(zip(builds, lo))
diff --git a/pgscatalog_utils/scorefile/qc.py b/pgscatalog_utils/scorefile/qc.py
index 68e511c..526fda2 100644
--- a/pgscatalog_utils/scorefile/qc.py
+++ b/pgscatalog_utils/scorefile/qc.py
@@ -1,92 +1,226 @@
 import logging
+import typing
 
-import pandas as pd
 
-logger = logging.getLogger(__name__)
-
-
-def quality_control(df: pd.DataFrame, drop_missing: bool) -> pd.DataFrame:
-    """ Do quality control checks on a scorefile """
-    _check_shape(df)
-    _check_columns(df)
-    logger.debug("Quality control: checking for bad variants")
-    if drop_missing is True:
-        return (df.pipe(_drop_hla)
-                .pipe(_drop_missing_variants)
-                .pipe(_check_duplicate_identifiers)
-                .pipe(_drop_multiple_oa))
-    else:
-        return (df.pipe(_check_duplicate_identifiers)
-                .pipe(_drop_multiple_oa))
-
-
-def _drop_multiple_oa(df: pd.DataFrame) -> pd.DataFrame:
-    """ Set alleles to None in hm_inferOtherAllele if they contain multiple alleles
+from pgscatalog_utils.scorefile.config import Config
+from pgscatalog_utils.scorefile.effecttype import EffectType
+from pgscatalog_utils.scorefile.scoringfileheader import ScoringFileHeader
+from pgscatalog_utils.scorefile.liftover import liftover
+from pgscatalog_utils.scorefile.scorevariant import ScoreVariant
 
-    e.g. A / C / T -> None; A -> A; A / C -> None
-    """
-    if 'other_allele' in df:
-        if df['other_allele'].str.contains('/').any():
-            logger.debug("Multiple inferred other alleles detected, dropping other alleles for ambiguous variants")
-            df['other_allele'] = df['other_allele'].replace(regex='.+\\/.+', value=None)
-            return df
-        else:
-            logger.debug("Only single other alleles detected.")
-            return df
-    else:
-        logger.warning("No other allele data detected, skipping QC of other allele")
-        return df
+logger = logging.getLogger(__name__)
 
 
-def _drop_missing_variants(df: pd.DataFrame) -> pd.DataFrame:
-    no_na: pd.DataFrame = df.dropna(subset=['chr_name', 'chr_position', 'effect_weight'])
-    n_dropped = df.shape[0] - no_na.shape[0]
+def quality_control(
+    variants: typing.Generator[ScoreVariant, None, None],
+    header: ScoringFileHeader,
+    harmonised: bool,
+    wide: bool,
+) -> typing.Generator[ScoreVariant, None, None]:
+    # order is important for:
+    # 1. liftover non-harmonised data (quite rare), failed lifts get None'd
+    # 2. remap harmonised data, failed harmonisations get None'd
+    # 3. check and optionally drop bad variants
+    # where a bad variant has None in a mandatory ScoreVariant field
+    # then continue with other QC
+    if Config.liftover:
+        variants = liftover(
+            variants,
+            harmonised=harmonised,
+            current_build=header.genome_build,
+            target_build=Config.target_build,
+        )
+
+    variants = remap_harmonised(variants, harmonised)
+    variants = check_bad_variant(variants)
+
+    if Config.drop_missing:
+        variants = drop_hla(variants)
+
+    variants = assign_effect_type(variants)
+    variants = check_effect_weight(variants)
+    variants = assign_other_allele(variants)
+    variants = check_effect_allele(variants)
+    variants = detect_complex(variants)
+
+    if wide:
+        # wide data must be sorted because check_duplicates requires sorted input
+        variants = (x for x in sorted(variants, key=lambda x: x["accession"]))
+
+    variants = check_duplicates(variants)
+
+    return variants
+
+
+def check_duplicates(
+    variants: typing.Generator[ScoreVariant, None, None]
+) -> typing.Generator[ScoreVariant, None, None]:
+    seen_ids: dict = {}
+    current_accession: typing.Union[str, None] = None
+    n_duplicates: int = 0
+    n_variants: int = 0
+    for variant in variants:
+        accession: str = variant.accession
+
+        if accession != current_accession:
+            seen_ids = {}
+            current_accession = accession
+
+        # None other allele -> empty string
+        variant_id: str = ":".join(
+            [
+                str(getattr(variant, k) or "")
+                for k in ["chr_name", "chr_position", "effect_allele", "other_allele"]
+            ]
+        )
+
+        if variant_id in seen_ids:
+            variant.is_duplicated = True
+            n_duplicates += 1
+
+        seen_ids[variant_id] = True
+
+        yield variant
+        n_variants += 1
+
+    if n_duplicates > 0:
+        logger.warning(
+            f"{n_duplicates} of {n_variants} variants are duplicated in: {current_accession}"
+        )
+
+
+def drop_hla(
+    variants: typing.Generator[ScoreVariant, None, None]
+) -> typing.Generator[ScoreVariant, None, None]:
+    n_dropped = 0
+    for variant in variants:
+        match variant:
+            case {"effect_allele": "P"} | {"effect_allele": "N"}:
+                n_dropped += 1
+                continue
+            case _:
+                yield variant
+
+    logger.warning(f"{n_dropped} HLA alleles detected and dropped")
+
+
+def check_effect_weight(
+    variants: typing.Generator[ScoreVariant, None, None]
+) -> typing.Generator[ScoreVariant, None, None]:
+    for variant in variants:
+        try:
+            float(variant.effect_weight)
+            yield variant
+        except ValueError:
+            logger.critical(f"{variant} has bad effect weight")
+            raise ValueError
+
+
+def assign_other_allele(
+    variants: typing.Generator[ScoreVariant, None, None]
+) -> typing.Generator[ScoreVariant, None, None]:
+    n_dropped = 0
+    for variant in variants:
+        if "/" in variant.other_allele:
+            n_dropped += 1
+            variant.other_allele = None
+
+        yield variant
 
     if n_dropped > 0:
-        logger.warning(f"{n_dropped} variants with missing values detected and dropped from scoring file")
-
-    return no_na
-
-
-def _drop_hla(df: pd.DataFrame) -> pd.DataFrame:
-    """ Drop HLA effect alleles with present / absent encoding """
-
-    no_hla: pd.DataFrame = df.query('effect_allele != "P" | effect_allele != "N"')
-
-    if df.shape[0] > no_hla.shape[0]:
-        logger.debug("HLA alleles detected and dropped")
-
-    return no_hla
-
-
-def _check_duplicate_identifiers(df: pd.DataFrame) -> pd.DataFrame:
-    if 'other_allele' in df:
-        logger.debug("Other allele column detected, including other_allele in variant identifier")
-        group_cols = ['chr_name', 'chr_position', 'effect_allele', 'other_allele']
+        logger.warning(f"Multiple other_alleles detected in {n_dropped} variants")
+        logger.warning("Other allele for these variants is set to missing")
+
+
+def assign_effect_type(
+    variants: typing.Generator[ScoreVariant, None, None]
+) -> typing.Generator[ScoreVariant, None, None]:
+    for variant in variants:
+        match (variant.is_recessive, variant.is_dominant):
+            case (None, None) | ("FALSE", "FALSE"):
+                pass  # default value is additive, pass to break match and yield
+            case ("FALSE", "TRUE"):
+                variant.effect_type = EffectType.DOMINANT
+            case ("TRUE", "FALSE"):
+                variant.effect_type = EffectType.RECESSIVE
+            case _:
+                logger.critical(f"Bad effect type setting: {variant}")
+                raise Exception
+        yield variant
+
+
+def remap_harmonised(
+    variants: typing.Generator[ScoreVariant, None, None], harmonised: bool
+) -> typing.Generator[ScoreVariant, None, None]:
+    if harmonised:
+        for variant in variants:
+            # using the harmonised field in the header to make sure we don't accidentally overwrite
+            # positions with empty data (e.g. in an unharmonised file)
+            # if harmonisation has failed we _always_ want to use that information
+            variant.chr_name = variant.hm_chr
+            variant.chr_position = variant.hm_pos
+            if variant.other_allele is None:
+                variant.other_allele = variant.hm_inferOtherAllele
+            yield variant
     else:
-        logger.warning("Other allele column not detected, dropping other_allele from variant identifier.")
-        group_cols = ['chr_name', 'chr_position', 'effect_allele']
-
-    u_count: pd.Series = df.groupby(group_cols).size()
-
-    if all(u_count == 1):
-        return df.assign(is_duplicated=False)
-    else:
-        logger.warning("Duplicate variants in scoring file: {}".format(df['filename_prefix'].unique()))
-        u_count = u_count > 1
-        u_count.name = 'is_duplicated'
-        df = pd.merge(df, u_count, how='left', left_on=group_cols, right_index=True)
-        df.loc[df.is_duplicated.isnull(), 'is_duplicated'] = False  # handles variants with null chr/pos
-        return df
-
+        for variant in variants:
+            # can't remap, so don't try
+            yield variant
+
+
+def check_bad_variant(
+    variants: typing.Generator[ScoreVariant, None, None]
+) -> typing.Generator[ScoreVariant, None, None]:
+    n_bad = 0
+    for variant in variants:
+        match variant:
+            case (
+                ScoreVariant(chr_name=None)
+                | ScoreVariant(chr_position=None)
+                | ScoreVariant(effect_allele=None)
+            ):
+                # (effect weight checked separately)
+                n_bad += 1
+                if not Config.drop_missing:
+                    yield variant
+            case _:
+                yield variant
+
+    if n_bad > 1:
+        logger.warning(f"{n_bad} bad variants")
+
+
+def check_effect_allele(
+    variants: typing.Generator[ScoreVariant, None, None]
+) -> typing.Generator[ScoreVariant, None, None]:
+    n_bad = 0
+    for variant in variants:
+        if not variant.effect_allele.is_snp:
+            n_bad += 1
+
+        yield variant
+
+    if n_bad > 1:
+        logger.warning(f"{n_bad} variants have invalid effect alleles (not ACTG)")
+
+
+def detect_complex(
+    variants: typing.Generator[ScoreVariant, None, None]
+) -> typing.Generator[ScoreVariant, None, None]:
+    """Some older scoring files in the PGS Catalog are complicated.
+    They often require bespoke set up to support interaction terms, etc
+    """
+    is_complex = False
 
-def _check_shape(df: pd.DataFrame) -> None:
-    assert len(df.columns) > 1, "ERROR: scorefile not formatted correctly (0 columns)"
-    assert df.shape[0] > 0, "ERROR: No variants detected in input file (0 rows)"
+    for variant in variants:
+        if not is_complex:
+            if variant.is_complex:
+                is_complex = True
 
+        yield variant
 
-def _check_columns(df: pd.DataFrame) -> None:
-    assert {'chr_name', 'chr_position'}.issubset(df.columns), "Missing chromosomal positions. If you're " \
-                                                              "using PGS Catalog files with rsIDs you should request " \
-                                                              "harmonised data files (HmPOS) instead."
-    assert 'effect_allele' in df, "ERROR: Missing effect allele column"
+    if is_complex:
+        logger.warning("Complex scoring file detected")
+        logger.warning(
+            "Complex files are difficult to calculate properly and may require manual intervention"
+        )
diff --git a/pgscatalog_utils/scorefile/read.py b/pgscatalog_utils/scorefile/read.py
deleted file mode 100644
index dbd559b..0000000
--- a/pgscatalog_utils/scorefile/read.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import gzip
-import io
-import logging
-import os
-
-import pandas as pd
-
-logger = logging.getLogger(__name__)
-
-
-def load_scorefile(path: str) -> tuple[dict, pd.DataFrame]:
-    logger.debug(f'Reading scorefile {path}')
-    df = pd.read_table(path, dtype=_scorefile_dtypes(), comment='#', na_values=['None'], low_memory=False)
-    return (_read_header(path),
-            df.assign(filename_prefix=get_scorefile_basename(path), filename=path, row_nr=df.index))
-
-
-def _read_header(path: str) -> dict:
-    """Parses the header of a PGS Catalog format scorefle into a dictionary"""
-    f = io.TextIOWrapper(gzip.open(path, 'r'))
-    try:
-        f.readline()
-    except gzip.BadGzipFile:
-        f = open(path, 'r')
-
-    header = {}
-    lastline = '#'
-    while lastline.startswith('#'):
-        lastline = f.readline()
-        line = lastline.strip()
-        if line.startswith('#'):
-            if '=' in line:
-                line = line[1:].split('=')
-                field, val = [x.strip() for x in line]
-                if field in remap_header:
-                    header[remap_header[field]] = val
-                else:
-                    header[field] = val
-
-    if ('genome_build' in header) and (header['genome_build'] == 'NR'):
-        header['genome_build'] = None
-    f.close()
-    return header
-
-
-def _scorefile_dtypes() -> dict[str]:
-    """ Data types for columns that might be found in a scorefile """
-    return {'rsID': str, 'chr_name': str, 'chr_position': pd.UInt64Dtype(), 'effect_allele': 'str',
-            'effect_weight': float, 'locus_name': str, 'OR': float, 'hm_source': str, 'hm_rsID': str,
-            'hm_chr': str, 'hm_pos': pd.UInt64Dtype(), 'hm_inferOtherAllele': str}
-
-
-def get_scorefile_basename(path: str) -> str:
-    """ Return the basename of a scoring file without extension """
-    filename = os.path.basename(path)
-    if filename.endswith('.txt.gz'):
-        filename = filename.replace('.txt.gz', '')
-    elif filename.endswith('.txt'):
-        filename = filename.replace('.txt', '')
-    return filename
-
-
-remap_header = {
-    'PGS ID': 'pgs_id',
-    'PGS Name': 'pgs_name',
-    'Reported Trait': 'trait_reported',
-    'Original Genome Build': 'genome_build',
-    'Number of Variants': 'variants_number',
-    'PGP ID': 'pgp_id',
-    'Citation': 'citation',
-    'LICENSE': 'license',
-    # Harmonization related
-    'HmPOS Build': 'HmPOS_build',
-    'HmPOS Date': 'HmPOS_date',
-    'HmVCF Reference': 'HmVCF_ref',
-    'HmVCF Date': 'HmVCF_date',
-    'HmVCF N Matched Variants': 'HmVCF_n_matched',
-    'HmVCF N Unmapped Variants': 'HmVCF_n_unmapped'
-}  # Used to maintain reverse compatibility to old scoring files
diff --git a/pgscatalog_utils/scorefile/scorevariant.py b/pgscatalog_utils/scorefile/scorevariant.py
new file mode 100644
index 0000000..38135dc
--- /dev/null
+++ b/pgscatalog_utils/scorefile/scorevariant.py
@@ -0,0 +1,137 @@
+from typing import Optional
+
+from pgscatalog_utils.scorefile.effectallele import EffectAllele
+from pgscatalog_utils.scorefile.effecttype import EffectType
+
+
+class ScoreVariant:
+    mandatory_fields: tuple[str] = (
+        "effect_allele",
+        "effect_weight",
+        "accession",
+        "row_nr",
+    )
+    optional_fields: tuple[str] = (
+        "chr_name",
+        "chr_position",
+        "rsID",
+        "other_allele",
+        "hm_chr",
+        "hm_pos",
+        "hm_inferOtherAllele",
+        "hm_source",
+        "is_dominant",
+        "is_recessive",
+        "hm_rsID",
+        "hm_match_chr",
+        "hm_match_pos",
+        "is_duplicated",
+        "effect_type",
+    )
+    complex_fields: tuple[str] = ("is_haplotype", "is_diplotype", "is_interaction")
+
+    # column names for output are used by __iter__ and when writing out
+    output_fields: tuple[str] = (
+        "chr_name",
+        "chr_position",
+        "effect_allele",
+        "other_allele",
+        "effect_weight",
+        "effect_type",
+        "is_duplicated",
+        "accession",
+        "row_nr",
+    )
+
+    # slots uses magic to improve speed and memory when making millions of objects
+    __slots__ = mandatory_fields + optional_fields + ("is_complex",)
+
+    # __init__ is intentionally verbose and avoids using loops or trickery to work:
+    #   - attributes won't change often
+    #   - class accepts keyword parameters only to init (not positional)
+    #   - type hints are helpful in parameters
+    #   - setting sensible defaults for optional fields is clear
+    #   - being verbose helps prevent IDE warnings
+    # extra kwargs are silently ignored
+    # (yes, effect_weight is treated as a str, want to avoid rounding errors at this stage)
+    def __init__(
+        self,
+        *,
+        effect_allele: str,
+        effect_weight: str,
+        accession: str,
+        row_nr: int,
+        chr_name: str = None,
+        chr_position: int = None,
+        rsID: str = None,
+        other_allele: str = None,
+        hm_chr: str = None,
+        hm_pos: int = None,
+        hm_inferOtherAllele: str = None,
+        hm_source: str = None,
+        is_dominant: str = None,
+        is_recessive: str = None,
+        hm_rsID: str = None,
+        hm_match_chr: str = None,
+        hm_match_pos: str = None,
+        is_duplicated: bool = False,
+        effect_type: EffectType = EffectType.ADDITIVE,
+        is_complex: bool = False,
+        **kwargs,
+    ):
+        # start with mandatory attributes
+        self.effect_allele: EffectAllele = EffectAllele(effect_allele)
+        self.effect_weight: str = effect_weight
+        self.accession: str = accession
+        self.row_nr: int = int(row_nr)
+
+        # now set optional fields
+        self.chr_name: Optional[str] = chr_name
+
+        # casting to int is important for arrow export
+        try:
+            self.chr_position: Optional[int] = int(chr_position)
+        except (ValueError, TypeError):
+            self.chr_position = None
+
+        self.rsID: Optional[str] = rsID
+        self.other_allele: Optional[str] = other_allele
+        self.hm_chr: Optional[str] = hm_chr
+
+        # casting to int is important when harmonised data may replace chr_position
+        try:
+            self.hm_pos: Optional[int] = int(hm_pos)
+        except (ValueError, TypeError):
+            self.hm_pos = None
+
+        self.hm_inferOtherAllele: Optional[str] = hm_inferOtherAllele
+        self.hm_source: Optional[str] = hm_source
+        self.is_dominant: Optional[bool] = is_dominant
+        self.is_recessive: Optional[bool] = is_recessive
+        self.hm_rsID: Optional[str] = hm_rsID
+        self.hm_match_chr: Optional[str] = hm_match_chr
+        self.hm_match_pos: Optional[str] = hm_match_pos
+        self.is_duplicated: Optional[bool] = is_duplicated
+        self.effect_type: EffectType = effect_type
+
+        # these fields are important to check if variants are complex
+        if any([x in kwargs for x in self.complex_fields]):
+            is_complex = True
+        self.is_complex: bool = is_complex
+
+    def __repr__(self):
+        class_name = type(self).__name__
+        values = {}
+
+        for key in ScoreVariant.__slots__:
+            values[key] = getattr(self, key, None)
+
+        # extract str parameter for effect allele
+        values["effect_allele"] = values["effect_allele"].allele
+
+        params = ",".join([f"{k}={repr(v)}" for k, v in values.items()])
+        return f"{class_name}({params})"
+
+    def __iter__(self):
+        for attr in self.output_fields:
+            yield getattr(self, attr)
diff --git a/pgscatalog_utils/scorefile/scoringfile.py b/pgscatalog_utils/scorefile/scoringfile.py
new file mode 100644
index 0000000..bb3aaa2
--- /dev/null
+++ b/pgscatalog_utils/scorefile/scoringfile.py
@@ -0,0 +1,188 @@
+import csv
+import logging
+import os
+import pathlib
+import typing
+from dataclasses import dataclass
+from itertools import islice
+
+from pgscatalog_utils.download.GenomeBuild import GenomeBuild
+from pgscatalog_utils.scorefile.config import Config
+from pgscatalog_utils.scorefile.scoringfileheader import ScoringFileHeader, auto_open
+from pgscatalog_utils.scorefile.qc import quality_control
+from pgscatalog_utils.scorefile.scorevariant import ScoreVariant
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ScoringFile:
+    path: pathlib.Path
+    accession: str
+    header: typing.Union[ScoringFileHeader, None]
+    genome_build: typing.Union[GenomeBuild, None]
+    harmonised: bool
+    fields: list[str]
+    variants: typing.Generator[ScoreVariant, None, None]
+
+    def __post_init__(self):
+        if self.header.HmPOS_build:
+            logger.info(
+                f"{self.path} harmonised data detected: {self.header.HmPOS_build}"
+            )
+            self.genome_build = self.header.HmPOS_build
+
+        mandatory_columns = {"chr_name", "effect_allele", "effect_weight"}
+        if not mandatory_columns.issubset(self.fields) not in self.fields:
+            err_msg = f"{self.path} missing fields"
+            raise Exception(err_msg)
+
+    @classmethod
+    def from_path(cls, path: pathlib.Path):
+        header = ScoringFileHeader.from_path(path)
+        name = os.path.basename(path).split(".")[0]
+        if header:
+            if header.HmPOS_build:
+                harmonised = True
+                genome_build = header.HmPOS_build
+            else:
+                harmonised = False
+                genome_build = header.genome_build
+        else:
+            harmonised = False
+            genome_build = None
+
+        start_line, cols = get_columns(path)
+        is_wide = detect_wide(cols)
+
+        logger.info(f"Lazily reading variants from {path}")
+        variants: typing.Generator[
+            ScoreVariant, None, None
+        ] = ScoringFile.read_variants(
+            path=path, start_line=start_line, fields=cols, name=name, is_wide=is_wide
+        )
+
+        # the quality_control function normalises a list of variants to have a standard representation
+        # attributes are overwritten using harmonised data, etc.
+        variants: typing.Generator[ScoreVariant, None, None] = quality_control(
+            variants, header=header, harmonised=harmonised, wide=is_wide
+        )
+
+        return cls(
+            path=path,
+            header=header,
+            genome_build=genome_build,
+            harmonised=harmonised,
+            fields=cols,
+            variants=variants,
+            accession=name,
+        )
+
+    def generate_log(self, counted: typing.Counter):
+        log = {
+            key: str(value) if value is not None else None
+            for key, value in self.header.__dict__.items()
+        }
+
+        if log["variants_number"] is None:
+            # custom scoring files might not have this information
+            log["variants_number"] = counted["n_variants"]
+
+        if (
+            int(log["variants_number"]) != counted["n_variants"]
+            and not Config.drop_missing
+        ):
+            logger.warning(
+                f"Mismatch between header ({log['variants_number']}) and output row count ({counted['n_variants']}) for {self.accession}"
+            )
+            logger.warning(
+                "This can happen with older scoring files in the PGS Catalog (e.g. PGS000028)"
+            )
+
+        # multiple terms may be separated with a pipe
+        if log["trait_mapped"]:
+            log["trait_mapped"] = log["trait_mapped"].split("|")
+
+        if log["trait_efo"]:
+            log["trait_efo"] = log["trait_efo"].split("|")
+
+        log["columns"] = self.fields
+        log["use_liftover"] = Config.liftover
+        log["use_harmonised"] = self.harmonised
+        log["sources"] = [k for k, v in counted.items() if k != "n_variants"]
+
+        return {self.accession: log}
+
+    @staticmethod
+    def read_variants(
+        path, fields, start_line, name: str, is_wide: bool
+    ) -> typing.Generator[ScoreVariant, None, None]:
+        open_function = auto_open(path)
+        row_nr = 0
+
+        with open_function(path, mode="rt") as f:
+            for _ in range(start_line + 1):
+                # skip header
+                next(f)
+
+            while True:
+                batch = list(islice(f, Config.batch_size))
+                if not batch:
+                    break
+
+                csv_reader = csv.reader(batch, delimiter="\t")
+                yield from read_rows(csv_reader, fields, name, is_wide, row_nr)
+                # this is important because row_nr resets for each batch
+                row_nr += len(batch)
+
+
+def read_rows(
+    csv_reader, fields: list[str], name: str, wide: bool, row_nr: int
+) -> typing.Generator[ScoreVariant, None, None]:
+    for row in csv_reader:
+        variant = dict(zip(fields, row))
+
+        if wide:
+            ew_col_idxs: list[int] = [
+                i for i, x in enumerate(["effect_weight_" in x for x in fields]) if x
+            ]
+            for i, weight_name in zip(ew_col_idxs, [fields[i] for i in ew_col_idxs]):
+                yield ScoreVariant(
+                    **variant,
+                    **{
+                        "accession": weight_name,
+                        "row_nr": row_nr,
+                        "effect_weight": variant[weight_name],
+                    },
+                )
+        else:
+            yield ScoreVariant(**variant, **{"accession": name, "row_nr": row_nr})
+
+        row_nr += 1
+
+
+def get_columns(path) -> tuple[int, list[str]]:
+    open_function = auto_open(path)
+    with open_function(path, mode="rt") as f:
+        for i, line in enumerate(f):
+            if line.startswith("#"):
+                continue
+            line_no, cols = i, line.strip().split("\t")
+            if len(set(cols)) != len(cols):
+                logger.critical(f"Duplicated column names: {cols}")
+                raise ValueError
+
+            return line_no, cols
+
+
+def detect_wide(cols: list[str]) -> bool:
+    """
+    Check columns to see if multiple effect weights are present. Multiple effect weights must be present in the form:
+    effect_weight_suffix1
+    effect_weight_suffix2
+    """
+    if any(["effect_weight_" in x for x in cols]):
+        logger.info("Wide scoring file detected with multiple effect weights")
+        return True
+    else:
+        return False
diff --git a/pgscatalog_utils/scorefile/scoringfileheader.py b/pgscatalog_utils/scorefile/scoringfileheader.py
new file mode 100644
index 0000000..06d7f10
--- /dev/null
+++ b/pgscatalog_utils/scorefile/scoringfileheader.py
@@ -0,0 +1,92 @@
+import gzip
+import inspect
+import pathlib
+from dataclasses import dataclass
+
+from pgscatalog_utils.download.GenomeBuild import GenomeBuild
+
+
+@dataclass
+class ScoringFileHeader:
+    pgs_id: str
+    pgp_id: str
+    pgs_name: str
+    genome_build: GenomeBuild
+    variants_number: int
+    trait_reported: str
+    trait_efo: str
+    trait_mapped: str
+    weight_type: str
+    citation: str
+    HmPOS_build: GenomeBuild
+    HmPOS_date: str
+    format_version: str
+    license: str = (
+        "PGS obtained from the Catalog should be cited appropriately, and "
+        "used in accordance with any licensing restrictions set by the authors. See EBI "
+        "Terms of Use (https://www.ebi.ac.uk/about/terms-of-use/) for additional details."
+    )
+
+    def __post_init__(self):
+        if self.variants_number:
+            self.variants_number = int(self.variants_number)
+
+        self.genome_build = GenomeBuild.from_string(self.genome_build)
+        if self.HmPOS_build:
+            self.HmPOS_build = GenomeBuild.from_string(self.HmPOS_build)
+
+    @classmethod
+    def from_path(cls, path: pathlib.Path):
+        raw_header: dict = raw_header_to_dict(read_header(path))
+        # only keep keys needed by class but support partial headers with None values
+        keep_keys = inspect.get_annotations(ScoringFileHeader).keys()
+        header_dict = {k: raw_header.get(k) for k in keep_keys}
+        # ... so we can unpack the dict into a dataclass
+
+        if header_dict.get("license") is None:
+            # missing license data in header means default license
+            # (this may change in the future)
+            header_dict["license"] = cls.license
+
+        if "HmPOS_build" not in header_dict:
+            # working with pgs catalog formatted header but unharmonised data
+            header_dict["HmPOS_build"] = None
+
+        if not all([v is None for _, v in header_dict.items()]):
+            return ScoringFileHeader(**header_dict)
+        else:
+            # no header available
+            raise Exception(f"No header detected in scoring file {path=}")
+
+
+def raw_header_to_dict(header):
+    d = {}
+    for item in header:
+        key, value = item.split("=")
+        d[key[1:]] = value  # drop # character from key
+    return d
+
+
+def read_header(path: pathlib.Path):
+    """Parses the header of a PGS Catalog format scorefile into a dictionary"""
+    open_function = auto_open(path)
+    with open_function(path, "rt") as f:
+        yield from _gen_header_lines(f)
+
+
+def _gen_header_lines(f):
+    for line in f:
+        if line.startswith("#"):
+            if "=" in line:
+                yield line.strip()
+        else:
+            # stop reading lines
+            break
+
+
+def auto_open(filepath):
+    with open(filepath, "rb") as test_f:
+        if test_f.read(2) == b"\x1f\x8b":
+            return gzip.open
+        else:
+            return open
diff --git a/pgscatalog_utils/scorefile/write.py b/pgscatalog_utils/scorefile/write.py
index 8a3233b..1e43594 100644
--- a/pgscatalog_utils/scorefile/write.py
+++ b/pgscatalog_utils/scorefile/write.py
@@ -1,43 +1,183 @@
+import csv
+import functools
+import gzip
 import logging
 import os
+import sqlite3
+import typing
+from collections import Counter
+from itertools import islice
 
-import pandas as pd
+from pgscatalog_utils.scorefile.config import Config
+from pgscatalog_utils.scorefile.scorevariant import ScoreVariant
+from pgscatalog_utils.scorefile.scoringfile import ScoringFile
+
+try:
+    import pyarrow as pa
+
+    PYARROW_AVAILABLE = True
+except ImportError:
+    PYARROW_AVAILABLE = False
 
 logger = logging.getLogger(__name__)
 
 
-def write_scorefile(df: pd.DataFrame, path: str) -> None:
-    cols: list[str] = ['chr_name', 'chr_position', 'effect_allele', 'other_allele', 'effect_weight', 'effect_type',
-                       'is_duplicated', 'accession', 'row_nr']
-
-    if os.path.exists(path):
-        logger.debug("Output file exists: setting write mode to append")
-        write_mode = 'a'
-        header = False
-    else:
-        logger.debug("Output file doesn't exist: setting write mode to write (create new file)")
-        write_mode = 'w'
-        header = True
-
-    out_df: pd.DataFrame = (df.drop('accession', axis=1)
-                            .rename({'filename_prefix': 'accession'}, axis=1)
-                            .pipe(_filter_failed_liftover))
-
-    if 'other_allele' not in out_df:
-        logger.warning("No other allele information detected, writing out as missing data")
-        out_df['other_allele'] = None
-
-    if path.endswith('.gz'):
-        logger.debug("Writing out gzip-compressed combined scorefile")
-        out_df[cols].to_csv(path, index=False, sep="\t", compression='gzip', mode=write_mode, header=header)
-    else:
-        logger.debug("Writing out combined scorefile")
-        out_df[cols].to_csv(path, index=False, sep="\t", mode=write_mode, header=header)
-
-
-def _filter_failed_liftover(df: pd.DataFrame) -> pd.DataFrame:
-    if 'liftover' in df:
-        logger.debug("Filtering variants that failed liftover")
-        return df.query('liftover == True')
-    else:
-        return df
+class DataWriter:
+    def __init__(self, filename):
+        self.filename = filename
+        self.fieldnames = [
+            "chr_name",
+            "chr_position",
+            "effect_allele",
+            "other_allele",
+            "effect_weight",
+            "effect_type",
+            "is_duplicated",
+            "accession",
+            "row_nr",
+        ]
+        logger.info(f"Output filename: {filename}")
+
+    def write(self, batch):
+        pass
+
+
+class TextFileWriter(DataWriter):
+    def __init__(self, compress, filename):
+        super().__init__(filename)
+        self.compress = compress
+
+        if self.compress:
+            logger.info("Writing with gzip")
+            self.open_function = functools.partial(gzip.open, compresslevel=6)
+        else:
+            logger.info("Writing text file")
+            self.open_function = open
+
+    def write(self, batch):
+        mode = "at" if os.path.exists(self.filename) else "wt"
+        with self.open_function(self.filename, mode) as f:
+            writer = csv.writer(
+                f,
+                delimiter="\t",
+                lineterminator="\n",
+            )
+            if mode == "wt":
+                writer.writerow(ScoreVariant.output_fields)
+
+            writer.writerows(batch)
+
+
+class SqliteWriter(DataWriter):
+    def __init__(self, filename):
+        super().__init__(filename)
+
+    def write(self, batch):
+        conn = sqlite3.connect(self.filename)
+        cursor = conn.cursor()
+        placeholders = ", ".join("?" for _ in self.fieldnames)
+
+        values = [
+            tuple(row[key] for key in self.fieldnames if key in row) for row in batch
+        ]
+
+        cursor.execute(
+            f"CREATE TABLE IF NOT EXISTS variants ({', '.join(self.fieldnames)})"
+        )
+        cursor.executemany(f"INSERT INTO variants VALUES ({placeholders})", values)
+        conn.commit()
+        conn.close()
+
+
+class PyarrowWriter(DataWriter):
+    if PYARROW_AVAILABLE:
+        schema = pa.schema(
+            [
+                pa.field("chr_name", pa.string()),
+                pa.field("chr_position", pa.uint64()),
+                pa.field("effect_allele", pa.string()),
+                pa.field("other_allele", pa.string()),
+                pa.field("effect_weight", pa.string()),
+                pa.field("effect_type", pa.string()),
+                pa.field("is_duplicated", pa.bool_()),
+                pa.field("accession", pa.string()),
+                pa.field("row_nr", pa.uint64()),
+            ]
+        )
+
+    def __init__(self, filename):
+        if not PYARROW_AVAILABLE:
+            # TODO: provide a pip command
+            raise ImportError(
+                "pyarrow output not available, please install pyarrow as listed in the pyproject.toml extras section"
+            )
+        super().__init__(filename)
+
+        self._sink = pa.OSFile(self.filename, "wb")
+        self._writer: pa.RecordBatchFileWriter = pa.ipc.new_file(
+            self._sink, self.schema
+        )
+
+    def write(self, batch: list[ScoreVariant]):
+        batch_dict = {
+            "chr_name": [x.chr_name for x in batch],
+            "chr_position": [x.chr_position for x in batch],
+            "effect_allele": [str(x.effect_allele) for x in batch],
+            "other_allele": [x.other_allele for x in batch],
+            "effect_weight": [x.effect_weight for x in batch],
+            "effect_type": [str(x.effect_type) for x in batch],
+            "is_duplicated": [x.is_duplicated for x in batch],
+            "accession": [x.accession for x in batch],
+            "row_nr": [x.row_nr for x in batch],
+        }
+
+        record_batch = pa.RecordBatch.from_pydict(batch_dict, schema=self.schema)
+        self._writer.write(record_batch)
+
+    def __del__(self):
+        # it's very important to close the writer and file, or it gets corrupted
+        # can't use a with statement, so close when the object gets deleted
+        self._writer.close()
+        if not self._sink.closed:
+            self._sink.close()
+
+
+def write_combined(
+    scoring_files: list[ScoringFile], out_path: str
+) -> dict[str : typing.Counter]:
+    # compresslevel can be really slow, default is 9
+    match fn := out_path.lower():
+        case _ if fn.endswith("gz"):
+            writer = TextFileWriter(compress=True, filename=out_path)
+        case _ if fn.endswith("txt"):
+            writer = TextFileWriter(compress=False, filename=out_path)
+        case _ if fn.endswith("sqlite"):
+            writer = SqliteWriter(filename=out_path)
+        case _ if fn.endswith("ipc"):
+            writer = PyarrowWriter(filename=out_path)
+        case _:
+            raise ValueError(f"Unsupported file extension: {out_path}")
+
+    counts = []
+    log = {}
+    for scoring_file in scoring_files:
+        logger.info(f"Writing {scoring_file.accession} variants")
+        while True:
+            batch = list(islice(scoring_file.variants, Config.batch_size))
+            if not batch:
+                break
+            writer.write(batch=batch)
+            counts = calculate_log(batch, counts)
+
+        log[scoring_file.accession] = sum(counts, Counter())
+        counts = []
+
+    return log
+
+
+def calculate_log(batch: list[ScoreVariant], log: list[Counter]) -> list[Counter]:
+    # these statistics can only be generated while iterating through variants
+    n_variants = Counter("n_variants" for item in batch)
+    hm_source = Counter(getattr(item, "hm_source") for item in batch)
+    log.extend([n_variants + hm_source])
+    return log
diff --git a/poetry.lock b/poetry.lock
index dac6b3d..05b2c77 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
 
 [[package]]
 name = "anyio"
@@ -297,6 +297,17 @@ files = [
 [package.dependencies]
 pycparser = "*"
 
+[[package]]
+name = "cfgv"
+version = "3.4.0"
+description = "Validate configuration and produce human readable error messages."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"},
+    {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"},
+]
+
 [[package]]
 name = "charset-normalizer"
 version = "3.2.0"
@@ -671,6 +682,17 @@ files = [
     {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"},
 ]
 
+[[package]]
+name = "distlib"
+version = "0.3.7"
+description = "Distribution utilities"
+optional = false
+python-versions = "*"
+files = [
+    {file = "distlib-0.3.7-py2.py3-none-any.whl", hash = "sha256:2e24928bc811348f0feb63014e97aaae3037f2cf48712d51ae61df7fd6075057"},
+    {file = "distlib-0.3.7.tar.gz", hash = "sha256:9dafe54b34a028eafd95039d5e5d4851a13734540f1331060d31c9916e7147a8"},
+]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.1.3"
@@ -713,6 +735,22 @@ files = [
 [package.extras]
 devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"]
 
+[[package]]
+name = "filelock"
+version = "3.13.1"
+description = "A platform independent file lock."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "filelock-3.13.1-py3-none-any.whl", hash = "sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c"},
+    {file = "filelock-3.13.1.tar.gz", hash = "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e"},
+]
+
+[package.extras]
+docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.24)"]
+testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"]
+typing = ["typing-extensions (>=4.8)"]
+
 [[package]]
 name = "fonttools"
 version = "4.42.1"
@@ -781,6 +819,20 @@ files = [
     {file = "fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f"},
 ]
 
+[[package]]
+name = "identify"
+version = "2.5.31"
+description = "File identification library for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "identify-2.5.31-py2.py3-none-any.whl", hash = "sha256:90199cb9e7bd3c5407a9b7e81b4abec4bb9d249991c79439ec8af740afc6293d"},
+    {file = "identify-2.5.31.tar.gz", hash = "sha256:7736b3c7a28233637e3c36550646fc6389bedd74ae84cb788200cc8e2dd60b75"},
+]
+
+[package.extras]
+license = ["ukkonen"]
+
 [[package]]
 name = "idna"
 version = "3.4"
@@ -1737,6 +1789,20 @@ files = [
     {file = "nest_asyncio-1.5.8.tar.gz", hash = "sha256:25aa2ca0d2a5b5531956b9e273b45cf664cae2b145101d73b86b199978d48fdb"},
 ]
 
+[[package]]
+name = "nodeenv"
+version = "1.8.0"
+description = "Node.js virtual environment builder"
+optional = false
+python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*"
+files = [
+    {file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"},
+    {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"},
+]
+
+[package.dependencies]
+setuptools = "*"
+
 [[package]]
 name = "notebook"
 version = "7.0.4"
@@ -1871,8 +1937,8 @@ files = [
 
 [package.dependencies]
 numpy = [
-    {version = ">=1.21.0", markers = "python_version >= \"3.10\""},
     {version = ">=1.23.2", markers = "python_version >= \"3.11\""},
+    {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""},
 ]
 python-dateutil = ">=2.8.1"
 pytz = ">=2020.1"
@@ -2082,6 +2148,24 @@ pyarrow = ["pyarrow (>=4.0.0)"]
 timezone = ["backports.zoneinfo", "tzdata"]
 xlsx2csv = ["xlsx2csv (>=0.8.0)"]
 
+[[package]]
+name = "pre-commit"
+version = "3.5.0"
+description = "A framework for managing and maintaining multi-language pre-commit hooks."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pre_commit-3.5.0-py2.py3-none-any.whl", hash = "sha256:841dc9aef25daba9a0238cd27984041fa0467b4199fc4852e27950664919f660"},
+    {file = "pre_commit-3.5.0.tar.gz", hash = "sha256:5804465c675b659b0862f07907f96295d490822a450c4c40e747d0b1c6ebcb32"},
+]
+
+[package.dependencies]
+cfgv = ">=2.0.0"
+identify = ">=1.0.0"
+nodeenv = ">=0.11.1"
+pyyaml = ">=5.1"
+virtualenv = ">=20.10.0"
+
 [[package]]
 name = "prometheus-client"
 version = "0.17.1"
@@ -2161,6 +2245,54 @@ files = [
 [package.extras]
 tests = ["pytest"]
 
+[[package]]
+name = "pyarrow"
+version = "14.0.1"
+description = "Python library for Apache Arrow"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pyarrow-14.0.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:96d64e5ba7dceb519a955e5eeb5c9adcfd63f73a56aea4722e2cc81364fc567a"},
+    {file = "pyarrow-14.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a8ae88c0038d1bc362a682320112ee6774f006134cd5afc291591ee4bc06505"},
+    {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f6f053cb66dc24091f5511e5920e45c83107f954a21032feadc7b9e3a8e7851"},
+    {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:906b0dc25f2be12e95975722f1e60e162437023f490dbd80d0deb7375baf3171"},
+    {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:78d4a77a46a7de9388b653af1c4ce539350726cd9af62e0831e4f2bd0c95a2f4"},
+    {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:06ca79080ef89d6529bb8e5074d4b4f6086143b2520494fcb7cf8a99079cde93"},
+    {file = "pyarrow-14.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:32542164d905002c42dff896efdac79b3bdd7291b1b74aa292fac8450d0e4dcd"},
+    {file = "pyarrow-14.0.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:c7331b4ed3401b7ee56f22c980608cf273f0380f77d0f73dd3c185f78f5a6220"},
+    {file = "pyarrow-14.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:922e8b49b88da8633d6cac0e1b5a690311b6758d6f5d7c2be71acb0f1e14cd61"},
+    {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58c889851ca33f992ea916b48b8540735055201b177cb0dcf0596a495a667b00"},
+    {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30d8494870d9916bb53b2a4384948491444741cb9a38253c590e21f836b01222"},
+    {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:be28e1a07f20391bb0b15ea03dcac3aade29fc773c5eb4bee2838e9b2cdde0cb"},
+    {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:981670b4ce0110d8dcb3246410a4aabf5714db5d8ea63b15686bce1c914b1f83"},
+    {file = "pyarrow-14.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:4756a2b373a28f6166c42711240643fb8bd6322467e9aacabd26b488fa41ec23"},
+    {file = "pyarrow-14.0.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:cf87e2cec65dd5cf1aa4aba918d523ef56ef95597b545bbaad01e6433851aa10"},
+    {file = "pyarrow-14.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:470ae0194fbfdfbf4a6b65b4f9e0f6e1fa0ea5b90c1ee6b65b38aecee53508c8"},
+    {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6263cffd0c3721c1e348062997babdf0151301f7353010c9c9a8ed47448f82ab"},
+    {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a8089d7e77d1455d529dbd7cff08898bbb2666ee48bc4085203af1d826a33cc"},
+    {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:fada8396bc739d958d0b81d291cfd201126ed5e7913cb73de6bc606befc30226"},
+    {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:2a145dab9ed7849fc1101bf03bcdc69913547f10513fdf70fc3ab6c0a50c7eee"},
+    {file = "pyarrow-14.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:05fe7994745b634c5fb16ce5717e39a1ac1fac3e2b0795232841660aa76647cd"},
+    {file = "pyarrow-14.0.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:a8eeef015ae69d104c4c3117a6011e7e3ecd1abec79dc87fd2fac6e442f666ee"},
+    {file = "pyarrow-14.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3c76807540989fe8fcd02285dd15e4f2a3da0b09d27781abec3adc265ddbeba1"},
+    {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:450e4605e3c20e558485f9161a79280a61c55efe585d51513c014de9ae8d393f"},
+    {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:323cbe60210173ffd7db78bfd50b80bdd792c4c9daca8843ef3cd70b186649db"},
+    {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0140c7e2b740e08c5a459439d87acd26b747fc408bde0a8806096ee0baaa0c15"},
+    {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:e592e482edd9f1ab32f18cd6a716c45b2c0f2403dc2af782f4e9674952e6dd27"},
+    {file = "pyarrow-14.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:d264ad13605b61959f2ae7c1d25b1a5b8505b112715c961418c8396433f213ad"},
+    {file = "pyarrow-14.0.1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:01e44de9749cddc486169cb632f3c99962318e9dacac7778315a110f4bf8a450"},
+    {file = "pyarrow-14.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d0351fecf0e26e152542bc164c22ea2a8e8c682726fce160ce4d459ea802d69c"},
+    {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33c1f6110c386464fd2e5e4ea3624466055bbe681ff185fd6c9daa98f30a3f9a"},
+    {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11e045dfa09855b6d3e7705a37c42e2dc2c71d608fab34d3c23df2e02df9aec3"},
+    {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:097828b55321897db0e1dbfc606e3ff8101ae5725673498cbfa7754ee0da80e4"},
+    {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:1daab52050a1c48506c029e6fa0944a7b2436334d7e44221c16f6f1b2cc9c510"},
+    {file = "pyarrow-14.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:3f6d5faf4f1b0d5a7f97be987cf9e9f8cd39902611e818fe134588ee99bf0283"},
+    {file = "pyarrow-14.0.1.tar.gz", hash = "sha256:b8b3f4fe8d4ec15e1ef9b599b94683c5216adaed78d5cb4c606180546d1e2ee1"},
+]
+
+[package.dependencies]
+numpy = ">=1.16.6"
+
 [[package]]
 name = "pycparser"
 version = "2.21"
@@ -3031,6 +3163,26 @@ secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.
 socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
 zstd = ["zstandard (>=0.18.0)"]
 
+[[package]]
+name = "virtualenv"
+version = "20.24.6"
+description = "Virtual Python Environment builder"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "virtualenv-20.24.6-py3-none-any.whl", hash = "sha256:520d056652454c5098a00c0f073611ccbea4c79089331f60bf9d7ba247bb7381"},
+    {file = "virtualenv-20.24.6.tar.gz", hash = "sha256:02ece4f56fbf939dbbc33c0715159951d6bf14aaf5457b092e4548e1382455af"},
+]
+
+[package.dependencies]
+distlib = ">=0.3.7,<1"
+filelock = ">=3.12.2,<4"
+platformdirs = ">=3.9.1,<4"
+
+[package.extras]
+docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
+test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
+
 [[package]]
 name = "wcwidth"
 version = "0.2.6"
@@ -3157,4 +3309,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "2859497817dfd52518f4fa2ba527c716a5bb5e4354175f791b314e80a033edf2"
+content-hash = "397df0f3e64b00fabebb36bf3c3576d94c2f34c2f34dcec223973a19e525d2e6"
diff --git a/poetry.toml b/poetry.toml
new file mode 100644
index 0000000..53b35d3
--- /dev/null
+++ b/poetry.toml
@@ -0,0 +1,3 @@
+[virtualenvs]
+create = true
+in-project = true
diff --git a/pyproject.toml b/pyproject.toml
index 571ed03..098283f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pgscatalog_utils"
-version = "0.4.3"
+version = "0.5.0"
 description = "Utilities for working with PGS Catalog API and scoring files"
 homepage = "https://github.com/PGScatalog/pgscatalog_utils"
 authors = ["Benjamin Wingfield <bwingfield@ebi.ac.uk>", "Samuel Lambert <sl925@medschl.cam.ac.uk>", "Laurent Gil <lg10@sanger.ac.uk>"]
@@ -16,7 +16,6 @@ aggregate_scores = "pgscatalog_utils.aggregate.aggregate_scores:aggregate_scores
 validate_scorefiles = "pgscatalog_utils.validate.validate_scorefile:validate_scorefile"
 relabel_ids = "pgscatalog_utils.relabel.relabel_ids:relabel_ids"
 ancestry_analysis = "pgscatalog_utils.ancestry.ancestry_analysis:ancestry_analysis"
-samplesheet_to_json = "pgscatalog_utils.samplesheet.check:check_samplesheet"
 
 [tool.poetry.dependencies]
 python = "^3.10"
@@ -30,6 +29,8 @@ polars = "^0.15.0"
 zstandard = "^0.18.0"
 pgzip = "^0.3.2"
 scikit-learn = "^1.2.1"
+pre-commit = "^3.5.0"
+pyarrow = "^14.0.1"
 
 [tool.poetry.group.dev.dependencies]
 jupyter = "^1.0.0"
@@ -43,3 +44,5 @@ seaborn = "^0.12.2"
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
 
+[tool.pytest.ini_options]
+addopts = "--doctest-modules"
\ No newline at end of file
diff --git a/tests/data/combine/PGS001229_22.txt b/tests/data/combine/PGS001229_22.txt
new file mode 100644
index 0000000..4084c13
--- /dev/null
+++ b/tests/data/combine/PGS001229_22.txt
@@ -0,0 +1,850 @@
+###PGS CATALOG SCORING FILE - see https://https://www.pgscatalog.org/downloads/#dl_ftp_scoring_scoring for additional information
+#format_version=2.0
+##POLYGENIC SCORE (PGS) INFORMATION
+#pgs_id=PGS001229
+#pgs_name=GBE_INI50
+#trait_reported=Standing height
+#trait_mapped=body height
+#trait_efo=EFO_0004339
+#weight_type=NR
+#genome_build=GRCh37
+#variants_number=835
+##SOURCE INFORMATION
+#pgp_id=PGP000244
+#citation=Tanigawa Y et al. medRxiv (2021). doi:10.1101/2021.09.02.21262942
+rsID	chr_name	chr_position	effect_allele	other_allele	effect_weight	is_haplotype	imputation_method	locus_name	variant_description
+rs5746679	22	17080378	G	A	1.045457e-02	False			
+rs2192430	22	17300230	A	G	1.411475e-04	False			
+rs165636	22	17318864	A	C	8.166266e-03	False			
+rs165808	22	17327595	T	C	7.791641e-03	False			
+rs5748743	22	17409813	A	G	3.108784e-04	False			
+rs11703655	22	17450952	G	A	-3.033983e-02	False			
+rs2192155	22	17492533	G	A	3.889990e-03	False			
+rs2845402	22	17542810	C	T	8.036290e-03	False			
+rs4819958	22	17565013	G	A	2.135621e-02	False			
+rs879577	22	17589209	T	C	3.026491e-03	False			
+rs5994165	22	17600977	A	G	1.581277e-02	False			
+rs35665085	22	17625915	A	G	-1.172964e-01	False			
+rs1034859	22	17630486	A	C	1.012909e-02	False			
+rs738032	22	17633785	C	T	2.325500e-03	False			
+rs5994185	22	17643689	A	G	3.361814e-03	False			
+rs2231495	22	17669306	C	T	2.145060e-02	False			
+rs5747018	22	17677699	T	C	-7.031384e-04	False			
+rs17807317	22	17680519	C	A	1.079236e-03	False			
+rs9606655	22	17701234	G	A	4.477145e-03	False			
+rs78718739	22	17703119	A	T	7.771872e-04	False			
+rs73153427	22	17718699	C	A	-1.320632e-02	False			
+rs4266110	22	17721595	C	T	9.480363e-03	False			
+rs4819982	22	17727648	T	C	7.811685e-03	False			
+rs5749000	22	17738177	G	A	-4.719812e-03	False			
+rs5749002	22	17749096	A	G	-5.244795e-03	False			
+rs11704699	22	17770181	G	T	-3.101703e-02	False			
+rs5749032	22	17793969	G	A	1.774444e-02	False			
+rs5994272	22	17815696	G	C	-5.516090e-03	False			
+rs4820001	22	17827684	G	A	-5.944752e-03	False			
+rs2040692	22	17831813	T	C	1.061587e-02	False			
+rs9606701	22	17844929	T	G	1.717643e-03	False			
+rs73391753	22	17850661	T	C	-2.805489e-02	False			
+rs4819581	22	17887534	A	G	7.723542e-04	False			
+rs2522310	22	17887725	A	G	7.472703e-03	False			
+rs5747199	22	17958221	C	A	-2.098647e-02	False			
+rs174346	22	18036253	G	A	-1.772981e-02	False			
+rs174351	22	18038786	A	G	-2.119071e-03	False			
+rs9605406	22	18262301	A	T	-5.065485e-03	False			
+rs389496	22	18289204	A	G	5.306345e-03	False			
+rs399757	22	18295575	C	T	2.300129e-02	False			
+rs1550663	22	18296238	G	A	-5.665446e-03	False			
+rs439231	22	18319179	T	C	3.440642e-02	False			
+rs2401424	22	18393534	A	C	1.764269e-02	False			
+rs1076540	22	18439958	T	C	-2.261707e-03	False			
+rs4819654	22	18483388	G	A	3.318724e-02	False			
+rs9617650	22	18488883	C	G	-1.919180e-02	False			
+rs397709	22	18489048	C	A	1.233198e-02	False			
+rs452579	22	18495470	A	G	-5.804926e-03	False			
+rs1992576	22	18537145	G	A	-4.930116e-03	False			
+rs464385	22	18571008	A	G	-8.844726e-05	False			
+rs3827281	22	18584433	C	T	-1.169893e-03	False			
+rs9618216	22	18631365	T	C	-1.551714e-03	False			
+rs3180408	22	18650682	T	C	-1.313784e-02	False			
+rs2010694	22	18890037	A	G	5.968921e-02	False			
+rs454534	22	18891398	G	A	6.891943e-03	False			
+rs2080346	22	18892575	A	G	-2.244470e-03	False			
+rs2016108	22	18915963	A	G	3.719756e-03	False			
+rs2518810	22	18959581	T	C	6.464581e-03	False			
+rs2019061	22	18963340	A	G	-1.397565e-02	False			
+rs11089247	22	18970915	T	C	-1.507131e-03	False			
+rs2073776	22	19024651	T	C	-3.505750e-03	False			
+rs712965	22	19121872	A	G	1.644046e-02	False			
+rs2275901	22	19135603	A	G	-2.970077e-02	False			
+rs11089259	22	19190143	T	C	3.268027e-03	False			
+rs361787	22	19263698	T	C	2.057255e-02	False			
+rs8135222	22	19292446	G	T	1.153989e-02	False			
+rs34292276	22	19371052	T	C	1.055134e-02	False			
+rs1128399	22	19420109	C	T	-8.628228e-03	False			
+rs5748218	22	19451186	A	C	2.141029e-02	False			
+rs5748260	22	19518079	C	T	5.372247e-03	False			
+rs5993713	22	19581331	T	C	1.686942e-02	False			
+rs9606090	22	19593854	C	A	6.544249e-04	False			
+rs8135254	22	19606703	G	A	2.070121e-02	False			
+rs9617823	22	19649005	A	G	2.868601e-03	False			
+rs6518580	22	19735854	C	T	6.262962e-03	False			
+rs1005133	22	19738355	T	C	4.973840e-05	False			
+rs9680615	22	19770886	A	G	-1.013929e-02	False			
+rs2871043	22	19781823	T	C	2.481609e-02	False			
+rs2073750	22	19873357	T	C	1.163020e-02	False			
+rs5748469	22	19907099	A	C	-2.676450e-02	False			
+rs9618723	22	19968597	T	C	-2.203945e-02	False			
+rs5748515	22	20046344	G	A	-9.801428e-03	False			
+rs59528277	22	20084821	C	T	-2.232886e-02	False			
+rs625704	22	20185457	A	G	6.892171e-03	False			
+rs672570	22	20189077	T	C	1.738215e-02	False			
+rs7293032	22	20219648	A	G	9.307625e-03	False			
+rs855050	22	20248391	A	G	-5.405845e-03	False			
+rs855061	22	20267213	A	G	6.713242e-03	False			
+rs741413	22	20286099	G	T	1.574758e-02	False			
+rs35012563	22	20749042	G	A	6.603339e-03	False			
+rs361860	22	20754039	A	G	-1.181141e-02	False			
+rs1771145	22	20775167	T	C	1.160113e-02	False			
+rs9680797	22	20780296	A	G	6.735311e-02	False			
+rs1005640	22	20789074	C	T	2.844307e-02	False			
+rs12628193	22	20791438	A	C	4.734740e-02	False			
+rs1035239	22	20793914	C	T	7.009781e-03	False			
+rs75179603	22	20839810	T	G	3.947346e-03	False			
+rs738092	22	20860931	T	C	5.613511e-04	False			
+rs10427922	22	20979980	G	A	3.231665e-03	False			
+rs2080195	22	20991771	G	A	4.226765e-03	False			
+rs5751800	22	21075537	C	A	-2.096453e-03	False			
+rs361979	22	21154393	G	T	-4.297086e-03	False			
+rs756878	22	21323357	C	T	-6.041745e-03	False			
+rs178275	22	21331918	G	C	-2.280912e-03	False			
+rs105034	22	21334924	C	G	-2.031369e-02	False			
+rs28372939	22	21356824	A	G	1.476577e-02	False			
+rs2072550	22	21386019	A	G	1.435557e-02	False			
+rs431319	22	21449028	G	A	-1.537701e-02	False			
+rs2845419	22	21463515	A	G	-1.335614e-02	False			
+rs2298428	22	21982892	T	C	-6.373335e-02	False			
+rs62235077	22	22001704	T	G	2.809584e-02	False			
+rs76940365	22	22062480	T	C	5.291130e-02	False			
+rs10427813	22	22080735	G	A	-1.394260e-02	False			
+rs78907487	22	22151939	C	A	-8.287849e-03	False			
+rs9607287	22	22163425	G	A	5.518983e-02	False			
+rs412050	22	22307519	C	G	-3.486191e-03	False			
+rs79165737	22	22351283	G	A	-7.483763e-04	False			
+rs5844480	22	22394291	AG	A	4.320583e-03	False			
+rs2213141	22	22395754	T	C	2.587971e-03	False			
+rs6519111	22	22424302	A	C	1.140800e-03	False			
+rs77010661	22	22473905	C	A	1.226009e-02	False			
+rs2073447	22	22550450	G	C	1.773244e-02	False			
+rs5757417	22	22561610	C	T	-6.207024e-03	False			
+rs6001482	22	22581369	G	A	-6.272413e-03	False			
+rs5757569	22	22584678	A	G	-2.176470e-03	False			
+rs736898	22	22711786	T	C	7.779875e-03	False			
+rs738881	22	22726372	T	C	3.496320e-03	False			
+rs2051490	22	22762771	C	T	1.252501e-02	False			
+rs433766	22	22769923	G	A	-1.103632e-02	False			
+rs361959	22	22869742	A	C	-2.412657e-03	False			
+rs362168	22	22871922	A	G	-2.769974e-03	False			
+rs4462880	22	22929268	T	C	-7.035723e-03	False			
+rs456455	22	23001481	A	G	7.524178e-03	False			
+rs11703025	22	23022520	T	C	2.175257e-03	False			
+rs10854762	22	23064982	A	C	-1.255076e-02	False			
+rs2856876	22	23249440	A	C	2.085816e-02	False			
+rs58555503	22	23268677	A	G	1.337349e-02	False			
+rs17514179	22	23279456	C	G	-1.371401e-02	False			
+rs468884	22	23282286	C	T	4.994329e-03	False			
+rs9623992	22	23325722	C	T	8.506657e-04	False			
+rs3788338	22	23412058	A	G	-9.545553e-03	False			
+rs140504	22	23627369	G	A	-1.900175e-02	False			
+rs12168342	22	23644425	G	A	-9.106953e-04	False			
+rs131693	22	23649242	G	T	1.061643e-03	False			
+rs3827368	22	23794844	G	A	-1.198736e-02	False			
+rs11090252	22	23804670	G	T	-1.119846e-03	False			
+rs2330498	22	23819697	T	G	-1.028722e-02	False			
+rs5759884	22	23873076	T	C	9.509027e-03	False			
+rs179303	22	23892145	T	C	1.351280e-02	False			
+rs131429	22	23925779	C	T	-4.127647e-03	False			
+rs6003815	22	23960187	T	C	-8.475905e-03	False			
+rs2070446	22	24035970	T	C	-1.334318e-03	False			
+rs5759985	22	24086107	G	A	-1.652957e-02	False			
+rs73396542	22	24105789	A	G	1.813091e-02	False			
+rs2298375	22	24106448	A	G	1.834095e-03	False			
+rs6003915	22	24186809	C	T	-1.426541e-02	False			
+rs4822446	22	24235360	G	A	3.168635e-04	False			
+rs4822455	22	24255296	T	C	1.624252e-02	False			
+rs144128236	22	24300540	T	C	-3.225760e-03	False			
+rs144686326	22	24376584	A	G	-6.223068e-03	False			
+rs422674	22	24406778	A	C	3.046540e-03	False			
+rs5996675	22	24618331	G	A	-6.506681e-04	False			
+rs5751862	22	24802564	A	G	-6.695797e-03	False			
+rs6004171	22	24912232	T	C	-1.536303e-02	False			
+rs762283	22	24943582	A	G	-1.687764e-03	False			
+rs2006092	22	24995668	G	A	-3.537331e-02	False			
+rs5760609	22	25123505	C	T	-1.600990e-02	False			
+rs5760620	22	25145094	T	C	-5.584047e-03	False			
+rs1892723	22	25145453	T	C	-1.388536e-03	False			
+rs5760661	22	25185823	A	G	-9.228375e-03	False			
+rs11703103	22	25265972	A	G	1.088906e-02	False			
+rs139766	22	25309448	A	G	-2.238693e-03	False			
+rs5752027	22	25363411	A	G	4.035775e-03	False			
+rs34259162	22	25410895	G	A	9.720734e-04	False			
+rs16979472	22	25442369	C	T	1.660527e-02	False			
+rs9612844	22	25454658	C	A	1.200285e-02	False			
+rs6004418	22	25465065	C	T	1.320801e-02	False			
+rs4627697	22	25524916	C	T	1.147501e-02	False			
+rs13055430	22	25603008	T	C	-1.262741e-02	False			
+rs7286982	22	25619025	G	T	-1.212511e-02	False			
+rs5752084	22	25621591	T	C	1.051851e-02	False			
+rs11703955	22	25643483	T	G	1.373474e-02	False			
+rs9612921	22	25661725	A	G	-5.936431e-03	False			
+rs6004519	22	25667883	G	A	1.547775e-02	False			
+rs5996879	22	25668730	A	C	2.616493e-02	False			
+rs67839603	22	25678577	T	C	3.040180e-02	False			
+rs79854676	22	25761309	T	C	-1.760112e-03	False			
+rs713847	22	25761936	T	C	-5.171998e-03	False			
+rs571663	22	25938977	T	C	1.966116e-02	False			
+rs1008673	22	25994013	A	G	6.268228e-04	False			
+rs718163	22	26081873	T	C	5.232603e-02	False			
+rs10212011	22	26132612	A	G	-6.457239e-03	False			
+rs133847	22	26133775	T	C	-1.181527e-03	False			
+rs133885	22	26159289	A	G	-8.399401e-03	False			
+rs3859870	22	26181767	C	T	1.044769e-02	False			
+rs5761201	22	26190915	G	A	4.287533e-03	False			
+rs5761256	22	26218164	G	A	-2.803502e-03	False			
+rs17704912	22	26231312	C	G	6.105629e-03	False			
+rs2269632	22	26237826	C	T	4.981479e-03	False			
+rs5761268	22	26239850	A	C	4.144037e-03	False			
+rs4822668	22	26273893	C	G	5.616213e-03	False			
+rs695809	22	26278128	G	T	-3.965338e-03	False			
+rs2157538	22	26280462	T	C	-8.324497e-04	False			
+rs6004814	22	26290588	T	C	-1.307320e-02	False			
+rs973523	22	26292659	G	A	4.294309e-05	False			
+rs2072006	22	26343593	G	A	7.813758e-03	False			
+rs9306419	22	26369358	T	C	-4.836650e-03	False			
+rs2331198	22	26390964	A	G	-7.849451e-03	False			
+rs5752254	22	26415475	T	C	-1.219281e-03	False			
+rs5752262	22	26456367	G	A	-1.285326e-02	False			
+rs56116806	22	26460519	T	C	-8.695338e-03	False			
+rs78711257	22	26528054	A	G	1.973023e-02	False			
+rs5752282	22	26617260	T	A	-1.384025e-02	False			
+rs4438594	22	26638906	G	T	1.229772e-02	False			
+rs5761484	22	26735648	A	G	7.879673e-04	False			
+rs5752316	22	26782251	G	A	5.096459e-04	False			
+rs7289238	22	26812632	C	T	-1.850814e-02	False			
+rs732933	22	26939781	C	T	-9.222796e-04	False			
+rs2267091	22	26960648	A	C	-5.679255e-03	False			
+rs5752371	22	27038865	T	G	-1.487706e-04	False			
+rs5752372	22	27042828	A	G	2.957737e-02	False			
+rs1476035	22	27161060	A	G	2.844558e-03	False			
+rs56278657	22	27191643	T	C	8.953731e-03	False			
+rs739226	22	27216426	G	A	9.120990e-03	False			
+rs4822804	22	27217018	A	G	1.510616e-02	False			
+rs136511	22	27240025	T	G	-2.971740e-02	False			
+rs136516	22	27242642	G	A	-9.822927e-03	False			
+rs136535	22	27246070	C	T	-1.554199e-03	False			
+rs9306427	22	27252454	C	T	-6.560251e-03	False			
+rs5761797	22	27264880	G	T	-1.323094e-02	False			
+rs4822824	22	27337886	A	G	-9.600014e-03	False			
+rs739257	22	27339284	T	C	-9.944488e-03	False			
+rs5761864	22	27353810	T	C	-2.171555e-03	False			
+rs5761885	22	27370273	T	C	-9.798478e-03	False			
+rs7288253	22	27378884	A	G	5.145072e-02	False			
+rs7287426	22	27398749	C	T	1.012263e-03	False			
+rs9613339	22	27403571	C	T	-1.745865e-02	False			
+rs2516086	22	27405012	T	C	-5.425419e-03	False			
+rs17343637	22	27415255	C	T	-1.499362e-02	False			
+rs60259956	22	27426628	G	C	2.289460e-02	False			
+rs9620654	22	27430724	A	G	-7.068064e-03	False			
+rs760526	22	27435577	C	T	-8.632412e-03	False			
+rs4822847	22	27487580	G	A	3.691502e-03	False			
+rs5761976	22	27498426	A	G	-6.801544e-03	False			
+rs11704703	22	27526095	G	A	-8.086267e-04	False			
+rs9625170	22	27563274	C	A	1.369650e-02	False			
+rs9306437	22	27584680	A	G	-2.139188e-03	False			
+rs546339	22	27628151	C	G	2.130389e-02	False			
+rs134786	22	27652290	T	G	4.815735e-03	False			
+rs760593	22	27660675	A	G	4.899654e-03	False			
+rs134810	22	27674832	G	T	1.248065e-04	False			
+rs736950	22	27718775	A	G	2.292384e-02	False			
+rs568561	22	27729742	G	A	4.951261e-03	False			
+rs6519705	22	27762155	C	T	4.856660e-03	False			
+rs6005412	22	27781736	A	C	-8.336242e-03	False			
+rs5752545	22	27829565	G	A	2.854090e-03	False			
+rs16984654	22	27832985	G	C	-1.668955e-02	False			
+rs4822878	22	27836311	G	A	-7.756250e-03	False			
+rs7288006	22	27839704	T	C	-2.492106e-02	False			
+rs5762173	22	27864471	A	C	2.189950e-03	False			
+rs10439912	22	27873024	G	A	2.721729e-03	False			
+rs5762194	22	27883265	G	A	2.961735e-02	False			
+rs6005471	22	27890684	A	G	-8.057355e-03	False			
+rs761596	22	27927298	T	C	2.054268e-02	False			
+rs5997265	22	27934290	G	A	4.751755e-03	False			
+rs5762249	22	27951176	A	G	-4.329547e-04	False			
+rs762064	22	27974819	C	A	1.439093e-02	False			
+rs4822917	22	27975451	G	A	-3.648208e-02	False			
+rs6005524	22	28007741	C	T	-1.635917e-02	False			
+rs1885362	22	28016883	C	A	8.564085e-03	False			
+rs8135014	22	28046561	T	C	1.535905e-02	False			
+rs9608638	22	28060034	A	G	3.097228e-02	False			
+rs134110	22	28076058	C	T	2.848654e-02	False			
+rs1885364	22	28094845	G	A	-2.659077e-02	False			
+rs7291248	22	28130130	C	T	-1.640387e-02	False			
+rs2079095	22	28136977	A	C	-3.962775e-03	False			
+rs4822935	22	28150109	G	A	6.071392e-04	False			
+rs2283844	22	28150815	A	G	1.604724e-02	False			
+rs2267106	22	28151825	A	G	-5.390282e-03	False			
+rs2267113	22	28155404	T	C	5.030388e-03	False			
+rs4822939	22	28172577	G	T	5.704168e-03	False			
+rs12166473	22	28185452	G	T	-6.896853e-03	False			
+rs5752639	22	28200176	G	A	-6.474674e-03	False			
+rs11705555	22	28206912	C	A	-6.175542e-03	False			
+rs5997320	22	28270372	G	T	-6.768204e-04	False			
+rs742547	22	28412908	G	T	1.763639e-02	False			
+rs77885044	22	28501414	T	C	-2.304747e-01	False			
+rs1884816	22	29106733	C	T	-1.074749e-02	False			
+rs132549	22	29318724	T	C	1.743333e-03	False			
+rs17518058	22	29378610	C	T	6.690876e-04	False			
+rs134620	22	29478760	C	T	-3.029428e-02	False			
+rs34920087	22	29533572	G	C	-1.269604e-02	False			
+rs111625211	22	29626515	A	G	-1.171130e-02	False			
+rs3950176	22	29630337	A	G	2.658049e-02	False			
+rs4820803	22	29669648	C	G	-8.550535e-03	False			
+rs131190	22	29692497	T	G	1.234896e-03	False			
+rs3804076	22	29837537	C	T	1.321112e-02	False			
+rs467768	22	29961986	T	G	1.878853e-03	False			
+rs140130	22	30151687	C	T	3.418302e-03	False			
+rs76013375	22	30163526	G	A	1.576261e-02	False			
+rs2412971	22	30494371	A	G	7.959801e-03	False			
+rs713875	22	30592487	G	C	-1.047403e-01	False			
+rs76168543	22	30621613	A	C	-1.382104e-02	False			
+rs55816744	22	30658082	C	T	-3.794014e-02	False			
+rs4823086	22	30688659	T	C	2.257140e-02	False			
+rs740223	22	30762140	A	G	2.079806e-02	False			
+rs757660	22	30793137	A	G	-4.609306e-03	False			
+rs9608956	22	30901592	C	T	-8.334040e-03	False			
+rs5749118	22	30927975	T	C	3.226189e-03	False			
+rs2267161	22	30953295	T	C	-7.685790e-03	False			
+rs4820875	22	30992651	G	A	-2.565800e-02	False			
+rs1131603	22	31018975	C	T	4.241226e-02	False			
+rs5997714	22	31032920	G	A	-2.311985e-02	False			
+rs34597012	22	31063804	G	GT	-2.081808e-04	False			
+rs136382	22	31114086	G	T	2.825476e-02	False			
+rs5753303	22	31139653	A	G	2.640129e-06	False			
+rs136230	22	31214382	G	A	1.137657e-02	False			
+rs57527354	22	31216506	C	T	5.531311e-03	False			
+rs67441859	22	31272930	T	C	-1.056118e-03	False			
+rs3747151	22	31333631	C	T	-1.235089e-02	False			
+rs4820921	22	31378447	A	G	1.020507e-02	False			
+rs715297	22	31442308	A	G	-2.479126e-03	False			
+rs11089487	22	31477361	C	G	-1.263667e-02	False			
+rs5753465	22	31514348	G	A	5.803240e-03	False			
+rs2240432	22	31521404	A	G	1.097391e-02	False			
+rs5749244	22	31659495	C	T	2.663412e-02	False			
+rs7289941	22	31884405	C	T	-3.950834e-04	False			
+rs41311139	22	32200849	T	C	1.585735e-02	False			
+rs7290696	22	32341684	T	C	-2.960328e-02	False			
+rs8139657	22	32559835	G	A	-2.170436e-02	False			
+rs7291990	22	32569263	C	T	-1.296006e-03	False			
+rs5998321	22	32624139	C	T	5.619574e-03	False			
+rs5753956	22	32702816	A	G	-1.534023e-02	False			
+rs201161881	22	32756652	G	A	2.512177e-02	False			
+rs2076050	22	32831540	T	C	1.868495e-03	False			
+rs2076054	22	32832874	T	C	6.028815e-05	False			
+rs9609559	22	32853660	G	A	1.382210e-02	False			
+rs62241183	22	32854391	C	A	1.960825e-04	False			
+rs11107	22	32875190	A	G	-6.426637e-03	False			
+rs11341975	22	32934713	C	CT	-9.057754e-03	False			
+rs2157189	22	32952012	A	C	-3.802480e-03	False			
+rs2710386	22	32954443	G	A	2.210369e-03	False			
+rs62232741	22	32993032	C	T	-2.429979e-03	False			
+rs966964	22	32997766	T	C	-8.424246e-03	False			
+rs62234573	22	33045573	T	C	-3.107145e-02	False			
+rs762899	22	33046110	G	C	-6.954732e-02	False			
+rs80186738	22	33048039	T	C	1.138346e-02	False			
+rs4821083	22	33056341	C	T	-6.477198e-02	False			
+rs997120	22	33108536	T	C	-3.426392e-02	False			
+rs7286819	22	33108981	T	C	-7.404035e-02	False			
+rs743743	22	33116435	T	C	6.542471e-02	False			
+rs2157133	22	33143528	G	A	2.195059e-02	False			
+rs58039541	22	33146363	A	G	8.105390e-04	False			
+rs5749529	22	33259625	C	T	2.309793e-02	False			
+rs137560	22	33336039	T	G	-2.554387e-02	False			
+rs9609680	22	33408519	T	C	-7.556300e-03	False			
+rs4821137	22	33660345	C	G	2.190743e-03	False			
+rs117531661	22	33804893	C	T	6.680774e-03	False			
+rs5754555	22	33844303	C	T	8.923314e-03	False			
+rs9609802	22	33846914	T	C	6.295378e-03	False			
+rs62225321	22	33898906	A	C	1.958759e-05	False			
+rs86487	22	34022284	A	G	-2.579330e-03	False			
+rs239333	22	34137784	G	A	4.460828e-03	False			
+rs5999111	22	34208570	T	C	-3.365869e-03	False			
+rs9941961	22	34217757	T	C	9.289431e-03	False			
+rs10854640	22	34256923	A	C	1.439384e-02	False			
+rs79442817	22	34265402	G	A	-1.636610e-02	False			
+rs5754747	22	34284173	G	A	-2.315559e-02	False			
+rs2157153	22	34296093	C	A	-4.688326e-03	False			
+rs12169215	22	34378012	A	G	2.276664e-03	False			
+rs242898	22	34436795	C	T	1.337033e-04	False			
+rs2413215	22	34488452	A	G	-4.288310e-04	False			
+rs135198	22	34501541	A	G	2.763614e-03	False			
+rs243001	22	34514810	C	A	3.976601e-03	False			
+rs130668	22	34526428	C	T	1.088864e-02	False			
+rs5999246	22	34583078	A	G	1.802495e-03	False			
+rs753600	22	34620754	T	C	1.466546e-02	False			
+rs2097307	22	34691035	A	G	-2.082615e-04	False			
+rs411451	22	34758540	T	C	5.165532e-03	False			
+rs2609850	22	34851377	A	C	1.371180e-02	False			
+rs737821	22	35371707	T	C	-4.985554e-04	False			
+rs35433006	22	35382268	A	C	-4.931336e-03	False			
+rs7292124	22	35419122	C	T	-1.077953e-02	False			
+rs8140287	22	35478529	A	G	1.760523e-04	False			
+rs61735502	22	35481493	T	C	1.056439e-02	False			
+rs80730	22	35526281	G	A	-2.766891e-03	False			
+rs61134707	22	35603836	A	G	-1.783939e-04	False			
+rs1053593	22	35660875	T	G	3.988231e-02	False			
+rs6518950	22	35745196	G	T	1.750545e-04	False			
+rs17793276	22	35750980	A	G	-7.651136e-03	False			
+rs2071749	22	35783413	G	A	1.649791e-03	False			
+rs35806646	22	35918270	C	T	6.918713e-03	False			
+rs5750115	22	35959242	A	G	1.697538e-02	False			
+rs738368	22	35962060	G	A	5.181476e-03	False			
+rs926338	22	35964158	G	C	2.769931e-03	False			
+rs5995124	22	35984385	A	G	-1.280623e-02	False			
+rs4820205	22	36001258	C	T	1.342405e-02	False			
+rs4327313	22	36072262	T	C	4.895490e-03	False			
+rs6000004	22	36180535	G	A	-3.250252e-02	False			
+rs78188544	22	36517307	C	T	1.366076e-02	False			
+rs6000142	22	36519596	A	C	-3.499560e-03	False			
+rs9610403	22	36532058	A	G	-1.214487e-02	False			
+rs3788518	22	36543489	C	G	7.838149e-03	False			
+rs6000190	22	36600841	G	A	2.644389e-02	False			
+rs2010659	22	36629633	C	A	-6.871468e-03	False			
+rs136145	22	36635967	G	A	-2.634742e-02	False			
+rs78188930	22	36655735	A	G	-5.385142e-03	False			
+rs136176	22	36661646	A	G	-1.560741e-02	False			
+rs2269529	22	36684354	C	T	-5.170111e-03	False			
+rs75138027	22	36705622	A	G	1.713234e-02	False			
+rs3842715	22	36708049	C	CTCCTGTGA	-5.187051e-02	False			
+rs11089788	22	36751101	A	C	-2.440650e-02	False			
+rs16996704	22	36764788	G	A	2.784116e-02	False			
+rs5756223	22	36897427	C	T	2.603792e-02	False			
+rs760718	22	36900806	G	A	7.366207e-03	False			
+rs6000293	22	36923144	T	C	-1.875563e-03	False			
+rs5995298	22	36924714	G	A	-3.632594e-03	False			
+rs140020	22	36946643	T	G	1.333137e-02	False			
+rs4821501	22	36954939	T	C	1.105894e-02	False			
+rs5756255	22	36998907	T	C	-6.084687e-04	False			
+rs9622429	22	37001495	G	T	-1.224147e-02	False			
+rs2267348	22	37013167	G	C	1.866849e-02	False			
+rs6000386	22	37077364	C	T	7.294257e-03	False			
+rs738514	22	37080738	C	G	-4.873355e-03	False			
+rs2746971	22	37101890	C	T	3.991764e-02	False			
+rs933229	22	37118535	A	G	-1.713909e-03	False			
+rs62230508	22	37184521	G	A	6.515894e-03	False			
+rs4820254	22	37206341	G	T	2.566936e-04	False			
+rs11089806	22	37256262	A	G	1.152626e-03	False			
+rs4821544	22	37258503	C	T	-9.761102e-03	False			
+rs909486	22	37323988	T	C	-7.318200e-03	False			
+rs1534882	22	37329545	G	A	5.775806e-03	False			
+rs131843	22	37337409	T	C	-2.534399e-02	False			
+rs2093380	22	37343000	A	C	-4.011777e-04	False			
+rs743749	22	37398195	T	C	-1.001198e-02	False			
+rs2413447	22	37401532	A	G	-3.244795e-03	False			
+rs11554714	22	37407109	C	G	4.335972e-02	False			
+rs2543523	22	37477732	T	C	3.669548e-04	False			
+rs1861947	22	37507019	A	G	-9.259451e-04	False			
+rs28450477	22	37513316	A	G	1.153887e-03	False			
+rs3218297	22	37532441	A	G	1.802306e-02	False			
+rs2543529	22	37571497	G	A	-5.785311e-03	False			
+rs7290488	22	37581383	T	C	3.172492e-02	False			
+rs9798725	22	37621269	C	A	4.460405e-03	False			
+rs10212068	22	37644621	T	C	-8.386907e-03	False			
+rs730422	22	37671896	A	G	2.303688e-02	False			
+rs1041895	22	37679763	G	A	-2.658396e-03	False			
+rs1008184	22	37720268	G	A	2.120184e-02	False			
+rs2069221	22	37753256	C	T	8.984539e-03	False			
+rs4821645	22	37757099	G	A	-1.560347e-02	False			
+rs9610727	22	37780522	C	G	-1.496708e-02	False			
+rs9607459	22	37800175	T	C	-5.510833e-03	False			
+rs6000739	22	37846448	G	A	1.152963e-02	False			
+rs742152	22	37896749	C	T	5.447068e-03	False			
+rs6000756	22	37908435	C	T	1.909131e-03	False			
+rs12167061	22	37977481	T	C	1.465308e-02	False			
+rs75937893	22	37992699	G	A	8.339179e-04	False			
+rs36120988	22	38032762	G	GA	1.693041e-02	False			
+rs9622677	22	38054262	C	A	4.354146e-02	False			
+rs4820295	22	38083101	C	T	-2.092117e-02	False			
+rs12628603	22	38119213	A	G	3.948165e-02	False			
+rs5756795	22	38122122	C	T	4.377277e-02	False			
+rs79849571	22	38204089	T	C	2.977743e-02	False			
+rs117267625	22	38435786	T	G	-7.684278e-03	False			
+rs2284063	22	38544298	G	A	5.090446e-02	False			
+rs4608623	22	38597378	T	G	-1.997927e-02	False			
+rs4444637	22	38606780	G	A	-9.182016e-03	False			
+rs5995554	22	38630272	C	T	7.393137e-03	False			
+rs135720	22	38663819	G	A	-6.392021e-03	False			
+rs135730	22	38673234	A	G	-1.106705e-02	False			
+rs35336050	22	38685131	C	T	-4.493352e-03	False			
+rs5750581	22	38695406	T	C	-1.155972e-02	False			
+rs56182369	22	38708506	A	G	1.701713e-02	False			
+rs5757057	22	38744184	C	T	-2.112956e-02	False			
+rs743942	22	38819613	A	G	-5.625806e-03	False			
+rs12004	22	38877461	G	T	1.108728e-03	False			
+rs5750616	22	38918894	G	T	-8.094286e-03	False			
+rs112010490	22	38928269	G	T	-2.114917e-02	False			
+rs35069730	22	39027286	C	CAG	3.840735e-03	False			
+rs3747172	22	39067524	G	A	1.200232e-02	False			
+rs5757275	22	39159201	C	T	3.096214e-03	False			
+rs760482	22	39178701	G	A	2.148449e-03	False			
+rs735306	22	39260032	T	C	3.574634e-02	False			
+rs760481	22	39268785	T	G	9.377414e-03	False			
+rs5750691	22	39281774	G	T	3.816951e-02	False			
+rs5757355	22	39300265	C	T	3.540156e-02	False			
+rs1014971	22	39332623	T	C	-4.449842e-03	False			
+rs5757424	22	39415780	G	A	1.479946e-02	False			
+rs35860424	22	39448465	A	G	3.065974e-03	False			
+rs2011869	22	39480697	G	A	-4.005617e-02	False			
+rs139272	22	39487665	G	A	-1.218988e-04	False			
+rs55989856	22	39493294	C	T	-3.115929e-02	False			
+rs738469	22	39510995	G	A	-2.069106e-02	False			
+rs877529	22	39542292	A	G	9.653575e-03	False			
+rs73884827	22	39543000	T	C	-4.069841e-03	False			
+rs7287160	22	39573724	A	C	2.683694e-02	False			
+rs5750761	22	39575692	A	C	1.451305e-02	False			
+rs738470	22	39581277	A	C	1.766406e-02	False			
+rs13053714	22	39626572	A	G	-2.901981e-02	False			
+rs5757580	22	39658626	C	T	4.177065e-03	False			
+rs1569497	22	39665395	G	A	1.264611e-02	False			
+rs54211	22	39687484	G	A	5.418141e-03	False			
+rs6519183	22	39708279	A	G	-4.281532e-02	False			
+rs5757611	22	39708357	T	C	8.605574e-03	False			
+rs5750811	22	39793066	G	T	3.658209e-02	False			
+rs34026806	22	39798127	G	A	2.302129e-03	False			
+rs5757678	22	39843409	T	C	1.065699e-02	False			
+rs6001601	22	39865475	G	A	1.588501e-03	False			
+rs5757703	22	39932516	A	G	-1.179841e-02	False			
+rs62228477	22	39963426	G	A	-1.503908e-02	False			
+rs11704409	22	40023636	C	T	6.443146e-03	False			
+rs136829	22	40046176	C	T	-7.416552e-04	False			
+rs5757764	22	40067818	T	C	4.559360e-03	False			
+rs5757777	22	40092864	G	A	2.400297e-02	False			
+rs5757783	22	40127293	T	C	-8.870038e-04	False			
+rs7285609	22	40358148	T	C	-1.079902e-02	False			
+rs8139715	22	40420786	G	C	-8.092115e-03	False			
+rs7291691	22	40454069	G	T	7.898880e-03	False			
+rs732384	22	40541981	G	A	1.742640e-02	False			
+rs12484776	22	40652873	G	A	5.853057e-03	False			
+rs28360630	22	40676672	G	T	-1.894274e-03	False			
+rs470113	22	40729614	G	A	1.959940e-02	False			
+rs5757949	22	40820151	C	T	-1.628066e-02	False			
+rs35898643	22	40986372	G	C	-1.983507e-02	False			
+rs12165625	22	41494925	A	G	-2.918069e-02	False			
+rs11703267	22	41646738	G	A	3.521847e-04	False			
+rs8139705	22	41680898	T	C	1.402732e-02	False			
+rs34011394	22	41704872	T	C	6.681484e-05	False			
+rs2073167	22	41791536	C	T	-5.572333e-05	False			
+rs2076196	22	41895409	A	G	-4.407217e-02	False			
+rs2076198	22	41929175	G	T	-3.186844e-02	False			
+rs739134	22	42089623	C	T	5.322340e-03	False			
+rs147348682	22	42095658	G	T	3.846131e-02	False			
+rs139568	22	42210985	C	T	-3.139710e-03	False			
+rs13055841	22	42279653	G	A	-6.596336e-03	False			
+rs7293091	22	42341308	G	A	-6.862491e-04	False			
+rs35742686	22	42524243	C	CT	-1.181191e-02	False			
+rs762995	22	42672124	G	A	-5.278171e-03	False			
+rs1548304	22	42691238	T	C	-1.642396e-02	False			
+rs8139063	22	42813753	C	T	-3.867750e-03	False			
+rs5758742	22	42867898	G	A	-1.352327e-03	False			
+rs11553441	22	42912097	T	C	-7.295657e-04	False			
+rs4822160	22	42932317	A	G	-5.768556e-02	False			
+rs28627172	22	43010817	A	G	1.722077e-02	False			
+rs130370	22	43080028	T	C	-5.527551e-04	False			
+rs6002910	22	43096507	T	C	-5.556102e-03	False			
+rs738526	22	43112475	T	C	-1.350273e-02	False			
+rs8138149	22	43114824	G	A	-1.963192e-02	False			
+rs5758896	22	43115576	C	T	-1.880097e-02	False			
+rs9623692	22	43154299	G	A	-1.621113e-03	False			
+rs9611885	22	43159948	T	C	-7.980584e-03	False			
+rs1018448	22	43206950	C	A	-5.783037e-03	False			
+rs9607957	22	43218397	C	T	-3.976636e-03	False			
+rs2267463	22	43283255	C	A	-1.426668e-02	False			
+rs4822220	22	43290583	C	T	-3.955775e-02	False			
+rs8140884	22	43333156	A	G	-3.127845e-02	False			
+rs6003002	22	43426262	G	A	-3.668040e-03	False			
+rs8141749	22	43483242	T	C	-2.540203e-02	False			
+rs4988388	22	43515108	C	T	-1.570749e-02	False			
+rs13815	22	43529314	C	G	1.738127e-02	False			
+rs5759199	22	43551513	G	A	2.565386e-02	False			
+rs6972	22	43558972	A	G	-1.962819e-02	False			
+rs4822262	22	43577214	T	C	-2.270478e-02	False			
+rs13058467	22	43579049	C	T	-1.193909e-03	False			
+rs138993	22	43610207	G	A	-7.621661e-03	False			
+rs129415	22	43623395	G	C	-4.852519e-02	False			
+rs11703272	22	43640512	C	T	-5.533207e-03	False			
+rs139027	22	43649701	C	T	7.724845e-02	False			
+rs5751462	22	43661080	T	C	-4.251741e-02	False			
+rs739306	22	43683088	A	G	-3.582388e-03	False			
+rs4820518	22	43707996	A	G	-2.547044e-02	False			
+rs6519367	22	43711080	C	G	-5.784446e-03	False			
+rs6003156	22	43721519	C	A	3.658850e-04	False			
+rs1894717	22	43729401	C	T	8.557013e-03	False			
+rs4820525	22	43763757	T	G	-1.789810e-02	False			
+rs28673361	22	43836198	G	T	2.427697e-03	False			
+rs9614382	22	43976396	A	G	-1.277457e-02	False			
+rs137731	22	44031042	C	T	3.593107e-03	False			
+rs9614187	22	44193626	C	A	-6.865434e-03	False			
+rs138057	22	44221247	G	A	1.833991e-02	False			
+rs4823156	22	44296372	T	C	6.169212e-03	False			
+rs6006453	22	44298838	A	G	7.441756e-03	False			
+rs2294918	22	44342116	G	A	2.810328e-02	False			
+rs3761472	22	44368122	G	A	1.299680e-02	False			
+rs8418	22	44379838	G	A	1.648422e-03	False			
+rs6006598	22	44380033	C	T	-2.136788e-03	False			
+rs1007863	22	44395451	C	T	-6.698507e-03	False			
+rs7285340	22	44419871	C	T	1.816130e-02	False			
+rs6006622	22	44424108	T	C	1.036733e-02	False			
+rs130313	22	44467899	C	T	-2.592364e-03	False			
+rs9614325	22	44498134	T	C	7.281423e-03	False			
+rs1535009	22	44522312	C	T	-2.636447e-04	False			
+rs4823194	22	44526130	G	A	-3.882980e-03	False			
+rs2267613	22	44530286	A	G	2.528159e-02	False			
+rs2267614	22	44530420	C	T	-1.233654e-02	False			
+rs10483222	22	44548944	G	A	-3.947209e-03	False			
+rs77120395	22	44551755	G	A	1.262458e-02	False			
+rs9614359	22	44566434	A	G	-4.290306e-03	False			
+rs139131	22	44581046	T	C	-1.479950e-02	False			
+rs9626137	22	44643161	C	T	1.439493e-02	False			
+rs135400	22	44677081	C	T	-1.030513e-02	False			
+rs135388	22	44681612	G	A	-1.269762e-03	False			
+rs3935378	22	44695088	T	C	6.324859e-03	False			
+rs6519840	22	44707716	G	T	2.288939e-03	False			
+rs62228577	22	44725343	G	A	3.534678e-03	False			
+rs6519897	22	44738406	G	A	2.320049e-02	False			
+rs7289501	22	44746729	A	G	-1.754216e-02	False			
+rs5764718	22	44751158	G	A	-6.539695e-03	False			
+rs9614538	22	44757439	A	G	2.480295e-02	False			
+rs9614823	22	44759519	G	A	2.111274e-03	False			
+rs5765809	22	44761797	A	T	-5.311720e-03	False			
+rs5764921	22	44763352	C	G	1.452737e-02	False			
+rs19985	22	44783779	G	A	9.142699e-03	False			
+rs2071820	22	44791807	C	T	-2.371876e-02	False			
+rs2746583	22	44818986	C	T	-6.740622e-03	False			
+rs5765690	22	44894913	G	A	-5.179871e-05	False			
+rs4508	22	45058431	C	T	1.098259e-02	False			
+rs6006845	22	45066035	A	G	-1.484374e-02	False			
+rs9614870	22	45069410	T	C	1.530441e-02	False			
+rs41515447	22	45081330	G	A	1.350120e-03	False			
+rs28460735	22	45082168	C	A	3.663354e-03	False			
+rs4823364	22	45090008	G	A	2.811861e-03	False			
+rs6006857	22	45116664	C	T	1.247728e-02	False			
+rs2269543	22	45244930	T	C	-1.450041e-02	False			
+rs8881	22	45258457	G	A	-3.500519e-03	False			
+rs9614987	22	45323989	T	C	1.111338e-03	False			
+rs140556	22	45415987	A	G	-1.398184e-02	False			
+rs132067	22	45451355	G	A	-5.566982e-03	False			
+rs5765155	22	45471607	C	T	1.148978e-02	False			
+rs5765167	22	45497738	C	T	-5.029327e-03	False			
+rs7292035	22	45502829	C	T	-3.893521e-02	False			
+rs2018928	22	45519040	T	G	2.377071e-03	False			
+rs6006941	22	45523391	A	G	1.318997e-02	False			
+rs17548742	22	45573450	C	A	4.385600e-03	False			
+rs1125398	22	45589490	G	A	-8.350439e-03	False			
+rs58667	22	45668012	T	C	1.286879e-02	False			
+rs5765242	22	45671343	G	A	-2.940682e-06	False			
+rs2742648	22	45672574	T	C	5.743608e-03	False			
+rs5765250	22	45693923	A	G	-2.675069e-03	False			
+rs7290139	22	45718743	G	A	-2.092804e-02	False			
+rs11556482	22	45723807	C	G	1.670159e-03	False			
+rs6007594	22	45728370	A	G	1.879231e-04	False			
+rs56343022	22	45741537	G	T	1.420045e-02	False			
+rs5764698	22	45749983	T	G	-4.591012e-02	False			
+rs2272804	22	45809624	A	C	2.185772e-03	False			
+rs2142662	22	45821935	A	G	2.250782e-02	False			
+rs6007041	22	45837410	G	A	-2.756449e-03	False			
+rs11090631	22	45846371	T	C	7.910102e-02	False			
+rs713975	22	45864934	T	C	8.535181e-03	False			
+rs10483228	22	45871507	G	C	-7.764056e-03	False			
+rs5765426	22	45892656	G	T	-3.885653e-03	False			
+rs3810631	22	45897997	C	T	3.935204e-04	False			
+rs105199	22	45929577	C	T	-2.532217e-02	False			
+rs136755	22	45936350	A	G	-8.001698e-03	False			
+rs5765463	22	45942726	T	G	-1.415551e-02	False			
+rs13268	22	45996298	G	A	5.643525e-02	False			
+rs17564843	22	46009063	G	A	6.464843e-03	False			
+rs5765546	22	46022070	G	A	2.246740e-02	False			
+rs2239398	22	46155548	G	C	-3.247470e-02	False			
+rs136018	22	46207955	C	T	-1.354554e-03	False			
+rs136029	22	46236425	A	G	8.398423e-02	False			
+rs57514815	22	46275529	T	C	2.264300e-03	False			
+rs75427302	22	46287720	A	G	-2.237482e-02	False			
+rs28473346	22	46289699	T	C	1.872124e-02	False			
+rs9697736	22	46303347	T	C	-1.283734e-02	False			
+rs28663466	22	46316057	A	G	2.312579e-02	False			
+rs9286453	22	46337043	G	C	1.701173e-02	False			
+rs75862558	22	46347519	C	T	1.574289e-02	False			
+rs9330813	22	46364161	A	G	-4.466341e-02	False			
+rs62228062	22	46381234	G	A	4.730559e-02	False			
+rs28628653	22	46396925	G	A	1.783944e-03	False			
+rs28698504	22	46403715	A	G	-2.132589e-02	False			
+rs78358349	22	46406782	A	C	8.439466e-02	False			
+rs9627368	22	46445002	G	C	-7.613496e-02	False			
+rs7292297	22	46458123	G	T	3.328073e-02	False			
+rs9626891	22	46482948	C	T	4.241879e-02	False			
+rs12160757	22	46486508	C	T	-9.684390e-03	False			
+rs3747243	22	46493852	T	C	-6.758580e-03	False			
+rs9616125	22	46499120	C	G	-9.873118e-03	False			
+rs12170325	22	46502870	T	C	-1.792140e-02	False			
+rs76755807	22	46561713	G	A	2.604703e-02	False			
+rs4253701	22	46586110	A	G	-1.256735e-03	False			
+rs59842914	22	46592168	C	T	1.417055e-02	False			
+rs1800206	22	46614274	G	C	-5.854014e-02	False			
+rs4253772	22	46627603	T	C	8.004024e-02	False			
+rs35364389	22	46760086	T	C	3.229515e-03	False			
+rs34267201	22	46782382	T	C	-2.470821e-02	False			
+rs9627450	22	46807234	C	T	2.324176e-03	False			
+rs9306514	22	46837114	G	A	9.440730e-04	False			
+rs5768830	22	46888399	T	C	9.911095e-03	False			
+rs9615374	22	46907779	G	A	6.531440e-03	False			
+rs4823838	22	46909355	T	G	-4.780494e-03	False			
+rs12484501	22	46914277	A	C	9.689535e-03	False			
+rs3810636	22	46943687	G	A	-1.303660e-02	False			
+rs9627514	22	46985917	A	G	1.893397e-02	False			
+rs9615396	22	47021226	G	A	-1.322949e-02	False			
+rs13057352	22	47095235	A	C	-1.156013e-01	False			
+rs13054785	22	47109621	C	T	4.322858e-04	False			
+rs34301321	22	47125474	G	A	-1.746025e-02	False			
+rs17221476	22	47147117	T	C	-2.418349e-02	False			
+rs5769136	22	47156703	C	T	2.628970e-02	False			
+rs6008990	22	47245836	A	G	1.880575e-03	False			
+rs140535	22	47271747	C	T	1.055264e-03	False			
+rs5767397	22	47301822	C	T	3.032158e-03	False			
+rs9616173	22	47345487	T	C	-2.945945e-03	False			
+rs470059	22	47372368	T	C	2.067644e-02	False			
+rs136120	22	47380606	C	T	4.041426e-02	False			
+rs5769300	22	47437808	C	T	1.683027e-03	False			
+rs131924	22	47450911	A	G	1.624479e-02	False			
+rs910541	22	47511864	A	C	-4.226735e-03	False			
+rs2295246	22	47519476	T	C	-3.954111e-03	False			
+rs13055207	22	47529458	A	G	-3.602848e-04	False			
+rs4823597	22	47531320	T	C	-6.899703e-03	False			
+rs738669	22	47548321	T	C	4.925401e-03	False			
+rs2337244	22	47568291	C	T	7.726693e-03	False			
+rs15646	22	47571203	A	G	-9.744751e-03	False			
+rs135368	22	47574009	C	T	-5.327010e-03	False			
+rs136618	22	47642100	T	C	6.976251e-03	False			
+rs136636	22	47657635	T	C	1.798943e-03	False			
+rs6008118	22	47683805	C	T	-3.475544e-02	False			
+rs36008375	22	47720973	T	C	-7.868172e-03	False			
+rs17763944	22	47821952	G	A	-8.854280e-04	False			
+rs2301382	22	47893053	A	G	-2.449056e-02	False			
+rs5767784	22	47935365	C	T	-1.599879e-03	False			
+rs2285093	22	47961708	G	T	-3.593525e-03	False			
+rs131114	22	47986332	T	C	-3.976592e-03	False			
+rs9615626	22	48154645	C	T	7.608639e-03	False			
+rs5845816	22	48165452	C	CT	2.039503e-03	False			
+rs16994709	22	48207318	T	C	-9.725168e-03	False			
+rs4823698	22	48213904	G	C	-1.220367e-02	False			
+rs9615649	22	48215904	A	G	-2.488244e-05	False			
+rs738739	22	48220460	T	C	-2.702163e-03	False			
+rs738743	22	48230941	C	A	-1.129522e-03	False			
+rs4823717	22	48271961	A	G	-5.053446e-03	False			
+rs2338258	22	48284025	T	C	-3.344182e-03	False			
+rs5768135	22	48297953	C	T	-1.046958e-02	False			
+rs1028528	22	48362290	G	A	-2.367254e-02	False			
+rs28537386	22	48362914	C	A	-3.167719e-03	False			
+rs5768244	22	48387670	A	G	-8.243989e-03	False			
+rs7289071	22	48415446	C	T	2.130715e-03	False			
+rs135271	22	48460730	T	C	2.682476e-03	False			
+rs5768344	22	48491160	T	C	1.257794e-03	False			
+rs4823512	22	48519794	C	T	3.680757e-03	False			
+rs6007807	22	48537775	G	A	2.134692e-03	False			
+rs106953	22	48543566	T	C	7.314089e-03	False			
+rs133534	22	48593037	C	T	9.084708e-03	False			
+rs34776844	22	48687509	C	T	-2.771960e-02	False			
+rs5768510	22	48692033	T	C	-2.126264e-02	False			
+rs62223851	22	48699617	T	C	5.093107e-04	False			
+rs34080684	22	48717568	T	C	-8.190281e-04	False			
+rs1475987	22	48811946	C	T	7.916515e-03	False			
+rs7293013	22	48823357	G	A	1.464317e-02	False			
+rs2071750	22	48840428	A	C	3.711229e-03	False			
+rs9615896	22	48851612	T	C	-5.887765e-03	False			
+rs13056230	22	48874310	T	C	-1.106607e-02	False			
+rs761793	22	48968070	C	T	1.280691e-02	False			
+rs28658383	22	48991385	T	C	-1.234119e-02	False			
+rs34694572	22	49004050	G	A	2.290755e-02	False			
+rs28406241	22	49014565	A	G	1.555565e-03	False			
+rs7288241	22	49086481	T	C	-6.196369e-03	False			
+rs4989008	22	49107173	T	C	1.277272e-02	False			
+rs131032	22	49180915	A	G	6.346977e-03	False			
+rs4076042	22	49262579	A	G	2.657134e-02	False			
+rs28726380	22	49270317	C	T	1.447665e-03	False			
+rs2024695	22	49313196	A	G	-7.055532e-03	False			
+rs1467436	22	49335230	T	C	-6.548281e-03	False			
+rs4824067	22	49366123	T	C	1.136486e-02	False			
+rs738596	22	49372356	G	C	-2.420841e-02	False			
+rs17178683	22	49443666	T	C	1.581736e-02	False			
+rs55898343	22	49496835	G	A	-1.355414e-02	False			
+rs1981477	22	49524428	A	G	-4.228482e-03	False			
+rs135257	22	49530553	G	C	8.197389e-03	False			
+rs9627875	22	49537845	T	C	1.112550e-02	False			
+rs5769975	22	49557457	G	A	9.401926e-03	False			
+rs5769981	22	49562666	C	A	1.271701e-02	False			
+rs2318943	22	49574509	C	T	4.703177e-04	False			
+rs5769446	22	49579141	A	G	2.448619e-02	False			
+rs7288983	22	49650863	T	C	6.739571e-03	False			
+rs5770154	22	49662549	T	G	-5.769464e-03	False			
+rs1880009	22	49665841	T	C	-7.037069e-04	False			
+rs62220604	22	49677464	A	G	-2.177735e-02	False			
+rs6009594	22	49696067	C	T	-3.309682e-03	False			
+rs5770223	22	49700272	T	G	-2.541948e-03	False			
+rs1124544	22	49706433	T	C	-1.719402e-02	False			
+rs73173197	22	49713835	G	A	-1.370754e-02	False			
+rs848761	22	49719264	A	C	-1.067852e-02	False			
+rs848721	22	49743627	G	A	-5.970581e-04	False			
+rs9628005	22	49800265	C	T	3.098582e-02	False			
+rs6009666	22	49806863	A	G	3.940447e-03	False			
+rs136795	22	49830851	C	T	-2.742706e-03	False			
+rs11705513	22	49834624	G	A	-2.820163e-03	False			
+rs6009703	22	49843235	G	C	-4.458281e-04	False			
+rs9616311	22	49847501	T	G	2.235016e-03	False			
+rs4823938	22	49861033	C	T	1.721243e-02	False			
+rs5770489	22	49881321	A	G	-5.127800e-04	False			
+rs9628037	22	49908804	G	A	-9.455892e-03	False			
+rs134474	22	49911222	G	T	-1.389666e-02	False			
+rs17779492	22	49925268	A	G	1.679984e-02	False			
+rs134447	22	49927332	T	C	3.929800e-04	False			
+rs111392589	22	50109212	T	C	1.610819e-02	False			
+rs6009846	22	50118149	G	C	7.024666e-03	False			
+rs138844	22	50184484	G	T	1.222581e-02	False			
+rs117613664	22	50219447	T	C	5.091891e-02	False			
+rs910799	22	50278568	G	A	-2.340672e-02	False			
+rs78676969	22	50319170	G	A	1.669806e-02	False			
+rs28372448	22	50350971	A	G	2.640160e-02	False			
+rs4077129	22	50356693	C	T	3.851499e-03	False			
+rs5771069	22	50435480	G	A	1.663630e-02	False			
+rs9617098	22	50439626	A	G	-2.722154e-03	False			
+rs137890	22	50466542	C	T	-2.560094e-03	False			
+rs11101958	22	50470516	T	C	-1.621986e-02	False			
+rs5771133	22	50491150	G	A	1.828674e-02	False			
+rs6010164	22	50515270	C	T	1.439904e-02	False			
+rs56144269	22	50529850	C	T	2.054628e-02	False			
+rs75570992	22	50570755	C	G	7.077514e-03	False			
+rs2272837	22	50582626	G	A	-3.588854e-03	False			
+rs17836662	22	50672154	A	G	7.660848e-03	False			
+rs11547731	22	50722134	C	T	-1.747164e-02	False			
+rs79966207	22	50722408	C	T	-1.063465e-03	False			
+rs28379706	22	50728062	C	T	2.159223e-02	False			
+rs11553142	22	50750481	T	C	1.877272e-02	False			
+rs62241237	22	50758873	T	C	4.001731e-03	False			
+rs9628184	22	50835040	A	G	-6.374259e-03	False			
+rs9616997	22	50859049	C	T	3.480749e-04	False			
+rs1053744	22	50885775	G	A	-1.358311e-02	False			
+rs2232883	22	50926768	T	C	1.798498e-03	False			
+rs2232885	22	50928026	A	G	4.775504e-03	False			
+rs140522	22	50971266	C	T	2.160893e-02	False			
+rs41281529	22	50989197	T	C	-1.328884e-02	False			
+rs131778	22	50989326	G	A	1.037054e-02	False			
+rs5770892	22	50999681	G	A	-1.226224e-02	False			
+rs35826039	22	51046163	T	C	-2.754002e-02	False			
+rs9616915	22	51117580	C	T	3.573542e-02	False			
+rs2301584	22	51171497	A	G	-1.951606e-02	False			
+rs73174435	22	51174939	T	C	-6.178519e-03	False			
diff --git a/tests/data/combine/scorefile.txt b/tests/data/combine/scorefile.txt
new file mode 100644
index 0000000..1043a68
--- /dev/null
+++ b/tests/data/combine/scorefile.txt
@@ -0,0 +1,838 @@
+#pgs_name=PGS001229_22
+#genome_build=GRCh37
+chr_name	chr_position	effect_allele	other_allele	effect_weight
+22	17080378	G	A	0.01045457
+22	17300230	A	G	0.0001411475
+22	17318864	A	C	0.008166266
+22	17327595	T	C	0.007791641
+22	17409813	A	G	0.0003108784
+22	17450952	G	A	-0.03033983
+22	17492533	G	A	0.00388999
+22	17542810	C	T	0.00803629
+22	17565013	G	A	0.02135621
+22	17589209	T	C	0.003026491
+22	17600977	A	G	0.01581277
+22	17625915	A	G	-0.1172964
+22	17630486	A	C	0.01012909
+22	17633785	C	T	0.0023255
+22	17643689	A	G	0.003361814
+22	17669306	C	T	0.0214506
+22	17677699	T	C	-0.0007031384
+22	17680519	C	A	0.001079236
+22	17701234	G	A	0.004477145
+22	17703119	A	T	0.0007771872
+22	17718699	C	A	-0.01320632
+22	17721595	C	T	0.009480363
+22	17727648	T	C	0.007811685
+22	17738177	G	A	-0.004719812
+22	17749096	A	G	-0.005244795
+22	17770181	G	T	-0.03101703
+22	17793969	G	A	0.01774444
+22	17815696	G	C	-0.00551609
+22	17827684	G	A	-0.005944752
+22	17831813	T	C	0.01061587
+22	17844929	T	G	0.001717643
+22	17850661	T	C	-0.02805489
+22	17887534	A	G	0.0007723542
+22	17887725	A	G	0.007472703
+22	17958221	C	A	-0.02098647
+22	18036253	G	A	-0.01772981
+22	18038786	A	G	-0.002119071
+22	18262301	A	T	-0.005065485
+22	18289204	A	G	0.005306345
+22	18295575	C	T	0.02300129
+22	18296238	G	A	-0.005665446
+22	18319179	T	C	0.03440642
+22	18393534	A	C	0.01764269
+22	18439958	T	C	-0.002261707
+22	18483388	G	A	0.03318724
+22	18488883	C	G	-0.0191918
+22	18489048	C	A	0.01233198
+22	18495470	A	G	-0.005804926
+22	18537145	G	A	-0.004930116
+22	18571008	A	G	-8.844726E-05
+22	18584433	C	T	-0.001169893
+22	18631365	T	C	-0.001551714
+22	18650682	T	C	-0.01313784
+22	18890037	A	G	0.05968921
+22	18891398	G	A	0.006891943
+22	18892575	A	G	-0.00224447
+22	18915963	A	G	0.003719756
+22	18959581	T	C	0.006464581
+22	18963340	A	G	-0.01397565
+22	18970915	T	C	-0.001507131
+22	19024651	T	C	-0.00350575
+22	19121872	A	G	0.01644046
+22	19135603	A	G	-0.02970077
+22	19190143	T	C	0.003268027
+22	19263698	T	C	0.02057255
+22	19292446	G	T	0.01153989
+22	19371052	T	C	0.01055134
+22	19420109	C	T	-0.008628228
+22	19451186	A	C	0.02141029
+22	19518079	C	T	0.005372247
+22	19581331	T	C	0.01686942
+22	19593854	C	A	0.0006544249
+22	19606703	G	A	0.02070121
+22	19649005	A	G	0.002868601
+22	19735854	C	T	0.006262962
+22	19738355	T	C	4.97384E-05
+22	19770886	A	G	-0.01013929
+22	19781823	T	C	0.02481609
+22	19873357	T	C	0.0116302
+22	19907099	A	C	-0.0267645
+22	19968597	T	C	-0.02203945
+22	20046344	G	A	-0.009801428
+22	20084821	C	T	-0.02232886
+22	20185457	A	G	0.006892171
+22	20189077	T	C	0.01738215
+22	20219648	A	G	0.009307625
+22	20248391	A	G	-0.005405845
+22	20267213	A	G	0.006713242
+22	20286099	G	T	0.01574758
+22	20749042	G	A	0.006603339
+22	20754039	A	G	-0.01181141
+22	20775167	T	C	0.01160113
+22	20780296	A	G	0.06735311
+22	20789074	C	T	0.02844307
+22	20791438	A	C	0.0473474
+22	20793914	C	T	0.007009781
+22	20839810	T	G	0.003947346
+22	20860931	T	C	0.0005613511
+22	20979980	G	A	0.003231665
+22	20991771	G	A	0.004226765
+22	21075537	C	A	-0.002096453
+22	21154393	G	T	-0.004297086
+22	21323357	C	T	-0.006041745
+22	21331918	G	C	-0.002280912
+22	21334924	C	G	-0.02031369
+22	21356824	A	G	0.01476577
+22	21386019	A	G	0.01435557
+22	21449028	G	A	-0.01537701
+22	21463515	A	G	-0.01335614
+22	21982892	T	C	-0.06373335
+22	22001704	T	G	0.02809584
+22	22062480	T	C	0.0529113
+22	22080735	G	A	-0.0139426
+22	22151939	C	A	-0.008287849
+22	22163425	G	A	0.05518983
+22	22307519	C	G	-0.003486191
+22	22351283	G	A	-0.0007483763
+22	22394291	AG	A	0.004320583
+22	22395754	T	C	0.002587971
+22	22424302	A	C	0.0011408
+22	22473905	C	A	0.01226009
+22	22550450	G	C	0.01773244
+22	22561610	C	T	-0.006207024
+22	22581369	G	A	-0.006272413
+22	22584678	A	G	-0.00217647
+22	22711786	T	C	0.007779875
+22	22726372	T	C	0.00349632
+22	22762771	C	T	0.01252501
+22	22769923	G	A	-0.01103632
+22	22869742	A	C	-0.002412657
+22	22871922	A	G	-0.002769974
+22	22929268	T	C	-0.007035723
+22	23001481	A	G	0.007524178
+22	23022520	T	C	0.002175257
+22	23064982	A	C	-0.01255076
+22	23249440	A	C	0.02085816
+22	23268677	A	G	0.01337349
+22	23279456	C	G	-0.01371401
+22	23282286	C	T	0.004994329
+22	23325722	C	T	0.0008506657
+22	23412058	A	G	-0.009545553
+22	23627369	G	A	-0.01900175
+22	23644425	G	A	-0.0009106953
+22	23649242	G	T	0.001061643
+22	23794844	G	A	-0.01198736
+22	23804670	G	T	-0.001119846
+22	23819697	T	G	-0.01028722
+22	23873076	T	C	0.009509027
+22	23892145	T	C	0.0135128
+22	23925779	C	T	-0.004127647
+22	23960187	T	C	-0.008475905
+22	24035970	T	C	-0.001334318
+22	24086107	G	A	-0.01652957
+22	24105789	A	G	0.01813091
+22	24106448	A	G	0.001834095
+22	24186809	C	T	-0.01426541
+22	24235360	G	A	0.0003168635
+22	24255296	T	C	0.01624252
+22	24300540	T	C	-0.00322576
+22	24376584	A	G	-0.006223068
+22	24406778	A	C	0.00304654
+22	24618331	G	A	-0.0006506681
+22	24802564	A	G	-0.006695797
+22	24912232	T	C	-0.01536303
+22	24943582	A	G	-0.001687764
+22	24995668	G	A	-0.03537331
+22	25123505	C	T	-0.0160099
+22	25145094	T	C	-0.005584047
+22	25145453	T	C	-0.001388536
+22	25185823	A	G	-0.009228375
+22	25265972	A	G	0.01088906
+22	25309448	A	G	-0.002238693
+22	25363411	A	G	0.004035775
+22	25410895	G	A	0.0009720734
+22	25442369	C	T	0.01660527
+22	25454658	C	A	0.01200285
+22	25465065	C	T	0.01320801
+22	25524916	C	T	0.01147501
+22	25603008	T	C	-0.01262741
+22	25619025	G	T	-0.01212511
+22	25621591	T	C	0.01051851
+22	25643483	T	G	0.01373474
+22	25661725	A	G	-0.005936431
+22	25667883	G	A	0.01547775
+22	25668730	A	C	0.02616493
+22	25678577	T	C	0.0304018
+22	25761309	T	C	-0.001760112
+22	25761936	T	C	-0.005171998
+22	25938977	T	C	0.01966116
+22	25994013	A	G	0.0006268228
+22	26081873	T	C	0.05232603
+22	26132612	A	G	-0.006457239
+22	26133775	T	C	-0.001181527
+22	26159289	A	G	-0.008399401
+22	26181767	C	T	0.01044769
+22	26190915	G	A	0.004287533
+22	26218164	G	A	-0.002803502
+22	26231312	C	G	0.006105629
+22	26237826	C	T	0.004981479
+22	26239850	A	C	0.004144037
+22	26273893	C	G	0.005616213
+22	26278128	G	T	-0.003965338
+22	26280462	T	C	-0.0008324497
+22	26290588	T	C	-0.0130732
+22	26292659	G	A	4.294309E-05
+22	26343593	G	A	0.007813758
+22	26369358	T	C	-0.00483665
+22	26390964	A	G	-0.007849451
+22	26415475	T	C	-0.001219281
+22	26456367	G	A	-0.01285326
+22	26460519	T	C	-0.008695338
+22	26528054	A	G	0.01973023
+22	26617260	T	A	-0.01384025
+22	26638906	G	T	0.01229772
+22	26735648	A	G	0.0007879673
+22	26782251	G	A	0.0005096459
+22	26812632	C	T	-0.01850814
+22	26939781	C	T	-0.0009222796
+22	26960648	A	C	-0.005679255
+22	27038865	T	G	-0.0001487706
+22	27042828	A	G	0.02957737
+22	27161060	A	G	0.002844558
+22	27191643	T	C	0.008953731
+22	27216426	G	A	0.00912099
+22	27217018	A	G	0.01510616
+22	27240025	T	G	-0.0297174
+22	27242642	G	A	-0.009822927
+22	27246070	C	T	-0.001554199
+22	27252454	C	T	-0.006560251
+22	27264880	G	T	-0.01323094
+22	27337886	A	G	-0.009600014
+22	27339284	T	C	-0.009944488
+22	27353810	T	C	-0.002171555
+22	27370273	T	C	-0.009798478
+22	27378884	A	G	0.05145072
+22	27398749	C	T	0.001012263
+22	27403571	C	T	-0.01745865
+22	27405012	T	C	-0.005425419
+22	27415255	C	T	-0.01499362
+22	27426628	G	C	0.0228946
+22	27430724	A	G	-0.007068064
+22	27435577	C	T	-0.008632412
+22	27487580	G	A	0.003691502
+22	27498426	A	G	-0.006801544
+22	27526095	G	A	-0.0008086267
+22	27563274	C	A	0.0136965
+22	27584680	A	G	-0.002139188
+22	27628151	C	G	0.02130389
+22	27652290	T	G	0.004815735
+22	27660675	A	G	0.004899654
+22	27674832	G	T	0.0001248065
+22	27718775	A	G	0.02292384
+22	27729742	G	A	0.004951261
+22	27762155	C	T	0.00485666
+22	27781736	A	C	-0.008336242
+22	27829565	G	A	0.00285409
+22	27832985	G	C	-0.01668955
+22	27836311	G	A	-0.00775625
+22	27839704	T	C	-0.02492106
+22	27864471	A	C	0.00218995
+22	27873024	G	A	0.002721729
+22	27883265	G	A	0.02961735
+22	27890684	A	G	-0.008057355
+22	27927298	T	C	0.02054268
+22	27934290	G	A	0.004751755
+22	27951176	A	G	-0.0004329547
+22	27974819	C	A	0.01439093
+22	27975451	G	A	-0.03648208
+22	28007741	C	T	-0.01635917
+22	28016883	C	A	0.008564085
+22	28046561	T	C	0.01535905
+22	28060034	A	G	0.03097228
+22	28076058	C	T	0.02848654
+22	28094845	G	A	-0.02659077
+22	28130130	C	T	-0.01640387
+22	28136977	A	C	-0.003962775
+22	28150109	G	A	0.0006071392
+22	28150815	A	G	0.01604724
+22	28151825	A	G	-0.005390282
+22	28155404	T	C	0.005030388
+22	28172577	G	T	0.005704168
+22	28185452	G	T	-0.006896853
+22	28200176	G	A	-0.006474674
+22	28206912	C	A	-0.006175542
+22	28270372	G	T	-0.0006768204
+22	28412908	G	T	0.01763639
+22	28501414	T	C	-0.2304747
+22	29106733	C	T	-0.01074749
+22	29318724	T	C	0.001743333
+22	29378610	C	T	0.0006690876
+22	29478760	C	T	-0.03029428
+22	29533572	G	C	-0.01269604
+22	29626515	A	G	-0.0117113
+22	29630337	A	G	0.02658049
+22	29669648	C	G	-0.008550535
+22	29692497	T	G	0.001234896
+22	29837537	C	T	0.01321112
+22	29961986	T	G	0.001878853
+22	30151687	C	T	0.003418302
+22	30163526	G	A	0.01576261
+22	30494371	A	G	0.007959801
+22	30592487	G	C	-0.1047403
+22	30621613	A	C	-0.01382104
+22	30658082	C	T	-0.03794014
+22	30688659	T	C	0.0225714
+22	30762140	A	G	0.02079806
+22	30793137	A	G	-0.004609306
+22	30901592	C	T	-0.00833404
+22	30927975	T	C	0.003226189
+22	30953295	T	C	-0.00768579
+22	30992651	G	A	-0.025658
+22	31018975	C	T	0.04241226
+22	31032920	G	A	-0.02311985
+22	31063804	G	GT	-0.0002081808
+22	31114086	G	T	0.02825476
+22	31139653	A	G	2.640129E-06
+22	31214382	G	A	0.01137657
+22	31216506	C	T	0.005531311
+22	31272930	T	C	-0.001056118
+22	31333631	C	T	-0.01235089
+22	31378447	A	G	0.01020507
+22	31442308	A	G	-0.002479126
+22	31477361	C	G	-0.01263667
+22	31514348	G	A	0.00580324
+22	31521404	A	G	0.01097391
+22	31659495	C	T	0.02663412
+22	31884405	C	T	-0.0003950834
+22	32200849	T	C	0.01585735
+22	32341684	T	C	-0.02960328
+22	32559835	G	A	-0.02170436
+22	32569263	C	T	-0.001296006
+22	32624139	C	T	0.005619574
+22	32702816	A	G	-0.01534023
+22	32756652	G	A	0.02512177
+22	32831540	T	C	0.001868495
+22	32832874	T	C	6.028815E-05
+22	32853660	G	A	0.0138221
+22	32854391	C	A	0.0001960825
+22	32875190	A	G	-0.006426637
+22	32934713	C	CT	-0.009057754
+22	32952012	A	C	-0.00380248
+22	32954443	G	A	0.002210369
+22	32993032	C	T	-0.002429979
+22	32997766	T	C	-0.008424246
+22	33045573	T	C	-0.03107145
+22	33046110	G	C	-0.06954732
+22	33048039	T	C	0.01138346
+22	33056341	C	T	-0.06477198
+22	33108536	T	C	-0.03426392
+22	33108981	T	C	-0.07404035
+22	33116435	T	C	0.06542471
+22	33143528	G	A	0.02195059
+22	33146363	A	G	0.000810539
+22	33259625	C	T	0.02309793
+22	33336039	T	G	-0.02554387
+22	33408519	T	C	-0.0075563
+22	33660345	C	G	0.002190743
+22	33804893	C	T	0.006680774
+22	33844303	C	T	0.008923314
+22	33846914	T	C	0.006295378
+22	33898906	A	C	1.958759E-05
+22	34022284	A	G	-0.00257933
+22	34137784	G	A	0.004460828
+22	34208570	T	C	-0.003365869
+22	34217757	T	C	0.009289431
+22	34256923	A	C	0.01439384
+22	34265402	G	A	-0.0163661
+22	34284173	G	A	-0.02315559
+22	34296093	C	A	-0.004688326
+22	34378012	A	G	0.002276664
+22	34436795	C	T	0.0001337033
+22	34488452	A	G	-0.000428831
+22	34501541	A	G	0.002763614
+22	34514810	C	A	0.003976601
+22	34526428	C	T	0.01088864
+22	34583078	A	G	0.001802495
+22	34620754	T	C	0.01466546
+22	34691035	A	G	-0.0002082615
+22	34758540	T	C	0.005165532
+22	34851377	A	C	0.0137118
+22	35371707	T	C	-0.0004985554
+22	35382268	A	C	-0.004931336
+22	35419122	C	T	-0.01077953
+22	35478529	A	G	0.0001760523
+22	35481493	T	C	0.01056439
+22	35526281	G	A	-0.002766891
+22	35603836	A	G	-0.0001783939
+22	35660875	T	G	0.03988231
+22	35745196	G	T	0.0001750545
+22	35750980	A	G	-0.007651136
+22	35783413	G	A	0.001649791
+22	35918270	C	T	0.006918713
+22	35959242	A	G	0.01697538
+22	35962060	G	A	0.005181476
+22	35964158	G	C	0.002769931
+22	35984385	A	G	-0.01280623
+22	36001258	C	T	0.01342405
+22	36072262	T	C	0.00489549
+22	36180535	G	A	-0.03250252
+22	36517307	C	T	0.01366076
+22	36519596	A	C	-0.00349956
+22	36532058	A	G	-0.01214487
+22	36543489	C	G	0.007838149
+22	36600841	G	A	0.02644389
+22	36629633	C	A	-0.006871468
+22	36635967	G	A	-0.02634742
+22	36655735	A	G	-0.005385142
+22	36661646	A	G	-0.01560741
+22	36684354	C	T	-0.005170111
+22	36705622	A	G	0.01713234
+22	36708049	C	CTCCTGTGA	-0.05187051
+22	36751101	A	C	-0.0244065
+22	36764788	G	A	0.02784116
+22	36897427	C	T	0.02603792
+22	36900806	G	A	0.007366207
+22	36923144	T	C	-0.001875563
+22	36924714	G	A	-0.003632594
+22	36946643	T	G	0.01333137
+22	36954939	T	C	0.01105894
+22	36998907	T	C	-0.0006084687
+22	37001495	G	T	-0.01224147
+22	37013167	G	C	0.01866849
+22	37077364	C	T	0.007294257
+22	37080738	C	G	-0.004873355
+22	37101890	C	T	0.03991764
+22	37118535	A	G	-0.001713909
+22	37184521	G	A	0.006515894
+22	37206341	G	T	0.0002566936
+22	37256262	A	G	0.001152626
+22	37258503	C	T	-0.009761102
+22	37323988	T	C	-0.0073182
+22	37329545	G	A	0.005775806
+22	37337409	T	C	-0.02534399
+22	37343000	A	C	-0.0004011777
+22	37398195	T	C	-0.01001198
+22	37401532	A	G	-0.003244795
+22	37407109	C	G	0.04335972
+22	37477732	T	C	0.0003669548
+22	37507019	A	G	-0.0009259451
+22	37513316	A	G	0.001153887
+22	37532441	A	G	0.01802306
+22	37571497	G	A	-0.005785311
+22	37581383	T	C	0.03172492
+22	37621269	C	A	0.004460405
+22	37644621	T	C	-0.008386907
+22	37671896	A	G	0.02303688
+22	37679763	G	A	-0.002658396
+22	37720268	G	A	0.02120184
+22	37753256	C	T	0.008984539
+22	37757099	G	A	-0.01560347
+22	37780522	C	G	-0.01496708
+22	37800175	T	C	-0.005510833
+22	37846448	G	A	0.01152963
+22	37896749	C	T	0.005447068
+22	37908435	C	T	0.001909131
+22	37977481	T	C	0.01465308
+22	37992699	G	A	0.0008339179
+22	38032762	G	GA	0.01693041
+22	38054262	C	A	0.04354146
+22	38083101	C	T	-0.02092117
+22	38119213	A	G	0.03948165
+22	38122122	C	T	0.04377277
+22	38204089	T	C	0.02977743
+22	38435786	T	G	-0.007684278
+22	38544298	G	A	0.05090446
+22	38597378	T	G	-0.01997927
+22	38606780	G	A	-0.009182016
+22	38630272	C	T	0.007393137
+22	38663819	G	A	-0.006392021
+22	38673234	A	G	-0.01106705
+22	38685131	C	T	-0.004493352
+22	38695406	T	C	-0.01155972
+22	38708506	A	G	0.01701713
+22	38744184	C	T	-0.02112956
+22	38819613	A	G	-0.005625806
+22	38877461	G	T	0.001108728
+22	38918894	G	T	-0.008094286
+22	38928269	G	T	-0.02114917
+22	39027286	C	CAG	0.003840735
+22	39067524	G	A	0.01200232
+22	39159201	C	T	0.003096214
+22	39178701	G	A	0.002148449
+22	39260032	T	C	0.03574634
+22	39268785	T	G	0.009377414
+22	39281774	G	T	0.03816951
+22	39300265	C	T	0.03540156
+22	39332623	T	C	-0.004449842
+22	39415780	G	A	0.01479946
+22	39448465	A	G	0.003065974
+22	39480697	G	A	-0.04005617
+22	39487665	G	A	-0.0001218988
+22	39493294	C	T	-0.03115929
+22	39510995	G	A	-0.02069106
+22	39542292	A	G	0.009653575
+22	39543000	T	C	-0.004069841
+22	39573724	A	C	0.02683694
+22	39575692	A	C	0.01451305
+22	39581277	A	C	0.01766406
+22	39626572	A	G	-0.02901981
+22	39658626	C	T	0.004177065
+22	39665395	G	A	0.01264611
+22	39687484	G	A	0.005418141
+22	39708279	A	G	-0.04281532
+22	39708357	T	C	0.008605574
+22	39793066	G	T	0.03658209
+22	39798127	G	A	0.002302129
+22	39843409	T	C	0.01065699
+22	39865475	G	A	0.001588501
+22	39932516	A	G	-0.01179841
+22	39963426	G	A	-0.01503908
+22	40023636	C	T	0.006443146
+22	40046176	C	T	-0.0007416552
+22	40067818	T	C	0.00455936
+22	40092864	G	A	0.02400297
+22	40127293	T	C	-0.0008870038
+22	40358148	T	C	-0.01079902
+22	40420786	G	C	-0.008092115
+22	40454069	G	T	0.00789888
+22	40541981	G	A	0.0174264
+22	40652873	G	A	0.005853057
+22	40676672	G	T	-0.001894274
+22	40729614	G	A	0.0195994
+22	40820151	C	T	-0.01628066
+22	40986372	G	C	-0.01983507
+22	41494925	A	G	-0.02918069
+22	41646738	G	A	0.0003521847
+22	41680898	T	C	0.01402732
+22	41704872	T	C	6.681484E-05
+22	41791536	C	T	-5.572333E-05
+22	41895409	A	G	-0.04407217
+22	41929175	G	T	-0.03186844
+22	42089623	C	T	0.00532234
+22	42095658	G	T	0.03846131
+22	42210985	C	T	-0.00313971
+22	42279653	G	A	-0.006596336
+22	42341308	G	A	-0.0006862491
+22	42524243	C	CT	-0.01181191
+22	42672124	G	A	-0.005278171
+22	42691238	T	C	-0.01642396
+22	42813753	C	T	-0.00386775
+22	42867898	G	A	-0.001352327
+22	42912097	T	C	-0.0007295657
+22	42932317	A	G	-0.05768556
+22	43010817	A	G	0.01722077
+22	43080028	T	C	-0.0005527551
+22	43096507	T	C	-0.005556102
+22	43112475	T	C	-0.01350273
+22	43114824	G	A	-0.01963192
+22	43115576	C	T	-0.01880097
+22	43154299	G	A	-0.001621113
+22	43159948	T	C	-0.007980584
+22	43206950	C	A	-0.005783037
+22	43218397	C	T	-0.003976636
+22	43283255	C	A	-0.01426668
+22	43290583	C	T	-0.03955775
+22	43333156	A	G	-0.03127845
+22	43426262	G	A	-0.00366804
+22	43483242	T	C	-0.02540203
+22	43515108	C	T	-0.01570749
+22	43529314	C	G	0.01738127
+22	43551513	G	A	0.02565386
+22	43558972	A	G	-0.01962819
+22	43577214	T	C	-0.02270478
+22	43579049	C	T	-0.001193909
+22	43610207	G	A	-0.007621661
+22	43623395	G	C	-0.04852519
+22	43640512	C	T	-0.005533207
+22	43649701	C	T	0.07724845
+22	43661080	T	C	-0.04251741
+22	43683088	A	G	-0.003582388
+22	43707996	A	G	-0.02547044
+22	43711080	C	G	-0.005784446
+22	43721519	C	A	0.000365885
+22	43729401	C	T	0.008557013
+22	43763757	T	G	-0.0178981
+22	43836198	G	T	0.002427697
+22	43976396	A	G	-0.01277457
+22	44031042	C	T	0.003593107
+22	44193626	C	A	-0.006865434
+22	44221247	G	A	0.01833991
+22	44296372	T	C	0.006169212
+22	44298838	A	G	0.007441756
+22	44342116	G	A	0.02810328
+22	44368122	G	A	0.0129968
+22	44379838	G	A	0.001648422
+22	44380033	C	T	-0.002136788
+22	44395451	C	T	-0.006698507
+22	44419871	C	T	0.0181613
+22	44424108	T	C	0.01036733
+22	44467899	C	T	-0.002592364
+22	44498134	T	C	0.007281423
+22	44522312	C	T	-0.0002636447
+22	44526130	G	A	-0.00388298
+22	44530286	A	G	0.02528159
+22	44530420	C	T	-0.01233654
+22	44548944	G	A	-0.003947209
+22	44551755	G	A	0.01262458
+22	44566434	A	G	-0.004290306
+22	44581046	T	C	-0.0147995
+22	44643161	C	T	0.01439493
+22	44677081	C	T	-0.01030513
+22	44681612	G	A	-0.001269762
+22	44695088	T	C	0.006324859
+22	44707716	G	T	0.002288939
+22	44725343	G	A	0.003534678
+22	44738406	G	A	0.02320049
+22	44746729	A	G	-0.01754216
+22	44751158	G	A	-0.006539695
+22	44757439	A	G	0.02480295
+22	44759519	G	A	0.002111274
+22	44761797	A	T	-0.00531172
+22	44763352	C	G	0.01452737
+22	44783779	G	A	0.009142699
+22	44791807	C	T	-0.02371876
+22	44818986	C	T	-0.006740622
+22	44894913	G	A	-5.179871E-05
+22	45058431	C	T	0.01098259
+22	45066035	A	G	-0.01484374
+22	45069410	T	C	0.01530441
+22	45081330	G	A	0.00135012
+22	45082168	C	A	0.003663354
+22	45090008	G	A	0.002811861
+22	45116664	C	T	0.01247728
+22	45244930	T	C	-0.01450041
+22	45258457	G	A	-0.003500519
+22	45323989	T	C	0.001111338
+22	45415987	A	G	-0.01398184
+22	45451355	G	A	-0.005566982
+22	45471607	C	T	0.01148978
+22	45497738	C	T	-0.005029327
+22	45502829	C	T	-0.03893521
+22	45519040	T	G	0.002377071
+22	45523391	A	G	0.01318997
+22	45573450	C	A	0.0043856
+22	45589490	G	A	-0.008350439
+22	45668012	T	C	0.01286879
+22	45671343	G	A	-2.940682E-06
+22	45672574	T	C	0.005743608
+22	45693923	A	G	-0.002675069
+22	45718743	G	A	-0.02092804
+22	45723807	C	G	0.001670159
+22	45728370	A	G	0.0001879231
+22	45741537	G	T	0.01420045
+22	45749983	T	G	-0.04591012
+22	45809624	A	C	0.002185772
+22	45821935	A	G	0.02250782
+22	45837410	G	A	-0.002756449
+22	45846371	T	C	0.07910102
+22	45864934	T	C	0.008535181
+22	45871507	G	C	-0.007764056
+22	45892656	G	T	-0.003885653
+22	45897997	C	T	0.0003935204
+22	45929577	C	T	-0.02532217
+22	45936350	A	G	-0.008001698
+22	45942726	T	G	-0.01415551
+22	45996298	G	A	0.05643525
+22	46009063	G	A	0.006464843
+22	46022070	G	A	0.0224674
+22	46155548	G	C	-0.0324747
+22	46207955	C	T	-0.001354554
+22	46236425	A	G	0.08398423
+22	46275529	T	C	0.0022643
+22	46287720	A	G	-0.02237482
+22	46289699	T	C	0.01872124
+22	46303347	T	C	-0.01283734
+22	46316057	A	G	0.02312579
+22	46337043	G	C	0.01701173
+22	46347519	C	T	0.01574289
+22	46364161	A	G	-0.04466341
+22	46381234	G	A	0.04730559
+22	46396925	G	A	0.001783944
+22	46403715	A	G	-0.02132589
+22	46406782	A	C	0.08439466
+22	46445002	G	C	-0.07613496
+22	46458123	G	T	0.03328073
+22	46482948	C	T	0.04241879
+22	46486508	C	T	-0.00968439
+22	46493852	T	C	-0.00675858
+22	46499120	C	G	-0.009873118
+22	46502870	T	C	-0.0179214
+22	46561713	G	A	0.02604703
+22	46586110	A	G	-0.001256735
+22	46592168	C	T	0.01417055
+22	46614274	G	C	-0.05854014
+22	46627603	T	C	0.08004024
+22	46760086	T	C	0.003229515
+22	46782382	T	C	-0.02470821
+22	46807234	C	T	0.002324176
+22	46837114	G	A	0.000944073
+22	46888399	T	C	0.009911095
+22	46907779	G	A	0.00653144
+22	46909355	T	G	-0.004780494
+22	46914277	A	C	0.009689535
+22	46943687	G	A	-0.0130366
+22	46985917	A	G	0.01893397
+22	47021226	G	A	-0.01322949
+22	47095235	A	C	-0.1156013
+22	47109621	C	T	0.0004322858
+22	47125474	G	A	-0.01746025
+22	47147117	T	C	-0.02418349
+22	47156703	C	T	0.0262897
+22	47245836	A	G	0.001880575
+22	47271747	C	T	0.001055264
+22	47301822	C	T	0.003032158
+22	47345487	T	C	-0.002945945
+22	47372368	T	C	0.02067644
+22	47380606	C	T	0.04041426
+22	47437808	C	T	0.001683027
+22	47450911	A	G	0.01624479
+22	47511864	A	C	-0.004226735
+22	47519476	T	C	-0.003954111
+22	47529458	A	G	-0.0003602848
+22	47531320	T	C	-0.006899703
+22	47548321	T	C	0.004925401
+22	47568291	C	T	0.007726693
+22	47571203	A	G	-0.009744751
+22	47574009	C	T	-0.00532701
+22	47642100	T	C	0.006976251
+22	47657635	T	C	0.001798943
+22	47683805	C	T	-0.03475544
+22	47720973	T	C	-0.007868172
+22	47821952	G	A	-0.000885428
+22	47893053	A	G	-0.02449056
+22	47935365	C	T	-0.001599879
+22	47961708	G	T	-0.003593525
+22	47986332	T	C	-0.003976592
+22	48154645	C	T	0.007608639
+22	48165452	C	CT	0.002039503
+22	48207318	T	C	-0.009725168
+22	48213904	G	C	-0.01220367
+22	48215904	A	G	-2.488244E-05
+22	48220460	T	C	-0.002702163
+22	48230941	C	A	-0.001129522
+22	48271961	A	G	-0.005053446
+22	48284025	T	C	-0.003344182
+22	48297953	C	T	-0.01046958
+22	48362290	G	A	-0.02367254
+22	48362914	C	A	-0.003167719
+22	48387670	A	G	-0.008243989
+22	48415446	C	T	0.002130715
+22	48460730	T	C	0.002682476
+22	48491160	T	C	0.001257794
+22	48519794	C	T	0.003680757
+22	48537775	G	A	0.002134692
+22	48543566	T	C	0.007314089
+22	48593037	C	T	0.009084708
+22	48687509	C	T	-0.0277196
+22	48692033	T	C	-0.02126264
+22	48699617	T	C	0.0005093107
+22	48717568	T	C	-0.0008190281
+22	48811946	C	T	0.007916515
+22	48823357	G	A	0.01464317
+22	48840428	A	C	0.003711229
+22	48851612	T	C	-0.005887765
+22	48874310	T	C	-0.01106607
+22	48968070	C	T	0.01280691
+22	48991385	T	C	-0.01234119
+22	49004050	G	A	0.02290755
+22	49014565	A	G	0.001555565
+22	49086481	T	C	-0.006196369
+22	49107173	T	C	0.01277272
+22	49180915	A	G	0.006346977
+22	49262579	A	G	0.02657134
+22	49270317	C	T	0.001447665
+22	49313196	A	G	-0.007055532
+22	49335230	T	C	-0.006548281
+22	49366123	T	C	0.01136486
+22	49372356	G	C	-0.02420841
+22	49443666	T	C	0.01581736
+22	49496835	G	A	-0.01355414
+22	49524428	A	G	-0.004228482
+22	49530553	G	C	0.008197389
+22	49537845	T	C	0.0111255
+22	49557457	G	A	0.009401926
+22	49562666	C	A	0.01271701
+22	49574509	C	T	0.0004703177
+22	49579141	A	G	0.02448619
+22	49650863	T	C	0.006739571
+22	49662549	T	G	-0.005769464
+22	49665841	T	C	-0.0007037069
+22	49677464	A	G	-0.02177735
+22	49696067	C	T	-0.003309682
+22	49700272	T	G	-0.002541948
+22	49706433	T	C	-0.01719402
+22	49713835	G	A	-0.01370754
+22	49719264	A	C	-0.01067852
+22	49743627	G	A	-0.0005970581
+22	49800265	C	T	0.03098582
+22	49806863	A	G	0.003940447
+22	49830851	C	T	-0.002742706
+22	49834624	G	A	-0.002820163
+22	49843235	G	C	-0.0004458281
+22	49847501	T	G	0.002235016
+22	49861033	C	T	0.01721243
+22	49881321	A	G	-0.00051278
+22	49908804	G	A	-0.009455892
+22	49911222	G	T	-0.01389666
+22	49925268	A	G	0.01679984
+22	49927332	T	C	0.00039298
+22	50109212	T	C	0.01610819
+22	50118149	G	C	0.007024666
+22	50184484	G	T	0.01222581
+22	50219447	T	C	0.05091891
+22	50278568	G	A	-0.02340672
+22	50319170	G	A	0.01669806
+22	50350971	A	G	0.0264016
+22	50356693	C	T	0.003851499
+22	50435480	G	A	0.0166363
+22	50439626	A	G	-0.002722154
+22	50466542	C	T	-0.002560094
+22	50470516	T	C	-0.01621986
+22	50491150	G	A	0.01828674
+22	50515270	C	T	0.01439904
+22	50529850	C	T	0.02054628
+22	50570755	C	G	0.007077514
+22	50582626	G	A	-0.003588854
+22	50672154	A	G	0.007660848
+22	50722134	C	T	-0.01747164
+22	50722408	C	T	-0.001063465
+22	50728062	C	T	0.02159223
+22	50750481	T	C	0.01877272
+22	50758873	T	C	0.004001731
+22	50835040	A	G	-0.006374259
+22	50859049	C	T	0.0003480749
+22	50885775	G	A	-0.01358311
+22	50926768	T	C	0.001798498
+22	50928026	A	G	0.004775504
+22	50971266	C	T	0.02160893
+22	50989197	T	C	-0.01328884
+22	50989326	G	A	0.01037054
+22	50999681	G	A	-0.01226224
+22	51046163	T	C	-0.02754002
+22	51117580	C	T	0.03573542
+22	51171497	A	G	-0.01951606
+22	51174939	T	C	-0.006178519
diff --git a/tests/data/combine/scorefile_dominant_and_recessive.txt b/tests/data/combine/scorefile_dominant_and_recessive.txt
new file mode 100644
index 0000000..bbf23f0
--- /dev/null
+++ b/tests/data/combine/scorefile_dominant_and_recessive.txt
@@ -0,0 +1,838 @@
+#pgs_name=PGS001229_22_DominantRecessiveExample
+#genome_build=GRCh37
+chr_name	chr_position	effect_allele	other_allele	effect_weight	is_dominant	is_recessive
+22	17080378	G	A	0.01045457	TRUE	FALSE
+22	17300230	A	G	0.0001411475	FALSE	TRUE
+22	17318864	A	C	0.008166266	FALSE	FALSE
+22	17327595	T	C	0.007791641	FALSE	FALSE
+22	17409813	A	G	0.0003108784	FALSE	FALSE
+22	17450952	G	A	-0.03033983	FALSE	FALSE
+22	17492533	G	A	0.00388999	FALSE	FALSE
+22	17542810	C	T	0.00803629	FALSE	FALSE
+22	17565013	G	A	0.02135621	FALSE	FALSE
+22	17589209	T	C	0.003026491	FALSE	FALSE
+22	17600977	A	G	0.01581277	FALSE	FALSE
+22	17625915	A	G	-0.1172964	FALSE	FALSE
+22	17630486	A	C	0.01012909	FALSE	FALSE
+22	17633785	C	T	0.0023255	FALSE	FALSE
+22	17643689	A	G	0.003361814	FALSE	FALSE
+22	17669306	C	T	0.0214506	FALSE	FALSE
+22	17677699	T	C	-0.0007031384	FALSE	FALSE
+22	17680519	C	A	0.001079236	FALSE	FALSE
+22	17701234	G	A	0.004477145	FALSE	FALSE
+22	17703119	A	T	0.0007771872	FALSE	FALSE
+22	17718699	C	A	-0.01320632	FALSE	FALSE
+22	17721595	C	T	0.009480363	FALSE	FALSE
+22	17727648	T	C	0.007811685	FALSE	FALSE
+22	17738177	G	A	-0.004719812	FALSE	FALSE
+22	17749096	A	G	-0.005244795	FALSE	FALSE
+22	17770181	G	T	-0.03101703	FALSE	FALSE
+22	17793969	G	A	0.01774444	FALSE	FALSE
+22	17815696	G	C	-0.00551609	FALSE	FALSE
+22	17827684	G	A	-0.005944752	FALSE	FALSE
+22	17831813	T	C	0.01061587	FALSE	FALSE
+22	17844929	T	G	0.001717643	FALSE	FALSE
+22	17850661	T	C	-0.02805489	FALSE	FALSE
+22	17887534	A	G	0.0007723542	FALSE	FALSE
+22	17887725	A	G	0.007472703	FALSE	FALSE
+22	17958221	C	A	-0.02098647	FALSE	FALSE
+22	18036253	G	A	-0.01772981	FALSE	FALSE
+22	18038786	A	G	-0.002119071	FALSE	FALSE
+22	18262301	A	T	-0.005065485	FALSE	FALSE
+22	18289204	A	G	0.005306345	FALSE	FALSE
+22	18295575	C	T	0.02300129	FALSE	FALSE
+22	18296238	G	A	-0.005665446	FALSE	FALSE
+22	18319179	T	C	0.03440642	FALSE	FALSE
+22	18393534	A	C	0.01764269	FALSE	FALSE
+22	18439958	T	C	-0.002261707	FALSE	FALSE
+22	18483388	G	A	0.03318724	FALSE	FALSE
+22	18488883	C	G	-0.0191918	FALSE	FALSE
+22	18489048	C	A	0.01233198	FALSE	FALSE
+22	18495470	A	G	-0.005804926	FALSE	FALSE
+22	18537145	G	A	-0.004930116	FALSE	FALSE
+22	18571008	A	G	-8.844726E-05	FALSE	FALSE
+22	18584433	C	T	-0.001169893	FALSE	FALSE
+22	18631365	T	C	-0.001551714	FALSE	FALSE
+22	18650682	T	C	-0.01313784	FALSE	FALSE
+22	18890037	A	G	0.05968921	FALSE	FALSE
+22	18891398	G	A	0.006891943	FALSE	FALSE
+22	18892575	A	G	-0.00224447	FALSE	FALSE
+22	18915963	A	G	0.003719756	FALSE	FALSE
+22	18959581	T	C	0.006464581	FALSE	FALSE
+22	18963340	A	G	-0.01397565	FALSE	FALSE
+22	18970915	T	C	-0.001507131	FALSE	FALSE
+22	19024651	T	C	-0.00350575	FALSE	FALSE
+22	19121872	A	G	0.01644046	FALSE	FALSE
+22	19135603	A	G	-0.02970077	FALSE	FALSE
+22	19190143	T	C	0.003268027	FALSE	FALSE
+22	19263698	T	C	0.02057255	FALSE	FALSE
+22	19292446	G	T	0.01153989	FALSE	FALSE
+22	19371052	T	C	0.01055134	FALSE	FALSE
+22	19420109	C	T	-0.008628228	FALSE	FALSE
+22	19451186	A	C	0.02141029	FALSE	FALSE
+22	19518079	C	T	0.005372247	FALSE	FALSE
+22	19581331	T	C	0.01686942	FALSE	FALSE
+22	19593854	C	A	0.0006544249	FALSE	FALSE
+22	19606703	G	A	0.02070121	FALSE	FALSE
+22	19649005	A	G	0.002868601	FALSE	FALSE
+22	19735854	C	T	0.006262962	FALSE	FALSE
+22	19738355	T	C	4.97384E-05	FALSE	FALSE
+22	19770886	A	G	-0.01013929	FALSE	FALSE
+22	19781823	T	C	0.02481609	FALSE	FALSE
+22	19873357	T	C	0.0116302	FALSE	FALSE
+22	19907099	A	C	-0.0267645	FALSE	FALSE
+22	19968597	T	C	-0.02203945	FALSE	FALSE
+22	20046344	G	A	-0.009801428	FALSE	FALSE
+22	20084821	C	T	-0.02232886	FALSE	FALSE
+22	20185457	A	G	0.006892171	FALSE	FALSE
+22	20189077	T	C	0.01738215	FALSE	FALSE
+22	20219648	A	G	0.009307625	FALSE	FALSE
+22	20248391	A	G	-0.005405845	FALSE	FALSE
+22	20267213	A	G	0.006713242	FALSE	FALSE
+22	20286099	G	T	0.01574758	FALSE	FALSE
+22	20749042	G	A	0.006603339	FALSE	FALSE
+22	20754039	A	G	-0.01181141	FALSE	FALSE
+22	20775167	T	C	0.01160113	FALSE	FALSE
+22	20780296	A	G	0.06735311	FALSE	FALSE
+22	20789074	C	T	0.02844307	FALSE	FALSE
+22	20791438	A	C	0.0473474	FALSE	FALSE
+22	20793914	C	T	0.007009781	FALSE	FALSE
+22	20839810	T	G	0.003947346	FALSE	FALSE
+22	20860931	T	C	0.0005613511	FALSE	FALSE
+22	20979980	G	A	0.003231665	FALSE	FALSE
+22	20991771	G	A	0.004226765	FALSE	FALSE
+22	21075537	C	A	-0.002096453	FALSE	FALSE
+22	21154393	G	T	-0.004297086	FALSE	FALSE
+22	21323357	C	T	-0.006041745	FALSE	FALSE
+22	21331918	G	C	-0.002280912	FALSE	FALSE
+22	21334924	C	G	-0.02031369	FALSE	FALSE
+22	21356824	A	G	0.01476577	FALSE	FALSE
+22	21386019	A	G	0.01435557	FALSE	FALSE
+22	21449028	G	A	-0.01537701	FALSE	FALSE
+22	21463515	A	G	-0.01335614	FALSE	FALSE
+22	21982892	T	C	-0.06373335	FALSE	FALSE
+22	22001704	T	G	0.02809584	FALSE	FALSE
+22	22062480	T	C	0.0529113	FALSE	FALSE
+22	22080735	G	A	-0.0139426	FALSE	FALSE
+22	22151939	C	A	-0.008287849	FALSE	FALSE
+22	22163425	G	A	0.05518983	FALSE	FALSE
+22	22307519	C	G	-0.003486191	FALSE	FALSE
+22	22351283	G	A	-0.0007483763	FALSE	FALSE
+22	22394291	AG	A	0.004320583	FALSE	FALSE
+22	22395754	T	C	0.002587971	FALSE	FALSE
+22	22424302	A	C	0.0011408	FALSE	FALSE
+22	22473905	C	A	0.01226009	FALSE	FALSE
+22	22550450	G	C	0.01773244	FALSE	FALSE
+22	22561610	C	T	-0.006207024	FALSE	FALSE
+22	22581369	G	A	-0.006272413	FALSE	FALSE
+22	22584678	A	G	-0.00217647	FALSE	FALSE
+22	22711786	T	C	0.007779875	FALSE	FALSE
+22	22726372	T	C	0.00349632	FALSE	FALSE
+22	22762771	C	T	0.01252501	FALSE	FALSE
+22	22769923	G	A	-0.01103632	FALSE	FALSE
+22	22869742	A	C	-0.002412657	FALSE	FALSE
+22	22871922	A	G	-0.002769974	FALSE	FALSE
+22	22929268	T	C	-0.007035723	FALSE	FALSE
+22	23001481	A	G	0.007524178	FALSE	FALSE
+22	23022520	T	C	0.002175257	FALSE	FALSE
+22	23064982	A	C	-0.01255076	FALSE	FALSE
+22	23249440	A	C	0.02085816	FALSE	FALSE
+22	23268677	A	G	0.01337349	FALSE	FALSE
+22	23279456	C	G	-0.01371401	FALSE	FALSE
+22	23282286	C	T	0.004994329	FALSE	FALSE
+22	23325722	C	T	0.0008506657	FALSE	FALSE
+22	23412058	A	G	-0.009545553	FALSE	FALSE
+22	23627369	G	A	-0.01900175	FALSE	FALSE
+22	23644425	G	A	-0.0009106953	FALSE	FALSE
+22	23649242	G	T	0.001061643	FALSE	FALSE
+22	23794844	G	A	-0.01198736	FALSE	FALSE
+22	23804670	G	T	-0.001119846	FALSE	FALSE
+22	23819697	T	G	-0.01028722	FALSE	FALSE
+22	23873076	T	C	0.009509027	FALSE	FALSE
+22	23892145	T	C	0.0135128	FALSE	FALSE
+22	23925779	C	T	-0.004127647	FALSE	FALSE
+22	23960187	T	C	-0.008475905	FALSE	FALSE
+22	24035970	T	C	-0.001334318	FALSE	FALSE
+22	24086107	G	A	-0.01652957	FALSE	FALSE
+22	24105789	A	G	0.01813091	FALSE	FALSE
+22	24106448	A	G	0.001834095	FALSE	FALSE
+22	24186809	C	T	-0.01426541	FALSE	FALSE
+22	24235360	G	A	0.0003168635	FALSE	FALSE
+22	24255296	T	C	0.01624252	FALSE	FALSE
+22	24300540	T	C	-0.00322576	FALSE	FALSE
+22	24376584	A	G	-0.006223068	FALSE	FALSE
+22	24406778	A	C	0.00304654	FALSE	FALSE
+22	24618331	G	A	-0.0006506681	FALSE	FALSE
+22	24802564	A	G	-0.006695797	FALSE	FALSE
+22	24912232	T	C	-0.01536303	FALSE	FALSE
+22	24943582	A	G	-0.001687764	FALSE	FALSE
+22	24995668	G	A	-0.03537331	FALSE	FALSE
+22	25123505	C	T	-0.0160099	FALSE	FALSE
+22	25145094	T	C	-0.005584047	FALSE	FALSE
+22	25145453	T	C	-0.001388536	FALSE	FALSE
+22	25185823	A	G	-0.009228375	FALSE	FALSE
+22	25265972	A	G	0.01088906	FALSE	FALSE
+22	25309448	A	G	-0.002238693	FALSE	FALSE
+22	25363411	A	G	0.004035775	FALSE	FALSE
+22	25410895	G	A	0.0009720734	FALSE	FALSE
+22	25442369	C	T	0.01660527	FALSE	FALSE
+22	25454658	C	A	0.01200285	FALSE	FALSE
+22	25465065	C	T	0.01320801	FALSE	FALSE
+22	25524916	C	T	0.01147501	FALSE	FALSE
+22	25603008	T	C	-0.01262741	FALSE	FALSE
+22	25619025	G	T	-0.01212511	FALSE	FALSE
+22	25621591	T	C	0.01051851	FALSE	FALSE
+22	25643483	T	G	0.01373474	FALSE	FALSE
+22	25661725	A	G	-0.005936431	FALSE	FALSE
+22	25667883	G	A	0.01547775	FALSE	FALSE
+22	25668730	A	C	0.02616493	FALSE	FALSE
+22	25678577	T	C	0.0304018	FALSE	FALSE
+22	25761309	T	C	-0.001760112	FALSE	FALSE
+22	25761936	T	C	-0.005171998	FALSE	FALSE
+22	25938977	T	C	0.01966116	FALSE	FALSE
+22	25994013	A	G	0.0006268228	FALSE	FALSE
+22	26081873	T	C	0.05232603	FALSE	FALSE
+22	26132612	A	G	-0.006457239	FALSE	FALSE
+22	26133775	T	C	-0.001181527	FALSE	FALSE
+22	26159289	A	G	-0.008399401	FALSE	FALSE
+22	26181767	C	T	0.01044769	FALSE	FALSE
+22	26190915	G	A	0.004287533	FALSE	FALSE
+22	26218164	G	A	-0.002803502	FALSE	FALSE
+22	26231312	C	G	0.006105629	FALSE	FALSE
+22	26237826	C	T	0.004981479	FALSE	FALSE
+22	26239850	A	C	0.004144037	FALSE	FALSE
+22	26273893	C	G	0.005616213	FALSE	FALSE
+22	26278128	G	T	-0.003965338	FALSE	FALSE
+22	26280462	T	C	-0.0008324497	FALSE	FALSE
+22	26290588	T	C	-0.0130732	FALSE	FALSE
+22	26292659	G	A	4.294309E-05	FALSE	FALSE
+22	26343593	G	A	0.007813758	FALSE	FALSE
+22	26369358	T	C	-0.00483665	FALSE	FALSE
+22	26390964	A	G	-0.007849451	FALSE	FALSE
+22	26415475	T	C	-0.001219281	FALSE	FALSE
+22	26456367	G	A	-0.01285326	FALSE	FALSE
+22	26460519	T	C	-0.008695338	FALSE	FALSE
+22	26528054	A	G	0.01973023	FALSE	FALSE
+22	26617260	T	A	-0.01384025	FALSE	FALSE
+22	26638906	G	T	0.01229772	FALSE	FALSE
+22	26735648	A	G	0.0007879673	FALSE	FALSE
+22	26782251	G	A	0.0005096459	FALSE	FALSE
+22	26812632	C	T	-0.01850814	FALSE	FALSE
+22	26939781	C	T	-0.0009222796	FALSE	FALSE
+22	26960648	A	C	-0.005679255	FALSE	FALSE
+22	27038865	T	G	-0.0001487706	FALSE	FALSE
+22	27042828	A	G	0.02957737	FALSE	FALSE
+22	27161060	A	G	0.002844558	FALSE	FALSE
+22	27191643	T	C	0.008953731	FALSE	FALSE
+22	27216426	G	A	0.00912099	FALSE	FALSE
+22	27217018	A	G	0.01510616	FALSE	FALSE
+22	27240025	T	G	-0.0297174	FALSE	FALSE
+22	27242642	G	A	-0.009822927	FALSE	FALSE
+22	27246070	C	T	-0.001554199	FALSE	FALSE
+22	27252454	C	T	-0.006560251	FALSE	FALSE
+22	27264880	G	T	-0.01323094	FALSE	FALSE
+22	27337886	A	G	-0.009600014	FALSE	FALSE
+22	27339284	T	C	-0.009944488	FALSE	FALSE
+22	27353810	T	C	-0.002171555	FALSE	FALSE
+22	27370273	T	C	-0.009798478	FALSE	FALSE
+22	27378884	A	G	0.05145072	FALSE	FALSE
+22	27398749	C	T	0.001012263	FALSE	FALSE
+22	27403571	C	T	-0.01745865	FALSE	FALSE
+22	27405012	T	C	-0.005425419	FALSE	FALSE
+22	27415255	C	T	-0.01499362	FALSE	FALSE
+22	27426628	G	C	0.0228946	FALSE	FALSE
+22	27430724	A	G	-0.007068064	FALSE	FALSE
+22	27435577	C	T	-0.008632412	FALSE	FALSE
+22	27487580	G	A	0.003691502	FALSE	FALSE
+22	27498426	A	G	-0.006801544	FALSE	FALSE
+22	27526095	G	A	-0.0008086267	FALSE	FALSE
+22	27563274	C	A	0.0136965	FALSE	FALSE
+22	27584680	A	G	-0.002139188	FALSE	FALSE
+22	27628151	C	G	0.02130389	FALSE	FALSE
+22	27652290	T	G	0.004815735	FALSE	FALSE
+22	27660675	A	G	0.004899654	FALSE	FALSE
+22	27674832	G	T	0.0001248065	FALSE	FALSE
+22	27718775	A	G	0.02292384	FALSE	FALSE
+22	27729742	G	A	0.004951261	FALSE	FALSE
+22	27762155	C	T	0.00485666	FALSE	FALSE
+22	27781736	A	C	-0.008336242	FALSE	FALSE
+22	27829565	G	A	0.00285409	FALSE	FALSE
+22	27832985	G	C	-0.01668955	FALSE	FALSE
+22	27836311	G	A	-0.00775625	FALSE	FALSE
+22	27839704	T	C	-0.02492106	FALSE	FALSE
+22	27864471	A	C	0.00218995	FALSE	FALSE
+22	27873024	G	A	0.002721729	FALSE	FALSE
+22	27883265	G	A	0.02961735	FALSE	FALSE
+22	27890684	A	G	-0.008057355	FALSE	FALSE
+22	27927298	T	C	0.02054268	FALSE	FALSE
+22	27934290	G	A	0.004751755	FALSE	FALSE
+22	27951176	A	G	-0.0004329547	FALSE	FALSE
+22	27974819	C	A	0.01439093	FALSE	FALSE
+22	27975451	G	A	-0.03648208	FALSE	FALSE
+22	28007741	C	T	-0.01635917	FALSE	FALSE
+22	28016883	C	A	0.008564085	FALSE	FALSE
+22	28046561	T	C	0.01535905	FALSE	FALSE
+22	28060034	A	G	0.03097228	FALSE	FALSE
+22	28076058	C	T	0.02848654	FALSE	FALSE
+22	28094845	G	A	-0.02659077	FALSE	FALSE
+22	28130130	C	T	-0.01640387	FALSE	FALSE
+22	28136977	A	C	-0.003962775	FALSE	FALSE
+22	28150109	G	A	0.0006071392	FALSE	FALSE
+22	28150815	A	G	0.01604724	FALSE	FALSE
+22	28151825	A	G	-0.005390282	FALSE	FALSE
+22	28155404	T	C	0.005030388	FALSE	FALSE
+22	28172577	G	T	0.005704168	FALSE	FALSE
+22	28185452	G	T	-0.006896853	FALSE	FALSE
+22	28200176	G	A	-0.006474674	FALSE	FALSE
+22	28206912	C	A	-0.006175542	FALSE	FALSE
+22	28270372	G	T	-0.0006768204	FALSE	FALSE
+22	28412908	G	T	0.01763639	FALSE	FALSE
+22	28501414	T	C	-0.2304747	FALSE	FALSE
+22	29106733	C	T	-0.01074749	FALSE	FALSE
+22	29318724	T	C	0.001743333	FALSE	FALSE
+22	29378610	C	T	0.0006690876	FALSE	FALSE
+22	29478760	C	T	-0.03029428	FALSE	FALSE
+22	29533572	G	C	-0.01269604	FALSE	FALSE
+22	29626515	A	G	-0.0117113	FALSE	FALSE
+22	29630337	A	G	0.02658049	FALSE	FALSE
+22	29669648	C	G	-0.008550535	FALSE	FALSE
+22	29692497	T	G	0.001234896	FALSE	FALSE
+22	29837537	C	T	0.01321112	FALSE	FALSE
+22	29961986	T	G	0.001878853	FALSE	FALSE
+22	30151687	C	T	0.003418302	FALSE	FALSE
+22	30163526	G	A	0.01576261	FALSE	FALSE
+22	30494371	A	G	0.007959801	FALSE	FALSE
+22	30592487	G	C	-0.1047403	FALSE	FALSE
+22	30621613	A	C	-0.01382104	FALSE	FALSE
+22	30658082	C	T	-0.03794014	FALSE	FALSE
+22	30688659	T	C	0.0225714	FALSE	FALSE
+22	30762140	A	G	0.02079806	FALSE	FALSE
+22	30793137	A	G	-0.004609306	FALSE	FALSE
+22	30901592	C	T	-0.00833404	FALSE	FALSE
+22	30927975	T	C	0.003226189	FALSE	FALSE
+22	30953295	T	C	-0.00768579	FALSE	FALSE
+22	30992651	G	A	-0.025658	FALSE	FALSE
+22	31018975	C	T	0.04241226	FALSE	FALSE
+22	31032920	G	A	-0.02311985	FALSE	FALSE
+22	31063804	G	GT	-0.0002081808	FALSE	FALSE
+22	31114086	G	T	0.02825476	FALSE	FALSE
+22	31139653	A	G	2.640129E-06	FALSE	FALSE
+22	31214382	G	A	0.01137657	FALSE	FALSE
+22	31216506	C	T	0.005531311	FALSE	FALSE
+22	31272930	T	C	-0.001056118	FALSE	FALSE
+22	31333631	C	T	-0.01235089	FALSE	FALSE
+22	31378447	A	G	0.01020507	FALSE	FALSE
+22	31442308	A	G	-0.002479126	FALSE	FALSE
+22	31477361	C	G	-0.01263667	FALSE	FALSE
+22	31514348	G	A	0.00580324	FALSE	FALSE
+22	31521404	A	G	0.01097391	FALSE	FALSE
+22	31659495	C	T	0.02663412	FALSE	FALSE
+22	31884405	C	T	-0.0003950834	FALSE	FALSE
+22	32200849	T	C	0.01585735	FALSE	FALSE
+22	32341684	T	C	-0.02960328	FALSE	FALSE
+22	32559835	G	A	-0.02170436	FALSE	FALSE
+22	32569263	C	T	-0.001296006	FALSE	FALSE
+22	32624139	C	T	0.005619574	FALSE	FALSE
+22	32702816	A	G	-0.01534023	FALSE	FALSE
+22	32756652	G	A	0.02512177	FALSE	FALSE
+22	32831540	T	C	0.001868495	FALSE	FALSE
+22	32832874	T	C	6.028815E-05	FALSE	FALSE
+22	32853660	G	A	0.0138221	FALSE	FALSE
+22	32854391	C	A	0.0001960825	FALSE	FALSE
+22	32875190	A	G	-0.006426637	FALSE	FALSE
+22	32934713	C	CT	-0.009057754	FALSE	FALSE
+22	32952012	A	C	-0.00380248	FALSE	FALSE
+22	32954443	G	A	0.002210369	FALSE	FALSE
+22	32993032	C	T	-0.002429979	FALSE	FALSE
+22	32997766	T	C	-0.008424246	FALSE	FALSE
+22	33045573	T	C	-0.03107145	FALSE	FALSE
+22	33046110	G	C	-0.06954732	FALSE	FALSE
+22	33048039	T	C	0.01138346	FALSE	FALSE
+22	33056341	C	T	-0.06477198	FALSE	FALSE
+22	33108536	T	C	-0.03426392	FALSE	FALSE
+22	33108981	T	C	-0.07404035	FALSE	FALSE
+22	33116435	T	C	0.06542471	FALSE	FALSE
+22	33143528	G	A	0.02195059	FALSE	FALSE
+22	33146363	A	G	0.000810539	FALSE	FALSE
+22	33259625	C	T	0.02309793	FALSE	FALSE
+22	33336039	T	G	-0.02554387	FALSE	FALSE
+22	33408519	T	C	-0.0075563	FALSE	FALSE
+22	33660345	C	G	0.002190743	FALSE	FALSE
+22	33804893	C	T	0.006680774	FALSE	FALSE
+22	33844303	C	T	0.008923314	FALSE	FALSE
+22	33846914	T	C	0.006295378	FALSE	FALSE
+22	33898906	A	C	1.958759E-05	FALSE	FALSE
+22	34022284	A	G	-0.00257933	FALSE	FALSE
+22	34137784	G	A	0.004460828	FALSE	FALSE
+22	34208570	T	C	-0.003365869	FALSE	FALSE
+22	34217757	T	C	0.009289431	FALSE	FALSE
+22	34256923	A	C	0.01439384	FALSE	FALSE
+22	34265402	G	A	-0.0163661	FALSE	FALSE
+22	34284173	G	A	-0.02315559	FALSE	FALSE
+22	34296093	C	A	-0.004688326	FALSE	FALSE
+22	34378012	A	G	0.002276664	FALSE	FALSE
+22	34436795	C	T	0.0001337033	FALSE	FALSE
+22	34488452	A	G	-0.000428831	FALSE	FALSE
+22	34501541	A	G	0.002763614	FALSE	FALSE
+22	34514810	C	A	0.003976601	FALSE	FALSE
+22	34526428	C	T	0.01088864	FALSE	FALSE
+22	34583078	A	G	0.001802495	FALSE	FALSE
+22	34620754	T	C	0.01466546	FALSE	FALSE
+22	34691035	A	G	-0.0002082615	FALSE	FALSE
+22	34758540	T	C	0.005165532	FALSE	FALSE
+22	34851377	A	C	0.0137118	FALSE	FALSE
+22	35371707	T	C	-0.0004985554	FALSE	FALSE
+22	35382268	A	C	-0.004931336	FALSE	FALSE
+22	35419122	C	T	-0.01077953	FALSE	FALSE
+22	35478529	A	G	0.0001760523	FALSE	FALSE
+22	35481493	T	C	0.01056439	FALSE	FALSE
+22	35526281	G	A	-0.002766891	FALSE	FALSE
+22	35603836	A	G	-0.0001783939	FALSE	FALSE
+22	35660875	T	G	0.03988231	FALSE	FALSE
+22	35745196	G	T	0.0001750545	FALSE	FALSE
+22	35750980	A	G	-0.007651136	FALSE	FALSE
+22	35783413	G	A	0.001649791	FALSE	FALSE
+22	35918270	C	T	0.006918713	FALSE	FALSE
+22	35959242	A	G	0.01697538	FALSE	FALSE
+22	35962060	G	A	0.005181476	FALSE	FALSE
+22	35964158	G	C	0.002769931	FALSE	FALSE
+22	35984385	A	G	-0.01280623	FALSE	FALSE
+22	36001258	C	T	0.01342405	FALSE	FALSE
+22	36072262	T	C	0.00489549	FALSE	FALSE
+22	36180535	G	A	-0.03250252	FALSE	FALSE
+22	36517307	C	T	0.01366076	FALSE	FALSE
+22	36519596	A	C	-0.00349956	FALSE	FALSE
+22	36532058	A	G	-0.01214487	FALSE	FALSE
+22	36543489	C	G	0.007838149	FALSE	FALSE
+22	36600841	G	A	0.02644389	FALSE	FALSE
+22	36629633	C	A	-0.006871468	FALSE	FALSE
+22	36635967	G	A	-0.02634742	FALSE	FALSE
+22	36655735	A	G	-0.005385142	FALSE	FALSE
+22	36661646	A	G	-0.01560741	FALSE	FALSE
+22	36684354	C	T	-0.005170111	FALSE	FALSE
+22	36705622	A	G	0.01713234	FALSE	FALSE
+22	36708049	C	CTCCTGTGA	-0.05187051	FALSE	FALSE
+22	36751101	A	C	-0.0244065	FALSE	FALSE
+22	36764788	G	A	0.02784116	FALSE	FALSE
+22	36897427	C	T	0.02603792	FALSE	FALSE
+22	36900806	G	A	0.007366207	FALSE	FALSE
+22	36923144	T	C	-0.001875563	FALSE	FALSE
+22	36924714	G	A	-0.003632594	FALSE	FALSE
+22	36946643	T	G	0.01333137	FALSE	FALSE
+22	36954939	T	C	0.01105894	FALSE	FALSE
+22	36998907	T	C	-0.0006084687	FALSE	FALSE
+22	37001495	G	T	-0.01224147	FALSE	FALSE
+22	37013167	G	C	0.01866849	FALSE	FALSE
+22	37077364	C	T	0.007294257	FALSE	FALSE
+22	37080738	C	G	-0.004873355	FALSE	FALSE
+22	37101890	C	T	0.03991764	FALSE	FALSE
+22	37118535	A	G	-0.001713909	FALSE	FALSE
+22	37184521	G	A	0.006515894	FALSE	FALSE
+22	37206341	G	T	0.0002566936	FALSE	FALSE
+22	37256262	A	G	0.001152626	FALSE	FALSE
+22	37258503	C	T	-0.009761102	FALSE	FALSE
+22	37323988	T	C	-0.0073182	FALSE	FALSE
+22	37329545	G	A	0.005775806	FALSE	FALSE
+22	37337409	T	C	-0.02534399	FALSE	FALSE
+22	37343000	A	C	-0.0004011777	FALSE	FALSE
+22	37398195	T	C	-0.01001198	FALSE	FALSE
+22	37401532	A	G	-0.003244795	FALSE	FALSE
+22	37407109	C	G	0.04335972	FALSE	FALSE
+22	37477732	T	C	0.0003669548	FALSE	FALSE
+22	37507019	A	G	-0.0009259451	FALSE	FALSE
+22	37513316	A	G	0.001153887	FALSE	FALSE
+22	37532441	A	G	0.01802306	FALSE	FALSE
+22	37571497	G	A	-0.005785311	FALSE	FALSE
+22	37581383	T	C	0.03172492	FALSE	FALSE
+22	37621269	C	A	0.004460405	FALSE	FALSE
+22	37644621	T	C	-0.008386907	FALSE	FALSE
+22	37671896	A	G	0.02303688	FALSE	FALSE
+22	37679763	G	A	-0.002658396	FALSE	FALSE
+22	37720268	G	A	0.02120184	FALSE	FALSE
+22	37753256	C	T	0.008984539	FALSE	FALSE
+22	37757099	G	A	-0.01560347	FALSE	FALSE
+22	37780522	C	G	-0.01496708	FALSE	FALSE
+22	37800175	T	C	-0.005510833	FALSE	FALSE
+22	37846448	G	A	0.01152963	FALSE	FALSE
+22	37896749	C	T	0.005447068	FALSE	FALSE
+22	37908435	C	T	0.001909131	FALSE	FALSE
+22	37977481	T	C	0.01465308	FALSE	FALSE
+22	37992699	G	A	0.0008339179	FALSE	FALSE
+22	38032762	G	GA	0.01693041	FALSE	FALSE
+22	38054262	C	A	0.04354146	FALSE	FALSE
+22	38083101	C	T	-0.02092117	FALSE	FALSE
+22	38119213	A	G	0.03948165	FALSE	FALSE
+22	38122122	C	T	0.04377277	FALSE	FALSE
+22	38204089	T	C	0.02977743	FALSE	FALSE
+22	38435786	T	G	-0.007684278	FALSE	FALSE
+22	38544298	G	A	0.05090446	FALSE	FALSE
+22	38597378	T	G	-0.01997927	FALSE	FALSE
+22	38606780	G	A	-0.009182016	FALSE	FALSE
+22	38630272	C	T	0.007393137	FALSE	FALSE
+22	38663819	G	A	-0.006392021	FALSE	FALSE
+22	38673234	A	G	-0.01106705	FALSE	FALSE
+22	38685131	C	T	-0.004493352	FALSE	FALSE
+22	38695406	T	C	-0.01155972	FALSE	FALSE
+22	38708506	A	G	0.01701713	FALSE	FALSE
+22	38744184	C	T	-0.02112956	FALSE	FALSE
+22	38819613	A	G	-0.005625806	FALSE	FALSE
+22	38877461	G	T	0.001108728	FALSE	FALSE
+22	38918894	G	T	-0.008094286	FALSE	FALSE
+22	38928269	G	T	-0.02114917	FALSE	FALSE
+22	39027286	C	CAG	0.003840735	FALSE	FALSE
+22	39067524	G	A	0.01200232	FALSE	FALSE
+22	39159201	C	T	0.003096214	FALSE	FALSE
+22	39178701	G	A	0.002148449	FALSE	FALSE
+22	39260032	T	C	0.03574634	FALSE	FALSE
+22	39268785	T	G	0.009377414	FALSE	FALSE
+22	39281774	G	T	0.03816951	FALSE	FALSE
+22	39300265	C	T	0.03540156	FALSE	FALSE
+22	39332623	T	C	-0.004449842	FALSE	FALSE
+22	39415780	G	A	0.01479946	FALSE	FALSE
+22	39448465	A	G	0.003065974	FALSE	FALSE
+22	39480697	G	A	-0.04005617	FALSE	FALSE
+22	39487665	G	A	-0.0001218988	FALSE	FALSE
+22	39493294	C	T	-0.03115929	FALSE	FALSE
+22	39510995	G	A	-0.02069106	FALSE	FALSE
+22	39542292	A	G	0.009653575	FALSE	FALSE
+22	39543000	T	C	-0.004069841	FALSE	FALSE
+22	39573724	A	C	0.02683694	FALSE	FALSE
+22	39575692	A	C	0.01451305	FALSE	FALSE
+22	39581277	A	C	0.01766406	FALSE	FALSE
+22	39626572	A	G	-0.02901981	FALSE	FALSE
+22	39658626	C	T	0.004177065	FALSE	FALSE
+22	39665395	G	A	0.01264611	FALSE	FALSE
+22	39687484	G	A	0.005418141	FALSE	FALSE
+22	39708279	A	G	-0.04281532	FALSE	FALSE
+22	39708357	T	C	0.008605574	FALSE	FALSE
+22	39793066	G	T	0.03658209	FALSE	FALSE
+22	39798127	G	A	0.002302129	FALSE	FALSE
+22	39843409	T	C	0.01065699	FALSE	FALSE
+22	39865475	G	A	0.001588501	FALSE	FALSE
+22	39932516	A	G	-0.01179841	FALSE	FALSE
+22	39963426	G	A	-0.01503908	FALSE	FALSE
+22	40023636	C	T	0.006443146	FALSE	FALSE
+22	40046176	C	T	-0.0007416552	FALSE	FALSE
+22	40067818	T	C	0.00455936	FALSE	FALSE
+22	40092864	G	A	0.02400297	FALSE	FALSE
+22	40127293	T	C	-0.0008870038	FALSE	FALSE
+22	40358148	T	C	-0.01079902	FALSE	FALSE
+22	40420786	G	C	-0.008092115	FALSE	FALSE
+22	40454069	G	T	0.00789888	FALSE	FALSE
+22	40541981	G	A	0.0174264	FALSE	FALSE
+22	40652873	G	A	0.005853057	FALSE	FALSE
+22	40676672	G	T	-0.001894274	FALSE	FALSE
+22	40729614	G	A	0.0195994	FALSE	FALSE
+22	40820151	C	T	-0.01628066	FALSE	FALSE
+22	40986372	G	C	-0.01983507	FALSE	FALSE
+22	41494925	A	G	-0.02918069	FALSE	FALSE
+22	41646738	G	A	0.0003521847	FALSE	FALSE
+22	41680898	T	C	0.01402732	FALSE	FALSE
+22	41704872	T	C	6.681484E-05	FALSE	FALSE
+22	41791536	C	T	-5.572333E-05	FALSE	FALSE
+22	41895409	A	G	-0.04407217	FALSE	FALSE
+22	41929175	G	T	-0.03186844	FALSE	FALSE
+22	42089623	C	T	0.00532234	FALSE	FALSE
+22	42095658	G	T	0.03846131	FALSE	FALSE
+22	42210985	C	T	-0.00313971	FALSE	FALSE
+22	42279653	G	A	-0.006596336	FALSE	FALSE
+22	42341308	G	A	-0.0006862491	FALSE	FALSE
+22	42524243	C	CT	-0.01181191	FALSE	FALSE
+22	42672124	G	A	-0.005278171	FALSE	FALSE
+22	42691238	T	C	-0.01642396	FALSE	FALSE
+22	42813753	C	T	-0.00386775	FALSE	FALSE
+22	42867898	G	A	-0.001352327	FALSE	FALSE
+22	42912097	T	C	-0.0007295657	FALSE	FALSE
+22	42932317	A	G	-0.05768556	FALSE	FALSE
+22	43010817	A	G	0.01722077	FALSE	FALSE
+22	43080028	T	C	-0.0005527551	FALSE	FALSE
+22	43096507	T	C	-0.005556102	FALSE	FALSE
+22	43112475	T	C	-0.01350273	FALSE	FALSE
+22	43114824	G	A	-0.01963192	FALSE	FALSE
+22	43115576	C	T	-0.01880097	FALSE	FALSE
+22	43154299	G	A	-0.001621113	FALSE	FALSE
+22	43159948	T	C	-0.007980584	FALSE	FALSE
+22	43206950	C	A	-0.005783037	FALSE	FALSE
+22	43218397	C	T	-0.003976636	FALSE	FALSE
+22	43283255	C	A	-0.01426668	FALSE	FALSE
+22	43290583	C	T	-0.03955775	FALSE	FALSE
+22	43333156	A	G	-0.03127845	FALSE	FALSE
+22	43426262	G	A	-0.00366804	FALSE	FALSE
+22	43483242	T	C	-0.02540203	FALSE	FALSE
+22	43515108	C	T	-0.01570749	FALSE	FALSE
+22	43529314	C	G	0.01738127	FALSE	FALSE
+22	43551513	G	A	0.02565386	FALSE	FALSE
+22	43558972	A	G	-0.01962819	FALSE	FALSE
+22	43577214	T	C	-0.02270478	FALSE	FALSE
+22	43579049	C	T	-0.001193909	FALSE	FALSE
+22	43610207	G	A	-0.007621661	FALSE	FALSE
+22	43623395	G	C	-0.04852519	FALSE	FALSE
+22	43640512	C	T	-0.005533207	FALSE	FALSE
+22	43649701	C	T	0.07724845	FALSE	FALSE
+22	43661080	T	C	-0.04251741	FALSE	FALSE
+22	43683088	A	G	-0.003582388	FALSE	FALSE
+22	43707996	A	G	-0.02547044	FALSE	FALSE
+22	43711080	C	G	-0.005784446	FALSE	FALSE
+22	43721519	C	A	0.000365885	FALSE	FALSE
+22	43729401	C	T	0.008557013	FALSE	FALSE
+22	43763757	T	G	-0.0178981	FALSE	FALSE
+22	43836198	G	T	0.002427697	FALSE	FALSE
+22	43976396	A	G	-0.01277457	FALSE	FALSE
+22	44031042	C	T	0.003593107	FALSE	FALSE
+22	44193626	C	A	-0.006865434	FALSE	FALSE
+22	44221247	G	A	0.01833991	FALSE	FALSE
+22	44296372	T	C	0.006169212	FALSE	FALSE
+22	44298838	A	G	0.007441756	FALSE	FALSE
+22	44342116	G	A	0.02810328	FALSE	FALSE
+22	44368122	G	A	0.0129968	FALSE	FALSE
+22	44379838	G	A	0.001648422	FALSE	FALSE
+22	44380033	C	T	-0.002136788	FALSE	FALSE
+22	44395451	C	T	-0.006698507	FALSE	FALSE
+22	44419871	C	T	0.0181613	FALSE	FALSE
+22	44424108	T	C	0.01036733	FALSE	FALSE
+22	44467899	C	T	-0.002592364	FALSE	FALSE
+22	44498134	T	C	0.007281423	FALSE	FALSE
+22	44522312	C	T	-0.0002636447	FALSE	FALSE
+22	44526130	G	A	-0.00388298	FALSE	FALSE
+22	44530286	A	G	0.02528159	FALSE	FALSE
+22	44530420	C	T	-0.01233654	FALSE	FALSE
+22	44548944	G	A	-0.003947209	FALSE	FALSE
+22	44551755	G	A	0.01262458	FALSE	FALSE
+22	44566434	A	G	-0.004290306	FALSE	FALSE
+22	44581046	T	C	-0.0147995	FALSE	FALSE
+22	44643161	C	T	0.01439493	FALSE	FALSE
+22	44677081	C	T	-0.01030513	FALSE	FALSE
+22	44681612	G	A	-0.001269762	FALSE	FALSE
+22	44695088	T	C	0.006324859	FALSE	FALSE
+22	44707716	G	T	0.002288939	FALSE	FALSE
+22	44725343	G	A	0.003534678	FALSE	FALSE
+22	44738406	G	A	0.02320049	FALSE	FALSE
+22	44746729	A	G	-0.01754216	FALSE	FALSE
+22	44751158	G	A	-0.006539695	FALSE	FALSE
+22	44757439	A	G	0.02480295	FALSE	FALSE
+22	44759519	G	A	0.002111274	FALSE	FALSE
+22	44761797	A	T	-0.00531172	FALSE	FALSE
+22	44763352	C	G	0.01452737	FALSE	FALSE
+22	44783779	G	A	0.009142699	FALSE	FALSE
+22	44791807	C	T	-0.02371876	FALSE	FALSE
+22	44818986	C	T	-0.006740622	FALSE	FALSE
+22	44894913	G	A	-5.179871E-05	FALSE	FALSE
+22	45058431	C	T	0.01098259	FALSE	FALSE
+22	45066035	A	G	-0.01484374	FALSE	FALSE
+22	45069410	T	C	0.01530441	FALSE	FALSE
+22	45081330	G	A	0.00135012	FALSE	FALSE
+22	45082168	C	A	0.003663354	FALSE	FALSE
+22	45090008	G	A	0.002811861	FALSE	FALSE
+22	45116664	C	T	0.01247728	FALSE	FALSE
+22	45244930	T	C	-0.01450041	FALSE	FALSE
+22	45258457	G	A	-0.003500519	FALSE	FALSE
+22	45323989	T	C	0.001111338	FALSE	FALSE
+22	45415987	A	G	-0.01398184	FALSE	FALSE
+22	45451355	G	A	-0.005566982	FALSE	FALSE
+22	45471607	C	T	0.01148978	FALSE	FALSE
+22	45497738	C	T	-0.005029327	FALSE	FALSE
+22	45502829	C	T	-0.03893521	FALSE	FALSE
+22	45519040	T	G	0.002377071	FALSE	FALSE
+22	45523391	A	G	0.01318997	FALSE	FALSE
+22	45573450	C	A	0.0043856	FALSE	FALSE
+22	45589490	G	A	-0.008350439	FALSE	FALSE
+22	45668012	T	C	0.01286879	FALSE	FALSE
+22	45671343	G	A	-2.940682E-06	FALSE	FALSE
+22	45672574	T	C	0.005743608	FALSE	FALSE
+22	45693923	A	G	-0.002675069	FALSE	FALSE
+22	45718743	G	A	-0.02092804	FALSE	FALSE
+22	45723807	C	G	0.001670159	FALSE	FALSE
+22	45728370	A	G	0.0001879231	FALSE	FALSE
+22	45741537	G	T	0.01420045	FALSE	FALSE
+22	45749983	T	G	-0.04591012	FALSE	FALSE
+22	45809624	A	C	0.002185772	FALSE	FALSE
+22	45821935	A	G	0.02250782	FALSE	FALSE
+22	45837410	G	A	-0.002756449	FALSE	FALSE
+22	45846371	T	C	0.07910102	FALSE	FALSE
+22	45864934	T	C	0.008535181	FALSE	FALSE
+22	45871507	G	C	-0.007764056	FALSE	FALSE
+22	45892656	G	T	-0.003885653	FALSE	FALSE
+22	45897997	C	T	0.0003935204	FALSE	FALSE
+22	45929577	C	T	-0.02532217	FALSE	FALSE
+22	45936350	A	G	-0.008001698	FALSE	FALSE
+22	45942726	T	G	-0.01415551	FALSE	FALSE
+22	45996298	G	A	0.05643525	FALSE	FALSE
+22	46009063	G	A	0.006464843	FALSE	FALSE
+22	46022070	G	A	0.0224674	FALSE	FALSE
+22	46155548	G	C	-0.0324747	FALSE	FALSE
+22	46207955	C	T	-0.001354554	FALSE	FALSE
+22	46236425	A	G	0.08398423	FALSE	FALSE
+22	46275529	T	C	0.0022643	FALSE	FALSE
+22	46287720	A	G	-0.02237482	FALSE	FALSE
+22	46289699	T	C	0.01872124	FALSE	FALSE
+22	46303347	T	C	-0.01283734	FALSE	FALSE
+22	46316057	A	G	0.02312579	FALSE	FALSE
+22	46337043	G	C	0.01701173	FALSE	FALSE
+22	46347519	C	T	0.01574289	FALSE	FALSE
+22	46364161	A	G	-0.04466341	FALSE	FALSE
+22	46381234	G	A	0.04730559	FALSE	FALSE
+22	46396925	G	A	0.001783944	FALSE	FALSE
+22	46403715	A	G	-0.02132589	FALSE	FALSE
+22	46406782	A	C	0.08439466	FALSE	FALSE
+22	46445002	G	C	-0.07613496	FALSE	FALSE
+22	46458123	G	T	0.03328073	FALSE	FALSE
+22	46482948	C	T	0.04241879	FALSE	FALSE
+22	46486508	C	T	-0.00968439	FALSE	FALSE
+22	46493852	T	C	-0.00675858	FALSE	FALSE
+22	46499120	C	G	-0.009873118	FALSE	FALSE
+22	46502870	T	C	-0.0179214	FALSE	FALSE
+22	46561713	G	A	0.02604703	FALSE	FALSE
+22	46586110	A	G	-0.001256735	FALSE	FALSE
+22	46592168	C	T	0.01417055	FALSE	FALSE
+22	46614274	G	C	-0.05854014	FALSE	FALSE
+22	46627603	T	C	0.08004024	FALSE	FALSE
+22	46760086	T	C	0.003229515	FALSE	FALSE
+22	46782382	T	C	-0.02470821	FALSE	FALSE
+22	46807234	C	T	0.002324176	FALSE	FALSE
+22	46837114	G	A	0.000944073	FALSE	FALSE
+22	46888399	T	C	0.009911095	FALSE	FALSE
+22	46907779	G	A	0.00653144	FALSE	FALSE
+22	46909355	T	G	-0.004780494	FALSE	FALSE
+22	46914277	A	C	0.009689535	FALSE	FALSE
+22	46943687	G	A	-0.0130366	FALSE	FALSE
+22	46985917	A	G	0.01893397	FALSE	FALSE
+22	47021226	G	A	-0.01322949	FALSE	FALSE
+22	47095235	A	C	-0.1156013	FALSE	FALSE
+22	47109621	C	T	0.0004322858	FALSE	FALSE
+22	47125474	G	A	-0.01746025	FALSE	FALSE
+22	47147117	T	C	-0.02418349	FALSE	FALSE
+22	47156703	C	T	0.0262897	FALSE	FALSE
+22	47245836	A	G	0.001880575	FALSE	FALSE
+22	47271747	C	T	0.001055264	FALSE	FALSE
+22	47301822	C	T	0.003032158	FALSE	FALSE
+22	47345487	T	C	-0.002945945	FALSE	FALSE
+22	47372368	T	C	0.02067644	FALSE	FALSE
+22	47380606	C	T	0.04041426	FALSE	FALSE
+22	47437808	C	T	0.001683027	FALSE	FALSE
+22	47450911	A	G	0.01624479	FALSE	FALSE
+22	47511864	A	C	-0.004226735	FALSE	FALSE
+22	47519476	T	C	-0.003954111	FALSE	FALSE
+22	47529458	A	G	-0.0003602848	FALSE	FALSE
+22	47531320	T	C	-0.006899703	FALSE	FALSE
+22	47548321	T	C	0.004925401	FALSE	FALSE
+22	47568291	C	T	0.007726693	FALSE	FALSE
+22	47571203	A	G	-0.009744751	FALSE	FALSE
+22	47574009	C	T	-0.00532701	FALSE	FALSE
+22	47642100	T	C	0.006976251	FALSE	FALSE
+22	47657635	T	C	0.001798943	FALSE	FALSE
+22	47683805	C	T	-0.03475544	FALSE	FALSE
+22	47720973	T	C	-0.007868172	FALSE	FALSE
+22	47821952	G	A	-0.000885428	FALSE	FALSE
+22	47893053	A	G	-0.02449056	FALSE	FALSE
+22	47935365	C	T	-0.001599879	FALSE	FALSE
+22	47961708	G	T	-0.003593525	FALSE	FALSE
+22	47986332	T	C	-0.003976592	FALSE	FALSE
+22	48154645	C	T	0.007608639	FALSE	FALSE
+22	48165452	C	CT	0.002039503	FALSE	FALSE
+22	48207318	T	C	-0.009725168	FALSE	FALSE
+22	48213904	G	C	-0.01220367	FALSE	FALSE
+22	48215904	A	G	-2.488244E-05	FALSE	FALSE
+22	48220460	T	C	-0.002702163	FALSE	FALSE
+22	48230941	C	A	-0.001129522	FALSE	FALSE
+22	48271961	A	G	-0.005053446	FALSE	FALSE
+22	48284025	T	C	-0.003344182	FALSE	FALSE
+22	48297953	C	T	-0.01046958	FALSE	FALSE
+22	48362290	G	A	-0.02367254	FALSE	FALSE
+22	48362914	C	A	-0.003167719	FALSE	FALSE
+22	48387670	A	G	-0.008243989	FALSE	FALSE
+22	48415446	C	T	0.002130715	FALSE	FALSE
+22	48460730	T	C	0.002682476	FALSE	FALSE
+22	48491160	T	C	0.001257794	FALSE	FALSE
+22	48519794	C	T	0.003680757	FALSE	FALSE
+22	48537775	G	A	0.002134692	FALSE	FALSE
+22	48543566	T	C	0.007314089	FALSE	FALSE
+22	48593037	C	T	0.009084708	FALSE	FALSE
+22	48687509	C	T	-0.0277196	FALSE	FALSE
+22	48692033	T	C	-0.02126264	FALSE	FALSE
+22	48699617	T	C	0.0005093107	FALSE	FALSE
+22	48717568	T	C	-0.0008190281	FALSE	FALSE
+22	48811946	C	T	0.007916515	FALSE	FALSE
+22	48823357	G	A	0.01464317	FALSE	FALSE
+22	48840428	A	C	0.003711229	FALSE	FALSE
+22	48851612	T	C	-0.005887765	FALSE	FALSE
+22	48874310	T	C	-0.01106607	FALSE	FALSE
+22	48968070	C	T	0.01280691	FALSE	FALSE
+22	48991385	T	C	-0.01234119	FALSE	FALSE
+22	49004050	G	A	0.02290755	FALSE	FALSE
+22	49014565	A	G	0.001555565	FALSE	FALSE
+22	49086481	T	C	-0.006196369	FALSE	FALSE
+22	49107173	T	C	0.01277272	FALSE	FALSE
+22	49180915	A	G	0.006346977	FALSE	FALSE
+22	49262579	A	G	0.02657134	FALSE	FALSE
+22	49270317	C	T	0.001447665	FALSE	FALSE
+22	49313196	A	G	-0.007055532	FALSE	FALSE
+22	49335230	T	C	-0.006548281	FALSE	FALSE
+22	49366123	T	C	0.01136486	FALSE	FALSE
+22	49372356	G	C	-0.02420841	FALSE	FALSE
+22	49443666	T	C	0.01581736	FALSE	FALSE
+22	49496835	G	A	-0.01355414	FALSE	FALSE
+22	49524428	A	G	-0.004228482	FALSE	FALSE
+22	49530553	G	C	0.008197389	FALSE	FALSE
+22	49537845	T	C	0.0111255	FALSE	FALSE
+22	49557457	G	A	0.009401926	FALSE	FALSE
+22	49562666	C	A	0.01271701	FALSE	FALSE
+22	49574509	C	T	0.0004703177	FALSE	FALSE
+22	49579141	A	G	0.02448619	FALSE	FALSE
+22	49650863	T	C	0.006739571	FALSE	FALSE
+22	49662549	T	G	-0.005769464	FALSE	FALSE
+22	49665841	T	C	-0.0007037069	FALSE	FALSE
+22	49677464	A	G	-0.02177735	FALSE	FALSE
+22	49696067	C	T	-0.003309682	FALSE	FALSE
+22	49700272	T	G	-0.002541948	FALSE	FALSE
+22	49706433	T	C	-0.01719402	FALSE	FALSE
+22	49713835	G	A	-0.01370754	FALSE	FALSE
+22	49719264	A	C	-0.01067852	FALSE	FALSE
+22	49743627	G	A	-0.0005970581	FALSE	FALSE
+22	49800265	C	T	0.03098582	FALSE	FALSE
+22	49806863	A	G	0.003940447	FALSE	FALSE
+22	49830851	C	T	-0.002742706	FALSE	FALSE
+22	49834624	G	A	-0.002820163	FALSE	FALSE
+22	49843235	G	C	-0.0004458281	FALSE	FALSE
+22	49847501	T	G	0.002235016	FALSE	FALSE
+22	49861033	C	T	0.01721243	FALSE	FALSE
+22	49881321	A	G	-0.00051278	FALSE	FALSE
+22	49908804	G	A	-0.009455892	FALSE	FALSE
+22	49911222	G	T	-0.01389666	FALSE	FALSE
+22	49925268	A	G	0.01679984	FALSE	FALSE
+22	49927332	T	C	0.00039298	FALSE	FALSE
+22	50109212	T	C	0.01610819	FALSE	FALSE
+22	50118149	G	C	0.007024666	FALSE	FALSE
+22	50184484	G	T	0.01222581	FALSE	FALSE
+22	50219447	T	C	0.05091891	FALSE	FALSE
+22	50278568	G	A	-0.02340672	FALSE	FALSE
+22	50319170	G	A	0.01669806	FALSE	FALSE
+22	50350971	A	G	0.0264016	FALSE	FALSE
+22	50356693	C	T	0.003851499	FALSE	FALSE
+22	50435480	G	A	0.0166363	FALSE	FALSE
+22	50439626	A	G	-0.002722154	FALSE	FALSE
+22	50466542	C	T	-0.002560094	FALSE	FALSE
+22	50470516	T	C	-0.01621986	FALSE	FALSE
+22	50491150	G	A	0.01828674	FALSE	FALSE
+22	50515270	C	T	0.01439904	FALSE	FALSE
+22	50529850	C	T	0.02054628	FALSE	FALSE
+22	50570755	C	G	0.007077514	FALSE	FALSE
+22	50582626	G	A	-0.003588854	FALSE	FALSE
+22	50672154	A	G	0.007660848	FALSE	FALSE
+22	50722134	C	T	-0.01747164	FALSE	FALSE
+22	50722408	C	T	-0.001063465	FALSE	FALSE
+22	50728062	C	T	0.02159223	FALSE	FALSE
+22	50750481	T	C	0.01877272	FALSE	FALSE
+22	50758873	T	C	0.004001731	FALSE	FALSE
+22	50835040	A	G	-0.006374259	FALSE	FALSE
+22	50859049	C	T	0.0003480749	FALSE	FALSE
+22	50885775	G	A	-0.01358311	FALSE	FALSE
+22	50926768	T	C	0.001798498	FALSE	FALSE
+22	50928026	A	G	0.004775504	FALSE	FALSE
+22	50971266	C	T	0.02160893	FALSE	FALSE
+22	50989197	T	C	-0.01328884	FALSE	FALSE
+22	50989326	G	A	0.01037054	FALSE	FALSE
+22	50999681	G	A	-0.01226224	FALSE	FALSE
+22	51046163	T	C	-0.02754002	FALSE	FALSE
+22	51117580	C	T	0.03573542	FALSE	FALSE
+22	51171497	A	G	-0.01951606	FALSE	FALSE
+22	51174939	T	C	-0.006178519	FALSE	FALSE
diff --git a/tests/test_combine.py b/tests/test_combine.py
index db92cc9..bc82faf 100644
--- a/tests/test_combine.py
+++ b/tests/test_combine.py
@@ -1,39 +1,136 @@
+import csv
+import importlib.resources
+import json
 from unittest.mock import patch
 
-import jq
-import pandas as pd
 import pytest
 
-from pgscatalog_utils.download.Catalog import CatalogQuery, CatalogResult
-from pgscatalog_utils.download.CatalogCategory import CatalogCategory
 from pgscatalog_utils.scorefile.combine_scorefiles import combine_scorefiles
+from tests.data import combine
 
 
-def test_combine_scorefiles(combined_scorefile, _n_variants):
-    df = pd.read_table(combined_scorefile)
-    cols = {'chr_name', 'chr_position', 'effect_allele', 'other_allele', 'effect_weight', 'effect_type',
-            'is_duplicated', 'accession', 'row_nr'}
-    assert set(df.columns).issubset(cols)
-    assert df.shape[0] == _n_variants
+def test_pgscatalog_combine(pgscatalog_path, tmp_path, combine_output_header):
+    out_path = tmp_path / "combined.txt"
+    args: list[str] = (
+        ["combine_scorefiles", "-t", "GRCh37", "-s"]
+        + [str(pgscatalog_path)]
+        + ["-o", str(out_path.resolve())]
+    )
 
+    with patch("sys.argv", args):
+        combine_scorefiles()
 
-def test_liftover(lifted_scorefiles):
-    df = pd.read_table(lifted_scorefiles)
-    assert df.shape[0] == 832  # approx size
+    n = -1  # skip header line
+    with open(out_path) as f:
+        for i, line in enumerate(f):
+            if i == 0:
+                cols = line.strip().split("\t")
+                assert not set(cols).difference(set(combine_output_header))
+            n += 1
 
+    with open(out_path.parent / "log_combined.json") as f:
+        header = json.load(f)[0]
+        assert header["PGS001229_22"]["pgs_id"] == "PGS001229"
+        assert header["PGS001229_22"]["pgs_name"] == "GBE_INI50"
+        assert header["PGS001229_22"]["genome_build"] == "GRCh37"
+        assert int(header["PGS001229_22"]["variants_number"]) == n
+        assert not header["PGS001229_22"]["use_harmonised"]
 
-def test_fail_combine(scorefiles, tmp_path_factory):
+
+def test_effect_type_combine(effect_type_path, tmp_path, combine_output_header):
+    # these genomes are in build GRCh37, so combining with -t GRCh38 will raise an exception
+    out_path = tmp_path / "combined.txt"
+    args: list[str] = (
+        ["combine_scorefiles", "-t", "GRCh37", "-s"]
+        + [str(effect_type_path)]
+        + ["-o", str(out_path.resolve())]
+    )
+    with patch("sys.argv", args):
+        combine_scorefiles()
+
+    with open(out_path) as f:
+        n = 0
+        for line in csv.DictReader(f, delimiter="\t"):
+            cols = list(line.keys())
+
+            if int(line["row_nr"]) == 0:
+                assert line["effect_type"] == "dominant"
+
+            if int(line["row_nr"]) == 1:
+                assert line["effect_type"] == "recessive"
+
+            n += 1
+
+        assert not set(cols).difference(set(combine_output_header))
+
+    with open(out_path.parent / "log_combined.json") as f:
+        header = json.load(f)[0]
+        assert (
+            header["scorefile_dominant_and_recessive"]["pgs_name"]
+            == "PGS001229_22_DominantRecessiveExample"
+        )
+        assert header["scorefile_dominant_and_recessive"]["genome_build"] == "GRCh37"
+        assert header["scorefile_dominant_and_recessive"]["variants_number"] == n
+        assert not header["scorefile_dominant_and_recessive"]["use_harmonised"]
+
+
+def test_custom_combine(custom_score_path, tmp_path, combine_output_header):
     # these genomes are in build GRCh37, so combining with -t GRCh38 will raise an exception
-    with pytest.raises(Exception):
-        out_path = tmp_path_factory.mktemp("scores") / "combined.txt"
-        args: list[str] = ['combine_scorefiles', '-t', 'GRCh38', '-s'] + scorefiles + ['-o', str(out_path.resolve())]
-        with patch('sys.argv', args):
-            combine_scorefiles()
+    out_path = tmp_path / "combined.txt"
+    args: list[str] = (
+        ["combine_scorefiles", "-t", "GRCh37", "-s"]
+        + [str(custom_score_path)]
+        + ["-o", str(out_path.resolve())]
+    )
+
+    with patch("sys.argv", args):
+        combine_scorefiles()
+
+    # read combined file
+    n = -1  # skip header line
+    with open(out_path) as f:
+        for i, line in enumerate(f):
+            if i == 0:
+                cols = line.strip().split("\t")
+                assert not set(cols).difference(set(combine_output_header))
+            n += 1
+
+    with open(out_path.parent / "log_combined.json") as f:
+        header = json.load(f)[0]
+        assert header["scorefile"]["pgs_name"] == "PGS001229_22"
+        assert header["scorefile"]["genome_build"] == "GRCh37"
+        assert header["scorefile"]["variants_number"] == n
+        assert not header["scorefile"]["use_harmonised"]
+
+
+@pytest.fixture
+def pgscatalog_path(scope="session"):
+    path = importlib.resources.files(combine) / "PGS001229_22.txt"
+    return path
 
 
 @pytest.fixture
-def _n_variants(pgs_accessions):
-    result = CatalogQuery(CatalogCategory.SCORE, accession=pgs_accessions).get()[0]
-    json = result.response
-    n: list[int] = jq.compile("[.results][][].variants_number").input(json).all()
-    return sum(n)
+def effect_type_path(scope="session"):
+    path = importlib.resources.files(combine) / "scorefile_dominant_and_recessive.txt"
+    return path
+
+
+@pytest.fixture(scope="session")
+def custom_score_path(tmp_path_factory):
+    path = importlib.resources.files(combine) / "scorefile.txt"
+    return path
+
+
+@pytest.fixture(scope="session")
+def combine_output_header():
+    return [
+        "chr_name",
+        "chr_position",
+        "effect_allele",
+        "other_allele",
+        "effect_weight",
+        "effect_type",
+        "is_duplicated",
+        "accession",
+        "row_nr",
+    ]
diff --git a/tests/test_liftover.py b/tests/test_liftover.py
index b2f03a0..396c8f8 100644
--- a/tests/test_liftover.py
+++ b/tests/test_liftover.py
@@ -1,9 +1,39 @@
-import pandas as pd
+import copy
 
-from pgscatalog_utils.scorefile.liftover import liftover
+from pgscatalog_utils.scorefile.config import Config
+
+from pgscatalog_utils.download.GenomeBuild import GenomeBuild
+from pgscatalog_utils.scorefile.liftover import liftover, create_liftover
 
 
 def test_liftover(hg38_coords, hg19_coords, chain_files):
-    lifted = liftover(hg38_coords, chain_files, min_lift=0.9, target_build='GRCh37')
-    coords: pd.DataFrame = hg19_coords[['lifted_pos', 'lifted_chr']] == lifted[['lifted_pos', 'lifted_chr']]
-    assert coords.all(axis=None)
+    Config.chain_dir = chain_files
+    Config.lo = create_liftover()
+    Config.min_lift = 0.95
+    hg38 = list(hg38_coords)
+    hg19 = list(hg19_coords)
+    hg19_ = copy.deepcopy(hg19)
+    hg38_ = copy.deepcopy(hg38)
+    lifted = list(
+        liftover(
+            (x for x in hg38),
+            harmonised=False,
+            current_build=GenomeBuild.GRCh38,
+            target_build=GenomeBuild.GRCh37,
+        )
+    )
+
+    assert [x.chr_position for x in lifted] == [x.chr_position for x in hg19_]
+    assert [x.chr_name for x in lifted] == [x.chr_name for x in hg19_]
+
+    hg19 = copy.deepcopy(hg19)
+    lift_back = list(
+        liftover(
+            (x for x in hg19),
+            harmonised=False,
+            current_build=GenomeBuild.GRCh37,
+            target_build=GenomeBuild.GRCh38,
+        )
+    )
+    assert [x.chr_position for x in lift_back] == [x.chr_position for x in hg38_]
+    assert [x.chr_name for x in lift_back] == [x.chr_name for x in hg38_]
diff --git a/tests/test_samplesheet.py b/tests/test_samplesheet.py
deleted file mode 100644
index 80d77db..0000000
--- a/tests/test_samplesheet.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import json
-import os
-from pathlib import Path
-from unittest.mock import patch
-
-import pandas as pd
-import pytest
-
-from pgscatalog_utils.samplesheet.check import check_samplesheet
-
-
-@pytest.fixture
-def existing_vcf_prefix(tmp_path):
-    vcf_path = tmp_path / "test.vcf.gz"
-    _touch(vcf_path)
-    return str(vcf_path.parent.joinpath(Path(vcf_path.stem).stem))
-
-
-@pytest.fixture
-def samplesheet_df(existing_vcf_prefix):
-    return pd.DataFrame(
-        {"path_prefix": [existing_vcf_prefix], "format": ["vcf"], "sampleset": ["test"], "chrom": [None]})
-
-
-@pytest.fixture
-def good_samplesheet(samplesheet_df, tmp_path):
-    path = tmp_path / "good_samplesheet.csv"
-    samplesheet_df.to_csv(path, index=False)
-    return str(path)
-
-
-@pytest.fixture
-def bad_samplesheet(samplesheet_df, tmp_path):
-    path = tmp_path / "bad_samplesheet.csv"
-    bad_df = samplesheet_df.copy()
-    bad_df['path_prefix'] = 'bad_path'  # path doesn't exist
-    bad_df.to_csv(path, index=False)
-    return str(path)
-
-
-@pytest.fixture
-def multi_samplesets(samplesheet_df, tmp_path):
-    path = tmp_path / "multi_samplesets.csv"
-    multi_samplesets = pd.concat([samplesheet_df, samplesheet_df], ignore_index=True)
-    multi_samplesets.loc[multi_samplesets.index == 1, 'sampleset'] = 'a_different_name'
-    multi_samplesets.to_csv(path, index=False)
-    return str(path)
-
-
-@pytest.fixture
-def vcf_dosage(samplesheet_df, tmp_path):
-    path = tmp_path / "vcf_dosage.csv"
-    dosage_samplesheet = samplesheet_df.copy()
-    dosage_samplesheet["vcf_genotype_field"] = ["DS"]
-    dosage_samplesheet.to_csv(path, index=False)
-    return str(path)
-
-
-def _touch(fname):
-    if os.path.exists(fname):
-        os.utime(fname, None)
-    else:
-        open(fname, 'a').close()
-
-
-def test_good_samplesheet(good_samplesheet, tmp_path):
-    out_path = str(tmp_path / "out.json")
-    args = ['samplesheet_to_json', good_samplesheet, out_path]
-    with patch('sys.argv', args):
-        check_samplesheet()
-
-    assert os.path.exists(out_path), "No file written"
-
-
-def test_bad_samplesheet(bad_samplesheet, tmp_path):
-    out_path = str(tmp_path / "out.json")
-    args = ['samplesheet_to_json', bad_samplesheet, out_path]
-    with patch('sys.argv', args):
-        with pytest.raises(FileNotFoundError):
-            check_samplesheet()
-
-
-def test_multi_samplesets(multi_samplesets, tmp_path):
-    out_path = str(tmp_path / "out.json")
-    args = ['samplesheet_to_json', multi_samplesets, out_path]
-    with patch('sys.argv', args):
-        with pytest.raises(Exception, match="Multiple samplesets"):
-            check_samplesheet()
-
-
-def test_dosage_samplesheet(vcf_dosage, tmp_path):
-    out_path = str(tmp_path / "out.json")
-    args = ['samplesheet_to_json', vcf_dosage, out_path]
-    with patch('sys.argv', args):
-        check_samplesheet()
-
-    assert os.path.exists(out_path), "Missing output file"
-
-    with open(out_path, 'r') as f:
-        converted = json.loads(f.read())
-        assert converted[0]['vcf_import_dosage'], "Not importing dosage correctly"