Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

old changes found at MMIL #35

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions sumstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,7 @@ def make_csv(args, log):
# Ensure that alleles are coded as capital letters
if cols.A1 in chunk.columns: chunk[cols.A1] = chunk[cols.A1].str.upper().str.strip()
if cols.A2 in chunk.columns: chunk[cols.A2] = chunk[cols.A2].str.upper().str.strip()

# Populate sample size columns (NCASE, NCONTROL, N)
if args.ncase_val is not None: chunk[cols.NCASE] = args.ncase_val
if args.ncontrol_val is not None: chunk[cols.NCONTROL] = args.ncontrol_val
Expand Down Expand Up @@ -1258,6 +1258,9 @@ def make_lift(args, log):

indices_with_old_chrpos = range(len(df)) # indices with original chr:pos
fixes = []
if cols.SNP in df:
df[cols.SNP] = df[cols.SNP].apply(str.lower)

if (cols.SNP in df) and (lift_rs is not None):
# Fix1 brings forward SNP rs# numbers and set SNP rs# to None for SNPs found in SNPHistory table
df[cols.SNP], stats = lift_rs.lift(df[cols.SNP])
Expand Down Expand Up @@ -1575,12 +1578,13 @@ def make_ls(args, log):
ml = max([len(os.path.basename(file).replace('.csv.gz', '')) for file in glob.glob(args.path)])
cols_list = [x for x in cols._asdict() if x not in ['A1A2', 'CHRPOS', 'CHRPOSA1A2', 'SNP', 'CHR', 'BP', 'PVAL', 'A1', 'A2']]
log.log('{f}\t{n}\t{c}'.format(f='file'.ljust(ml),n='#snp'.ljust(9),c='\t'.join([x.replace('NCONTROL', 'NCONT.') for x in cols_list])))
result = []
for file in glob.glob(args.path):
if not os.path.isfile(file): continue
if '_noMHC' in file: continue
num_snps = np.nan; n = np.nan; ncase = np.nan; ncontrol = np.nan
try:
file_log = os.path.splitext(file)[0] + '.log'
file_log = file.replace('.sumstats.gz', '.log')
if os.path.isfile(file_log):
lines = open(file_log, 'r').readlines()
num_snps = [int(x.group(1)) for x in [re.search('([0-9]+) SNPs saved to', line.strip()) for line in lines] if x][0]
Expand All @@ -1601,8 +1605,10 @@ def make_ls(args, log):
ncontrol if (x == 'NCONTROL') else
'YES' if x in chunk
else '-') for x in cols_list]
log.log('{f}\t{n}\t{c}'.format(f=os.path.basename(file).replace('.csv.gz', '').ljust(ml), c='\t'.join(yes_no_or_sample_size),n=str(num_snps).ljust(9)))
result.append('{f}\t{n}\t{c}'.format(f=os.path.basename(file).replace('.sumstats.gz', '').ljust(ml), c='\t'.join(yes_no_or_sample_size),n=str(num_snps).ljust(9)))
break
for x in sorted(result):
log.log(x)
log.log('Columns description:')
for cname in sorted(cols._asdict()):
log.log('{c}\t{d}'.format(c=cname, d=describe_cname[cname]))
Expand Down