-
Notifications
You must be signed in to change notification settings - Fork 1
/
dataloader.py
45 lines (31 loc) · 858 Bytes
/
dataloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/python3
# Author: Suzanna Sia
# Standard imports
#import random
#import numpy as np
#import pdb
#import math
#import os, sys
# argparser
#import argparse
#from distutils.util import str2bool
#argparser = argparser.ArgumentParser()
#argparser.add_argument('--x', type=float, default=0)
import pdb
# Custom imports
def load_topic_words(topic_wordf):
DELIM = ","
with open(topic_wordf, 'r') as f:
topic_words = f.readlines()
topic_words = [tw.strip().replace(DELIM,'').split() for tw in topic_words]
return topic_words
def load_word_docids(word_dcf):
DELIM1 = "\t"
DELIM2 = ";"
word_dc = {}
with open(word_dcf, 'r') as f:
data = f.readlines()
for line in data:
word, docids = line.split(DELIM1)
word_dc[word] = set(docids.strip().split(DELIM2))
return word_dc