-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsequence_utils.py
107 lines (86 loc) · 3.41 KB
/
sequence_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""
======================================================================
SEQUENCE_UTILS ---
Utils for the sequence processing.
Author: Zi Liang <[email protected]>
Copyright © 2024, ZiLiang, all rights reserved.
Created: 28 February 2024
======================================================================
"""
# ------------------------ Code --------------------------------------
import torch
from typing import List
import random
def my_padding(ts_ls: List[torch.tensor], pls, msl, pad_idx,):
num = len(ts_ls)
target_tensor = (torch.ones((num, msl), dtype=torch.long)
* pad_idx).to("cpu")
mask_tensor = torch.zeros((num, msl)).to("cpu")
assert len(ts_ls) == len(pls)
for i, ts in enumerate(ts_ls):
end_idx = min(msl, len(ts)+1)
prompt_idx = min(msl, max(len(pls[i])-2,0))
target_tensor[i, :end_idx-1] = torch.tensor(ts[:end_idx-1],
dtype=torch.long)
mask_tensor[i, :end_idx] = torch.ones(end_idx)
mask_tensor[i, :prompt_idx] = torch.zeros(prompt_idx)
return target_tensor, mask_tensor
def left_pad(tensor_list: List[torch.Tensor], pad_id:torch.long):
longest_len=max([len(x) for x in tensor_list])
new_tensor_ls=[]
for x in tensor_list:
num_pad=longest_len-len(x)
ls=[pad_id for _ in range(num_pad)]
ls.extend(x.numpy().tolist())
new_tensor_ls.append(ls)
return torch.tensor(new_tensor_ls,dtype=torch.long)
def my_padding_token_dist(ts_ls: List[torch.tensor], msl, pad_idx,):
num = len(ts_ls)
candidate_num = len(ts_ls[0][0])
target_tensor = (torch.ones((num, msl, candidate_num),
dtype=torch.long)*pad_idx).to("cpu")
# mask_tensor=torch.zeros((num, msl, candidate_num)).to("cpu")
for i, ts in enumerate(ts_ls):
end_idx = min(msl, len(ts))
target_tensor[i, :end_idx] = torch.tensor(ts[:end_idx],
dtype=torch.long)
# mask_tensor[i, :end_idx]=torch.ones_like(ts[:end_idx])
return target_tensor
def my_padding_logits(ts_lss: List[torch.tensor], msl, pad_idx,):
num = len(ts_lss)
V = ts_lss[0].shape[1]
target_tensor = (torch.ones((num, msl, V),
dtype=torch.float)*1/V).to("cpu")
target_tensor = torch.log(target_tensor)
for i, ts in enumerate(ts_lss):
end_idx = min(msl, len(ts))
target_tensor[i, :end_idx] = ts[:end_idx]
return target_tensor
def my_padding_logit(ts_lss: List[torch.tensor], msl, pad_idx,):
num = len(ts_lss)
sl = ts_lss[0].shape[0]
V = 25600
target_tensor = (torch.ones((num, msl),
dtype=torch.float)*(1/V)).to("cpu")
for i, ts in enumerate(ts_lss):
end_idx = min(msl, len(ts))
target_tensor[i, :end_idx] = ts[:end_idx]
return target_tensor
def random_shut(p_ls):
newpls=[]
for x in p_ls:
lens=len(x)
rand_shut_idx=random.randint(0,lens)
newx=x[:rand_shut_idx]
newpls.append(newx)
return newpls
# running entry
if __name__ == "__main__":
p_ls=[
torch.tensor([1,2,3,4,5],dtype=torch.long),
torch.tensor([1,2,3,4],dtype=torch.long),
torch.tensor([1,2,3,4,5,6],dtype=torch.long),
torch.tensor([5,5,5,5,5],dtype=torch.long),
]
print(random_shut(p_ls))
print("EVERYTHING DONE.")