Skip to content

Commit

Permalink
Patch (#499)
Browse files Browse the repository at this point in the history
* upgrade to the new lac interface

* remove jieba

* + init_checkppoint for seq_cls demo

* backto jieba
  • Loading branch information
Meiyim authored Jun 18, 2020
1 parent 77d5d10 commit 9b6c683
Show file tree
Hide file tree
Showing 7 changed files with 10 additions and 8 deletions.
1 change: 0 additions & 1 deletion demo/finetune_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import numpy as np
import multiprocessing
import tempfile
import jieba
import re

import paddle
Expand Down
6 changes: 6 additions & 0 deletions demo/finetune_classifier_dygraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
parser.add_argument('--save_dir', type=str, default=None, help='model output directory')
parser.add_argument('--max_steps', type=int, default=None, help='max_train_steps, set this to EPOCH * NUM_SAMPLES / BATCH_SIZE')
parser.add_argument('--wd', type=float, default=0.01, help='weight decay, aka L2 regularizer')
parser.add_argument('--init_checkpoint', type=str, default=None, help='checkpoint to warm start from')


args = parser.parse_args()
Expand Down Expand Up @@ -103,6 +104,11 @@ def map_fn(seg_a, seg_b, label):
with FD.guard(place):
model = ErnieModelForSequenceClassification.from_pretrained(args.from_pretrained, num_labels=3, name='')

if args.init_checkpoint is not None:
log.info('loading checkpoint from %s' % args.init_checkpoint)
sd, _ = FD.load_dygraph(args.init_checkpoint)
model.set_dict(sd)

g_clip = F.clip.GradientClipByGlobalNorm(1.0) #experimental
if args.use_lr_decay:
opt = AdamW(learning_rate=LinearDecay(args.lr, int(args.warmup_proportion * args.max_steps), args.max_steps), parameter_list=model.parameters(), weight_decay=args.wd, grad_clip=g_clip)
Expand Down
1 change: 0 additions & 1 deletion demo/pretrain/make_pretrain_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import sys
import argparse
import struct
#import jieba
import random as r
import re
import gzip
Expand Down
1 change: 0 additions & 1 deletion demo/pretrain/pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
import propeller.paddle as propeller
from propeller.paddle.data import Dataset

#import jieba
from propeller import log

log.setLevel(logging.DEBUG)
Expand Down
1 change: 0 additions & 1 deletion demo/pretrain/pretrain_dygraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
import propeller.paddle as propeller
from propeller.paddle.data import Dataset

#import jieba
from propeller import log

log.setLevel(logging.DEBUG)
Expand Down
6 changes: 3 additions & 3 deletions ernie/tokenizing_ernie.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,14 +222,14 @@ def from_pretrained(cls, pretrain_dir_or_url, force_download=False, **kwargs):
def __init__(self, vocab, sp_model_path, **kwargs):
super(ErnieTinyTokenizer, self).__init__(vocab, **kwargs)
import sentencepiece as spm
import jieba as jb
self.sp_model = spm.SentencePieceProcessor()
self.window_size = 5
self.sp_model.Load(sp_model_path)
from LAC import LAC
self.lac = LAC()
self.jb = jb

def cut(self, sentence):
return self.lac.lexer(sentence)
return self.jb.cut(sentence)

def tokenize(self, text):
if len(text) == 0:
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ pyzmq==18.0.2
six==1.11.0
sklearn==0.0
sentencepiece==0.1.8
LAC
jieba==0.39

0 comments on commit 9b6c683

Please sign in to comment.