patil-suraj · sfahad1414 · Sep 17, 2020 · Sep 17, 2020 · Sep 17, 2020 · Sep 21, 2020
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -0,0 +1,35 @@
+# This workflows will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Upload Python Package
+
+on:
+#  release:
+#    types: [created]
+  push:
+    branches: [ master ]
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install setuptools wheel twine
+        pip install -r requirements.txt
+    - name: Build
+      run: |
+        python setup.py sdist bdist_wheel
+    - name: Archive Distribution Files
+      uses: actions/upload-artifact@v1
+      with:
+        name: wheel
+        path: ./dist/question_generation-**.whl
diff --git a/README.md b/README.md
@@ -131,9 +131,7 @@ The [nlg-eval](https://github.com/Maluuba/nlg-eval) package is used for calculat
 
 ## Requirements
 ```
-transformers==3.0.0
-nltk
-nlp==0.2.0 # only if you want to fine-tune.
+python -m pip install git+https://github.com/patil-suraj/question_generation.git
 ```
 
 after installing `nltk` do
@@ -154,7 +152,7 @@ The pipeline is divided into 3 tasks
 #### Question Generation
 
 ```python3
-from pipelines import pipeline
+from question_generation import pipeline
 
 nlp = pipeline("question-generation")
 nlp("42 is the answer to life, the universe and everything.")
@@ -224,7 +222,7 @@ The datasets will be saved in `data/` directory. You should provide filenames us
 
 **process data for single task question generation with highlight_qg_format**
 ```bash
-python prepare_data.py \
+python -m question_generation.prepare_data.py \
     --task qg \
     --model_type t5 \
     --dataset_path data/squad_multitask/ \
@@ -240,7 +238,7 @@ python prepare_data.py \
 `valid_for_qg_only` argument is used to decide if the validation set should only contain data for qg task. For my multi-task experiments I used validation data with only qg task so that the eval loss curve can be easly compared with other single task models
 
 ```bash
-python prepare_data.py \
+python -m question_generation.prepare_data.py \
     --task multi \
     --valid_for_qg_only \ 
     --model_type t5 \
@@ -254,7 +252,7 @@ python prepare_data.py \
 
 **process dataset for end-to-end question generation**
 ```bash
-python prepare_data.py \
+python -m question_generation.prepare_data.py \
     --task e2e_qg \
     --valid_for_qg_only \ 
     --model_type t5 \
@@ -271,7 +269,7 @@ Use the `run_qg.py` script to  start training. It uses transformers `Trainer` cl
 
 
 ```bash
-python run_qg.py \
+python -m question_generation.run_qg.py \
     --model_name_or_path t5-small \
     --model_type t5 \
     --tokenizer_name_or_path t5_qg_tokenizer \
@@ -293,7 +291,7 @@ python run_qg.py \
 or if you want to train it from script or notebook then
 
 ```python3
-from run_qg import run_qg
+from question_generation import run_qg
 
 args_dict = {
     "model_name_or_path": "t5-small",
@@ -323,7 +321,7 @@ run_qg(args_dict)
 Use the `eval.py` script for evaluting the model. 
 
 ```bash
-python eval.py \
+python -m question_generation.eval.py \
     --model_name_or_path t5-base-qg-hl \
     --valid_file_path valid_data_qg_hl_t5.pt \
     --model_type t5 \

diff --git a/question_generation/__init__.py b/question_generation/__init__.py
@@ -0,0 +1,4 @@
+from .pipelines import pipeline
+from .run_qg import run_qg
+
+__version__ = "0.1.0"
diff --git a/data_collator.py → question_generation/data_collator.py b/data_collator.py → question_generation/data_collator.py
diff --git a/eval.py → question_generation/eval.py b/eval.py → question_generation/eval.py
@@ -6,7 +6,7 @@
 from tqdm.auto import tqdm
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, HfArgumentParser
 
-from data_collator import T2TDataCollator
+from question_generation.data_collator import T2TDataCollator
 
 device = 'cuda' if torch.cuda.is_available else 'cpu'
 

diff --git a/pipelines.py → question_generation/pipelines.py b/pipelines.py → question_generation/pipelines.py
diff --git a/prepare_data.py → question_generation/prepare_data.py b/prepare_data.py → question_generation/prepare_data.py
diff --git a/run_qg.py → question_generation/run_qg.py b/run_qg.py → question_generation/run_qg.py
@@ -1,28 +1,24 @@
-import dataclasses
 import json
 import logging
 import os
 import sys
 from dataclasses import dataclass, field
-from typing import Dict, List, Optional
+from typing import Optional
 
-import numpy as np
 import torch
 
 from transformers import (
     AutoModelForSeq2SeqLM,
-    AutoTokenizer,
     T5Tokenizer,
     BartTokenizer,
     HfArgumentParser,
-    DataCollator,
     TrainingArguments,
     set_seed,
 )
 
-from trainer import Trainer
-from data_collator import T2TDataCollator
-from utils import freeze_embeds, assert_not_all_frozen
+from question_generation.trainer import Trainer
+from question_generation.data_collator import T2TDataCollator
+from question_generation.utils import freeze_embeds, assert_not_all_frozen
 
 MODEL_TYPE_TO_TOKENIZER = {
     "t5": T5Tokenizer,

diff --git a/trainer.py → question_generation/trainer.py b/trainer.py → question_generation/trainer.py
@@ -9,7 +9,7 @@
 if is_apex_available():
     from apex import amp
 
-from utils import label_smoothed_nll_loss
+from question_generation.utils import label_smoothed_nll_loss
 
 class Trainer(HFTrainer):
     def __init__(self, label_smoothing: float = 0, **kwargs):

diff --git a/utils.py → question_generation/utils.py b/utils.py → question_generation/utils.py
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,4 @@
+transformers>=3.0.0
+nltk
+nlp>=0.2.0
+torch
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,8 @@
+[metadata]
+license = MIT
+license-file = LICENSE
+description-file = README.md
+platform = any
+
+[bdist_wheel]
+universal = 1
diff --git a/setup.py b/setup.py
@@ -0,0 +1,33 @@
+from setuptools import setup, find_packages
+
+from question_generation import __version__
+
+with open("README.md", "r") as f:
+    long_description = f.read()
+
+setup(
+    name="question_generation",
+    packages=find_packages(),
+    version=__version__,
+    url="https://github.com/patil-suraj/question_generation",
+    license="MIT",
+    author="Suraj Patil",
+    author_email="[email protected]",
+    description="Question generation is the task of automatically generating questions from a text paragraph.",
+    install_requires=["transformers>=3.0.0", "nltk", "nlp>=0.2.0", "torch"],
+    python_requires=">=3.6",
+    include_package_data=True,
+    platforms="any",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    classifiers=[
+        "Operating System :: OS Independent",
+        "License :: OSI Approved :: MIT License",
+        "Topic :: Utilities",
+        "Intended Audience :: Developers",
+        "Topic :: Software Development :: Libraries :: Python Modules",
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+    ],
+)