From 39f5b2915e1a61f0c696655249e46d4c94f834f2 Mon Sep 17 00:00:00 2001
From: Fahad Ali Shaikh <fshaikh@digite.com>
Date: Thu, 17 Sep 2020 16:24:52 +0530
Subject: [PATCH 01/11] done changes to get the library install from pip using
 git url

---
 README.md                                        | 16 +++++++---------
 question_generation/__init__.py                  |  1 +
 .../data_collator.py                             |  0
 eval.py => question_generation/eval.py           |  2 +-
 pipelines.py => question_generation/pipelines.py |  0
 .../prepare_data.py                              |  0
 run_qg.py => question_generation/run_qg.py       | 12 ++++--------
 trainer.py => question_generation/trainer.py     |  2 +-
 utils.py => question_generation/utils.py         |  0
 9 files changed, 14 insertions(+), 19 deletions(-)
 create mode 100644 question_generation/__init__.py
 rename data_collator.py => question_generation/data_collator.py (100%)
 rename eval.py => question_generation/eval.py (97%)
 rename pipelines.py => question_generation/pipelines.py (100%)
 rename prepare_data.py => question_generation/prepare_data.py (100%)
 rename run_qg.py => question_generation/run_qg.py (96%)
 rename trainer.py => question_generation/trainer.py (96%)
 rename utils.py => question_generation/utils.py (100%)

diff --git a/README.md b/README.md
index 9b90eb7..c3b909a 100644
--- a/README.md
+++ b/README.md
@@ -131,9 +131,7 @@ The [nlg-eval](https://github.com/Maluuba/nlg-eval) package is used for calculat
 
 ## Requirements
 ```
-transformers==3.0.0
-nltk
-nlp==0.2.0 # only if you want to fine-tune.
+python -m pip install https://github.com/patil-suraj/question_generation.git
 ```
 
 after installing `nltk` do
@@ -154,7 +152,7 @@ The pipeline is divided into 3 tasks
 #### Question Generation
 
 ```python3
-from pipelines import pipeline
+from question_generation import pipeline
 
 nlp = pipeline("question-generation")
 nlp("42 is the answer to life, the universe and everything.")
@@ -224,7 +222,7 @@ The datasets will be saved in `data/` directory. You should provide filenames us
 
 **process data for single task question generation with highlight_qg_format**
 ```bash
-python prepare_data.py \
+python question_generation.prepare_data.py \
     --task qg \
     --model_type t5 \
     --dataset_path data/squad_multitask/ \
@@ -240,7 +238,7 @@ python prepare_data.py \
 `valid_for_qg_only` argument is used to decide if the validation set should only contain data for qg task. For my multi-task experiments I used validation data with only qg task so that the eval loss curve can be easly compared with other single task models
 
 ```bash
-python prepare_data.py \
+python question_generation.prepare_data.py \
     --task multi \
     --valid_for_qg_only \ 
     --model_type t5 \
@@ -254,7 +252,7 @@ python prepare_data.py \
 
 **process dataset for end-to-end question generation**
 ```bash
-python prepare_data.py \
+python question_generation.prepare_data.py \
     --task e2e_qg \
     --valid_for_qg_only \ 
     --model_type t5 \
@@ -271,7 +269,7 @@ Use the `run_qg.py` script to  start training. It uses transformers `Trainer` cl
 
 
 ```bash
-python run_qg.py \
+python question_generation.run_qg.py \
     --model_name_or_path t5-small \
     --model_type t5 \
     --tokenizer_name_or_path t5_qg_tokenizer \
@@ -323,7 +321,7 @@ run_qg(args_dict)
 Use the `eval.py` script for evaluting the model. 
 
 ```bash
-python eval.py \
+python question_generation.eval.py \
     --model_name_or_path t5-base-qg-hl \
     --valid_file_path valid_data_qg_hl_t5.pt \
     --model_type t5 \
diff --git a/question_generation/__init__.py b/question_generation/__init__.py
new file mode 100644
index 0000000..d6c0bd4
--- /dev/null
+++ b/question_generation/__init__.py
@@ -0,0 +1 @@
+from .pipelines import pipeline
\ No newline at end of file
diff --git a/data_collator.py b/question_generation/data_collator.py
similarity index 100%
rename from data_collator.py
rename to question_generation/data_collator.py
diff --git a/eval.py b/question_generation/eval.py
similarity index 97%
rename from eval.py
rename to question_generation/eval.py
index 4b59c6e..bce408a 100644
--- a/eval.py
+++ b/question_generation/eval.py
@@ -6,7 +6,7 @@
 from tqdm.auto import tqdm
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, HfArgumentParser
 
-from data_collator import T2TDataCollator
+from question_generation.data_collator import T2TDataCollator
 
 device = 'cuda' if torch.cuda.is_available else 'cpu'
 
diff --git a/pipelines.py b/question_generation/pipelines.py
similarity index 100%
rename from pipelines.py
rename to question_generation/pipelines.py
diff --git a/prepare_data.py b/question_generation/prepare_data.py
similarity index 100%
rename from prepare_data.py
rename to question_generation/prepare_data.py
diff --git a/run_qg.py b/question_generation/run_qg.py
similarity index 96%
rename from run_qg.py
rename to question_generation/run_qg.py
index 20b8abe..f52da4b 100644
--- a/run_qg.py
+++ b/question_generation/run_qg.py
@@ -1,28 +1,24 @@
-import dataclasses
 import json
 import logging
 import os
 import sys
 from dataclasses import dataclass, field
-from typing import Dict, List, Optional
+from typing import Optional
 
-import numpy as np
 import torch
 
 from transformers import (
     AutoModelForSeq2SeqLM,
-    AutoTokenizer,
     T5Tokenizer,
     BartTokenizer,
     HfArgumentParser,
-    DataCollator,
     TrainingArguments,
     set_seed,
 )
 
-from trainer import Trainer
-from data_collator import T2TDataCollator
-from utils import freeze_embeds, assert_not_all_frozen
+from question_generation.trainer import Trainer
+from question_generation.data_collator import T2TDataCollator
+from question_generation.utils import freeze_embeds, assert_not_all_frozen
 
 MODEL_TYPE_TO_TOKENIZER = {
     "t5": T5Tokenizer,
diff --git a/trainer.py b/question_generation/trainer.py
similarity index 96%
rename from trainer.py
rename to question_generation/trainer.py
index 29612d8..67711a6 100644
--- a/trainer.py
+++ b/question_generation/trainer.py
@@ -9,7 +9,7 @@
 if is_apex_available():
     from apex import amp
 
-from utils import label_smoothed_nll_loss
+from question_generation.utils import label_smoothed_nll_loss
 
 class Trainer(HFTrainer):
     def __init__(self, label_smoothing: float = 0, **kwargs):
diff --git a/utils.py b/question_generation/utils.py
similarity index 100%
rename from utils.py
rename to question_generation/utils.py

From 54b79c935e89226bef4afd9b3a6ff498b137050f Mon Sep 17 00:00:00 2001
From: Fahad Ali Shaikh <fshaikh@digite.com>
Date: Thu, 17 Sep 2020 16:34:40 +0530
Subject: [PATCH 02/11] added setup files

---
 README.md        |  2 +-
 requirements.txt |  3 +++
 setup.cfg        |  8 ++++++++
 setup.py         | 33 +++++++++++++++++++++++++++++++++
 4 files changed, 45 insertions(+), 1 deletion(-)
 create mode 100644 requirements.txt
 create mode 100644 setup.cfg
 create mode 100644 setup.py

diff --git a/README.md b/README.md
index c3b909a..fe49857 100644
--- a/README.md
+++ b/README.md
@@ -131,7 +131,7 @@ The [nlg-eval](https://github.com/Maluuba/nlg-eval) package is used for calculat
 
 ## Requirements
 ```
-python -m pip install https://github.com/patil-suraj/question_generation.git
+python -m pip install git+https://github.com/patil-suraj/question_generation.git
 ```
 
 after installing `nltk` do
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..ba0da64
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+transformers>=3.0.0
+nltk
+nlp>=0.2.0
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..a253057
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,8 @@
+[metadata]
+license = MIT
+license-file = LICENSE
+description-file = README.md
+platform = any
+
+[bdist_wheel]
+universal = 1
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..a43d184
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,33 @@
+from setuptools import setup, find_packages
+
+from smart_config import __version__
+
+with open("README.md", "r") as f:
+    long_description = f.read()
+
+setup(
+    name="question_generation",
+    packages=find_packages(),
+    version=__version__,
+    url="https://github.com/patil-suraj/question_generation",
+    license="MIT",
+    author="Suraj Patil",
+    author_email="surajp815@gmail.com",
+    description="Question generation is the task of automatically generating questions from a text paragraph.",
+    install_requires=["transformers>=3.0.0", "nltk", "nlp>=0.2.0"],
+    python_requires=">=3.6",
+    include_package_data=True,
+    platforms="any",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    classifiers=[
+        "Operating System :: OS Independent",
+        "License :: OSI Approved :: MIT License",
+        "Topic :: Utilities",
+        "Intended Audience :: Developers",
+        "Topic :: Software Development :: Libraries :: Python Modules",
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+    ],
+)
\ No newline at end of file

From af63982d0bfab7e07a0b7459557e3c36428d18eb Mon Sep 17 00:00:00 2001
From: Fahad Ali Shaikh <fshaikh@digite.com>
Date: Thu, 17 Sep 2020 16:47:50 +0530
Subject: [PATCH 03/11] done document fixes and minor changes

---
 README.md                       | 12 ++++++------
 question_generation/__init__.py |  5 ++++-
 setup.py                        |  2 +-
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index fe49857..515801e 100644
--- a/README.md
+++ b/README.md
@@ -222,7 +222,7 @@ The datasets will be saved in `data/` directory. You should provide filenames us
 
 **process data for single task question generation with highlight_qg_format**
 ```bash
-python question_generation.prepare_data.py \
+python -m question_generation.prepare_data.py \
     --task qg \
     --model_type t5 \
     --dataset_path data/squad_multitask/ \
@@ -238,7 +238,7 @@ python question_generation.prepare_data.py \
 `valid_for_qg_only` argument is used to decide if the validation set should only contain data for qg task. For my multi-task experiments I used validation data with only qg task so that the eval loss curve can be easly compared with other single task models
 
 ```bash
-python question_generation.prepare_data.py \
+python -m question_generation.prepare_data.py \
     --task multi \
     --valid_for_qg_only \ 
     --model_type t5 \
@@ -252,7 +252,7 @@ python question_generation.prepare_data.py \
 
 **process dataset for end-to-end question generation**
 ```bash
-python question_generation.prepare_data.py \
+python -m question_generation.prepare_data.py \
     --task e2e_qg \
     --valid_for_qg_only \ 
     --model_type t5 \
@@ -269,7 +269,7 @@ Use the `run_qg.py` script to  start training. It uses transformers `Trainer` cl
 
 
 ```bash
-python question_generation.run_qg.py \
+python -m question_generation.run_qg.py \
     --model_name_or_path t5-small \
     --model_type t5 \
     --tokenizer_name_or_path t5_qg_tokenizer \
@@ -291,7 +291,7 @@ python question_generation.run_qg.py \
 or if you want to train it from script or notebook then
 
 ```python3
-from run_qg import run_qg
+from question_generation import run_qg
 
 args_dict = {
     "model_name_or_path": "t5-small",
@@ -321,7 +321,7 @@ run_qg(args_dict)
 Use the `eval.py` script for evaluting the model. 
 
 ```bash
-python question_generation.eval.py \
+python -m question_generation.eval.py \
     --model_name_or_path t5-base-qg-hl \
     --valid_file_path valid_data_qg_hl_t5.pt \
     --model_type t5 \
diff --git a/question_generation/__init__.py b/question_generation/__init__.py
index d6c0bd4..e129ec7 100644
--- a/question_generation/__init__.py
+++ b/question_generation/__init__.py
@@ -1 +1,4 @@
-from .pipelines import pipeline
\ No newline at end of file
+from .pipelines import pipeline
+from .run_qg import run_qg
+
+__version__ = "0.1.0"
diff --git a/setup.py b/setup.py
index a43d184..e76b20d 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,6 @@
 from setuptools import setup, find_packages
 
-from smart_config import __version__
+from question_generation import __version__
 
 with open("README.md", "r") as f:
     long_description = f.read()

From ae0bbb0aeb7fe7f63615c0109eee23894fe5079c Mon Sep 17 00:00:00 2001
From: Fahad Ali Shaikh <fshaikh@digite.com>
Date: Mon, 21 Sep 2020 15:20:19 +0530
Subject: [PATCH 04/11] Update requirements.txt

added missing dependency
---
 requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index ba0da64..5ccc3fe 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 transformers>=3.0.0
 nltk
-nlp>=0.2.0
\ No newline at end of file
+nlp>=0.2.0
+torch

From 3575c5a8620f54a6cff592da48757ac1d983a341 Mon Sep 17 00:00:00 2001
From: Fahad Ali Shaikh <shaikhfahad2526@gmail.com>
Date: Mon, 21 Sep 2020 15:21:00 +0530
Subject: [PATCH 05/11] Update setup.py

added missing dependency
---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index e76b20d..fd619a6 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@
     author="Suraj Patil",
     author_email="surajp815@gmail.com",
     description="Question generation is the task of automatically generating questions from a text paragraph.",
-    install_requires=["transformers>=3.0.0", "nltk", "nlp>=0.2.0"],
+    install_requires=["transformers>=3.0.0", "nltk", "nlp>=0.2.0", "torch"],
     python_requires=">=3.6",
     include_package_data=True,
     platforms="any",
@@ -30,4 +30,4 @@
         "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
     ],
-)
\ No newline at end of file
+)

From 56f4963f20b19964cf6f496072a5eb35db0c3af6 Mon Sep 17 00:00:00 2001
From: Fahad Ali Shaikh <shaikhfahad2526@gmail.com>
Date: Mon, 21 Sep 2020 15:21:19 +0530
Subject: [PATCH 06/11] Create python-publish.yml

---
 .github/workflows/python-publish.yml | 33 ++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 .github/workflows/python-publish.yml

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
new file mode 100644
index 0000000..199343c
--- /dev/null
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,33 @@
+# This workflows will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [created]
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install setuptools wheel twine
+        pip install -r requirements.txt
+    - name: Build
+      run: |
+        python setup.py sdist bdist_wheel
+    - name: Archive pytest results
+      uses: actions/upload-artifact@v1
+      with:
+        name: distribution
+        path: dist/*

From f104cd2ef7e2d0a52125ef95a4363d005b537dbe Mon Sep 17 00:00:00 2001
From: Fahad Ali Shaikh <fshaikh@digite.com>
Date: Mon, 21 Sep 2020 15:33:37 +0530
Subject: [PATCH 07/11] achive stage name changed

---
 .github/workflows/python-publish.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index 199343c..35f73a2 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -26,7 +26,7 @@ jobs:
     - name: Build
       run: |
         python setup.py sdist bdist_wheel
-    - name: Archive pytest results
+    - name: Archive Distribution Files
       uses: actions/upload-artifact@v1
       with:
         name: distribution

From e710128634b34b9850f9be22004aee8bec9fadee Mon Sep 17 00:00:00 2001
From: Fahad Ali Shaikh <fshaikh@digite.com>
Date: Mon, 21 Sep 2020 16:19:28 +0530
Subject: [PATCH 08/11] done changes in file path

---
 .github/workflows/python-publish.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index 35f73a2..ac2d547 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -30,4 +30,4 @@ jobs:
       uses: actions/upload-artifact@v1
       with:
         name: distribution
-        path: dist/*
+        path: /home/runner/work/question_generation/dist

From fbe7a77f82001fcbcf09a15f75008670c39e8c4a Mon Sep 17 00:00:00 2001
From: Fahad Ali Shaikh <fshaikh@digite.com>
Date: Mon, 21 Sep 2020 16:24:05 +0530
Subject: [PATCH 09/11] done changes in flow

---
 .github/workflows/python-publish.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index ac2d547..67d5d01 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -4,8 +4,10 @@
 name: Upload Python Package
 
 on:
-  release:
-    types: [created]
+#  release:
+#    types: [created]
+  push:
+    branches: [ master ]
 
 jobs:
   deploy:

From 0e5c2872e3848a3b4cc37b6237fd112de211f808 Mon Sep 17 00:00:00 2001
From: Fahad Ali Shaikh <fshaikh@digite.com>
Date: Mon, 21 Sep 2020 16:29:42 +0530
Subject: [PATCH 10/11] done changes in dist path

---
 .github/workflows/python-publish.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index 67d5d01..dfd6532 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -32,4 +32,4 @@ jobs:
       uses: actions/upload-artifact@v1
       with:
         name: distribution
-        path: /home/runner/work/question_generation/dist
+        path: ./dist

From bb99426936ee88174c7f2f3959c9832c6732c17c Mon Sep 17 00:00:00 2001
From: Fahad Ali Shaikh <fshaikh@digite.com>
Date: Mon, 21 Sep 2020 17:43:23 +0530
Subject: [PATCH 11/11] updated file

---
 .github/workflows/python-publish.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index dfd6532..84c96cc 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -31,5 +31,5 @@ jobs:
     - name: Archive Distribution Files
       uses: actions/upload-artifact@v1
       with:
-        name: distribution
-        path: ./dist
+        name: wheel
+        path: ./dist/question_generation-**.whl