From 666fa78e2ad19c7d7cbd7aed372cbaa49d4395da Mon Sep 17 00:00:00 2001
From: Ed Summers <ehs@pobox.com>
Date: Wed, 7 Oct 2015 13:16:15 -0400
Subject: [PATCH] fixes #80

---
 README.md                              | 13 +++++++++++++
 setup.py                               |  4 ++--
 utils/{archive.py => twarc-archive.py} |  9 ++++-----
 3 files changed, 19 insertions(+), 7 deletions(-)
 rename utils/{archive.py => twarc-archive.py} (93%)

diff --git a/README.md b/README.md
index 7f0ef66e..de0e572a 100644
--- a/README.md
+++ b/README.md
@@ -101,6 +101,19 @@ fetch the full JSON for each tweet and write it to stdout as line-oriented JSON:
 
     twarc.py --hydrate ids.txt > tweets.json
 
+## Archive
+
+In addition to `twarc.py` when you install twarc you will also get a
+`twarc-archive.py` command line tool. This uses twarc as a library to
+periodically collect data matching a particular search query. It's useful if you
+don't necessarily want to collect tweets as they happen with the streaming
+api, and are content to perhaps run it every day (perhaps) from cron to collect
+what you can. The script will keep the files organized, and is smart enough to
+use the most recent file to determine when it can stop collecting so there are
+no duplicates.
+
+    twarc-archive.py 
+
 ## Use as a Library
 
 If you want you can use twarc programatically as a library to collect
diff --git a/setup.py b/setup.py
index a9be4e5e..7aabb086 100644
--- a/setup.py
+++ b/setup.py
@@ -28,12 +28,12 @@ def run(self):
 
 setup(
     name='twarc',
-    version='0.3.3',
+    version='0.3.4',
     url='http://github.com/edsu/twarc',
     author='Ed Summers',
     author_email='ehs@pobox.com',
     py_modules=['twarc', ],
-    scripts=['twarc.py'],
+    scripts=['twarc.py', 'utils/twarc-archive.py'],
     description='command line utility to archive Twitter search results as line-oriented-json',
     cmdclass={'test': PyTest},
     install_requires=dependencies,
diff --git a/utils/archive.py b/utils/twarc-archive.py
similarity index 93%
rename from utils/archive.py
rename to utils/twarc-archive.py
index b819f85f..1455cc31 100755
--- a/utils/archive.py
+++ b/utils/twarc-archive.py
@@ -8,7 +8,7 @@
 So for example if you want to search for tweets mentioning "ferguson" you can 
 run it:
 
-    ./archive.py ferguson /mnt/tweets/ferguson
+    % twarc-archive.py ferguson /mnt/tweets/ferguson
 
 The first time you run this it will search twitter for tweets matching 
 "ferguson" and write them to a file:
@@ -17,16 +17,15 @@
 
 When you run the exact same command again:
 
-    ./archive.py ferguson /mnt/tweets/ferguson
+    % twarc-archive.py ferguson /mnt/tweets/ferguson
 
 it will get the first tweet id in tweets-0001.json and use it to write another 
 file which includes any new tweets since that tweet:
 
     /mnt/tweets/ferguson/tweets-0002.json
 
-This functionality was initially part of twarc.py itself (not in a utility).
-If it proves useful perhaps it can go back in. But for now twarc.py writes
-to stdout to let you manage your data the way you want to.
+This functionality was initially part of twarc.py itself, but has been split out
+into a separate utility.
 
 """