Skip to content

Commit

Permalink
Add test
Browse files Browse the repository at this point in the history
  • Loading branch information
awdeorio committed Jan 29, 2024
1 parent 273758d commit f595c1a
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 0 deletions.
18 changes: 18 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,21 @@ def test_ignores_subdirs(tmpdir):
TESTDATA_DIR/"word_count/correct/output",
tmpdir/"output",
)


def test_input_path_spaces(tmpdir):
"""Run a simple MapReduce job with an input directory containing a
subdirectory. The subdirectory should be gracefully ignored.
"""
with tmpdir.as_cwd():
madoop.mapreduce(
input_path=TESTDATA_DIR/"word_count SPACE/input SPACE",
output_dir="output",
map_exe=TESTDATA_DIR/"word_count SPACE/map SPACE.py",
reduce_exe=TESTDATA_DIR/"word_count SPACE/reduce SPACE.py",
num_reducers=4
)
utils.assert_dirs_eq(
TESTDATA_DIR/"word_count/correct/output",
tmpdir/"output",
)
1 change: 1 addition & 0 deletions tests/testdata/word_count SPACE/correct/output/part-00000
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Goodbye 1
3 changes: 3 additions & 0 deletions tests/testdata/word_count SPACE/correct/output/part-00001
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Bye 1
Hadoop 2
World 2
1 change: 1 addition & 0 deletions tests/testdata/word_count SPACE/correct/output/part-00002
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Hello 2
2 changes: 2 additions & 0 deletions tests/testdata/word_count SPACE/input SPACE/input 01.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Hello World
Bye World
2 changes: 2 additions & 0 deletions tests/testdata/word_count SPACE/input SPACE/input 02.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Hello Hadoop
Goodbye Hadoop
9 changes: 9 additions & 0 deletions tests/testdata/word_count SPACE/map SPACE.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env python3
"""Word count mapper."""
import sys


for line in sys.stdin:
words = line.split()
for word in words:
print(f"{word}\t1")
28 changes: 28 additions & 0 deletions tests/testdata/word_count SPACE/reduce SPACE.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env python3
"""Word count reducer."""
import sys
import itertools


def main():
"""Divide sorted lines into groups that share a key."""
for key, group in itertools.groupby(sys.stdin, keyfunc):
reduce_one_group(key, group)


def keyfunc(line):
"""Return the key from a TAB-delimited key-value pair."""
return line.partition("\t")[0]


def reduce_one_group(key, group):
"""Reduce one group."""
word_count = 0
for line in group:
count = line.partition("\t")[2]
word_count += int(count)
print(f"{key} {word_count}")


if __name__ == "__main__":
main()

0 comments on commit f595c1a

Please sign in to comment.