diff --git a/%3E %3Cinput%3E %3CintermediateDir%3E %3CoutDir%3E b/%3E %3Cinput%3E %3CintermediateDir%3E %3CoutDir%3E
new file mode 100644
index 0000000..84d951f
--- /dev/null
+++ b/%3E %3Cinput%3E %3CintermediateDir%3E %3CoutDir%3E	
@@ -0,0 +1,231 @@
+
+                   SSUUMMMMAARRYY OOFF LLEESSSS CCOOMMMMAANNDDSS
+
+      Commands marked with * may be preceded by a number, _N.
+      Notes in parentheses indicate the behavior if _N is given.
+
+  h  H                 Display this help.
+  q  :q  Q  :Q  ZZ     Exit.
+ ---------------------------------------------------------------------------
+
+                           MMOOVVIINNGG
+
+  e  ^E  j  ^N  CR  *  Forward  one line   (or _N lines).
+  y  ^Y  k  ^K  ^P  *  Backward one line   (or _N lines).
+  f  ^F  ^V  SPACE  *  Forward  one window (or _N lines).
+  b  ^B  ESC-v      *  Backward one window (or _N lines).
+  z                 *  Forward  one window (and set window to _N).
+  w                 *  Backward one window (and set window to _N).
+  ESC-SPACE         *  Forward  one window, but don't stop at end-of-file.
+  d  ^D             *  Forward  one half-window (and set half-window to _N).
+  u  ^U             *  Backward one half-window (and set half-window to _N).
+  ESC-)  RightArrow *  Left  one half screen width (or _N positions).
+  ESC-(  LeftArrow  *  Right one half screen width (or _N positions).
+  F                    Forward forever; like "tail -f".
+  r  ^R  ^L            Repaint screen.
+  R                    Repaint screen, discarding buffered input.
+        ---------------------------------------------------
+        Default "window" is the screen height.
+        Default "half-window" is half of the screen height.
+ ---------------------------------------------------------------------------
+
+                          SSEEAARRCCHHIINNGG
+
+  /_p_a_t_t_e_r_n          *  Search forward for (_N-th) matching line.
+  ?_p_a_t_t_e_r_n          *  Search backward for (_N-th) matching line.
+  n                 *  Repeat previous search (for _N-th occurrence).
+  N                 *  Repeat previous search in reverse direction.
+  ESC-n             *  Repeat previous search, spanning files.
+  ESC-N             *  Repeat previous search, reverse dir. & spanning files.
+  ESC-u                Undo (toggle) search highlighting.
+  &_p_a_t_t_e_r_n          *  Display only matching lines
+        ---------------------------------------------------
+        Search patterns may be modified by one or more of:
+        ^N or !  Search for NON-matching lines.
+        ^E or *  Search multiple files (pass thru END OF FILE).
+        ^F or @  Start search at FIRST file (for /) or last file (for ?).
+        ^K       Highlight matches, but don't move (KEEP position).
+        ^R       Don't use REGULAR EXPRESSIONS.
+ ---------------------------------------------------------------------------
+
+                           JJUUMMPPIINNGG
+
+  g  <  ESC-<       *  Go to first line in file (or line _N).
+  G  >  ESC->       *  Go to last line in file (or line _N).
+  p  %              *  Go to beginning of file (or _N percent into file).
+  t                 *  Go to the (_N-th) next tag.
+  T                 *  Go to the (_N-th) previous tag.
+  {  (  [           *  Find close bracket } ) ].
+  }  )  ]           *  Find open bracket { ( [.
+  ESC-^F _<_c_1_> _<_c_2_>  *  Find close bracket _<_c_2_>.
+  ESC-^B _<_c_1_> _<_c_2_>  *  Find open bracket _<_c_1_> 
+        ---------------------------------------------------
+        Each "find close bracket" command goes forward to the close bracket 
+          matching the (_N-th) open bracket in the top line.
+        Each "find open bracket" command goes backward to the open bracket 
+          matching the (_N-th) close bracket in the bottom line.
+
+  m_<_l_e_t_t_e_r_>            Mark the current position with <letter>.
+  '_<_l_e_t_t_e_r_>            Go to a previously marked position.
+  ''                   Go to the previous position.
+  ^X^X                 Same as '.
+        ---------------------------------------------------
+        A mark is any upper-case or lower-case letter.
+        Certain marks are predefined:
+             ^  means  beginning of the file
+             $  means  end of the file
+ ---------------------------------------------------------------------------
+
+                        CCHHAANNGGIINNGG FFIILLEESS
+
+  :e [_f_i_l_e]            Examine a new file.
+  ^X^V                 Same as :e.
+  :n                *  Examine the (_N-th) next file from the command line.
+  :p                *  Examine the (_N-th) previous file from the command line.
+  :x                *  Examine the first (or _N-th) file from the command line.
+  :d                   Delete the current file from the command line list.
+  =  ^G  :f            Print current file name.
+ ---------------------------------------------------------------------------
+
+                    MMIISSCCEELLLLAANNEEOOUUSS CCOOMMMMAANNDDSS
+
+  -_<_f_l_a_g_>              Toggle a command line option [see OPTIONS below].
+  --_<_n_a_m_e_>             Toggle a command line option, by name.
+  __<_f_l_a_g_>              Display the setting of a command line option.
+  ___<_n_a_m_e_>             Display the setting of an option, by name.
+  +_c_m_d                 Execute the less cmd each time a new file is examined.
+
+  !_c_o_m_m_a_n_d             Execute the shell command with $SHELL.
+  |XX_c_o_m_m_a_n_d            Pipe file between current pos & mark XX to shell command.
+  v                    Edit the current file with $VISUAL or $EDITOR.
+  V                    Print version number of "less".
+ ---------------------------------------------------------------------------
+
+                           OOPPTTIIOONNSS
+
+        Most options may be changed either on the command line,
+        or from within less by using the - or -- command.
+        Options may be given in one of two forms: either a single
+        character preceded by a -, or a name preceeded by --.
+
+  -?  ........  --help
+                  Display help (from command line).
+  -a  ........  --search-skip-screen
+                  Forward search skips current screen.
+  -A  ........  --SEARCH-SKIP-SCREEN
+                  Forward search always skips target line.
+  -b [_N]  ....  --buffers=[_N]
+                  Number of buffers.
+  -B  ........  --auto-buffers
+                  Don't automatically allocate buffers for pipes.
+  -c  ........  --clear-screen
+                  Repaint by clearing rather than scrolling.
+  -d  ........  --dumb
+                  Dumb terminal.
+  -D [_x_n_._n]  .  --color=_x_n_._n
+                  Set screen colors. (MS-DOS only)
+  -e  -E  ....  --quit-at-eof  --QUIT-AT-EOF
+                  Quit at end of file.
+  -f  ........  --force
+                  Force open non-regular files.
+  -F  ........  --quit-if-one-screen
+                  Quit if entire file fits on first screen.
+  -g  ........  --hilite-search
+                  Highlight only last match for searches.
+  -G  ........  --HILITE-SEARCH
+                  Don't highlight any matches for searches.
+  -h [_N]  ....  --max-back-scroll=[_N]
+                  Backward scroll limit.
+  -i  ........  --ignore-case
+                  Ignore case in searches that do not contain uppercase.
+  -I  ........  --IGNORE-CASE
+                  Ignore case in all searches.
+  -j [_N]  ....  --jump-target=[_N]
+                  Screen position of target lines.
+  -J  ........  --status-column
+                  Display a status column at left edge of screen.
+  -k [_f_i_l_e]  .  --lesskey-file=[_f_i_l_e]
+                  Use a lesskey file.
+  -K            --quit-on-intr
+                  Exit less in response to ctrl-C.
+  -L  ........  --no-lessopen
+                  Ignore the LESSOPEN environment variable.
+  -m  -M  ....  --long-prompt  --LONG-PROMPT
+                  Set prompt style.
+  -n  -N  ....  --line-numbers  --LINE-NUMBERS
+                  Don't use line numbers.
+  -o [_f_i_l_e]  .  --log-file=[_f_i_l_e]
+                  Copy to log file (standard input only).
+  -O [_f_i_l_e]  .  --LOG-FILE=[_f_i_l_e]
+                  Copy to log file (unconditionally overwrite).
+  -p [_p_a_t_t_e_r_n]  --pattern=[_p_a_t_t_e_r_n]
+                  Start at pattern (from command line).
+  -P [_p_r_o_m_p_t]   --prompt=[_p_r_o_m_p_t]
+                  Define new prompt.
+  -q  -Q  ....  --quiet  --QUIET  --silent --SILENT
+                  Quiet the terminal bell.
+  -r  -R  ....  --raw-control-chars  --RAW-CONTROL-CHARS
+                  Output "raw" control characters.
+  -s  ........  --squeeze-blank-lines
+                  Squeeze multiple blank lines.
+  -S  ........  --chop-long-lines
+                  Chop long lines.
+  -t [_t_a_g]  ..  --tag=[_t_a_g]
+                  Find a tag.
+  -T [_t_a_g_s_f_i_l_e] --tag-file=[_t_a_g_s_f_i_l_e]
+                  Use an alternate tags file.
+  -u  -U  ....  --underline-special  --UNDERLINE-SPECIAL
+                  Change handling of backspaces.
+  -V  ........  --version
+                  Display the version number of "less".
+  -w  ........  --hilite-unread
+                  Highlight first new line after forward-screen.
+  -W  ........  --HILITE-UNREAD
+                  Highlight first new line after any forward movement.
+  -x [_N[,...]]  --tabs=[_N[,...]]
+                  Set tab stops.
+  -X  ........  --no-init
+                  Don't use termcap init/deinit strings.
+                --no-keypad
+                  Don't use termcap keypad init/deinit strings.
+  -y [_N]  ....  --max-forw-scroll=[_N]
+                  Forward scroll limit.
+  -z [_N]  ....  --window=[_N]
+                  Set size of window.
+  -" [_c[_c]]  .  --quotes=[_c[_c]]
+                  Set shell quote characters.
+  -~  ........  --tilde
+                  Don't display tildes after end of file.
+  -# [_N]  ....  --shift=[_N]
+                  Horizontal scroll amount (0 = one half screen width)
+      ........  --no-keypad
+                  Don't send keypad init/deinit sequence.
+      ........  --follow-name
+                  The F command changes files if the input file is renamed.
+
+
+ ---------------------------------------------------------------------------
+
+                          LLIINNEE EEDDIITTIINNGG
+
+        These keys can be used to edit text being entered 
+        on the "command line" at the bottom of the screen.
+
+ RightArrow                       ESC-l     Move cursor right one character.
+ LeftArrow                        ESC-h     Move cursor left one character.
+ CNTL-RightArrow  ESC-RightArrow  ESC-w     Move cursor right one word.
+ CNTL-LeftArrow   ESC-LeftArrow   ESC-b     Move cursor left one word.
+ HOME                             ESC-0     Move cursor to start of line.
+ END                              ESC-$     Move cursor to end of line.
+ BACKSPACE                                  Delete char to left of cursor.
+ DELETE                           ESC-x     Delete char under cursor.
+ CNTL-BACKSPACE   ESC-BACKSPACE             Delete word to left of cursor.
+ CNTL-DELETE      ESC-DELETE      ESC-X     Delete word under cursor.
+ CNTL-U           ESC (MS-DOS only)         Delete entire line.
+ UpArrow                          ESC-k     Retrieve previous command line.
+ DownArrow                        ESC-j     Retrieve next command line.
+ TAB                                        Complete filename & cycle.
+ SHIFT-TAB                        ESC-TAB   Complete filename & reverse cycle.
+ CNTL-L                                     Complete filename, list all.
+
+
diff --git a/DoublePair.java b/DoublePair.java
new file mode 100644
index 0000000..b1fb494
--- /dev/null
+++ b/DoublePair.java
@@ -0,0 +1,89 @@
+/*
+ * CS 61C Fall 2013 Project 1
+ *
+ * DoublePair.java is a class which stores two doubles and 
+ * implements the Writable interface. It can be used as a 
+ * custom value for Hadoop. To use this as a key, you can
+ * choose to implement the WritableComparable interface,
+ * although that is not necessary for credit.
+ */
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+
+public class DoublePair implements Writable {
+    // Declare any variables here
+	public double double1;
+	public double double2;
+    /**
+     * Constructs a DoublePair with both doubles set to zero.
+     */
+    public DoublePair() {
+        // YOUR CODE HERE
+    	this.double1=0;
+    	this.double2=0;
+    }
+
+    /**
+     * Constructs a DoublePair containing double1 and double2.
+     */ 
+    public DoublePair(double double1, double double2) {
+        // YOUR CODE HERE
+    	this.double1=double1;
+    	this.double2=double2;
+    }
+
+    /**
+     * Returns the value of the first double.
+     */
+    public double getDouble1() {
+        // YOUR CODE HERE
+        return this.double1;
+        
+    }
+
+    /**
+     * Returns the value of the second double.
+     */
+    public double getDouble2() {
+        // YOUR CODE HERE
+        return this.double2;
+    }
+
+    /**
+     * Sets the first double to val.
+     */
+    public void setDouble1(double val) {
+        // YOUR CODE HERE
+    	this.double1=val;
+    }
+
+    /**
+     * Sets the second double to val.
+     */
+    public void setDouble2(double val) {
+        // YOUR CODE HERE
+    	this.double2=val;
+    }
+
+    /**
+     * write() is required for implementing Writable.
+     */
+    public void write(DataOutput out) throws IOException {
+        // YOUR CODE HERE
+    	out.writeDouble(double1);
+    	out.writeDouble(double2);
+    }
+
+    /**
+     * readFields() is required for implementing Writable.
+     */
+    public void readFields(DataInput in) throws IOException {
+        // YOUR CODE HERE
+    	double1=in.readDouble();
+    	double2=in.readDouble();
+    }
+}
diff --git a/Func.java b/Func.java
new file mode 100644
index 0000000..2bfd38e
--- /dev/null
+++ b/Func.java
@@ -0,0 +1,5 @@
+
+/** A class representing a function from doubles to doubles. */
+public abstract class Func {
+    public abstract double f(double d);
+}
diff --git a/Importer.java b/Importer.java
new file mode 100644
index 0000000..744a32e
--- /dev/null
+++ b/Importer.java
@@ -0,0 +1,183 @@
+/*  Written by Ariel Rabkin <asrabkin@gmail.com>, 2011.
+ * Copyright 2011, the Regents of the University of California.
+  
+  Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ */
+
+import java.io.*;
+import java.security.MessageDigest;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.*;
+import org.apache.hadoop.io.compress.bzip2.CBZip2InputStream;
+
+/**
+ * Converts text files to sequence files, suitably for cs61c project 1, Spring 201
+ *
+ * Usage: Importer <file or directory> [output]
+ * If invoked on a text file, converts that file to compressed sequence file, writing the output
+ * in output dir.
+ * If invoked on a directory, recursively scans that directory and subdirs for .txt
+ * files, storing output to output dir.
+ * 
+ * Each input file is split at boundaries, where a boundary is a line containing
+ * exactly the text: "---END.OF.DOCUMENT---"
+ * 
+ * Will also process .bz2 files, first decompressing them.
+ * 
+ * Default output dir is "convertedOut"
+ *
+ *  Written by Ariel Rabkin, asrabkin@gmail.com
+ *  Licensed under the terms of the New BSD License. 
+ *  
+ */
+public class Importer {
+
+  static SequenceFile.Writer seqFileWriter;
+  static long totalBytes = 0;
+  static long totalRecords = 0;
+  static long files = 0;
+  static File outDir = new File("convertedOut");
+  public static void main(String[] args) {
+    try {
+      if(args.length < 1) {
+        System.err.println("can't run. Not enough args. Need to specify input file or dir");
+        System.exit(-1);
+      } else
+        System.out.println("starting scan of " + args[0]);
+      
+      if(args.length > 1)
+        outDir = new File(args[1]);
+      System.out.println("dumping output to " + outDir.getAbsolutePath());
+      
+      lookForFiles(new File(args[0]));
+      long avgRecLength = totalBytes / totalRecords;
+      System.out.println("total data, uncompressed was " + totalBytes/ (1024 * 1024) + " MB");
+      System.out.println("total of " + totalRecords + " records. (Avg uncompressed size " + avgRecLength + " bytes)");
+    } catch(Exception e) {
+      e.printStackTrace();
+    }
+  }
+  
+  public static Text hash(Text content) throws Exception {
+    StringBuilder sb = new StringBuilder();
+    sb.append("post_");
+
+    MessageDigest md = MessageDigest.getInstance("MD5");
+
+    md.update(content.getBytes(), 0, content.getLength());
+    byte[] bytes = md.digest();
+    for(int i=0; i < bytes.length; ++i) {
+      if( (bytes[i] & 0xF0) == 0)
+        sb.append('0');
+      sb.append( Integer.toHexString(0xFF & bytes[i]) );
+    }
+    return new Text(sb.toString());
+  }
+  
+  static void lookForFiles(File file) throws Exception {
+    if(file.isDirectory()) {
+      File[] contents = file.listFiles();
+      if(contents == null) {
+        System.out.println("WARN: null list of contents for " + file.getAbsolutePath());
+        return;
+      }
+      for(File sub: contents)
+        lookForFiles(sub);
+    } else {
+      if(file.getName().endsWith(".bz2") || file.getName().contains(".txt"))
+        copyFile(file);
+    }
+  }
+  
+  public static void copyFile(File file) throws Exception {
+//    String TEST_PREFIX = "";
+    File destFile = new File(outDir,file.getName()+".seq");
+    Path dest = new Path(destFile.getAbsolutePath());
+    
+    Configuration conf = new Configuration();
+    FileSystem fileSys = org.apache.hadoop.fs.FileSystem.get(new java.net.URI(conf.get("fs.default.name")),conf);
+    CompressionCodec codec = new DefaultCodec();
+    fileSys.mkdirs(dest.getParent());
+    FSDataOutputStream outputStr = fileSys.create(dest);
+    seqFileWriter = SequenceFile.createWriter(conf, outputStr,
+        Text.class, Text.class,
+        SequenceFile.CompressionType.BLOCK, codec);
+    String filename = file.getName();
+    InputStream in = new BufferedInputStream(new FileInputStream(file));
+    if(filename.endsWith(".bz2")) {
+     in.read();
+     in.read(); //snarf header
+     in = new CBZip2InputStream(in);
+    }
+    BufferedReader br = new BufferedReader(new InputStreamReader(in, "US-ASCII"));
+    
+    System.out.println("working on file " + file);
+    int records = 0;
+    long bytes = 0, bytes_since_status = 0;
+    long startTime= System.currentTimeMillis();
+    String s = null;
+    Text content = new Text();
+    while( (s = br.readLine()) != null) {
+      if(s.startsWith("---END.OF.DOCUMENT---")) {
+        Text name = new Text(hash(content));
+        seqFileWriter.append(name, content);
+        records ++;
+        content = new Text();
+      } else {
+        byte[] line_as_bytes = (s+ " ").getBytes();
+        for(byte b: line_as_bytes) {
+          assert b < 128: "found an unexpected high-bit set";
+        }
+
+        content.append(line_as_bytes, 0, line_as_bytes.length);
+        bytes += line_as_bytes.length;
+        /*
+        bytes_since_status += line_as_bytes.length;
+        if(bytes_since_status > 10 * 1024 * 1024) { //every 10 MB
+          System.err.print('.');
+          bytes_since_status = 0;
+        }*/
+      }
+    } //end while
+    if(content.getLength() > 5) {
+      Text name = new Text(hash(content));
+      seqFileWriter.append(name, content);
+      records ++;
+    }
+    totalBytes += bytes;
+    totalRecords += records;
+    long time = (System.currentTimeMillis() - startTime)/ 1000 + 1;
+    long kbSec = bytes / 1024 / time;
+    System.out.println(new java.util.Date());
+    System.out.println("File " + file.getName() + " " + records+ " records, " + 
+        bytes + " bytes in " + time+ " seconds ("  +kbSec + " KB/sec).");
+    in.close();
+    seqFileWriter.close();
+    outputStr.close();
+  }
+}
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..20683a0
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,49 @@
+# Build file for CS61C Project 1  [Fall 2013]
+# You should not need to edit this file if you're working on the inst machines.
+
+# This file requires GNU make and depends on paths on instruction machines.
+
+# If you are working on your own machine, you will need to edit the paths.
+
+
+####
+
+## Variables
+
+# Source files (java code). wildcard selects all files matching a pattern.
+SOURCES = $(wildcard *.java)
+# Output JAR file
+TARGET = proj1.jar
+# Extra JARs to have on the classpath when compiling.
+CLASSPATH = /home/ff/cs61c/hadoop/hadoop-core.jar:/home/ff/cs61c/hadoop/lib/commons-cli.jar
+# Compatibility flags to build for Java 6. Remove these flags if in the future 
+# the EC2 servers support Java 7 (or later versions)
+COMPAT_FLAGS = -source 6 -target 6
+# javac command to use
+JAVAC = javac -g $(COMPAT_FLAGS) -deprecation -cp $(CLASSPATH)
+# jar command to use
+JAR = jar
+
+## Make targets
+
+# General form is target: dependencies (targets or files), followed by
+# commands to run to build the target from the dependencies.
+
+# Default target.
+all: $(TARGET)
+
+$(TARGET): classes $(SOURCES)
+	$(JAVAC) -d classes $(SOURCES)
+	$(JAR) cf $(TARGET) -C classes .
+
+classes:
+	mkdir classes
+
+clean:
+	rm -rf classes $(TARGET)
+
+doublepair: classes
+	$(JAVAC) -d classes DoublePair.java
+	java -cp $(CLASSPATH):classes DoublePair 
+
+.PHONY: clean all
diff --git a/Proj1.java b/Proj1.java
new file mode 100644
index 0000000..20f9009
--- /dev/null
+++ b/Proj1.java
@@ -0,0 +1,333 @@
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.lang.Math;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+
+/*
+ * This is the skeleton for CS61c project 1, Fall 2013.
+ *
+ * Reminder:  DO NOT SHARE CODE OR ALLOW ANOTHER STUDENT TO READ YOURS.
+ * EVEN FOR DEBUGGING. THIS MEANS YOU.
+ *
+ */
+public class Proj1{
+
+    /*
+     * Inputs is a set of (docID, document contents) pairs.
+     */
+    public static class Map1 extends Mapper<WritableComparable, Text, Text, DoublePair> {
+        /** Regex pattern to find words (alphanumeric + _). */
+        final static Pattern WORD_PATTERN = Pattern.compile("\\w+");
+
+        private String targetGram = null;
+        private int funcNum = 0;
+
+        /*
+         * Setup gets called exactly once for each mapper, before map() gets called the first time.
+         * It's a good place to do configuration or setup that can be shared across many calls to map
+         */
+        @Override
+            public void setup(Context context) {
+                targetGram = context.getConfiguration().get("targetWord").toLowerCase();
+                try {
+                    funcNum = Integer.parseInt(context.getConfiguration().get("funcNum"));
+                } catch (NumberFormatException e) {
+                    /* Do nothing. */
+                }
+            }
+
+        @Override
+            public void map(WritableComparable docID, Text docContents, Context context)
+            throws IOException, InterruptedException {
+                Matcher matcher = WORD_PATTERN.matcher(docContents.toString());
+                Func func = funcFromNum(funcNum);
+
+                // YOUR CODE HERE
+                ArrayList<Double> targetWordIndices = targetWordIndex(docContents);
+                int counter = 0;
+                while (matcher.find()) { 
+                	String word = matcher.group().toLowerCase(); //the input word
+                	Text wordOutput = new Text(word);
+                	if (!word.equals(targetGram)) { //output word and distance if word is not targetGram
+                		context.write(wordOutput, new DoublePair(1, func.f(distance(targetWordIndices, counter))));
+                	}
+                    counter++;
+
+
+            	}
+        	}
+
+        	/*
+         	* Finds the closest distance from word to targetGram.
+        	*/
+        	private double distance(ArrayList<Double> targetIndices, double currentIndex) {
+                double minDistance = Double.POSITIVE_INFINITY;
+                for (int i=0; i<targetIndices.size(); i++) {
+                        double current = Math.abs(targetIndices.get(i) - currentIndex);
+                        if (current < minDistance) {
+                                minDistance = current;
+                        }
+                }
+                return minDistance;
+        	}
+
+        	/*
+         	* Generates an arraylist of the indices of the target words.
+        	*/
+        	private ArrayList<Double> targetWordIndex(Text docContents) {
+                Matcher matcher = WORD_PATTERN.matcher(docContents.toString());
+                ArrayList<Double> targetIndices = new ArrayList<Double>();
+                double counter = 0;
+                while (matcher.find()) {
+                        String word = matcher.group().toLowerCase();
+                        if (word.equals(targetGram)) {
+                                targetIndices.add(counter);
+                        }
+                        counter++;
+                }
+                return targetIndices;
+        	}
+
+        /** Returns the Func corresponding to FUNCNUM*/
+        private Func funcFromNum(int funcNum) {
+            Func func = null;
+            switch (funcNum) {
+                case 0:	
+                    func = new Func() {
+                        public double f(double d) {
+                            return d == Double.POSITIVE_INFINITY ? 0.0 : 1.0;
+                        }			
+                    };	
+                    break;
+                case 1:
+                    func = new Func() {
+                        public double f(double d) {
+                            return d == Double.POSITIVE_INFINITY ? 0.0 : 1.0 + 1.0 / d;
+                        }			
+                    };
+                    break;
+                case 2:
+                    func = new Func() {
+                        public double f(double d) {
+                            return d == Double.POSITIVE_INFINITY ? 0.0 : 1.0 + Math.sqrt(d);
+                        }			
+                    };
+                    break;
+            }
+            return func;
+        }
+    }
+
+    /** Here's where you'll be implementing your combiner. It must be non-trivial for you to receive credit. */
+    public static class Combine1 extends Reducer<Text, DoublePair, Text, DoublePair> {
+
+        @Override
+            public void reduce(Text key, Iterable<DoublePair> values,
+                    Context context) throws IOException, InterruptedException {
+
+                 // YOUR CODE HERE
+                double instances = 0;
+                double score=0;
+                for (DoublePair value: values) {
+                    instances +=value.getDouble1();
+                    score += value.getDouble2();
+                }
+                context.write(key, new DoublePair(instances, score));
+
+
+            }
+    }
+
+
+    public static class Reduce1 extends Reducer<Text, DoublePair, Text, DoublePair> {
+        @Override
+            public void reduce(Text key, Iterable<DoublePair> values,
+                    Context context) throws IOException, InterruptedException {
+
+                // YOUR CODE HERE
+                double instances = 0;
+                double scores = 0;
+                for (DoublePair value : values) {
+                	instances +=value.getDouble1();
+                	scores += value.getDouble2();
+                }
+                context.write(key, new DoublePair(instances, scores));
+            }
+    }
+
+    public static class Map2 extends Mapper<Text, DoublePair, DoubleWritable, Text> {
+        //maybe do something, maybe don't
+        @Override
+            public void map(Text key, DoublePair value, Context context)
+            throws IOException, InterruptedException {
+            	double cRate=0;
+            	double a = value.getDouble1();
+            	double s = value.getDouble2();
+            	if (s != 0) {
+            		cRate = (-1)*s*(Math.pow(Math.log(s), 3))/a; //makes scores negative to output in correct order
+            	}
+            	context.write(new DoubleWritable(cRate), key);
+
+        	}
+    }
+
+    public static class Reduce2 extends Reducer<DoubleWritable, Text, DoubleWritable, Text> {
+
+        int n = 0;
+        static int N_TO_OUTPUT = 100;
+
+        /*
+         * Setup gets called exactly once for each reducer, before reduce() gets called the first time.
+         * It's a good place to do configuration or setup that can be shared across many calls to reduce
+         */
+        @Override
+            protected void setup(Context c) {
+                n = 0;
+            }
+
+        /*
+         * Your output should be a in the form of (DoubleWritable score, Text word)
+         * where score is the co-occurrence value for the word. Your output should be
+         * sorted from largest co-occurrence to smallest co-occurrence.
+         */
+        @Override
+            public void reduce(DoubleWritable key, Iterable<Text> values,
+                    Context context) throws IOException, InterruptedException {
+
+                 // YOUR CODE HERE
+            	for (Text word : values) {
+                    if (n<N_TO_OUTPUT) {
+                        double reversedKey = Math.abs(key.get()); //makes the scores positive again
+                        DoubleWritable finalKey = new DoubleWritable(reversedKey);
+        			    context.write(finalKey, word);
+                    }
+            	    n++;	
+            	}
+
+            }
+    }
+
+    /*
+     *  You shouldn't need to modify this function much. If you think you have a good reason to,
+     *  you might want to discuss with staff.
+     *
+     *  The skeleton supports several options.
+     *  if you set runJob2 to false, only the first job will run and output will be
+     *  in TextFile format, instead of SequenceFile. This is intended as a debugging aid.
+     *
+     *  If you set combiner to false, the combiner will not run. This is also
+     *  intended as a debugging aid. Turning on and off the combiner shouldn't alter
+     *  your results. Since the framework doesn't make promises about when it'll
+     *  invoke combiners, it's an error to assume anything about how many times
+     *  values will be combined.
+     */
+    public static void main(String[] rawArgs) throws Exception {
+        GenericOptionsParser parser = new GenericOptionsParser(rawArgs);
+        Configuration conf = parser.getConfiguration();
+        String[] args = parser.getRemainingArgs();
+
+        boolean runJob2 = conf.getBoolean("runJob2", true);
+        boolean combiner = conf.getBoolean("combiner", false);
+
+        System.out.println("Target word: " + conf.get("targetWord"));
+        System.out.println("Function num: " + conf.get("funcNum"));
+
+        if(runJob2)
+            System.out.println("running both jobs");
+        else
+            System.out.println("for debugging, only running job 1");
+
+        if(combiner)
+            System.out.println("using combiner");
+        else
+            System.out.println("NOT using combiner");
+
+        Path inputPath = new Path(args[0]);
+        Path middleOut = new Path(args[1]);
+        Path finalOut = new Path(args[2]);
+        FileSystem hdfs = middleOut.getFileSystem(conf);
+        int reduceCount = conf.getInt("reduces", 32);
+
+        if(hdfs.exists(middleOut)) {
+            System.err.println("can't run: " + middleOut.toUri().toString() + " already exists");
+            System.exit(1);
+        }
+        if(finalOut.getFileSystem(conf).exists(finalOut) ) {
+            System.err.println("can't run: " + finalOut.toUri().toString() + " already exists");
+            System.exit(1);
+        }
+
+        {
+            Job firstJob = new Job(conf, "job1");
+
+            firstJob.setJarByClass(Map1.class);
+
+            /* You may need to change things here */
+            firstJob.setMapOutputKeyClass(Text.class);
+            firstJob.setMapOutputValueClass(DoublePair.class);
+            firstJob.setOutputKeyClass(Text.class);
+            firstJob.setOutputValueClass(DoublePair.class);
+            /* End region where we expect you to perhaps need to change things. */
+
+            firstJob.setMapperClass(Map1.class);
+            firstJob.setReducerClass(Reduce1.class);
+            firstJob.setNumReduceTasks(reduceCount);
+
+
+            if(combiner)
+                firstJob.setCombinerClass(Combine1.class);
+
+            firstJob.setInputFormatClass(SequenceFileInputFormat.class);
+            if(runJob2)
+                firstJob.setOutputFormatClass(SequenceFileOutputFormat.class);
+
+            FileInputFormat.addInputPath(firstJob, inputPath);
+            FileOutputFormat.setOutputPath(firstJob, middleOut);
+
+            firstJob.waitForCompletion(true);
+        }
+
+        if(runJob2) {
+            Job secondJob = new Job(conf, "job2");
+
+            secondJob.setJarByClass(Map1.class);
+            /* You may need to change things here */
+            secondJob.setMapOutputKeyClass(DoubleWritable.class);
+            secondJob.setMapOutputValueClass(Text.class);
+            secondJob.setOutputKeyClass(DoubleWritable.class);
+            secondJob.setOutputValueClass(Text.class);
+            /* End region where we expect you to perhaps need to change things. */
+
+            secondJob.setMapperClass(Map2.class);
+            secondJob.setReducerClass(Reduce2.class);
+
+            secondJob.setInputFormatClass(SequenceFileInputFormat.class);
+            secondJob.setOutputFormatClass(TextOutputFormat.class);
+            secondJob.setNumReduceTasks(1);
+
+
+            FileInputFormat.addInputPath(secondJob, middleOut);
+            FileOutputFormat.setOutputPath(secondJob, finalOut);
+
+            secondJob.waitForCompletion(true);
+        }
+    }
+
+}
\ No newline at end of file
diff --git a/Tests.class b/Tests.class
new file mode 100644
index 0000000..5c60c24
Binary files /dev/null and b/Tests.class differ
diff --git a/Tests.java b/Tests.java
new file mode 100644
index 0000000..5a77437
--- /dev/null
+++ b/Tests.java
@@ -0,0 +1,55 @@
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.lang.Math;
+
+public class Tests{    
+    public static void main(String[] args) {
+	    ArrayList<Double> targetIndices = new ArrayList<Double>();
+	    targetIndices.add(2.00);
+	    targetIndices.add(4.00);
+	    System.out.println("Test 1 Distance.....");
+	    System.out.println(distance(targetIndices, 1.00));
+        System.out.println("Should be 1");
+        System.out.println(distance(targetIndices, 7));
+        System.out.println("Should be 3");
+        ArrayList<Double> targetIndices2 = new ArrayList<Double>();
+        targetIndices2.add(2.00);
+        targetIndices2.add(5.00);
+        System.out.println(distance(targetIndices2, 3));
+        System.out.println("Should be 1.0");
+        System.out.println(distance(targetIndices2, 4));
+        System.out.println("Should be 1.0");
+        ArrayList<Double> targetIndices3 = new ArrayList<Double>();
+        targetIndices3.add(0.00);
+        targetIndices3.add(7.00);
+        System.out.println(distance(targetIndices3, 3));
+        System.out.println("Should be 3.0");
+        System.out.println(distance(targetIndices3, 4));
+        System.out.println("Should be 3.0");
+        ArrayList<Double> targetIndices4 = new ArrayList<Double>();
+        targetIndices4.add(2.00);
+        targetIndices4.add(3.00);
+        System.out.println("Test 4 Distance.....");
+        System.out.println(distance(targetIndices4, 4));
+        System.out.println("Should be 1.0");
+        System.out.println(distance(targetIndices4, 5));
+        System.out.println("Should be 2.0");
+    }
+
+
+    private static double distance(ArrayList<Double> targetIndices, double currentIndex) {
+        double minDistance = Double.POSITIVE_INFINITY;
+        for (int i=0; i<targetIndices.size(); i++) {
+            double current = Math.abs(targetIndices.get(i) - currentIndex);
+            if (current < minDistance) {
+                minDistance = current;
+            }
+        }
+        return minDistance;
+    }
+
+}
\ No newline at end of file
diff --git a/bench.sh b/bench.sh
new file mode 100644
index 0000000..7411483
--- /dev/null
+++ b/bench.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+rm -rf test-bench-*
+time hadoop jar proj1.jar Proj1 -conf conf.xml -Dcombiner=true ~cs61c/data/sample.seq test-bench-int test-bench-out
+
+echo "You should make sure that the time reported under the category 'user' is less than 2 minutes."
+
+exit 0
diff --git a/billOfRights-freedom-0 b/billOfRights-freedom-0
new file mode 100644
index 0000000..39d6b48
--- /dev/null
+++ b/billOfRights-freedom-0
@@ -0,0 +1,100 @@
+0.8685232425595109	of
+0.8628402263382491	the
+0.7104525909097161	or
+0.03027496836262995	to
+0.0	a
+0.0	abridging
+0.0	according
+0.0	accusation
+0.0	accused
+0.0	actual
+0.0	affirmation
+0.0	against
+0.0	all
+0.0	amendment
+0.0	an
+0.0	and
+0.0	answer
+0.0	any
+0.0	are
+0.0	arising
+0.0	arms
+0.0	ascertained
+0.0	assemble
+0.0	assistance
+0.0	at
+0.0	bail
+0.0	be
+0.0	bear
+0.0	been
+0.0	being
+0.0	but
+0.0	by
+0.0	capital
+0.0	case
+0.0	cases
+0.0	cause
+0.0	certain
+0.0	committed
+0.0	common
+0.0	compelled
+0.0	compensation
+0.0	compulsory
+0.0	confronted
+0.0	congress
+0.0	consent
+0.0	constitution
+0.0	construed
+0.0	controversy
+0.0	counsel
+0.0	court
+0.0	crime
+0.0	criminal
+0.0	cruel
+0.0	danger
+0.0	defence
+0.0	delegated
+0.0	deny
+0.0	deprived
+0.0	describing
+0.0	disparage
+0.0	district
+0.0	dollars
+0.0	due
+0.0	effects
+0.0	enjoy
+0.0	enumeration
+0.0	establishment
+0.0	examined
+0.0	exceed
+0.0	except
+0.0	excessive
+0.0	exercise
+0.0	fact
+0.0	favor
+0.0	fines
+0.0	for
+0.0	forces
+0.0	free
+0.0	government
+0.0	grand
+0.0	grievances
+0.0	have
+0.0	held
+0.0	him
+0.0	himself
+0.0	his
+0.0	house
+0.0	houses
+0.0	i
+0.0	ii
+0.0	iii
+0.0	impartial
+0.0	imposed
+0.0	in
+0.0	indictment
+0.0	infamous
+0.0	inflicted
+0.0	informed
+0.0	infringed
+0.0	issue
diff --git a/billOfRights-freedom-1 b/billOfRights-freedom-1
new file mode 100644
index 0000000..6e6eb37
--- /dev/null
+++ b/billOfRights-freedom-1
@@ -0,0 +1,100 @@
+1.7637883358888644	of
+1.5909841682918917	the
+1.3271959397452828	or
+0.09998888401272867	abridging
+0.09998888401272867	speech
+0.04187818243636804	to
+0.013888745973675832	thereof
+0.007272693876848162	exercise
+0.004273494238076529	press
+0.0021367471190382645	free
+0.0018382338216602085	prohibiting
+9.523806251581638E-4	religion
+5.555554605864092E-4	establishment
+4.383080755555035E-4	peaceably
+3.518648506785155E-4	respecting
+2.8673833053365614E-4	assemble
+2.5990889850540803E-4	right
+2.1915403777775176E-4	an
+1.977261404917842E-4	make
+1.6683349422791388E-4	petition
+1.4205553962122824E-4	congress
+1.2195121663332598E-4	i
+1.2195121663332598E-4	government
+1.1111109211728183E-4	people
+8.044081457037506E-5	redress
+6.274509742066204E-5	grievances
+4.778972175560936E-5	law
+4.734848218430008E-5	no
+1.7578090324830294E-5	for
+1.69101722086786E-5	and
+1.0546854194898178E-5	amendment
+9.813734954583169E-6	shall
+9.182736306098745E-6	a
+0.0	committed
+0.0	common
+0.0	compelled
+0.0	compensation
+0.0	compulsory
+0.0	confronted
+0.0	certain
+0.0	consent
+0.0	constitution
+0.0	construed
+0.0	controversy
+0.0	counsel
+0.0	court
+0.0	crime
+0.0	criminal
+0.0	cruel
+0.0	danger
+0.0	defence
+0.0	delegated
+0.0	deny
+0.0	deprived
+0.0	describing
+0.0	disparage
+0.0	district
+0.0	dollars
+0.0	due
+0.0	effects
+0.0	enjoy
+0.0	enumeration
+0.0	cause
+0.0	examined
+0.0	exceed
+0.0	except
+0.0	excessive
+0.0	cases
+0.0	fact
+0.0	favor
+0.0	fines
+0.0	case
+0.0	forces
+0.0	capital
+0.0	by
+0.0	grand
+0.0	but
+0.0	have
+0.0	held
+0.0	him
+0.0	himself
+0.0	his
+0.0	house
+0.0	houses
+0.0	being
+0.0	ii
+0.0	iii
+0.0	impartial
+0.0	imposed
+0.0	in
+0.0	indictment
+0.0	infamous
+0.0	inflicted
+0.0	informed
+0.0	infringed
+0.0	issue
+0.0	it
+0.0	iv
+0.0	ix
+0.0	jeopardy
diff --git a/billOfRights-jury-0 b/billOfRights-jury-0
new file mode 100644
index 0000000..46b328a
--- /dev/null
+++ b/billOfRights-jury-0
@@ -0,0 +1,100 @@
+10.229136542850217	in
+8.525431090916594	the
+7.671852407137663	of
+7.5832142098589985	be
+7.181218561035801	shall
+4.339534778652788	to
+3.4740929702380434	for
+3.4513609053529963	a
+3.4385483109555306	or
+2.6641972159114355	have
+2.4652577895378545	and
+1.7761314772742904	law
+1.3320986079557178	nor
+1.3259689601439075	public
+1.1840876515161936	by
+0.9944767201079306	any
+0.39779068804317225	amendment
+0.33302465198892944	trial
+0.33302465198892944	been
+0.33302465198892944	otherwise
+0.33302465198892944	witnesses
+0.33302465198892944	common
+0.33302465198892944	life
+0.33302465198892944	property
+0.33302465198892944	person
+0.33302465198892944	district
+0.33302465198892944	crime
+0.33302465198892944	his
+0.33302465198892944	criminal
+0.33302465198892944	process
+0.2220164346592863	against
+0.2220164346592863	without
+0.1332098607955718	no
+0.1332098607955718	right
+0.0	make
+0.0	abridging
+0.0	according
+0.0	accusation
+0.0	accused
+0.0	actual
+0.0	affirmation
+0.0	all
+0.0	an
+0.0	answer
+0.0	are
+0.0	arising
+0.0	arms
+0.0	ascertained
+0.0	assemble
+0.0	assistance
+0.0	at
+0.0	bail
+0.0	bear
+0.0	being
+0.0	but
+0.0	capital
+0.0	case
+0.0	cases
+0.0	cause
+0.0	certain
+0.0	committed
+0.0	compelled
+0.0	compensation
+0.0	compulsory
+0.0	confronted
+0.0	congress
+0.0	consent
+0.0	constitution
+0.0	construed
+0.0	controversy
+0.0	counsel
+0.0	court
+0.0	cruel
+0.0	danger
+0.0	defence
+0.0	delegated
+0.0	deny
+0.0	deprived
+0.0	describing
+0.0	disparage
+0.0	dollars
+0.0	due
+0.0	effects
+0.0	enjoy
+0.0	enumeration
+0.0	establishment
+0.0	examined
+0.0	exceed
+0.0	except
+0.0	excessive
+0.0	exercise
+0.0	fact
+0.0	favor
+0.0	fines
+0.0	forces
+0.0	free
+0.0	freedom
+0.0	government
+0.0	grand
+0.0	grievances
diff --git a/capital-0.txt b/capital-0.txt
new file mode 100644
index 0000000..bda70ca
--- /dev/null
+++ b/capital-0.txt
@@ -0,0 +1,20 @@
+1540.1366592122918      judg
+1417.6254079246696      nasdaq
+1338.8978649602273      professionalism
+1092.1147057120354      placement
+1002.3899960841976      nationwide
+830.9056466367553       staffing
+815.6293908704927       determination
+688.1526545812279       delivered
+657.2047813498331       resource
+612.9782523265053       hcmi
+568.5864894821221       5kv
+558.0998817545266       rmrp
+540.9619834856368       consistent
+523.9782984949501       permanent
+512.2788340120392       managed
+380.08579186862875      kv
+348.00854673237137      gesnetwork
+322.815247308376        art
+319.6455493278869       solutions
+306.2697516672234       judge
diff --git a/classes/DoublePair.class b/classes/DoublePair.class
new file mode 100644
index 0000000..c429203
Binary files /dev/null and b/classes/DoublePair.class differ
diff --git a/classes/Func.class b/classes/Func.class
new file mode 100644
index 0000000..cfff4e0
Binary files /dev/null and b/classes/Func.class differ
diff --git a/classes/Importer.class b/classes/Importer.class
new file mode 100644
index 0000000..ef7f599
Binary files /dev/null and b/classes/Importer.class differ
diff --git a/classes/Proj1$Combine1.class b/classes/Proj1$Combine1.class
new file mode 100644
index 0000000..54cd413
Binary files /dev/null and b/classes/Proj1$Combine1.class differ
diff --git a/classes/Proj1$Map1$1.class b/classes/Proj1$Map1$1.class
new file mode 100644
index 0000000..5ae4878
Binary files /dev/null and b/classes/Proj1$Map1$1.class differ
diff --git a/classes/Proj1$Map1$2.class b/classes/Proj1$Map1$2.class
new file mode 100644
index 0000000..e90638e
Binary files /dev/null and b/classes/Proj1$Map1$2.class differ
diff --git a/classes/Proj1$Map1$3.class b/classes/Proj1$Map1$3.class
new file mode 100644
index 0000000..f7a7d59
Binary files /dev/null and b/classes/Proj1$Map1$3.class differ
diff --git a/classes/Proj1$Map1.class b/classes/Proj1$Map1.class
new file mode 100644
index 0000000..3b0854b
Binary files /dev/null and b/classes/Proj1$Map1.class differ
diff --git a/classes/Proj1$Map2.class b/classes/Proj1$Map2.class
new file mode 100644
index 0000000..33ab526
Binary files /dev/null and b/classes/Proj1$Map2.class differ
diff --git a/classes/Proj1$Reduce1.class b/classes/Proj1$Reduce1.class
new file mode 100644
index 0000000..aa62007
Binary files /dev/null and b/classes/Proj1$Reduce1.class differ
diff --git a/classes/Proj1$Reduce2.class b/classes/Proj1$Reduce2.class
new file mode 100644
index 0000000..a37663f
Binary files /dev/null and b/classes/Proj1$Reduce2.class differ
diff --git a/classes/Proj1.class b/classes/Proj1.class
new file mode 100644
index 0000000..d0d16d8
Binary files /dev/null and b/classes/Proj1.class differ
diff --git a/classes/Tests.class b/classes/Tests.class
new file mode 100644
index 0000000..1e718da
Binary files /dev/null and b/classes/Tests.class differ
diff --git a/conf.xml b/conf.xml
new file mode 100644
index 0000000..b71f2ab
--- /dev/null
+++ b/conf.xml
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+
+<property>
+<name>targetWord</name>
+<value>monument</value>
+</property>
+
+<property>
+<name>funcNum</name>
+<value>2</value>
+</property>
+
+</configuration>
diff --git a/ec2experience.txt b/ec2experience.txt
new file mode 100644
index 0000000..710d4c5
--- /dev/null
+++ b/ec2experience.txt
@@ -0,0 +1,73 @@
+1. How long did each of the six runs take? How many mappers and how many reducers did you use?
+
+	run 1: (freedom, 0) on the 2005 dataset with combiner off
+		time: 16 minutes, 52 seconds
+		mappers: 242
+		reducers: 33
+
+	run 2: (freedom, 0) on the 2005 dataset with combiner on
+		time: 6 minutes, 34 seconds
+		mappers: 242
+		reducers: 33
+
+	run 3: (capital, 0) on the 2006 dataset with combiner on
+		time: 15 mins, 20 seconds
+		mappers: 348
+		reducers: 33
+		processing rate: 0.01938 gb/s
+
+	run 4: (capital, 0) on the 2006 dataset with combiner on
+		time: 8 mins, 58 seconds
+		mappers: 348
+		reducers: 33
+		processing rate: 0.03313 gb/s
+
+	run 5: (landmark, 1) on the 2006 dataset with combiner on
+		time: 8 mins, 50 seconds
+		mappers: 348
+		reducers: 33
+		processing rate: 0.03363 gb/s
+
+	run 6: (monument, 2) on the 2006 dataset with combiner on
+		time: 8 mins, 51 seconds
+		mappers: 348
+		reducers: 33
+		processing rate: 0.03357 gb/s
+
+
+2. For the two runs with (freedom, 0), how much faster did your code run on the 5 workers with the combiner turned on than with the combiner turned off? Express your answer as a percentage.
+
+((16 minutes 52 seconds)-(6 minutes 34 seconds))/(6 minutes 34 seconds)=1.5685, so 156.85% faster
+
+
+3. For the runs on the 2006 dataset, what was the median processing rate per GB (= 2^30 bytes) of input for the tests using 5 workers? Using 9 workers?
+
+The median processing rates are shown above in part 1. The median processing rate for 5 workers is 0.01938 gb/s, and the median processing rate for 9 workers is 0.03357 gb/s.
+
+
+4. What was the percent speedup of running (capital, 0) with 9 workers over 5 workers? What is the maximum possible speedup, assuming your code is fully parallelizable? How well, in your opinion, does Hadoop parallelize your code? Justify your answer in 1-2 sentences.
+
+(0.03357-0.01938)/0.01938 = 0.7322 = 73.22% faster
+Optimal is (9-5)/5 = 0.8 = 80% faster
+
+73.22/80 = 0.9153 = 91.53% efficient
+
+In my opinion, Hadoop parallelizes code pretty well and it parallelizes at a speedup that is 91.53% of the maximum speedup.
+
+
+5. For a single run on the 2006 dataset, what was the price per GB processed on with 5 workers? With 9 workers? (Recall that an extra-large instance costs $0.58 per hour, rounded up to the nearest hour.)
+
+	($0.58)*(5 workers)*(1 hour) = $2.90
+	GB Processed: (19,139,821,102 bytes)*(1/2^30 gb/bytes) = 17.82534 gb
+	$2.90/17.82534 gb = 
+		$0.16 per gb
+
+	($0.58)*(9 workers)*(1 hour) = $5.22
+	GB Processed: (19,141,786,065 bytes)*(1/2^30 gb/bytes) = 17.82718 gb
+	$5.33/17.82718 gb = 
+		$0.30 per gb
+
+
+6. How much total money did you use to complete this project?
+
+($0.58)*(5 workers)*(1 hour)*(3 jobs) + ($0.58)*(9 workers)*(1 hour)*(3 jobs) = $24.36
\ No newline at end of file
diff --git a/landmark-1.txt b/landmark-1.txt
new file mode 100644
index 0000000..b64cf24
--- /dev/null
+++ b/landmark-1.txt
@@ -0,0 +1,20 @@
+100.39530100764568      techese
+92.03629080941323       somehiow
+69.4081622852701        nixed
+69.17064746022587       siiiiiiigh
+64.17540253901382       prevenzione
+59.92735916055491       journee
+55.22177448564794       chiroparctor
+54.187496433420826      loizidou
+53.230190829283416      undaunting
+48.2937701513063        deauthenticizes
+42.33005145025361       wunnafulness
+39.70436779185503       irala
+33.60318835396598       gissi
+31.324077477551374      bushido
+30.95107412060516       troiseime
+30.03294237677179       erhard
+28.976262090783774      iyyengar
+27.05044811969202       filartiga
+26.445904902976714      scammark
+25.487072226209133      peqa
diff --git a/monument-2.txt b/monument-2.txt
new file mode 100644
index 0000000..4455ce7
--- /dev/null
+++ b/monument-2.txt
@@ -0,0 +1,20 @@
+556.0618844224659       dachaus
+443.1566899233314       buchenwalds
+432.11171012313133      bandelier
+372.51224507027706      ranelagh
+291.8976881201187       aftrighted
+248.99131468692107      eventsit
+220.9959615432032       2714
+214.8765710028718       belsens
+205.89149349373483      waalsdorper
+194.2870172417444       3144
+171.9677863857934       94520
+170.84364974523712      pompeys
+165.5002825746365       theexisting
+143.48903370060955      rubot
+139.86197928751682      _inscription
+125.66456252541532      restauant
+123.73918881704124      80132
+115.99254115323151      gravediggers
+107.0751029806899       jennuars
+107.0751029806899       26do
diff --git a/onetest1/._SUCCESS.crc b/onetest1/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and b/onetest1/._SUCCESS.crc differ
diff --git a/onetest1/.part-r-00000.crc b/onetest1/.part-r-00000.crc
new file mode 100644
index 0000000..95fcac4
Binary files /dev/null and b/onetest1/.part-r-00000.crc differ
diff --git a/onetest1/_SUCCESS b/onetest1/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git a/onetest1/part-r-00000 b/onetest1/part-r-00000
new file mode 100644
index 0000000..c5fb96d
Binary files /dev/null and b/onetest1/part-r-00000 differ
diff --git a/onetest2/._SUCCESS.crc b/onetest2/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and b/onetest2/._SUCCESS.crc differ
diff --git a/onetest2/.part-r-00000.crc b/onetest2/.part-r-00000.crc
new file mode 100644
index 0000000..ff30803
Binary files /dev/null and b/onetest2/.part-r-00000.crc differ
diff --git a/onetest2/_SUCCESS b/onetest2/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git a/onetest2/part-r-00000 b/onetest2/part-r-00000
new file mode 100644
index 0000000..cbc9ce1
--- /dev/null
+++ b/onetest2/part-r-00000
@@ -0,0 +1,100 @@
+1.7637883358888644	of
+1.590984168291892	the
+1.3271959397452826	or
+0.09998888401272868	abridging
+0.09998888401272868	speech
+0.04187818243636804	to
+0.013888745973675832	thereof
+0.007272693876848162	exercise
+0.004273494238076528	press
+0.002136747119038264	free
+0.0018382338216602087	prohibiting
+9.523806251581638E-4	religion
+5.555554605864092E-4	establishment
+4.383080755555035E-4	peaceably
+3.5186485067851544E-4	respecting
+2.867383305336562E-4	assemble
+2.5990889850540803E-4	right
+2.1915403777775176E-4	an
+1.977261404917842E-4	make
+1.6683349422791388E-4	petition
+1.4205553962122824E-4	congress
+1.2195121663332598E-4	i
+1.2195121663332598E-4	government
+1.1111109211728183E-4	people
+8.044081457037506E-5	redress
+6.274509742066204E-5	grievances
+4.7789721755609366E-5	law
+4.7348482184300085E-5	no
+1.7578090324830298E-5	for
+1.69101722086786E-5	and
+1.0546854194898178E-5	amendment
+9.813734954583169E-6	shall
+9.182736306098745E-6	a
+0.0	committed
+0.0	common
+0.0	compelled
+0.0	compensation
+0.0	compulsory
+0.0	confronted
+0.0	certain
+0.0	consent
+0.0	constitution
+0.0	construed
+0.0	controversy
+0.0	counsel
+0.0	court
+0.0	crime
+0.0	criminal
+0.0	cruel
+0.0	danger
+0.0	defence
+0.0	delegated
+0.0	deny
+0.0	deprived
+0.0	describing
+0.0	disparage
+0.0	district
+0.0	dollars
+0.0	due
+0.0	effects
+0.0	enjoy
+0.0	enumeration
+0.0	cause
+0.0	examined
+0.0	exceed
+0.0	except
+0.0	excessive
+0.0	cases
+0.0	fact
+0.0	favor
+0.0	fines
+0.0	case
+0.0	forces
+0.0	capital
+0.0	by
+0.0	grand
+0.0	but
+0.0	have
+0.0	held
+0.0	him
+0.0	himself
+0.0	his
+0.0	house
+0.0	houses
+0.0	being
+0.0	ii
+0.0	iii
+0.0	impartial
+0.0	imposed
+0.0	in
+0.0	indictment
+0.0	infamous
+0.0	inflicted
+0.0	informed
+0.0	infringed
+0.0	issue
+0.0	it
+0.0	iv
+0.0	ix
+0.0	jeopardy
diff --git a/proj1.jar b/proj1.jar
new file mode 100644
index 0000000..ee1f63a
Binary files /dev/null and b/proj1.jar differ