forked from anjuke/romar
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
540 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
102 changes: 102 additions & 0 deletions
102
src/main/java/com/anjuke/romar/mahout/similarity/RomarFileItemSimilarity.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
package com.anjuke.romar.mahout.similarity; | ||
|
||
import java.io.File; | ||
import java.util.Collection; | ||
import java.util.concurrent.locks.ReentrantLock; | ||
|
||
import org.apache.mahout.cf.taste.common.Refreshable; | ||
import org.apache.mahout.cf.taste.common.TasteException; | ||
import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity; | ||
import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity.ItemItemSimilarity; | ||
import org.apache.mahout.cf.taste.impl.similarity.file.FileItemSimilarity; | ||
import org.apache.mahout.cf.taste.similarity.ItemSimilarity; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import com.anjuke.romar.mahout.similarity.file.RomarFileSimilarityIterator.IteratorBuiler; | ||
import com.anjuke.romar.mahout.util.Util; | ||
import com.google.common.base.Preconditions; | ||
import com.google.common.collect.ImmutableList; | ||
|
||
public class RomarFileItemSimilarity implements ItemSimilarity { | ||
|
||
private final IteratorBuiler<ItemItemSimilarity> iteratorBuilder; | ||
private final File dataFile; | ||
private ItemSimilarity delegate; | ||
private final ReentrantLock reloadLock; | ||
private long lastModified; | ||
private final long minReloadIntervalMS; | ||
|
||
private static final Logger log = LoggerFactory.getLogger(FileItemSimilarity.class); | ||
|
||
/** | ||
* @param dataFile | ||
* file containing the similarity data | ||
*/ | ||
public RomarFileItemSimilarity(File dataFile, | ||
IteratorBuiler<ItemItemSimilarity> iteratorBuilder) { | ||
this(dataFile, iteratorBuilder, FileItemSimilarity.DEFAULT_MIN_RELOAD_INTERVAL_MS); | ||
} | ||
|
||
/** | ||
* @param minReloadIntervalMS | ||
* the minimum interval in milliseconds after which a full reload | ||
* of the original datafile is done when refresh() is called | ||
* @see #FileItemSimilarity(File) | ||
*/ | ||
public RomarFileItemSimilarity(File dataFile, | ||
IteratorBuiler<ItemItemSimilarity> iteratorBuilder, long minReloadIntervalMS) { | ||
Preconditions.checkArgument(dataFile != null, "dataFile is null"); | ||
Preconditions.checkArgument(dataFile.exists() && !dataFile.isDirectory(), | ||
"dataFile is missing or a directory: %s", dataFile); | ||
log.info("Creating FileItemSimilarity for file {}", dataFile); | ||
this.lastModified = dataFile.lastModified(); | ||
this.dataFile = dataFile; | ||
this.iteratorBuilder = iteratorBuilder; | ||
this.minReloadIntervalMS = minReloadIntervalMS; | ||
this.reloadLock = new ReentrantLock(); | ||
reload(); | ||
} | ||
|
||
@Override | ||
public double[] itemSimilarities(long itemID1, long[] itemID2s) throws TasteException { | ||
return delegate.itemSimilarities(itemID1, itemID2s); | ||
} | ||
|
||
@Override | ||
public long[] allSimilarItemIDs(long itemID) throws TasteException { | ||
return delegate.allSimilarItemIDs(itemID); | ||
} | ||
|
||
@Override | ||
public double itemSimilarity(long itemID1, long itemID2) throws TasteException { | ||
return delegate.itemSimilarity(itemID1, itemID2); | ||
} | ||
|
||
@Override | ||
public void refresh(Collection<Refreshable> alreadyRefreshed) { | ||
if (dataFile.lastModified() > lastModified + minReloadIntervalMS) { | ||
log.debug("File has changed; reloading..."); | ||
reload(); | ||
} | ||
} | ||
|
||
protected void reload() { | ||
if (reloadLock.tryLock()) { | ||
try { | ||
long newLastModified = dataFile.lastModified(); | ||
delegate = new GenericItemSimilarity(Util.iterable(iteratorBuilder | ||
.build(dataFile))); | ||
lastModified = newLastModified; | ||
} finally { | ||
reloadLock.unlock(); | ||
} | ||
} | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "FileItemSimilarity[dataFile:" + dataFile + ']'; | ||
} | ||
|
||
} |
64 changes: 64 additions & 0 deletions
64
src/main/java/com/anjuke/romar/mahout/similarity/file/DataFileIterator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
package com.anjuke.romar.mahout.similarity.file; | ||
|
||
import java.io.Closeable; | ||
import java.io.DataInputStream; | ||
import java.io.EOFException; | ||
import java.io.File; | ||
import java.io.FileInputStream; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.util.zip.GZIPInputStream; | ||
import java.util.zip.ZipInputStream; | ||
|
||
import com.google.common.collect.AbstractIterator; | ||
import com.google.common.io.Closeables; | ||
|
||
public class DataFileIterator extends AbstractIterator<byte[]> implements Closeable { | ||
|
||
private final DataInputStream inputStream; | ||
private final int dataSize; | ||
|
||
public DataFileIterator(File file, int dataSize) throws IOException { | ||
this(getFileInputStream(file), dataSize); | ||
} | ||
|
||
public DataFileIterator(InputStream is, int dataSize) throws IOException { | ||
this.inputStream = new DataInputStream(is); | ||
this.dataSize = dataSize; | ||
|
||
} | ||
|
||
static InputStream getFileInputStream(File file) throws IOException { | ||
InputStream is = new FileInputStream(file); | ||
String name = file.getName(); | ||
if (name.endsWith(".gz")) { | ||
return new GZIPInputStream(is); | ||
} else if (name.endsWith(".zip")) { | ||
return new ZipInputStream(is); | ||
} else { | ||
return is; | ||
} | ||
} | ||
|
||
@Override | ||
protected byte[] computeNext() { | ||
byte[] data = new byte[dataSize]; | ||
try { | ||
inputStream.readFully(data); | ||
} catch (EOFException eof) { | ||
close(); | ||
data=null; | ||
} catch (IOException ioe) { | ||
close(); | ||
throw new IllegalStateException(ioe); | ||
} | ||
return data; | ||
} | ||
|
||
@Override | ||
public void close() { | ||
endOfData(); | ||
Closeables.closeQuietly(inputStream); | ||
} | ||
|
||
} |
Oops, something went wrong.