Skip to content

Commit

Permalink
solve anjuke#5
Browse files Browse the repository at this point in the history
  • Loading branch information
kdlan committed Mar 28, 2013
1 parent af0e6d0 commit 68ba068
Show file tree
Hide file tree
Showing 10 changed files with 540 additions and 8 deletions.
9 changes: 9 additions & 0 deletions conf/romar.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,19 @@
#value can only be item,user or common
recommendType: item

# when use file similairty, these config will no be used
useSimilarityCache: true

similarityCacheSize: 10240


useFileSimilarity: true

similarityFile: data/similarity

# true to use binary file long,long,double
binarySimilarityFile: false

#item similarity can only be
# EuclideanDistanceSimilarity
# PearsonCorrelationSimilarity
Expand Down
54 changes: 54 additions & 0 deletions src/main/java/com/anjuke/romar/core/RomarConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ public static class RomarConfigHolder {
private Integer _userNeighborhoodNearestN;
private CommonRecommenderClass _commonRecommenderClass;
private String _persistencePath;
private Boolean _useFileSimilarity;
private Boolean _binarySimilarityFile;
private String _similarityFile;

public Boolean getAllowUserStringID() {
return _allowUserStringID;
Expand Down Expand Up @@ -162,6 +165,33 @@ public void setPersistencePath(String persistencePath) {
_persistencePath = persistencePath;
}

public Boolean getUseFileSimilarity() {
return _useFileSimilarity;
}

public void setUseFileSimilarity(Boolean useFileSimilarity) {
_useFileSimilarity = useFileSimilarity;
}

public Boolean getBinarySimilarityFile() {
return _binarySimilarityFile;
}

public void setBinarySimilarityFile(Boolean binarySimilarityFile) {
_binarySimilarityFile = binarySimilarityFile;
}

public String getSimilarityFile() {
return _similarityFile;
}

public void setSimilarityFile(String similarityFile) {
_similarityFile = similarityFile;
}




}

static {
Expand Down Expand Up @@ -410,4 +440,28 @@ public boolean isAllowItemStringID() {
return _defaultHolder._allowItemStringID;
}

public Boolean isUseFileSimilarity() {
Boolean useFileSimilarity = _customerHolder.getUseFileSimilarity();
if (useFileSimilarity != null) {
return useFileSimilarity;
}
return _defaultHolder._useFileSimilarity;
}

public Boolean isBinarySimilarityFile() {
Boolean binarySimilarityFile = _customerHolder.getBinarySimilarityFile();
if (binarySimilarityFile != null) {
return binarySimilarityFile;
}
return _defaultHolder._binarySimilarityFile;
}

public String getSimilarityFile(){
String similarityFile = _customerHolder.getSimilarityFile();
if (similarityFile != null) {
return similarityFile;
}
return _defaultHolder._similarityFile;
}

}
4 changes: 3 additions & 1 deletion src/main/java/com/anjuke/romar/http/jetty/RomarRESTMain.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

import ch.qos.logback.access.jetty.RequestLogImpl;

import com.anjuke.romar.core.CoreContainer;
import com.anjuke.romar.core.RomarConfig;
import com.sun.jersey.spi.container.servlet.ServletContainer;

Expand All @@ -42,7 +43,8 @@ public static void main(String[] args) throws Exception {
}
org.slf4j.bridge.SLF4JBridgeHandler.install();
RomarConfig config = RomarConfig.getInstance();

//fast fail init
CoreContainer.getCore();
Server server = new Server(config.getServerPort());
ServletContextHandler context = new ServletContextHandler(
ServletContextHandler.NO_SESSIONS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@
*/
package com.anjuke.romar.mahout.factory;

import java.io.File;

import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.CachingItemSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity.ItemItemSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
Expand All @@ -26,6 +29,9 @@
import com.anjuke.romar.mahout.GenericReloadDataModel;
import com.anjuke.romar.mahout.MahoutService;
import com.anjuke.romar.mahout.RecommenderWrapper;
import com.anjuke.romar.mahout.similarity.RomarFileItemSimilarity;
import com.anjuke.romar.mahout.similarity.file.RomarFileSimilarityIterator;
import com.anjuke.romar.mahout.similarity.file.RomarFileSimilarityIterator.IteratorBuiler;

public class MahoutServiceItemRecommendFactory extends
AbstractMahoutServiceFactory implements MahoutServiceFactory {
Expand All @@ -35,13 +41,32 @@ public MahoutService createService() {
RomarConfig config = RomarConfig.getInstance();
Recommender recommender;
DataModel dataModel = wrapDataModel(new GenericReloadDataModel());
ItemSimilarity similarity;
if(config.isUseFileSimilarity()){
File file=new File(config.getSimilarityFile());
if(!file.exists()){
throw new IllegalArgumentException("similairy file not exists");
}

if(!file.isFile()){
throw new IllegalArgumentException("similairy file is a directory");
}

ItemSimilarity similarity = ClassUtils.instantiateAs(
config.getItemSimilarityClass(), ItemSimilarity.class,
new Class<?>[] {DataModel.class}, new Object[] {dataModel});
if (config.isUseSimilariyCache()) {
similarity = new CachingItemSimilarity(similarity,
config.getSimilarityCacheSize());
IteratorBuiler<ItemItemSimilarity> iteratorBuilder;
if(config.isBinarySimilarityFile()){
iteratorBuilder=RomarFileSimilarityIterator.dataFileItemIteratorBuilder();
}else{
iteratorBuilder=RomarFileSimilarityIterator.lineFileItemIteratorBuilder();
}
similarity=new RomarFileItemSimilarity(file,iteratorBuilder);
}else{
similarity = ClassUtils.instantiateAs(
config.getItemSimilarityClass(), ItemSimilarity.class,
new Class<?>[] {DataModel.class}, new Object[] {dataModel});
if (config.isUseSimilariyCache()) {
similarity = new CachingItemSimilarity(similarity,
config.getSimilarityCacheSize());
}
}
recommender = new GenericItemBasedRecommender(dataModel, similarity);
return new RecommenderWrapper(recommender);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package com.anjuke.romar.mahout.similarity;

import java.io.File;
import java.util.Collection;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity.ItemItemSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.file.FileItemSimilarity;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.anjuke.romar.mahout.similarity.file.RomarFileSimilarityIterator.IteratorBuiler;
import com.anjuke.romar.mahout.util.Util;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;

public class RomarFileItemSimilarity implements ItemSimilarity {

private final IteratorBuiler<ItemItemSimilarity> iteratorBuilder;
private final File dataFile;
private ItemSimilarity delegate;
private final ReentrantLock reloadLock;
private long lastModified;
private final long minReloadIntervalMS;

private static final Logger log = LoggerFactory.getLogger(FileItemSimilarity.class);

/**
* @param dataFile
* file containing the similarity data
*/
public RomarFileItemSimilarity(File dataFile,
IteratorBuiler<ItemItemSimilarity> iteratorBuilder) {
this(dataFile, iteratorBuilder, FileItemSimilarity.DEFAULT_MIN_RELOAD_INTERVAL_MS);
}

/**
* @param minReloadIntervalMS
* the minimum interval in milliseconds after which a full reload
* of the original datafile is done when refresh() is called
* @see #FileItemSimilarity(File)
*/
public RomarFileItemSimilarity(File dataFile,
IteratorBuiler<ItemItemSimilarity> iteratorBuilder, long minReloadIntervalMS) {
Preconditions.checkArgument(dataFile != null, "dataFile is null");
Preconditions.checkArgument(dataFile.exists() && !dataFile.isDirectory(),
"dataFile is missing or a directory: %s", dataFile);
log.info("Creating FileItemSimilarity for file {}", dataFile);
this.lastModified = dataFile.lastModified();
this.dataFile = dataFile;
this.iteratorBuilder = iteratorBuilder;
this.minReloadIntervalMS = minReloadIntervalMS;
this.reloadLock = new ReentrantLock();
reload();
}

@Override
public double[] itemSimilarities(long itemID1, long[] itemID2s) throws TasteException {
return delegate.itemSimilarities(itemID1, itemID2s);
}

@Override
public long[] allSimilarItemIDs(long itemID) throws TasteException {
return delegate.allSimilarItemIDs(itemID);
}

@Override
public double itemSimilarity(long itemID1, long itemID2) throws TasteException {
return delegate.itemSimilarity(itemID1, itemID2);
}

@Override
public void refresh(Collection<Refreshable> alreadyRefreshed) {
if (dataFile.lastModified() > lastModified + minReloadIntervalMS) {
log.debug("File has changed; reloading...");
reload();
}
}

protected void reload() {
if (reloadLock.tryLock()) {
try {
long newLastModified = dataFile.lastModified();
delegate = new GenericItemSimilarity(Util.iterable(iteratorBuilder
.build(dataFile)));
lastModified = newLastModified;
} finally {
reloadLock.unlock();
}
}
}

@Override
public String toString() {
return "FileItemSimilarity[dataFile:" + dataFile + ']';
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package com.anjuke.romar.mahout.similarity.file;

import java.io.Closeable;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipInputStream;

import com.google.common.collect.AbstractIterator;
import com.google.common.io.Closeables;

public class DataFileIterator extends AbstractIterator<byte[]> implements Closeable {

private final DataInputStream inputStream;
private final int dataSize;

public DataFileIterator(File file, int dataSize) throws IOException {
this(getFileInputStream(file), dataSize);
}

public DataFileIterator(InputStream is, int dataSize) throws IOException {
this.inputStream = new DataInputStream(is);
this.dataSize = dataSize;

}

static InputStream getFileInputStream(File file) throws IOException {
InputStream is = new FileInputStream(file);
String name = file.getName();
if (name.endsWith(".gz")) {
return new GZIPInputStream(is);
} else if (name.endsWith(".zip")) {
return new ZipInputStream(is);
} else {
return is;
}
}

@Override
protected byte[] computeNext() {
byte[] data = new byte[dataSize];
try {
inputStream.readFully(data);
} catch (EOFException eof) {
close();
data=null;
} catch (IOException ioe) {
close();
throw new IllegalStateException(ioe);
}
return data;
}

@Override
public void close() {
endOfData();
Closeables.closeQuietly(inputStream);
}

}
Loading

0 comments on commit 68ba068

Please sign in to comment.