Skip to content

Commit

Permalink
Random sampling combos #23 Subset sampling #25
Browse files Browse the repository at this point in the history
  • Loading branch information
vsuthichai committed Aug 18, 2016
1 parent 95daa98 commit 24247ec
Show file tree
Hide file tree
Showing 27 changed files with 421 additions and 213 deletions.
4 changes: 4 additions & 0 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>com.github.dpaukov</groupId>
<artifactId>combinatoricslib3</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package com.eharmony.spotz.backend

import com.eharmony.spotz.objective.Objective
import com.eharmony.spotz.optimizer.RandomSampler
import com.eharmony.spotz.optimizer.grid.Grid
import com.eharmony.spotz.optimizer.hyperparam.RandomSampler

import scala.reflect.ClassTag

Expand All @@ -15,13 +15,13 @@ import scala.reflect.ClassTag
*/
trait BackendFunctions {
protected def bestRandomPointAndLoss[P, L](
startIndex: Long,
batchSize: Long,
objective: Objective[P, L],
reducer: ((P, L), (P, L)) => (P, L),
hyperParams: Map[String, RandomSampler[_]],
seed: Long = 0,
sampleFunction: (Map[String, RandomSampler[_]], Long) => P): (P, L)
startIndex: Long,
batchSize: Long,
objective: Objective[P, L],
reducer: ((P, L), (P, L)) => (P, L),
hyperParams: Map[String, RandomSampler[_]],
seed: Long = 0,
sampleFunction: (Map[String, RandomSampler[_]], Long) => P): (P, L)

protected def bestGridPointAndLoss[P, L](
startIndex: Long,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package com.eharmony.spotz.backend

import com.eharmony.spotz.objective.Objective
import com.eharmony.spotz.optimizer.RandomSampler
import com.eharmony.spotz.optimizer.grid.Grid
import com.eharmony.spotz.optimizer.hyperparam.RandomSampler

import scala.reflect.ClassTag

Expand Down Expand Up @@ -31,13 +31,13 @@ trait ParallelFunctions extends BackendFunctions {
* @return the best point with the best loss as a tuple
*/
protected override def bestRandomPointAndLoss[P, L](
startIndex: Long,
batchSize: Long,
objective: Objective[P, L],
reducer: ((P, L), (P, L)) => (P, L),
hyperParams: Map[String, RandomSampler[_]],
seed: Long = 0,
sampleFunction: (Map[String, RandomSampler[_]], Long) => P): (P, L) = {
startIndex: Long,
batchSize: Long,
objective: Objective[P, L],
reducer: ((P, L), (P, L)) => (P, L),
hyperParams: Map[String, RandomSampler[_]],
seed: Long = 0,
sampleFunction: (Map[String, RandomSampler[_]], Long) => P): (P, L) = {

val pointsAndLosses = (startIndex until (startIndex + batchSize)).par.map { trial =>
val point = sampleFunction(hyperParams, seed + trial)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package com.eharmony.spotz.backend

import com.eharmony.spotz.objective.Objective
import com.eharmony.spotz.optimizer.RandomSampler
import com.eharmony.spotz.optimizer.grid.Grid
import com.eharmony.spotz.optimizer.hyperparam.RandomSampler
import org.apache.spark.SparkContext

import scala.reflect.ClassTag
Expand Down Expand Up @@ -33,13 +33,13 @@ trait SparkFunctions extends BackendFunctions {
* @return the best point with the best loss as a tuple
*/
protected override def bestRandomPointAndLoss[P, L](
startIndex: Long,
batchSize: Long,
objective: Objective[P, L],
reducer: ((P, L), (P, L)) => (P, L),
hyperParams: Map[String, RandomSampler[_]],
seed: Long = 0,
sampleFunction: (Map[String, RandomSampler[_]], Long) => P): (P, L) = {
startIndex: Long,
batchSize: Long,
objective: Objective[P, L],
reducer: ((P, L), (P, L)) => (P, L),
hyperParams: Map[String, RandomSampler[_]],
seed: Long = 0,
sampleFunction: (Map[String, RandomSampler[_]], Long) => P): (P, L) = {

assert(batchSize > 0, "batchSize must be greater than 0")

Expand Down
150 changes: 0 additions & 150 deletions core/src/main/scala/com/eharmony/spotz/optimizer/HyperParameter.scala

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package com.eharmony.spotz.optimizer.hyperparam

import scala.util.Random


trait CombinatoricRandomSampler[T] extends RandomSampler[Iterable[Iterable[T]]]
trait IterableRandomSampler[T] extends RandomSampler[Iterable[T]]

/**
*
* @param iterable
* @param k
* @param x
* @param replacement
* @tparam T
*/
abstract class AbstractCombinations[T](
iterable: Iterable[T],
k: Int,
x: Int = 1,
replacement: Boolean = false) extends Serializable {

import org.paukov.combinatorics3.Generator

import scala.collection.JavaConverters._

private val values = iterable.toSeq

assert(k > 0, "k must be greater than 0")
assert(k <= values.length, s"k must be less than or equal to length of the iterable, ${values.length}")

// TODO: This is hideous! Rewrite this to be more memory efficient by unranking combinations. For now, use a Java lib.
val combinations = Generator.combination(iterable.asJavaCollection).simple(k).asScala.toIndexedSeq.map(l => l.asScala.toIndexedSeq)

/**
*
* @param rng
* @return
*/
def combos(rng: Random): Iterable[Iterable[T]] = {
if (replacement) {
Seq.fill(x)(combinations(rng.nextInt(combinations.size)))
} else {
val indices = collection.mutable.Set[Int]()
val numElements = scala.math.min(x, combinations.size)
val ret = new collection.mutable.ArrayBuffer[Iterable[T]](numElements)
while (indices.size < numElements) {
val index = rng.nextInt(combinations.size)
if (!indices.contains(index)) {
indices.add(index)
ret += combinations(index)
}
}
ret.toIndexedSeq
}
}
}


/**
* Sample a single combination of K unordered items from the iterable of length N.
*
* @param iterable
* @param k
* @param replacement
* @tparam T
*/
case class Combination[T](
iterable: Iterable[T],
k: Int,
replacement: Boolean = false)
extends AbstractCombinations[T](iterable, k, 1, replacement) with IterableRandomSampler[T] {

override def apply(rng: Random): Iterable[T] = combos(rng).head
}


/**
* Binomial coefficient implementation. Pick K unordered items from an Iterable of N items.
* Also known as N Choose K, where N is the size of an Iterable and K is the desired number
* of items to be chosen. This implementation will actually compute all the possible choices
* and return them as an Iterable.
*
* @param iterable an iterable of finite length
* @param k the number of items to choose
* @tparam T
*/
case class Combinations[T](
iterable: Iterable[T],
k: Int,
x: Int = 1,
replacement: Boolean = false)
extends AbstractCombinations[T](iterable, k, x, replacement) with CombinatoricRandomSampler[T] {

override def apply(rng: Random): Iterable[Iterable[T]] = combos(rng)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import com.eharmony.spotz.optimizer.hyperparam.RandomSampler

import scala.util.Random

/**
* Sample from a normal distribution given the mean and standard deviation
*
* {{{
* val hyperParamSpace = Map(
* ("x1", NormalDistribution(0, 0.1))
* )
* }}}
*
* @param mean mean
* @param std standard deviation
*/
case class NormalDistribution(mean: Double, std: Double) extends RandomSampler[Double] {
override def apply(rng: Random): Double = {
std * rng.nextGaussian() + mean
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package com.eharmony.spotz.optimizer.hyperparam

import scala.util.Random

/**
* Sample an element from an Iterable of fixed length with uniform random distribution.
*
* {{{
* val hyperParamSpace = Map(
* ("x1", RandomChoice(Seq("svm", "logistic")))
* )
* }}}
*
* @param iterable an iterable of type T
* @tparam T type parameter of iterable
*/
case class RandomChoice[T](iterable: Iterable[T]) extends RandomSampler[T] {
private val values = iterable.toIndexedSeq

if (values.length < 1)
throw new IllegalArgumentException("Empty iterable")

override def apply(rng: Random): T = values(rng.nextInt(values.length))
}
Loading

0 comments on commit 24247ec

Please sign in to comment.