diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 17787f4..559616b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - scala: [3.2.2] + scala: [3.3.0] java: [temurin@8] project: [rootJS, rootJVM, rootNative] runs-on: ${{ matrix.os }} @@ -94,11 +94,11 @@ jobs: - name: Make target directories if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v')) - run: mkdir -p target unidocs/target .js/target site/target vector/js/target vector/native/target verification/target .jvm/target .native/target demo/jvm/target demo/native/target vector/jvm/target demo/js/target project/target + run: mkdir -p target unidocs/target .js/target site/target vector/js/target vector/native/target verification/target tests/js/target .jvm/target .native/target demo/jvm/target tests/jvm/target demo/native/target vector/jvm/target tests/native/target demo/js/target project/target - name: Compress target directories if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v')) - run: tar cf targets.tar target unidocs/target .js/target site/target vector/js/target vector/native/target verification/target .jvm/target .native/target demo/jvm/target demo/native/target vector/jvm/target demo/js/target project/target + run: tar cf targets.tar target unidocs/target .js/target site/target vector/js/target vector/native/target verification/target tests/js/target .jvm/target .native/target demo/jvm/target tests/jvm/target demo/native/target vector/jvm/target tests/native/target demo/js/target project/target - name: Upload target directories if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v')) @@ -114,7 +114,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - scala: [3.2.2] + scala: [3.3.0] java: [temurin@8] runs-on: ${{ matrix.os }} steps: @@ -151,32 +151,32 @@ jobs: ~/Library/Caches/Coursier/v1 key: ${{ runner.os }}-sbt-cache-v2-${{ hashFiles('**/*.sbt') }}-${{ hashFiles('project/build.properties') }} - - name: Download target directories (3.2.2, rootJS) + - name: Download target directories (3.3.0, rootJS) uses: actions/download-artifact@v3 with: - name: target-${{ matrix.os }}-${{ matrix.java }}-3.2.2-rootJS + name: target-${{ matrix.os }}-${{ matrix.java }}-3.3.0-rootJS - - name: Inflate target directories (3.2.2, rootJS) + - name: Inflate target directories (3.3.0, rootJS) run: | tar xf targets.tar rm targets.tar - - name: Download target directories (3.2.2, rootJVM) + - name: Download target directories (3.3.0, rootJVM) uses: actions/download-artifact@v3 with: - name: target-${{ matrix.os }}-${{ matrix.java }}-3.2.2-rootJVM + name: target-${{ matrix.os }}-${{ matrix.java }}-3.3.0-rootJVM - - name: Inflate target directories (3.2.2, rootJVM) + - name: Inflate target directories (3.3.0, rootJVM) run: | tar xf targets.tar rm targets.tar - - name: Download target directories (3.2.2, rootNative) + - name: Download target directories (3.3.0, rootNative) uses: actions/download-artifact@v3 with: - name: target-${{ matrix.os }}-${{ matrix.java }}-3.2.2-rootNative + name: target-${{ matrix.os }}-${{ matrix.java }}-3.3.0-rootNative - - name: Inflate target directories (3.2.2, rootNative) + - name: Inflate target directories (3.3.0, rootNative) run: | tar xf targets.tar rm targets.tar @@ -200,7 +200,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - scala: [3.2.2] + scala: [3.3.0] java: [temurin@8] runs-on: ${{ matrix.os }} steps: diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f58f162 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +.bloop +.bsp +.js +.jvm +.metals +.native +.vscode +target +project/build.sbt +project/metals.sbt +project/project \ No newline at end of file diff --git a/build.sbt b/build.sbt index 85921c0..679b138 100644 --- a/build.sbt +++ b/build.sbt @@ -1,5 +1,5 @@ val appVersion:String = "0.101" -val globalScalaVersion = "3.2.2" +val globalScalaVersion = "3.3.0" ThisBuild / organization := "ai.dragonfly" ThisBuild / organizationName := "dragonfly.ai" @@ -19,7 +19,11 @@ ThisBuild / nativeConfig ~= { } -lazy val vector = crossProject(JSPlatform, JVMPlatform, NativePlatform) +lazy val vector = crossProject( + JSPlatform, + JVMPlatform, + NativePlatform + ) .crossType(CrossType.Full) .settings( description := "High performance, low footprint, cross platform, vector and statistics library!", @@ -42,7 +46,11 @@ lazy val verification = project ) ) -lazy val demo = crossProject(JSPlatform, JVMPlatform, NativePlatform) +lazy val demo = crossProject( + JSPlatform, + JVMPlatform, + NativePlatform +) .crossType(CrossType.Full) .enablePlugins(NoPublishPlugin) .dependsOn(vector) @@ -61,7 +69,7 @@ lazy val demo = crossProject(JSPlatform, JVMPlatform, NativePlatform) .jvmSettings() .nativeSettings() -lazy val root = tlCrossRootProject.aggregate(vector).settings(name := "vector") +lazy val root = tlCrossRootProject.aggregate(vector, tests).settings(name := "vector") lazy val docs = project.in(file("site")).enablePlugins(TypelevelSitePlugin).settings( mdocVariables := Map( @@ -76,5 +84,26 @@ lazy val unidocs = project .enablePlugins(TypelevelUnidocPlugin) // also enables the ScalaUnidocPlugin .settings( name := "vector-docs", - ScalaUnidoc / unidoc / unidocProjectFilter := inProjects(vector.jvm, vector.js, vector.native) - ) \ No newline at end of file + ScalaUnidoc / unidoc / unidocProjectFilter := + inProjects( + vector.jvm, + vector.js, + vector.native + ) + ) + +lazy val tests = crossProject( + JVMPlatform, + JSPlatform, + NativePlatform + ) + .in(file("tests")) + .enablePlugins(NoPublishPlugin) + .dependsOn(vector) + .settings( + name := "vector-tests", + libraryDependencies += "org.scalameta" %%% "munit" % "1.0.0-M8" % Test + ) + // .jvmSettings(name := "tests-jvm") + // .jsSettings(name := "tests-js") + // .nativeSettings(name := "tests-native") \ No newline at end of file diff --git a/project/build.properties b/project/build.properties index fdb2429..3d7427d 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.8.3 \ No newline at end of file +sbt.version=1.9.2 \ No newline at end of file diff --git a/project/plugins.sbt b/project/plugins.sbt index a62ba47..6b760ec 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,6 +1,6 @@ -val crossVer = "1.2.0" -val scalaJSVersion = "1.11.0" -val scalaNativeVersion = "0.4.7" +val crossVer = "1.3.2" +val scalaJSVersion = "1.13.2" +val scalaNativeVersion = "0.4.14" addDependencyTreePlugin diff --git a/tests/shared/src/test/scala/Instantiate.scala b/tests/shared/src/test/scala/Instantiate.scala new file mode 100644 index 0000000..88bb50b --- /dev/null +++ b/tests/shared/src/test/scala/Instantiate.scala @@ -0,0 +1,46 @@ +/* + * Copyright 2023 dragonfly.ai + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import ai.dragonfly.math.vector.Vec +import narr.NArray + +class InstantiateTests extends munit.FunSuite: + + test(" ways of making vecs ") { + + //type dim = 5 + + val v = Vec.fromTuple(1.0, 2.0, 3.0, 4.0, 5.0) + val v2 = Vec[5](1.0, 2.0, 3.0, 4.0, 5.0) + val v_fill = Vec.fill[5](1.0) + + val v_zeros = Vec.zeros[5] + val v_ones = Vec.ones[5] + + + val v_rand = Vec.random[5]() + val v_rand_max_min = Vec.random[5](2.0, 0.5) + + assertEquals(v2.dimension, v2.dimension ) + assertEquals(v2.dimension, v_fill.dimension ) + assertEquals(v2.dimension, v_zeros.dimension ) + assertEquals(v2.dimension, v_rand.dimension ) + assertEquals(v2.dimension, v_rand.dimension ) + assertEquals(v2.dimension, v_rand_max_min.dimension ) + + } + +end InstantiateTests \ No newline at end of file diff --git a/tests/shared/src/test/scala/SimpleStats.scala b/tests/shared/src/test/scala/SimpleStats.scala new file mode 100644 index 0000000..0984fac --- /dev/null +++ b/tests/shared/src/test/scala/SimpleStats.scala @@ -0,0 +1,85 @@ +/* + * Copyright 2023 dragonfly.ai + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import ai.dragonfly.math.vector.Vec +import narr.NArray + +class SimpleStatsTests extends munit.FunSuite: + + test("Some basic properties") { + + val v = Vec.fromTuple(1.0, 2.0, 3.0, 4.0, 5.0) + val v2 = Vec.fromTuple(1.0, 2.0, 3.0, 4.0, 5.0) + + assertEquals(v.dimension, v2.dimension ) + + // Reference, not value equality! + assertNotEquals(v, v2 ) + } + + test("sample mean") { + val v = Vec.fromTuple(2.0,4.0,4.0,4.0,5.0,5.0,7.0,9.0) + assertEquals(v.mean , 5.0) + } + + test("sample variance and std") { + val v = Vec.fromTuple(2.0,4.0,4.0,4.0,5.0,5.0,7.0,9.0) + assertEqualsDouble(v.variance, 4.571429, 0.00001) + assertEqualsDouble(v.stdDev, 2.13809, 0.00001) + } + + test("sample covariance") { + // Sample version + // https://corporatefinanceinstitute.com/resources/data-science/covariance/ + + val vector1 = Vec.fromTuple(1692.0, 1978.0, 1884.0, 2151.0, 2519.0) + val vector2 = Vec.fromTuple(68.0, 102.0, 110.0, 112.0, 154.0) + + val result = vector1.covariance(vector2) + println(result) + assertEqualsDouble(result, 9107.3, 0.001) + + } + + test("pearson correlation coefficient") { + // https://www.statisticshowto.com/probability-and-statistics/correlation-coefficient-formula/ + val v1 = Vec.fromTuple(43.0, 21.0, 25.0, 42.0, 57.0, 59.0) + val v2 = Vec.fromTuple(99.0, 65.0, 79.0, 75.0, 87.0, 81.0) + assertEqualsDouble(v1.pearsonCorrelationCoefficient(v2), 0.529809, 0.0001) + + } + + test("element rank") { + val v = Vec.fromTuple(1.0, 5.0, 3.0, 6.0, 1.0, 5.0) + /* + 1.0 is the first, but has as tied rank. Take the average - 1.5 + */ + assertEquals(v.elementRanks.csv(), Array[Double](1.5,4.5,3.0,6.0,1.5,4.5).mkString(",")) + } + + test("spearmans rank") { + // https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide-2.php + val v1 = Vec.fromTuple(56.0, 75.0, 45.0, 71.0, 62.0, 64.0, 58.0, 80.0, 76.0, 61.0) + val v2 = Vec.fromTuple(66.0, 70.0, 40.0, 60.0, 65.0, 56.0, 59.0, 77.0, 67.0, 63.0) + assertEqualsDouble(v1.spearmansRankCorrelation(v2), 0.6727, 0.001 ) + + // https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient + + val v3 = Vec[10](86.0, 97.0, 99.0, 100.0, 101.0, 103.0, 106.0, 110.0, 112.0, 113.0) + val v4 = Vec[10](2, 20.0, 28.0, 27.0, 50.0, 29.0, 7.0, 17.0, 6.0, 12.0) + assertEqualsDouble(-0.1757575, v3.spearmansRankCorrelation(v4), 0.000001); + } +end SimpleStatsTests \ No newline at end of file diff --git a/vector/shared/src/main/scala/ai/dragonfly/math/vector/package.scala b/vector/shared/src/main/scala/ai/dragonfly/math/vector/package.scala index 1d58a3e..1531b1f 100644 --- a/vector/shared/src/main/scala/ai/dragonfly/math/vector/package.scala +++ b/vector/shared/src/main/scala/ai/dragonfly/math/vector/package.scala @@ -24,11 +24,11 @@ import narr.* import scala.compiletime.ops.any.== import scala.compiletime.ops.boolean.&& import scala.compiletime.ops.int.* +import scala.collection.View package object vector { opaque type Vec[N <: Int] = NArray[Double] - object Vec { inline def apply[N <: Int](a: NArray[Double]): Vec[N] = { // cast a NArray[Double] as Vec[N] dimensionCheck(a, valueOf[N]) @@ -37,6 +37,8 @@ package object vector { inline def zeros[N <: Int](using ValueOf[N]): Vec[N] = fill[N](0.0) + inline def ones[N <: Int](using ValueOf[N]): Vec[N] = fill[N](1.0) + inline def random[N <: Int]( MAX:Double = 1.0, min:Double = 0.0, @@ -166,6 +168,92 @@ package object vector { copyOfThisVector } + inline def mean: Double = { + var sum = 0.0 + var i = 1 + while (i < dimension) { + sum = sum + thisVector(i) + i = i + 1 + } + thisVector.sum / thisVector.size + } + //It is assumed, that we consider a sample rather than a complete population + inline def variance: Double = { + // https://www.cuemath.com/sample-variance-formula/ + val μ = thisVector.mean + thisVector.map(i => squareInPlace(i - μ)).sum / (thisVector.size - 1) + } + + // It is assumed, that we consider a sample rather than a complete population + inline def stdDev: Double = { + // https://www.cuemath.com/data/standard-deviation/ + val mu = thisVector.mean + val diffs_2 = thisVector.map( num => squareInPlace(num - mu) ) + Math.pow( diffs_2.sum / (thisVector.size - 1 ) , 0.5) + } + + def covariance(thatVector : Vec[N] ) = { + val μThis = thisVector.mean + val μThat = thatVector.mean + thisVector.zip(thatVector).map{ case (thisV, thatV) => (thisV - μThis) * (thatV - μThat) }.sum / (thisVector.size -1) + } + + def pearsonCorrelationCoefficient(thatVector: Vec[N]): Double = { + val n = thisVector.size + val sum_x = thisVector.sum + val sum_y = thatVector.sum + val sum_xy = thisVector.zip(thatVector).map{ case (thisV, thatV) => thisV * thatV }.sum + val sum_x2 = thisVector.map(squareInPlace(_)).sum + val sum_y2 = thatVector.map(squareInPlace(_)).sum + (n * sum_xy - (sum_x * sum_y)) / Math.pow( (sum_x2 * n - Math.pow(sum_x, 2)) * (sum_y2 * n - Math.pow(sum_y, 2)), 0.5) + } + + def spearmansRankCorrelation(thatVector: Vec[N]): Double = { + val theseRanks = thisVector.elementRanks + val thoseRanks = thatVector.elementRanks + val diffs = theseRanks - thoseRanks + val diffs_2 = diffs.map(squareInPlace(_)) + val n = theseRanks.size + val s = diffs_2.sum + val numerator = 6 * diffs_2.sum + val denominator = n * (squareInPlace(n) - 1) + 1 - ( numerator / denominator) + } + + // An alias - pearson is the most commonly requested type of correlation + def corr(thatVector: Vec[N]): Double = pearsonCorrelationCoefficient(thatVector) + + def elementRanks: Vec[N] = { + val (sorted, originalPosition) = thisVector.zipWithIndex.toVector.sortBy(_._1).unzip + val ranks : Vec[N] = NArray.tabulate[Double](thisVector.dimension)(i => (i+1).toDouble) + + var currentValue = sorted(0) + var i = 0 + var currentSum = 0.0 + var currentCount = 0 + var resultList = List[Double]() + + for (value <- sorted) { + if (value == currentValue) { + currentSum += ranks(i) + currentCount += 1 + } else { + resultList = resultList ++ List.fill(currentCount)(currentSum / currentCount) + currentValue = value + currentCount = 1 + currentSum = ranks(i) + } + i = i + 1 + } + resultList = resultList ++ List.fill(currentCount)(currentSum / currentCount) + + val rankResult : Vec[N] = new NArray[Double](thisVector.dimension) + for( (idx, r) <- originalPosition.zip(resultList)) { + rankResult(idx) = r + } + rankResult + } + inline def normSquared: Double = { var mag2 = 0.0 var i = 0 @@ -390,4 +478,4 @@ case class ExtraDimensionalAccessException[N <: Int](v:Vec[N], ci: Int) extends import vector.* import Vec.* s"Index: $ci exceeds dimensionality of Euclidean object${v.dimension}: ${v.render()}" -}) \ No newline at end of file +})