From f7c72a87eb30c12984100b923578d2e094524a2f Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 22 Jul 2019 17:17:15 +0200 Subject: [PATCH 01/47] First Part --- .../spark3d/spatialPartitioning/KDtree.scala | 172 ++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala new file mode 100644 index 0000000..4b86479 --- /dev/null +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala @@ -0,0 +1,172 @@ +/* + * Copyright 2018 AstroLab Software + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.astrolabsoftware.spark3d.spatialPartitioning + +import com.astrolabsoftware.spark3d.geometryObjects._ +import com.astrolabsoftware.spark3d.geometryObjects.Shape3D.Shape3D +import com.astrolabsoftware.spark3d.geometryObjects.Point3D + +import scala.util.control.Breaks._ +import scala.collection.mutable.ListBuffer +import scala.collection.mutable.Queue + +/**Description: This class is responsible for: + * building Balanced KDtree for 3D points + * Printing the B2DT + * */ + +class KDtree() extends Serializable { + var point:Point3D=null + var box: BoxEnvelope=null + var level: Int=0 + var parent: KDtree = null + var left: KDtree = null + var right: KDtree = null + var check:Int=0 + + /** + * Insert Element to build KDtree + */ + def insertElement(current:KDtree, value:Point3D, level:Int):KDtree={ + + var currentLevel=level%3 + if(current==null) + return null + // For x dimension + if(currentLevel==0){ + //left side + if(value.x Date: Tue, 23 Jul 2019 23:35:24 +0200 Subject: [PATCH 02/47] Second Update --- .../spark3d/spatialPartitioning/KDtree.scala | 61 +++++++++++++++---- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala index 4b86479..e1d8f0d 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala @@ -15,6 +15,8 @@ */ package com.astrolabsoftware.spark3d.spatialPartitioning +import com.astrolabsoftware.spark3d.spatialPartitioning +import com.astrolabsoftware.spark3d.geometryObjects.BoxEnvelope import com.astrolabsoftware.spark3d.geometryObjects._ import com.astrolabsoftware.spark3d.geometryObjects.Shape3D.Shape3D import com.astrolabsoftware.spark3d.geometryObjects.Point3D @@ -42,7 +44,7 @@ class KDtree() extends Serializable { */ def insertElement(current:KDtree, value:Point3D, level:Int):KDtree={ - var currentLevel=level%3 + var currentLevel=level%2 if(current==null) return null // For x dimension @@ -57,7 +59,7 @@ class KDtree() extends Serializable { current.left.point=value current.left.parent=current current.left.level=level+1 - current.left.box=BoxEnvelope.apply(box.minX,point.x,box.minY,box.maxY,box.minZ,box.maxZ) + current.left.box=BoxEnvelope.apply(current.box.minX,current.point.x,current.box.minY,current.box.maxY,current.box.minZ,current.box.maxZ) } } //right side @@ -70,7 +72,7 @@ class KDtree() extends Serializable { current.right.point=value current.right.parent=current current.right.level=level+1 - current.right.box=BoxEnvelope.apply(point.x,box.maxX,box.minY,box.maxY,box.minZ,box.maxZ) + current.right.box=BoxEnvelope.apply(current.point.x,current.box.maxX,current.box.minY,current.box.maxY,current.box.minZ,current.box.maxZ) } } @@ -89,7 +91,7 @@ class KDtree() extends Serializable { current.left.point=value current.left.parent=current current.left.level=level+1 - current.left.box=BoxEnvelope.apply(box.minX,box.maxX,box.minY,point.y,box.minZ,box.maxZ) + current.left.box=BoxEnvelope.apply(current.box.minX,current.box.maxX,current.box.minY,current.point.y,current.box.minZ,current.box.maxZ) } } //right side @@ -102,7 +104,7 @@ class KDtree() extends Serializable { current.right.point=value current.right.parent=current current.right.level=level+1 - current.right.box=BoxEnvelope.apply(box.minX,box.maxX,point.y,box.maxX,box.minZ,box.maxZ) + current.right.box=BoxEnvelope.apply(current.box.minX,current.box.maxX,current.point.y,current.box.maxX,current.box.minZ,current.box.maxZ) } } @@ -120,7 +122,7 @@ class KDtree() extends Serializable { current.left.point=value current.left.parent=current current.left.level=level+1 - current.left.box=BoxEnvelope.apply(box.minX,box.maxX,box.minY,box.maxX,box.minZ,point.z) + current.left.box=BoxEnvelope.apply(current.box.minX,current.box.maxX,current.box.minY,current.box.maxX,current.box.minZ,current.point.z) } } //right side @@ -133,7 +135,7 @@ class KDtree() extends Serializable { current.right.point=value current.right.parent=current current.right.level=level+1 - current.right.box=BoxEnvelope.apply(box.minX,box.maxX,box.minY,box.maxX,point.z,box.maxZ) + current.right.box=BoxEnvelope.apply(current.box.minX,current.box.maxX,current.box.minY,current.box.maxX,current.point.z,current.box.maxZ) } } }// end of z dim @@ -144,11 +146,10 @@ class KDtree() extends Serializable { * This is used to insert one point to KDtree * */ - def insert(value:Point3D):Unit={ - - if(this.point==null){ + def insert(value:Point3D, initialBox:BoxEnvelope):Unit={ + if(this.point==null){ print ("first") this.point=value - this.box=BoxEnvelope.apply(0,4,0,3,0,7) + this.box=initialBox } else insertElement(this,value,0) @@ -156,6 +157,44 @@ class KDtree() extends Serializable { } + /** + * Insert list of 3D points + * + */ + def insertList (points: List[Point3D], level:Int, initialBox:BoxEnvelope ):Unit ={ + var currentLevel=level%2 + if( points!=null) + { + var sortedPoints:List[Point3D] =List() + if(currentLevel == 0) + { + sortedPoints=points.sortWith(_.x<_.x) + } + else if(currentLevel == 1){ + sortedPoints=points.sortWith(_.y<_.y) + } + else + sortedPoints=points.sortWith(_.z<_.z) + + var medianIndex:Int=(sortedPoints.length)/2 + insert(sortedPoints(medianIndex),initialBox) + var leftSubtree: List[Point3D]=List() + var rightSubtree: List[Point3D]=List() + leftSubtree =sortedPoints.take(medianIndex) + rightSubtree =sortedPoints.takeRight(sortedPoints.length-(medianIndex+1)) + + if(medianIndex-1>=0 &&leftSubtree.length>0){ + insertList(leftSubtree,level+1,initialBox) + } + + if(medianIndex-10){ + insertList(rightSubtree,level+1,initialBox) + } + + }//end points!=null + } + + /** * Print the content of the KDtree * From 9f486d113ed353164ef6a8f2c6f258154ef9a9c9 Mon Sep 17 00:00:00 2001 From: alia Date: Tue, 23 Jul 2019 23:53:24 +0200 Subject: [PATCH 03/47] Updated --- .../KDtreePartitioning.scala | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala new file mode 100644 index 0000000..e3f465c --- /dev/null +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala @@ -0,0 +1,91 @@ +/* + * Copyright 2018 AstroLab Software + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.astrolabsoftware.spark3d.spatialPartitioning + +import com.astrolabsoftware.spark3d.spatialPartitioning + + +import com.astrolabsoftware.spark3d.geometryObjects.BoxEnvelope +import com.astrolabsoftware.spark3d.geometryObjects._ +import com.astrolabsoftware.spark3d.geometryObjects.Shape3D.Shape3D +import com.astrolabsoftware.spark3d.geometryObjects.Point3D + + +import scala.collection.mutable.ListBuffer + +class KDtreePartitioning (private val octree: KDtree) + extends Serializable { + println("KDtreePartitioning") + /** + * @return the octree used for partitioning + */ + def getPartitionTree(): KDtree = { + //octree + null + } + + /** + * @return Leaf nodes of the partitioning tree + */ + def getGrids(): List[BoxEnvelope] = { + //octree.getLeafNodes.toList + null + } +} + +object KDtreePartitioning { + + def apply(data: List[Point3D], tree: KDtree): KDtreePartitioning = { + + //Initialize the boundary box + var min_X:Double=data(0).x + var max_X:Double=data(0).x + var min_Y:Double=data(0).y + var max_Y:Double=data(0).y + var min_Z:Double=data(0).z + var max_Z:Double=data(0).z + + for(i<-data){ + + if(i.xmax_X) + max_X=i.x + + if(i.ymax_Y) + max_Y=i.y + + if(i.zmax_Z) + max_Z=i.z + } + + val KDtreeBoundary:BoxEnvelope=BoxEnvelope.apply(min_X,max_X,min_Y,max_Y,min_Z,max_Z) + tree.insertList(data,0,KDtreeBoundary) + tree.printKDtree(tree) + //tree.assignPartitionIDs + //new OctreePartitioning(tree) + + + null + } +} From 1d282dc54ce8fe97e1e49b773c2cf913d18bf476 Mon Sep 17 00:00:00 2001 From: alia Date: Tue, 23 Jul 2019 23:58:27 +0200 Subject: [PATCH 04/47] Added --- .../KDtreePartitioner.scala | 105 ++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioner.scala diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioner.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioner.scala new file mode 100644 index 0000000..228a0ad --- /dev/null +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioner.scala @@ -0,0 +1,105 @@ +/* + * Copyright 2018 AstroLab Software + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.astrolabsoftware.spark3d.spatialPartitioning + + + +import com.astrolabsoftware.spark3d.geometryObjects.BoxEnvelope +import com.astrolabsoftware.spark3d.geometryObjects.Point3D +import com.astrolabsoftware.spark3d.geometryObjects.Shape3D.Shape3D + +import scala.collection.mutable.{HashSet, ListBuffer} + +class KDtreePartitioner (kdtree: KDtree, grids : List[BoxEnvelope]) extends SpatialPartitioner(grids) { + println("KDtreePartitioner") + /** + * Get the number of partitions in this partitioning + * + * @return the number of partitions + */ + override def numPartitions: Int = { + // grids.size + 1 + } + + /** + * Gets the iterator on tuple leaf nodes (partitions) which intersects, contains or are contained + * by the input object. + * + * @param spatialObject : (T<:Shape3D) + * Shape3D instance (or any extension) representing objects to put on + * the grid. + * @return (Iterator[Tuple2[Int, T]) Iterable over a Tuple + * * of (Int, T) where Int is the partition index, and T the input object. + * + */ + override def placeObject[T <: Shape3D](spatialObject: T): Iterator[Tuple2[Int, T]] = { + + null + } + + /** + * Gets the iterator on tuple leaf nodes (partitions) which intersects, contains or are contained + * by the input object. + * + * @param spatialObject : (T<:Shape3D) + * Shape3D instance (or any extension) representing objects to put on + * the grid. + * @return (Iterator[Tuple2[Int, T]) Iterable over a Tuple + * * of (Int, T) where Int is the partition index, and T the input object. + * + */ + override def placePoints(c0: Double, c1: Double, c2: Double, isSpherical: Boolean) : Int = { + + 1 + } + + /** + * Gets the partitions which contain the input object. + * + * @param spatialObject input object for which the containment is to be found + * @return list of Tuple of containing partitions and their index/partition ID's + */ + override def getPartitionNodes[T <: Shape3D](spatialObject: T): List[Tuple2[Int, Shape3D]] = { + println("getPartitionNodes") + null + } + + /** + * Gets the partitions which are the neighbors of the partitions which contain the input object. + * + * @param spatialObject input object for which the neighbors are to be found + * @param inclusive If true, includes the node of the spatialObject as well. Default is false. + * @return list of Tuple of neighbor partitions and their index/partition ID's + */ + override def getNeighborNodes[T <: Shape3D](spatialObject: T, inclusive: Boolean = false): List[Tuple2[Int, Shape3D]] = { + null + } + + /** + * Gets the partitions which are the neighbors to the input partition. Useful when getting + * secondary neighbors (neighbors to neighbor) of the queryObject. + * + * @param containingNode The boundary of the Node for which neighbors are to be found. + * @param containingNodeID The index/partition ID of the containingNode + * @return list of Tuple of secondary neighbor partitions and their index/partition IDs + */ + override def getSecondaryNeighborNodes[T <: Shape3D](containingNode: T, containingNodeID: Int): List[Tuple2[Int, Shape3D]] = { + + null + } + +} From a6f1b749f36d3f5c4e53f1fde6bb955ea00dbd0b Mon Sep 17 00:00:00 2001 From: alia Date: Wed, 24 Jul 2019 10:04:17 +0200 Subject: [PATCH 05/47] Added --- src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala index e1d8f0d..7c05de4 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala @@ -44,7 +44,7 @@ class KDtree() extends Serializable { */ def insertElement(current:KDtree, value:Point3D, level:Int):KDtree={ - var currentLevel=level%2 + var currentLevel=level%3 if(current==null) return null // For x dimension @@ -147,7 +147,7 @@ class KDtree() extends Serializable { * */ def insert(value:Point3D, initialBox:BoxEnvelope):Unit={ - if(this.point==null){ print ("first") + if(this.point==null){ this.point=value this.box=initialBox } @@ -162,7 +162,7 @@ class KDtree() extends Serializable { * */ def insertList (points: List[Point3D], level:Int, initialBox:BoxEnvelope ):Unit ={ - var currentLevel=level%2 + var currentLevel=level%3 if( points!=null) { var sortedPoints:List[Point3D] =List() From 4382b67c7d86a9b4c32548a32a737612f8f31fad Mon Sep 17 00:00:00 2001 From: alia Date: Wed, 24 Jul 2019 16:53:22 +0200 Subject: [PATCH 06/47] KDtree option is added --- src/main/scala/com/spark3d/Partitioners.scala | 45 ++++++++++++++----- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/src/main/scala/com/spark3d/Partitioners.scala b/src/main/scala/com/spark3d/Partitioners.scala index d33ec14..0d84107 100644 --- a/src/main/scala/com/spark3d/Partitioners.scala +++ b/src/main/scala/com/spark3d/Partitioners.scala @@ -21,9 +21,10 @@ import scala.util.Random import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions.col -import com.astrolabsoftware.spark3d.geometryObjects.Shape3D._ - import com.astrolabsoftware.spark3d.geometryObjects._ +import com.astrolabsoftware.spark3d.geometryObjects.Shape3D.Shape3D +import com.astrolabsoftware.spark3d.geometryObjects.Point3D + import com.astrolabsoftware.spark3d.spatialPartitioning.SpatialPartitioner import com.astrolabsoftware.spark3d.spatialPartitioning.OnionPartitioning import com.astrolabsoftware.spark3d.spatialPartitioning.OnionPartitioner @@ -31,12 +32,14 @@ import com.astrolabsoftware.spark3d.spatialPartitioning.Octree import com.astrolabsoftware.spark3d.spatialPartitioning.OctreePartitioning import com.astrolabsoftware.spark3d.spatialPartitioning.OctreePartitioner import com.astrolabsoftware.spark3d.utils.Utils.getSampleSize - +import com.astrolabsoftware.spark3d.spatialPartitioning.KDtree +import com.astrolabsoftware.spark3d.spatialPartitioning.KDtreePartitioning +import com.astrolabsoftware.spark3d.spatialPartitioning.KDtreePartitioner /** * Main object to retrieve SpatialPartitioner. */ class Partitioners(df : DataFrame, options: Map[String, String]) extends Serializable { - + println("Partitioners") // Definition of the coordinate system. Spherical or cartesian val isSpherical : Boolean = options("coordSys") match { case "spherical" => true @@ -80,10 +83,10 @@ class Partitioners(df : DataFrame, options: Map[String, String]) extends Seriali Geometry not understood! You must choose between: points or spheres """) - } - - - /** + } + + + /** * Define a spatial partitioning for rawRDD, and return the partitioner. * The list of available partitioning can be found in utils/GridType. * By default, the outgoing level of parallelism is the same as the incoming @@ -119,17 +122,20 @@ class Partitioners(df : DataFrame, options: Map[String, String]) extends Seriali // Initialise our space val partitioning = new OnionPartitioning + partitioning.LinearOnionPartitioning( numPartitionsRaw, partitioning.getMaxZ(rawRDD), isSpherical ) - + // Grab the grid elements val grids = partitioning.getGrids - + // Build our partitioner - new OnionPartitioner(grids) + var x= new OnionPartitioner(grids) + + x } case "octree" => { // taking 20% of the data as a sample @@ -153,6 +159,20 @@ class Partitioners(df : DataFrame, options: Map[String, String]) extends Seriali val grids = partitioning.getGrids new OctreePartitioner(octree, grids) } + //for KDtree + case "kdtree" => { + val dataCount = rawRDD.count + val sampleSize = getSampleSize(dataCount, numPartitionsRaw) + val samples:List[Point3D] = rawRDD.takeSample(false, sampleSize, + new Random(dataCount).nextInt(dataCount.asInstanceOf[Int])).toList.map(x => x.asInstanceOf[Point3D]) + + val kdtree=new KDtree( ) + val partitioning = KDtreePartitioning.apply(samples, kdtree) + // val grids = partitioning.getGrids + new KDtreePartitioner(kdtree,null) + + } + // Other cases not handled. RTree in prep. case _ => throw new AssertionError(""" @@ -161,6 +181,7 @@ class Partitioners(df : DataFrame, options: Map[String, String]) extends Seriali // Apply the partitioner and return the RDD partitioner + } /** @@ -203,5 +224,7 @@ class Partitioners(df : DataFrame, options: Map[String, String]) extends Seriali dataBoundary } + + } From 82e085a5d8b3fe519ff2c28a27851a41f788ce99 Mon Sep 17 00:00:00 2001 From: alia Date: Wed, 24 Jul 2019 16:54:24 +0200 Subject: [PATCH 07/47] KDtree option is added --- .../scala/com/spark3d/Repartitioning.scala | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/main/scala/com/spark3d/Repartitioning.scala b/src/main/scala/com/spark3d/Repartitioning.scala index 53dde6b..0d951b6 100644 --- a/src/main/scala/com/spark3d/Repartitioning.scala +++ b/src/main/scala/com/spark3d/Repartitioning.scala @@ -21,6 +21,11 @@ import org.apache.spark.sql.functions.col import org.apache.spark.sql.functions.lit import org.apache.spark.sql.SparkSession import org.apache.spark.sql.functions.spark_partition_id +import org.apache.spark.sql.Column + + +import org.apache.spark.sql.Row +import org.apache.spark.sql.Dataset import com.astrolabsoftware.spark3d.Partitioners import com.astrolabsoftware.spark3d.spatialPartitioning.KeyPartitioner @@ -56,7 +61,7 @@ object Repartitioning { * @return Input DataFrame plus an additional column `partition_id`. */ def prePartition(df : DataFrame, options: Map[String, String], numPartitions : Int = -1) : DataFrame = { - + println("prePartition Method") // Change the number of partitions if wanted val numOfPartitions = numPartitions match { case -1 => df.rdd.getNumPartitions @@ -80,7 +85,7 @@ object Repartitioning { } // Other implemented repartitioners - case grid @ ("onion" | "octree") => { + case grid @ ("onion" | "octree" |"kdtree") => { // Definition of the coordinate system. Spherical or cartesian val isSpherical : Boolean = options("coordSys") match { case "spherical" => true @@ -96,13 +101,13 @@ object Repartitioning { val P = new Partitioners(df, options) val partitioner = P.get(numOfPartitions) - + // Add a column with the new partition indices val dfExt = geometry match { case "points" => { // UDF for the repartitioning - val placePointsUDF = udf[Int, Double, Double, Double, Boolean](partitioner.placePoints) - + val placePointsUDF = udf[Int, Double, Double, Double, Boolean](partitioner.placePoints) + df.withColumn("partition_id", placePointsUDF( col(colnames(0)).cast("double"), @@ -111,11 +116,12 @@ object Repartitioning { lit(isSpherical) ) ) + } case "spheres" => { // UDF for the repartitioning val placePointsUDF = udf[Int, Double, Double, Double, Double, Boolean](partitioner.placeSpheres) - + df.withColumn("partition_id", placePointsUDF( col(colnames(0)).cast("double"), @@ -132,13 +138,15 @@ object Repartitioning { """) } dfExt + + } case _ => throw new AssertionError(""" Gridtype not understood! You must choose between: onion, octree, or current """) } - + dfout } @@ -186,7 +194,7 @@ object Repartitioning { * +-------------------+-------------------+------------------+------------+ */ def repartitionByCol(df: DataFrame, colname: String, preLabeled: Boolean, numPartitions: Int = -1): DataFrame = { - + println("repartitionByCol") // Build a Map (k=df.col -> v=partition_id) // to allow the use of standard (Int) partitioners (can be costly). val mapPart : Map[Any, Int] = preLabeled match { From a7a385dfe463815a993aa0f34c4237c078c7c5de Mon Sep 17 00:00:00 2001 From: alia Date: Wed, 24 Jul 2019 16:56:25 +0200 Subject: [PATCH 08/47] KDtree is added --- .../com/spark3d/spatialPartitioning/SpatialPartitioner.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/SpatialPartitioner.scala b/src/main/scala/com/spark3d/spatialPartitioning/SpatialPartitioner.scala index 3d055b1..e7baf34 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/SpatialPartitioner.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/SpatialPartitioner.scala @@ -31,7 +31,6 @@ import com.astrolabsoftware.spark3d.geometryObjects.Shape3D._ * */ abstract class SpatialPartitioner(grids : List[Shape3D]) extends Partitioner with Serializable { - /** * Method to place a spatialObject (could a Point3D, a Sphere, and so on) on * a grid. In practice it will assign a key (Int) to the spatialObject From ff2086a8a768a0248a0f9d2fd9e1035ed4f3d9c9 Mon Sep 17 00:00:00 2001 From: alia Date: Wed, 24 Jul 2019 16:58:07 +0200 Subject: [PATCH 09/47] KDtree option has been added --- src/main/scala/com/spark3d/utils/GridType.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/scala/com/spark3d/utils/GridType.scala b/src/main/scala/com/spark3d/utils/GridType.scala index f8a9e0a..2934be7 100644 --- a/src/main/scala/com/spark3d/utils/GridType.scala +++ b/src/main/scala/com/spark3d/utils/GridType.scala @@ -24,4 +24,5 @@ object GridType { // Add new GridType name here once implementation done. val LINEARONIONGRID = "onion" val OCTREE = "octree" + val KDTREE = "kdtree" } From 1bf8b37df3df9b3cdd92c205b2bca5e89fc091e8 Mon Sep 17 00:00:00 2001 From: alia Date: Thu, 25 Jul 2019 16:00:20 +0200 Subject: [PATCH 10/47] Updated --- src/main/scala/com/spark3d/Partitioners.scala | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/scala/com/spark3d/Partitioners.scala b/src/main/scala/com/spark3d/Partitioners.scala index 0d84107..46fcbfd 100644 --- a/src/main/scala/com/spark3d/Partitioners.scala +++ b/src/main/scala/com/spark3d/Partitioners.scala @@ -115,7 +115,7 @@ class Partitioners(df : DataFrame, options: Map[String, String]) extends Seriali (You put: $numPartitions) """) } - + // Add here new cases. val partitioner = gridtype match { case "onion" => { @@ -165,9 +165,13 @@ class Partitioners(df : DataFrame, options: Map[String, String]) extends Seriali val sampleSize = getSampleSize(dataCount, numPartitionsRaw) val samples:List[Point3D] = rawRDD.takeSample(false, sampleSize, new Random(dataCount).nextInt(dataCount.asInstanceOf[Int])).toList.map(x => x.asInstanceOf[Point3D]) - + + // to determine the level which is used in partitioning, also the number of partitions is determined + val log2 = (x: Int) => floor(log10(x)/log10(2.0)).asInstanceOf[Int] + val levelPartitioning=log2(numPartitionsRaw) +1 + println("level is"+levelPartitioning) val kdtree=new KDtree( ) - val partitioning = KDtreePartitioning.apply(samples, kdtree) + val partitioning = KDtreePartitioning.apply(samples, kdtree, levelPartitioning) // val grids = partitioning.getGrids new KDtreePartitioner(kdtree,null) @@ -228,3 +232,4 @@ class Partitioners(df : DataFrame, options: Map[String, String]) extends Seriali } + From bf3a9ee4fa05811c950f9be59b0e2d12a2289460 Mon Sep 17 00:00:00 2001 From: alia Date: Thu, 25 Jul 2019 16:01:33 +0200 Subject: [PATCH 11/47] Updated --- .../spark3d/spatialPartitioning/KDtree.scala | 28 +++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala index 7c05de4..49c91ba 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala @@ -44,7 +44,7 @@ class KDtree() extends Serializable { */ def insertElement(current:KDtree, value:Point3D, level:Int):KDtree={ - var currentLevel=level%3 + var currentLevel=level%2 if(current==null) return null // For x dimension @@ -153,7 +153,6 @@ class KDtree() extends Serializable { } else insertElement(this,value,0) - } @@ -162,7 +161,7 @@ class KDtree() extends Serializable { * */ def insertList (points: List[Point3D], level:Int, initialBox:BoxEnvelope ):Unit ={ - var currentLevel=level%3 + var currentLevel=level%2 if( points!=null) { var sortedPoints:List[Point3D] =List() @@ -193,8 +192,6 @@ class KDtree() extends Serializable { }//end points!=null } - - /** * Print the content of the KDtree * @@ -207,5 +204,26 @@ class KDtree() extends Serializable { printKDtree(current.right) } } + + /** + * Breadth-First Search (BFS) + */ + + def BFS(current:KDtree,level:Int):ListBuffer[BoxEnvelope]={ + var partitionBoundary= new ListBuffer[BoxEnvelope] + if(current==null) + null + else if(level==1){ + println(partitionBoundary) + partitionBoundary += current.box + } + else if(level>1){ + BFS(current.left,level-1) + BFS(current.right,level-1) + + } + + partitionBoundary + } } From 195169e0f0240979fa5d3a27fdf931b0e370c19f Mon Sep 17 00:00:00 2001 From: alia Date: Thu, 25 Jul 2019 16:02:01 +0200 Subject: [PATCH 12/47] Updated --- .../spark3d/spatialPartitioning/KDtreePartitioning.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala index e3f465c..00d5cf3 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala @@ -48,7 +48,7 @@ class KDtreePartitioning (private val octree: KDtree) object KDtreePartitioning { - def apply(data: List[Point3D], tree: KDtree): KDtreePartitioning = { + def apply(data: List[Point3D], tree: KDtree, levelPart:Int): KDtreePartitioning = { //Initialize the boundary box var min_X:Double=data(0).x @@ -82,6 +82,10 @@ object KDtreePartitioning { val KDtreeBoundary:BoxEnvelope=BoxEnvelope.apply(min_X,max_X,min_Y,max_Y,min_Z,max_Z) tree.insertList(data,0,KDtreeBoundary) tree.printKDtree(tree) + + val test1=tree.BFS(tree,levelPart) + for (a<-test1) + println(a.maxX) //tree.assignPartitionIDs //new OctreePartitioning(tree) From 1be165f6142d8522b7845c79fa373c7247ee73d6 Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 29 Jul 2019 14:46:28 +0200 Subject: [PATCH 13/47] Boundary box problem is fixed --- .../spark3d/spatialPartitioning/KDtree.scala | 61 ++++++++++--------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala index 49c91ba..c62d405 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala @@ -44,17 +44,16 @@ class KDtree() extends Serializable { */ def insertElement(current:KDtree, value:Point3D, level:Int):KDtree={ - var currentLevel=level%2 - if(current==null) - return null + var currentLevel=level%3 + // For x dimension if(currentLevel==0){ //left side if(value.x1){ BFS(current.left,level-1) BFS(current.right,level-1) - } - partitionBoundary } From 973bb67ec549c2cd1352bdac1219c406d0c903da Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 29 Jul 2019 14:49:28 +0200 Subject: [PATCH 14/47] The number of partitions is determined, the the boundary boxes for partitions are received --- .../KDtreePartitioning.scala | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala index 00d5cf3..4814c97 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala @@ -26,23 +26,23 @@ import com.astrolabsoftware.spark3d.geometryObjects.Point3D import scala.collection.mutable.ListBuffer -class KDtreePartitioning (private val octree: KDtree) +class KDtreePartitioning (private val kdtree: KDtree, grids:List[BoxEnvelope]) extends Serializable { println("KDtreePartitioning") /** * @return the octree used for partitioning */ def getPartitionTree(): KDtree = { - //octree - null + kdtree + } /** * @return Leaf nodes of the partitioning tree */ def getGrids(): List[BoxEnvelope] = { - //octree.getLeafNodes.toList - null + // grids.isInstanceOf[List[BoxEnvelope]] + grids } } @@ -79,17 +79,18 @@ object KDtreePartitioning { max_Z=i.z } - val KDtreeBoundary:BoxEnvelope=BoxEnvelope.apply(min_X,max_X,min_Y,max_Y,min_Z,max_Z) + val KDtreeBoundary:BoxEnvelope=BoxEnvelope.apply(min_X-1,max_X+1,min_Y-1,max_Y+1,min_Z-1,max_Z+1) + tree.insertList(data,0,KDtreeBoundary) - tree.printKDtree(tree) + //tree.printKDtree(tree) - val test1=tree.BFS(tree,levelPart) - for (a<-test1) - println(a.maxX) - //tree.assignPartitionIDs - //new OctreePartitioning(tree) + val grids=tree.BFS(tree,levelPart).toList + + for(i<-grids) + println(i) + new KDtreePartitioning(tree,grids) - null + } } From be83043766ed5a6af652988bbaa2c1545342a33d Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 29 Jul 2019 14:52:54 +0200 Subject: [PATCH 15/47] placePoints method is ready --- .../KDtreePartitioner.scala | 32 +++++++++++++++---- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioner.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioner.scala index 228a0ad..d2d3312 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioner.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioner.scala @@ -24,15 +24,17 @@ import com.astrolabsoftware.spark3d.geometryObjects.Shape3D.Shape3D import scala.collection.mutable.{HashSet, ListBuffer} class KDtreePartitioner (kdtree: KDtree, grids : List[BoxEnvelope]) extends SpatialPartitioner(grids) { - println("KDtreePartitioner") + + + /** * Get the number of partitions in this partitioning * * @return the number of partitions */ override def numPartitions: Int = { - // grids.size - 1 + grids.size + } /** @@ -63,8 +65,25 @@ class KDtreePartitioner (kdtree: KDtree, grids : List[BoxEnvelope]) extends Spat * */ override def placePoints(c0: Double, c1: Double, c2: Double, isSpherical: Boolean) : Int = { - - 1 + val result = HashSet.empty[Int] + var partitionId:Int=0 + for(element<-grids){ + if(element.covers(c0,c1,c2)){ + return partitionId + } + partitionId+=1 + } + + partitionId=0 + for(element<-grids){ + if(element.coversKD(c0,c1,c2)){ + return partitionId + } + partitionId+=1 + } + + 64 + } /** @@ -74,8 +93,7 @@ class KDtreePartitioner (kdtree: KDtree, grids : List[BoxEnvelope]) extends Spat * @return list of Tuple of containing partitions and their index/partition ID's */ override def getPartitionNodes[T <: Shape3D](spatialObject: T): List[Tuple2[Int, Shape3D]] = { - println("getPartitionNodes") - null + null } /** From 3c217257d0ff4b8ef62626b53729df36c81940f6 Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 29 Jul 2019 14:56:34 +0200 Subject: [PATCH 16/47] The level of KDtree which uses to find the boundary boxes for partitions is added --- src/main/scala/com/spark3d/Partitioners.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/scala/com/spark3d/Partitioners.scala b/src/main/scala/com/spark3d/Partitioners.scala index 46fcbfd..45db904 100644 --- a/src/main/scala/com/spark3d/Partitioners.scala +++ b/src/main/scala/com/spark3d/Partitioners.scala @@ -172,8 +172,8 @@ class Partitioners(df : DataFrame, options: Map[String, String]) extends Seriali println("level is"+levelPartitioning) val kdtree=new KDtree( ) val partitioning = KDtreePartitioning.apply(samples, kdtree, levelPartitioning) - // val grids = partitioning.getGrids - new KDtreePartitioner(kdtree,null) + val grids:List[BoxEnvelope] = partitioning.getGrids + new KDtreePartitioner(kdtree,grids) } @@ -184,7 +184,7 @@ class Partitioners(df : DataFrame, options: Map[String, String]) extends Seriali } // Apply the partitioner and return the RDD - partitioner + partitioner } From f27582b951028edfd21f11479e60b04b6dbae753 Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 29 Jul 2019 14:58:08 +0200 Subject: [PATCH 17/47] it uses to check the KDtree --- .../scala/com/spark3d/examples/test.scala | 135 ++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 src/main/scala/com/spark3d/examples/test.scala diff --git a/src/main/scala/com/spark3d/examples/test.scala b/src/main/scala/com/spark3d/examples/test.scala new file mode 100644 index 0000000..a2d3292 --- /dev/null +++ b/src/main/scala/com/spark3d/examples/test.scala @@ -0,0 +1,135 @@ +/* + * Copyright 2018 Julien Peloton + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.astrolabsoftware.spark3d.examples + +// spark3D implicits +import com.astrolabsoftware.spark3d._ +// check +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ +import org.apache.spark._ +import org.apache.spark.SparkConf +import com.astrolabsoftware.spark3d.spatialPartitioning.KDtree +import com.astrolabsoftware.spark3d.geometryObjects.Point3D +import com.astrolabsoftware.spark3d.geometryObjects.BoxEnvelope + +// Spark lib +import org.apache.spark.storage.StorageLevel +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.functions._ + +// Logger +import org.apache.log4j.Level +import org.apache.log4j.Logger + +/** + * Main app. + */ +object Test { + // Set to Level.WARN is you want verbosity + Logger.getLogger("org").setLevel(Level.WARN) + Logger.getLogger("akka").setLevel(Level.WARN) + + // Initialise our spark connector + val spark = SparkSession + .builder() + .appName("partitioning") + .getOrCreate() + + import spark.implicits._ + + /** + * Main + */ + def main(args : Array[String]) = { + + // Data file + val fn_fits = args(0).toString + println("arg0: "+ fn_fits) + // HDU + val hdu = args(1).toString + println("arg1: "+ hdu) + + // Column names + val columns = args(2).toString + println("arg2: "+columns) + + // isSpherical + val isSpherical : String = args(3).toBoolean match { + case true => "spherical" + case false => "cartesian" +} + // partitioning + val grid = args(4).toString + println("arg4 "+grid) + // partitions + val part = args(5).toInt + println("arg 5: "+ part) + // Mode + val mode = args(6).toString + println("Arg6: "+mode) + // Load the data + ///////////////////////////////spark.sparkContext + //val df = spark.read.format("csv").option("header", "true").load("hdfs://134.158.75.222:8020/user/alia/example3D.csv") + //val df = spark.read.format("csv").option("header", "true").load("hdfs://134.158.75.222:8020/user/alia/test2d.csv") + val df = spark.read.format("fits").option("hdu", 1).load(fn_fits) + + + ////////////////////////////// + + println( df.count()) + val options = Map( + "geometry" -> "points", + "colnames" -> columns, + "coordSys" -> isSpherical, + "gridtype" -> grid) + + + // val mypartitioner = new Partitioners(df, options) + // val myrawRDD = mypartitioner.rawRDD + + // println(myrawRDD.take(1)) + + + +//partition_id + val df_colid = df.prePartition(options, part) + // df_colid.show(100,false) + // df_colid.groupBy("partition_id").count().show(50,false) + // df_colid.printSchema() + //df_colid.filter($"partition_id">4).show() + //println(df_colid.filter($"DEC"< -0.001).count()) + //MC it to minimize flukes + + // val number= df_colid.repartitionByCol("partition_id", true, part) + + + val number1=df_colid.repartitionByCol("partition_id", true, part).mapPartitions(part => Iterator(part.size)).collect().toList + println("number1: "+ number1) + + // val kdtree=new KDtree( ) + // kdtree.printKDtree() + // val point=new Point3D(3,1,4,false) + // val point1=new Point3D(2,3,7,false) + // val point2=new Point3D(4,3,4,false) + + // kdtree.insert(point) + // kdtree.insert(point1) + // kdtree.insert(point2) + // kdtree.printKDtree(kdtree) + + } +} \ No newline at end of file From 6d76411d8492914a8351c859838d059738ac3146 Mon Sep 17 00:00:00 2001 From: alia Date: Tue, 30 Jul 2019 14:04:49 +0200 Subject: [PATCH 18/47] Validation is addet --- .../scala/com/spark3d/examples/test.scala | 63 +++++++++++-------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/src/main/scala/com/spark3d/examples/test.scala b/src/main/scala/com/spark3d/examples/test.scala index a2d3292..c19cbd3 100644 --- a/src/main/scala/com/spark3d/examples/test.scala +++ b/src/main/scala/com/spark3d/examples/test.scala @@ -14,6 +14,7 @@ * limitations under the License. */ package com.astrolabsoftware.spark3d.examples +import scala.math._ // spark3D implicits import com.astrolabsoftware.spark3d._ @@ -29,7 +30,7 @@ import com.astrolabsoftware.spark3d.geometryObjects.BoxEnvelope // Spark lib import org.apache.spark.storage.StorageLevel import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.functions._ +//import org.apache.spark.sql.functions._ // Logger import org.apache.log4j.Level @@ -58,14 +59,14 @@ object Test { // Data file val fn_fits = args(0).toString - println("arg0: "+ fn_fits) + // HDU val hdu = args(1).toString - println("arg1: "+ hdu) + // Column names val columns = args(2).toString - println("arg2: "+columns) + // isSpherical val isSpherical : String = args(3).toBoolean match { @@ -74,23 +75,22 @@ object Test { } // partitioning val grid = args(4).toString - println("arg4 "+grid) + // partitions val part = args(5).toInt - println("arg 5: "+ part) + // Mode val mode = args(6).toString - println("Arg6: "+mode) + // Load the data ///////////////////////////////spark.sparkContext //val df = spark.read.format("csv").option("header", "true").load("hdfs://134.158.75.222:8020/user/alia/example3D.csv") //val df = spark.read.format("csv").option("header", "true").load("hdfs://134.158.75.222:8020/user/alia/test2d.csv") - val df = spark.read.format("fits").option("hdu", 1).load(fn_fits) + val df = spark.read.format("fits").option("hdu", 1).load(fn_fits) ////////////////////////////// - println( df.count()) val options = Map( "geometry" -> "points", "colnames" -> columns, @@ -98,15 +98,35 @@ object Test { "gridtype" -> grid) - // val mypartitioner = new Partitioners(df, options) - // val myrawRDD = mypartitioner.rawRDD + - // println(myrawRDD.take(1)) + /** + * Validation + */ + /** + * Find the number of levels + */ + val log2 = (x: Int) => log10(x)/log10(2.0) + val maximumLevel=(log2(df.count.asInstanceOf[Int]+1))-2 + //Find the number of partitions + val maximumPartitions=pow(maximumLevel,2) + + if(log2(part)%1==0) + if(part<=maximumPartitions){ + val df_colid = df.prePartition(options, part) + val number1=df_colid.repartitionByCol("partition_id", true, part).mapPartitions(part => Iterator(part.size)).collect().toList + println("number1: "+ number1) + } + else + println("Please, Maximum number of partitions is "+ maximumPartitions) + else + println("Please determine the number of partitions as 1, 2, 4, 8 and so on") + + + // val df_colid = df.prePartition(options, part) -//partition_id - val df_colid = df.prePartition(options, part) // df_colid.show(100,false) // df_colid.groupBy("partition_id").count().show(50,false) // df_colid.printSchema() @@ -117,19 +137,10 @@ object Test { // val number= df_colid.repartitionByCol("partition_id", true, part) - val number1=df_colid.repartitionByCol("partition_id", true, part).mapPartitions(part => Iterator(part.size)).collect().toList - println("number1: "+ number1) - - // val kdtree=new KDtree( ) - // kdtree.printKDtree() - // val point=new Point3D(3,1,4,false) - // val point1=new Point3D(2,3,7,false) - // val point2=new Point3D(4,3,4,false) + // val number1=df_colid.repartitionByCol("partition_id", true, part).mapPartitions(part => Iterator(part.size)).collect().toList + // println("number1: "+ number1) - // kdtree.insert(point) - // kdtree.insert(point1) - // kdtree.insert(point2) - // kdtree.printKDtree(kdtree) + } } \ No newline at end of file From 3de67ec71ebaecd25da1aa1a1a32b9df5f56dbee Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 5 Aug 2019 12:06:01 +0200 Subject: [PATCH 19/47] updated --- run_part_scala.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/run_part_scala.sh b/run_part_scala.sh index 9f0eb38..72dd8c2 100755 --- a/run_part_scala.sh +++ b/run_part_scala.sh @@ -10,8 +10,9 @@ PACK=com.github.astrolabsoftware:spark-fits_2.11:0.7.1 SF=target/scala-2.11/spark3d_2.11-${VERSION}.jar HP=lib/jhealpix.jar -MASTERURL= -fitsfn= +MASTERURL=spark://134.158.75.222:7077 +fitsfn=hdfs://134.158.75.222:8020//lsst/LSST1Y/out_srcs_s1_0.fits + # "order" "col" "range" "rep" for method in "range" "col" @@ -22,6 +23,6 @@ do --jars ${SF},${HP} --packages ${PACK} \ --class com.astrolabsoftware.spark3d.examples.PartitioningDF \ target/scala-${SBT_VERSION_SPARK}/spark3d_${SBT_VERSION_SPARK}-${VERSION}.jar \ - $fitsfn 1 "Z_COSMO,RA,DEC" true "onion" 102 ${method} + $fitsfn 1 "Z_COSMO,RA,DEC" true "octree" 102 ${method} done From b2c83b11982417d934d907d58d70ad6bedf48c7f Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 5 Aug 2019 12:06:54 +0200 Subject: [PATCH 20/47] Validation is added --- arun_scala.sh | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100755 arun_scala.sh diff --git a/arun_scala.sh b/arun_scala.sh new file mode 100755 index 0000000..af6ec34 --- /dev/null +++ b/arun_scala.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +SBT_VERSION_SPARK=2.11 +VERSION=0.3.1 + +# Compile the code +sbt ++2.11.8 package + +PACK=com.github.astrolabsoftware:spark-fits_2.11:0.7.1 +SF=target/scala-2.11/spark3d_2.11-${VERSION}.jar +HP=lib/jhealpix.jar + +#MASTERURL=spark://134.158.75.222:7077 +MASTERURL=yarn +fitsfn=hdfs://134.158.75.222:8020//lsst/LSST1Y/out_srcs_s1_2.fits +# fitsfn=hdfs://134.158.75.222:8020/user/alia/a.fits + +# 4096 +sum=0 +for i in {1..10} +do + echo "Run number is $i" + start_time="$(date -u +%s)" + spark-submit \ + --master ${MASTERURL} \ + --driver-memory 4g --executor-memory 28g --executor-cores 17 --total-executor-cores 102 \ + --jars ${SF},${HP} --packages ${PACK} \ + --class com.astrolabsoftware.spark3d.examples.Test \ + target/scala-${SBT_VERSION_SPARK}/spark3d_${SBT_VERSION_SPARK}-${VERSION}.jar \ + $fitsfn 1 "Z_COSMO,RA,DEC" true "octree" 4096 "col" + end_time="$(date -u +%s)" + elapsed="$(($end_time-$start_time))" + echo "Total of $elapsed seconds elapsed for process" + sum="$(($sum+$elapsed))" + wait +done +avgT="$(($sum/10))" +echo "average elapsed time is $avgT " + + From e65baa31cdd4031ca212b964ed8b376ea4b5a4b8 Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 5 Aug 2019 12:09:11 +0200 Subject: [PATCH 21/47] Updated --- src/main/scala/com/spark3d/Partitioners.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/spark3d/Partitioners.scala b/src/main/scala/com/spark3d/Partitioners.scala index 45db904..65dc3c8 100644 --- a/src/main/scala/com/spark3d/Partitioners.scala +++ b/src/main/scala/com/spark3d/Partitioners.scala @@ -39,7 +39,7 @@ import com.astrolabsoftware.spark3d.spatialPartitioning.KDtreePartitioner * Main object to retrieve SpatialPartitioner. */ class Partitioners(df : DataFrame, options: Map[String, String]) extends Serializable { - println("Partitioners") + // Definition of the coordinate system. Spherical or cartesian val isSpherical : Boolean = options("coordSys") match { case "spherical" => true @@ -169,7 +169,7 @@ class Partitioners(df : DataFrame, options: Map[String, String]) extends Seriali // to determine the level which is used in partitioning, also the number of partitions is determined val log2 = (x: Int) => floor(log10(x)/log10(2.0)).asInstanceOf[Int] val levelPartitioning=log2(numPartitionsRaw) +1 - println("level is"+levelPartitioning) + val kdtree=new KDtree( ) val partitioning = KDtreePartitioning.apply(samples, kdtree, levelPartitioning) val grids:List[BoxEnvelope] = partitioning.getGrids From ac60b6ced97f0eaac1329d4374f99201b56b9011 Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 5 Aug 2019 12:09:46 +0200 Subject: [PATCH 22/47] KDtree is added --- src/main/scala/com/spark3d/Repartitioning.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/scala/com/spark3d/Repartitioning.scala b/src/main/scala/com/spark3d/Repartitioning.scala index 0d951b6..7474c00 100644 --- a/src/main/scala/com/spark3d/Repartitioning.scala +++ b/src/main/scala/com/spark3d/Repartitioning.scala @@ -61,7 +61,6 @@ object Repartitioning { * @return Input DataFrame plus an additional column `partition_id`. */ def prePartition(df : DataFrame, options: Map[String, String], numPartitions : Int = -1) : DataFrame = { - println("prePartition Method") // Change the number of partitions if wanted val numOfPartitions = numPartitions match { case -1 => df.rdd.getNumPartitions @@ -194,7 +193,7 @@ object Repartitioning { * +-------------------+-------------------+------------------+------------+ */ def repartitionByCol(df: DataFrame, colname: String, preLabeled: Boolean, numPartitions: Int = -1): DataFrame = { - println("repartitionByCol") + // Build a Map (k=df.col -> v=partition_id) // to allow the use of standard (Int) partitioners (can be costly). val mapPart : Map[Any, Int] = preLabeled match { From f11978100fa49fedba66ec0ca067e69c23f192b0 Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 5 Aug 2019 12:11:20 +0200 Subject: [PATCH 23/47] it used for develping KD-tree --- src/main/scala/com/spark3d/examples/test.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/main/scala/com/spark3d/examples/test.scala b/src/main/scala/com/spark3d/examples/test.scala index c19cbd3..bcd9cc3 100644 --- a/src/main/scala/com/spark3d/examples/test.scala +++ b/src/main/scala/com/spark3d/examples/test.scala @@ -87,7 +87,7 @@ object Test { //val df = spark.read.format("csv").option("header", "true").load("hdfs://134.158.75.222:8020/user/alia/example3D.csv") //val df = spark.read.format("csv").option("header", "true").load("hdfs://134.158.75.222:8020/user/alia/test2d.csv") val df = spark.read.format("fits").option("hdu", 1).load(fn_fits) - + println("Number of pints:"+df.count) ////////////////////////////// @@ -112,16 +112,16 @@ object Test { //Find the number of partitions val maximumPartitions=pow(maximumLevel,2) - if(log2(part)%1==0) - if(part<=maximumPartitions){ +// if(log2(part)%1==0) + // if(part<=maximumPartitions){ val df_colid = df.prePartition(options, part) val number1=df_colid.repartitionByCol("partition_id", true, part).mapPartitions(part => Iterator(part.size)).collect().toList println("number1: "+ number1) - } - else - println("Please, Maximum number of partitions is "+ maximumPartitions) - else - println("Please determine the number of partitions as 1, 2, 4, 8 and so on") + // } + // else + // println("Please, Maximum number of partitions is "+ maximumPartitions) + // else + // println("Please determine the number of partitions as 1, 2, 4, 8 and so on") From e3f3a8f458c2469aa7bd8c4893be2295abcce54b Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 5 Aug 2019 12:12:40 +0200 Subject: [PATCH 24/47] containKD methid is added --- .../spark3d/geometryObjects/BoxEnvelope.scala | 34 +++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/src/main/scala/com/spark3d/geometryObjects/BoxEnvelope.scala b/src/main/scala/com/spark3d/geometryObjects/BoxEnvelope.scala index 3a1c98c..2c624b4 100644 --- a/src/main/scala/com/spark3d/geometryObjects/BoxEnvelope.scala +++ b/src/main/scala/com/spark3d/geometryObjects/BoxEnvelope.scala @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License. + * limitations under the License.env */ package com.astrolabsoftware.spark3d.geometryObjects @@ -67,7 +67,10 @@ class BoxEnvelope private( min(p1.y, min(p2.y, p3.y)), max(p1.y, max(p2.y, p3.y)), min(p1.z, min(p2.z, p3.z)), max(p1.z, max(p2.z, p3.z)) ) - + + + + // Trigger an AssertionError if Point3D is spherical. // Unfortunately, one cannot put the logic first in an // overloaded constructor in scala. Another option would @@ -628,19 +631,35 @@ class BoxEnvelope private( * on the boundary of this cube Envelope, false if the cube Envelope is null. */ def covers(x: Double, y: Double, z: Double): Boolean = { + if (isNull) { return false } - - x >= minX && + + x >= minX && x < maxX && y >= minY && y < maxY && z >= minZ && - z < maxZ + z < maxZ + } + + def coversKD(x: Double, y: Double, z: Double): Boolean = { + + if (isNull) { + return false + } + + x >= minX && + x <= maxX && + y >= minY && + y <= maxY && + z >= minZ && + z <= maxZ + } /** * Tests if the cube Envelope other * lies completely inside this cube Envelope (inclusive of the boundary). @@ -653,7 +672,7 @@ class BoxEnvelope private( return false } - env.minX >= minX && + env.minX >= minX && env.maxX < maxX && env.minY >= minY && env.maxY < maxY && @@ -661,6 +680,7 @@ class BoxEnvelope private( env.maxZ < maxZ } + /** * Computes the distance between this and another cube Envelope * The distance between overlapping cube Envelopes is 0. Otherwise, the @@ -754,6 +774,8 @@ object BoxEnvelope { min(x1, x2), max(x1, x2), min(y1, y2), max(y1, y2), min(z1, z2), max(z1, z2) + + ) } } From 472264d06426f5fa2d68256c62a60fa530f43c78 Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 5 Aug 2019 12:13:36 +0200 Subject: [PATCH 25/47] updated --- src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala index c62d405..cd8506d 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala @@ -220,7 +220,6 @@ class KDtree() extends Serializable { null else if(level==1){ current.box.indexID=partitionID - println(current.box.indexID) partitionBoundary += current.box partitionID+=1 } From e02828eeb12045066f766755564de90c5f11276d Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 5 Aug 2019 12:14:09 +0200 Subject: [PATCH 26/47] KDtree is added --- .../spark3d/spatialPartitioning/OctreePartitioner.scala | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/OctreePartitioner.scala b/src/main/scala/com/spark3d/spatialPartitioning/OctreePartitioner.scala index 07ccce2..701b19f 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/OctreePartitioner.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/OctreePartitioner.scala @@ -22,7 +22,6 @@ import com.astrolabsoftware.spark3d.geometryObjects.Shape3D.Shape3D import scala.collection.mutable.{HashSet, ListBuffer} class OctreePartitioner (octree: Octree, grids : List[BoxEnvelope]) extends SpatialPartitioner(grids) { - /** * Get the number of partitions in this partitioning * @@ -44,8 +43,7 @@ class OctreePartitioner (octree: Octree, grids : List[BoxEnvelope]) extends Spat * */ override def placeObject[T <: Shape3D](spatialObject: T): Iterator[Tuple2[Int, T]] = { - - val result = HashSet.empty[Tuple2[Int, T]] + val result = HashSet.empty[Tuple2[Int, T]] var matchedPartitions = new ListBuffer[BoxEnvelope] matchedPartitions ++= octree.getMatchedLeafBoxes(spatialObject.getEnvelope) for(partition <- matchedPartitions) { @@ -66,8 +64,7 @@ class OctreePartitioner (octree: Octree, grids : List[BoxEnvelope]) extends Spat * */ override def placePoints(c0: Double, c1: Double, c2: Double, isSpherical: Boolean) : Int = { - - val spatialObject = new Point3D(c0, c1, c2, isSpherical) + val spatialObject = new Point3D(c0, c1, c2, isSpherical) val result = HashSet.empty[Int] var matchedPartitions = new ListBuffer[BoxEnvelope] matchedPartitions ++= octree.getMatchedLeafBoxes(spatialObject.getEnvelope) @@ -84,7 +81,7 @@ class OctreePartitioner (octree: Octree, grids : List[BoxEnvelope]) extends Spat * @return list of Tuple of containing partitions and their index/partition ID's */ override def getPartitionNodes[T <: Shape3D](spatialObject: T): List[Tuple2[Int, Shape3D]] = { - + println("getPartitionNodes") var partitionNodes = new ListBuffer[Shape3D] partitionNodes ++= octree.getMatchedLeafBoxes(spatialObject.getEnvelope) var partitionNodesIDs = partitionNodes.map(x => new Tuple2(x.getEnvelope.indexID, x)) From 5ed897793175ebbeabf6deb14b5b8e06d611d6de Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 5 Aug 2019 12:14:42 +0200 Subject: [PATCH 27/47] Bounding box is fixed --- .../com/spark3d/spatialPartitioning/OctreePartitioning.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/OctreePartitioning.scala b/src/main/scala/com/spark3d/spatialPartitioning/OctreePartitioning.scala index 41c62ae..5f5f9aa 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/OctreePartitioning.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/OctreePartitioning.scala @@ -22,7 +22,7 @@ import scala.collection.mutable.ListBuffer class OctreePartitioning (private val octree: Octree) extends Serializable { - + println("OctreePartitioning") /** * @return the octree used for partitioning */ From 1827df4ead3e6beaecdf2018ecedb71613dc1f63 Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 5 Aug 2019 12:19:39 +0200 Subject: [PATCH 28/47] Updated --- .../spark3d/geometryObjects/BoundaryKD.scala | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 src/main/scala/com/spark3d/geometryObjects/BoundaryKD.scala diff --git a/src/main/scala/com/spark3d/geometryObjects/BoundaryKD.scala b/src/main/scala/com/spark3d/geometryObjects/BoundaryKD.scala new file mode 100644 index 0000000..b48e907 --- /dev/null +++ b/src/main/scala/com/spark3d/geometryObjects/BoundaryKD.scala @@ -0,0 +1,46 @@ +/* + * Copyright 2018 AstroLab Software + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License.env + */ +package com.astrolabsoftware.spark3d.geometryObjects + +import com.astrolabsoftware.spark3d.geometryObjects.Shape3D._ + +import scala.math._ + +/** Defines a cubical region of 3D coordinate space. + * This can be used to define a bounding box of a geometryObject + * + * An cube Envelope can be uniquely defined based on minimum and maximum coordinates along all + * three axes. On creating the cube Envelope initially, the min's and max's are assigned automatically. + * + * Default constructor is kept private to avoid creating an instance of the cube Envelope class without initialising + * the min/max coordinates along axes incorrectly. + * + * @param minX minimum coordinate of cube Envelope along X-axis + * @param maxX maximum coordinate of cube Envelope along X-axis + * @param minY minimum coordinate of cube Envelope along Y-axis + * @param maxY maximum coordinate of cube Envelope along Y-axis + * @param minZ minimum coordinate of cube Envelope along Z-axis + * @param maxZ maximum coordinate of cube Envelope along Z-axis + */ +class BoundaryKD private( + var minX: Double, var maxX: Double, + var minY: Double, var maxY: Double, + var minZ: Double, var maxZ: Double) + extends Serializable { + + + + } From ce2745e38cba8f6011555605dbdb696080adc853 Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 5 Aug 2019 12:20:53 +0200 Subject: [PATCH 29/47] nothing --- .../spark3d/spatialPartitioning/learn.scala | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 src/main/scala/com/spark3d/spatialPartitioning/learn.scala diff --git a/src/main/scala/com/spark3d/spatialPartitioning/learn.scala b/src/main/scala/com/spark3d/spatialPartitioning/learn.scala new file mode 100644 index 0000000..eaf26c9 --- /dev/null +++ b/src/main/scala/com/spark3d/spatialPartitioning/learn.scala @@ -0,0 +1,81 @@ +/* + * Copyright 2018 AstroLab Software + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.astrolabsoftware.spark3d.spatialPartitioning + +import com.astrolabsoftware.spark3d.geometryObjects._ +import com.astrolabsoftware.spark3d.geometryObjects.Shape3D.Shape3D + +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions.udf +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.functions.lit +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.functions.spark_partition_id +import org.apache.spark.sql.Column +import org.apache.spark.sql._ + + + +import scala.util.control.Breaks._ +import scala.collection.mutable.ListBuffer +import scala.collection.mutable.Queue + +/** + * Octree is a 3D extension of Quadtree where in at each stage node (Cuboid) + * (instead of rectangle in Quadtree case) is split into 8 sub nodes. + * Each node can contain + * It is assumed that one can place an object only using its bounding box, a BoxEnvelope + * + * @param box root node of this octree + * @param maxItemsPerNode maximum number of items per Cuboid (box) + */ + +/** + * Octree is a 3D extension of Quadtree where in at each stage node (Cuboid) + * (instead of rectangle in Quadtree case) is split into 8 sub nodes. + * + * @param box BoxEnvelope (boundary) of the tree rooted at this node + * @param level The depth of the node compared to the root of original tree + * @param maxItemsPerNode maximum number of elements per node + * @param maxLevel maximum level upto which tree can grow + */ +class learn(df:DataFrame){ + + + // df.printSchema() + // df.show(50,false) + //var df1=df.write.partitionBy("RA") +// val df1= df.repartition(4).withColumn("partition_id", spark_partition_id()) +// df1.show(50,false) + +def add1(s: String): String = { + + return "test" +} + + //val udf1 = udf[Int, String]{s: String => 5 } + val udf1 = udf[String,String](add1) + //val udf1=udf{s: String => s+"1"} +val x=df.withColumn("id", udf1(col("RA")) ) + x.printSchema + x.show + +//val placePointsUDF = udf[Int, Double, Double, Double, Boolean](partitioner.placePoints) + + + + +} From 0d9004eb5c9a20b9e295920c6aee81d3b12ac9bf Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 5 Aug 2019 12:29:39 +0200 Subject: [PATCH 30/47] placePoint method for KD-tree is added --- .../com/spark3d/spatialPartitioning/KDtreePartitioner.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioner.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioner.scala index d2d3312..b3fbe20 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioner.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioner.scala @@ -82,7 +82,7 @@ class KDtreePartitioner (kdtree: KDtree, grids : List[BoxEnvelope]) extends Spat partitionId+=1 } - 64 + 0 } From e7ded79fa432039a75deca40f1aa7805bc5c4fde Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 5 Aug 2019 12:30:16 +0200 Subject: [PATCH 31/47] Bounding box is fixed --- .../com/spark3d/spatialPartitioning/KDtreePartitioning.scala | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala index 4814c97..ec898c3 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala @@ -28,7 +28,6 @@ import scala.collection.mutable.ListBuffer class KDtreePartitioning (private val kdtree: KDtree, grids:List[BoxEnvelope]) extends Serializable { - println("KDtreePartitioning") /** * @return the octree used for partitioning */ @@ -85,9 +84,6 @@ object KDtreePartitioning { //tree.printKDtree(tree) val grids=tree.BFS(tree,levelPart).toList - - for(i<-grids) - println(i) new KDtreePartitioning(tree,grids) From 1d54c58087a024e59cc661f1dd08fdeae8b56318 Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 5 Aug 2019 12:58:14 +0200 Subject: [PATCH 32/47] Nothing --- src/main/scala/com/spark3d/geometryObjects/Point3D.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/scala/com/spark3d/geometryObjects/Point3D.scala b/src/main/scala/com/spark3d/geometryObjects/Point3D.scala index 8c60552..03a385d 100644 --- a/src/main/scala/com/spark3d/geometryObjects/Point3D.scala +++ b/src/main/scala/com/spark3d/geometryObjects/Point3D.scala @@ -40,12 +40,15 @@ import com.astrolabsoftware.spark3d.utils.Utils.sphericalToCartesian class Point3D(val x: Double, val y: Double, val z: Double, val isSpherical: Boolean) extends Shape3D with Serializable { + + // The center of the point is the point val center: Point3D = this // Zero volume def getVolume: Double = 0.0 - + + /** * Methods to determine whether the Point3D is contained in another shape. * Implement different ways for different shapes (Point, Shell, Box available). From 0048ce410d2d7d3cb1612637ad733dae12d0343a Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 5 Aug 2019 12:59:14 +0200 Subject: [PATCH 33/47] Nothin --- .../spatialPartitioning/OnionPartitioner.scala | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/OnionPartitioner.scala b/src/main/scala/com/spark3d/spatialPartitioning/OnionPartitioner.scala index bfef5c1..985fa51 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/OnionPartitioner.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/OnionPartitioner.scala @@ -40,7 +40,7 @@ import com.astrolabsoftware.spark3d.geometryObjects.Point3D * */ class OnionPartitioner(grids : List[ShellEnvelope]) extends SpatialPartitioner(grids) { - + println("OnionPartitioner") /** * The number of partitions is the number of shells defined as the * difference between 2 concentric spheres. The n partitions @@ -122,32 +122,36 @@ class OnionPartitioner(grids : List[ShellEnvelope]) extends SpatialPartitioner(g * */ override def placePoints(c0: Double, c1: Double, c2: Double, isSpherical: Boolean) : Int = { + val center = new Point3D(c0, c1, c2, isSpherical) var containFlag : Boolean = false val notIncludedID = grids.size - 1 val result = HashSet.empty[Int] - + // Associate the object with one shell breakable { for (pos <- 0 to grids.size - 1) { val shell = grids(pos) - - if (shell.isPointInShell(center)) { + + if (shell.isPointInShell(center)) { result += pos containFlag = true break - } + } } } - + // Useless if Point3D if (!containFlag) { result += notIncludedID } - + // Return an iterator + result.toList(0) + + } // /** From 0f238053fd458d31b0bf49492df58cae7d47d5b4 Mon Sep 17 00:00:00 2001 From: alia Date: Mon, 5 Aug 2019 12:59:31 +0200 Subject: [PATCH 34/47] Tracing --- .../com/spark3d/spatialPartitioning/OnionPartitioning.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/OnionPartitioning.scala b/src/main/scala/com/spark3d/spatialPartitioning/OnionPartitioning.scala index 1802932..a0cf4b4 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/OnionPartitioning.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/OnionPartitioning.scala @@ -31,7 +31,7 @@ import com.astrolabsoftware.spark3d.geometryObjects.Shape3D._ * */ class OnionPartitioning extends Serializable { - +println ("OnionPartitionong") /** * Elements of the Grid as List[ShellEnvelope] */ @@ -84,6 +84,7 @@ class OnionPartitioning extends Serializable { grids += shell } + } /** From 00a8b3a32d09f44aca58110058a2d41db35de215 Mon Sep 17 00:00:00 2001 From: alia Date: Tue, 6 Aug 2019 09:53:47 +0200 Subject: [PATCH 35/47] Simple commit test --- build.sbt | 1 + 1 file changed, 1 insertion(+) diff --git a/build.sbt b/build.sbt index ba43915..71f8469 100644 --- a/build.sbt +++ b/build.sbt @@ -57,6 +57,7 @@ lazy val root = (project in file(".")). ) ) + // POM settings for Sonatype homepage := Some( url("https://github.com/astrolabsoftware/spark3D") From 7e6dcae7d5c4be469121566cbcf0fdff7af6a0db Mon Sep 17 00:00:00 2001 From: abualia4 Date: Tue, 6 Aug 2019 10:03:46 +0200 Subject: [PATCH 36/47] Simple commit test --- build.sbt | 1 - 1 file changed, 1 deletion(-) diff --git a/build.sbt b/build.sbt index 71f8469..ba43915 100644 --- a/build.sbt +++ b/build.sbt @@ -57,7 +57,6 @@ lazy val root = (project in file(".")). ) ) - // POM settings for Sonatype homepage := Some( url("https://github.com/astrolabsoftware/spark3D") From 97420ecf2d2fa80ca4e2e156d74f521cf774264d Mon Sep 17 00:00:00 2001 From: abualia4 Date: Tue, 6 Aug 2019 17:03:01 +0200 Subject: [PATCH 37/47] KDtree partitioner is added --- run_viz_scala.sh | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/run_viz_scala.sh b/run_viz_scala.sh index 5df1131..ce19c1b 100755 --- a/run_viz_scala.sh +++ b/run_viz_scala.sh @@ -11,21 +11,31 @@ SF=target/scala-2.11/spark3d_2.11-${VERSION}.jar HP=lib/jhealpix.jar,lib/plotly-assembly-0.3.0-SNAPSHOT.jar MASTERURL=yarn -inputfn="LSST1Y/out_srcs_s1_0.fits" +#inputfn="LSST1Y/out_srcs_s1_0.fits" + #inputfn=hdfs://134.158.75.222:8020//lsst/LSST1Y/out_srcs_s1_3.fits +#spark-submit \ + # --master ${MASTERURL} \ + # --driver-memory 4g --executor-memory 28g --executor-cores 17 --total-executor-cores 102 \ + # --jars ${SF},${HP} --packages ${PACK} \ + # --class com.astrolabsoftware.spark3d.examples.VisualizePart \ + #target/scala-${SCAlA_VERSION}/spark3d_${SCAlA_VERSION}-${VERSION}.jar \ + # $inputfn "Z_COSMO,RA,DEC" true "onion" 8 0.0001 "username" "api_key" -spark-submit \ - --master ${MASTERURL} \ - --driver-memory 4g --executor-memory 28g --executor-cores 17 --total-executor-cores 102 \ - --jars ${SF},${HP} --packages ${PACK} \ - --class com.astrolabsoftware.spark3d.examples.VisualizePart \ - target/scala-${SCAlA_VERSION}/spark3d_${SCAlA_VERSION}-${VERSION}.jar \ - $inputfn "Z_COSMO,RA,DEC" true "onion" 8 0.0001 "username" "api_key" + #inputfn=hdfs://134.158.75.222:8020/user/julien.peloton/dc2 + #spark-submit \ + # --master ${MASTERURL} \ + # --driver-memory 4g --executor-memory 28g --executor-cores 17 --total-executor-cores 102 \ + # --jars ${SP},${HP} --packages ${PACK} \ + # --class com.astrolabsoftware.spark3d.examples.VisualizePart \ + # target/scala-${SCAlA_VERSION}/spark3d_${SCAlA_VERSION}-${VERSION}.jar \ + # $inputfn "position_x_mock,position_y_mock,position_z_mock" false "octree" 512 0.001 "abualia4" "GFz0cUDumcKcnhpMd8qw" -inputfn="dc2" +inputfn=hdfs://134.158.75.222:8020/user/julien.peloton/dc2 spark-submit \ --master ${MASTERURL} \ - --driver-memory 4g --executor-memory 28g --executor-cores 17 --total-executor-cores 102 \ - --jars ${SP},${HP} --packages ${PACK} \ - --class com.astrolabsoftware.spark3d.examples.VisualizePart \ + --driver-memory 4g --executor-memory 28g --executor-cores 17 --total-executor-cores 102 \ + --jars ${SP},${HP} --packages ${PACK} \ + --class com.astrolabsoftware.spark3d.examples.VisualizePart \ target/scala-${SCAlA_VERSION}/spark3d_${SCAlA_VERSION}-${VERSION}.jar \ - $inputfn "position_x_mock,position_y_mock,position_z_mock" false "octree" 512 0.001 "username" "api_key" + $inputfn "position_x_mock,position_y_mock,position_z_mock" false "octree" 512 0.001 "abualia4" "GFz0cUDumcKcnhpMd8qw" + From 1775a8e2e47c9264357bab1d4e3dc4df7f8c5499 Mon Sep 17 00:00:00 2001 From: abualia4 Date: Thu, 8 Aug 2019 10:21:10 +0200 Subject: [PATCH 38/47] Documenting the code --- .../spark3d/spatialPartitioning/KDtree.scala | 80 ++++++++++++------- 1 file changed, 52 insertions(+), 28 deletions(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala index cd8506d..ca33b7d 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala @@ -27,33 +27,48 @@ import scala.collection.mutable.Queue /**Description: This class is responsible for: * building Balanced KDtree for 3D points - * Printing the B2DT + * Printing the tree + * Breadth-First Search to determine the bounding boxes for partitioning * */ class KDtree() extends Serializable { + /** + * @param point Value of Node's (x,y,z) + * @param box Boundary box of the tree rooted at this node + * @param level The level of the node compared to the root of original tree + * @param parent KDtree reference to the parent node of the current node + * @param left KDtree reference to the left node of the current node + * @param parent KDtree reference to the right node of the current node + */ + + //Initialization var point:Point3D=null var box: BoxEnvelope=null var level: Int=0 var parent: KDtree = null var left: KDtree = null var right: KDtree = null - var check:Int=0 /** - * Insert Element to build KDtree + * Insert an input object.3D point into the KDtree to build it + * @param current KDtree root + * @param value Input object/node (x,y,z) + * @param level The depth of the node compared to the root of original tree + * @return KDtree */ def insertElement(current:KDtree, value:Point3D, level:Int):KDtree={ + //To determine the dimension (x,y or z) var currentLevel=level%3 // For x dimension if(currentLevel==0){ - //left side + //left subtree if(value.x Date: Thu, 8 Aug 2019 10:33:07 +0200 Subject: [PATCH 39/47] Documenting the code --- .../KDtreePartitioning.scala | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala index ec898c3..19635cd 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala @@ -29,7 +29,7 @@ import scala.collection.mutable.ListBuffer class KDtreePartitioning (private val kdtree: KDtree, grids:List[BoxEnvelope]) extends Serializable { /** - * @return the octree used for partitioning + * @return the KDtree used for partitioning */ def getPartitionTree(): KDtree = { kdtree @@ -37,19 +37,24 @@ class KDtreePartitioning (private val kdtree: KDtree, grids:List[BoxEnvelope]) } /** - * @return Leaf nodes of the partitioning tree + * @return List of boundary boxes that will be used in the partitioning */ def getGrids(): List[BoxEnvelope] = { - // grids.isInstanceOf[List[BoxEnvelope]] + grids } } object KDtreePartitioning { - + /** + * @param data List of the input objects/3D points + * @param tree KDtree tree + * @param levelPart The level which is used to determine the boundary boxes for partitioning + * @return KDtreePartitioning + */ def apply(data: List[Point3D], tree: KDtree, levelPart:Int): KDtreePartitioning = { - //Initialize the boundary box + //Initialize the boundary box for the KDtree root var min_X:Double=data(0).x var max_X:Double=data(0).x var min_Y:Double=data(0).y @@ -77,12 +82,11 @@ object KDtreePartitioning { if(i.z>max_Z) max_Z=i.z } - + //Expand by 1 the boundary box val KDtreeBoundary:BoxEnvelope=BoxEnvelope.apply(min_X-1,max_X+1,min_Y-1,max_Y+1,min_Z-1,max_Z+1) - + //Sending the list of data to build the balanced KDtree tree.insertList(data,0,KDtreeBoundary) - //tree.printKDtree(tree) - + //Finding the list of boundary boxes for partitioning val grids=tree.BFS(tree,levelPart).toList new KDtreePartitioning(tree,grids) From d9c8b576e6d6ea2ac09378d6c373b6c28363885b Mon Sep 17 00:00:00 2001 From: abualia4 Date: Thu, 8 Aug 2019 10:46:25 +0200 Subject: [PATCH 40/47] Documenting the code --- src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala index ca33b7d..2c271ce 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtree.scala @@ -1,5 +1,5 @@ /* - * Copyright 2018 AstroLab Software + * Copyright 2019 AstroLab Software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 71f15e5a4b0e355f8f78e860c2cfcacc47d30fd8 Mon Sep 17 00:00:00 2001 From: abualia4 Date: Thu, 8 Aug 2019 10:46:37 +0200 Subject: [PATCH 41/47] Documenting the code --- .../com/spark3d/spatialPartitioning/KDtreePartitioning.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala index 19635cd..5a3f920 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioning.scala @@ -1,5 +1,5 @@ /* - * Copyright 2018 AstroLab Software + * Copyright 2019 AstroLab Software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From e4b541eac8059502f21ba6f9f90afbc6a34946bb Mon Sep 17 00:00:00 2001 From: abualia4 Date: Thu, 8 Aug 2019 10:46:46 +0200 Subject: [PATCH 42/47] Documenting the code --- .../KDtreePartitioner.scala | 52 +++++++------------ 1 file changed, 19 insertions(+), 33 deletions(-) diff --git a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioner.scala b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioner.scala index b3fbe20..9f59c02 100644 --- a/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioner.scala +++ b/src/main/scala/com/spark3d/spatialPartitioning/KDtreePartitioner.scala @@ -1,5 +1,5 @@ /* - * Copyright 2018 AstroLab Software + * Copyright 2019 AstroLab Software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,12 @@ import com.astrolabsoftware.spark3d.geometryObjects.Shape3D.Shape3D import scala.collection.mutable.{HashSet, ListBuffer} +/** + * @param kdtree KDtree tree + * @param grids the list of boundary boxex/partitions in this partitioning + * + */ + class KDtreePartitioner (kdtree: KDtree, grids : List[BoxEnvelope]) extends SpatialPartitioner(grids) { @@ -38,14 +44,7 @@ class KDtreePartitioner (kdtree: KDtree, grids : List[BoxEnvelope]) extends Spat } /** - * Gets the iterator on tuple leaf nodes (partitions) which intersects, contains or are contained - * by the input object. - * - * @param spatialObject : (T<:Shape3D) - * Shape3D instance (or any extension) representing objects to put on - * the grid. - * @return (Iterator[Tuple2[Int, T]) Iterable over a Tuple - * * of (Int, T) where Int is the partition index, and T the input object. + * * */ override def placeObject[T <: Shape3D](spatialObject: T): Iterator[Tuple2[Int, T]] = { @@ -54,16 +53,15 @@ class KDtreePartitioner (kdtree: KDtree, grids : List[BoxEnvelope]) extends Spat } /** - * Gets the iterator on tuple leaf nodes (partitions) which intersects, contains or are contained - * by the input object. - * + *Assigning the tuple to its partitions + *@param c0 the x-coordinate of the point + *@param c1 the y-coordinate of the point + *@param c2 the z-coordinate of the point + @param isSpherical Definition of the coordinate system. Spherical or cartesian + @return The partiton number of the tuple/3Dpoint * @param spatialObject : (T<:Shape3D) - * Shape3D instance (or any extension) representing objects to put on - * the grid. - * @return (Iterator[Tuple2[Int, T]) Iterable over a Tuple - * * of (Int, T) where Int is the partition index, and T the input object. - * */ + override def placePoints(c0: Double, c1: Double, c2: Double, isSpherical: Boolean) : Int = { val result = HashSet.empty[Int] var partitionId:Int=0 @@ -87,34 +85,22 @@ class KDtreePartitioner (kdtree: KDtree, grids : List[BoxEnvelope]) extends Spat } /** - * Gets the partitions which contain the input object. - * - * @param spatialObject input object for which the containment is to be found - * @return list of Tuple of containing partitions and their index/partition ID's + * */ override def getPartitionNodes[T <: Shape3D](spatialObject: T): List[Tuple2[Int, Shape3D]] = { null } /** - * Gets the partitions which are the neighbors of the partitions which contain the input object. - * - * @param spatialObject input object for which the neighbors are to be found - * @param inclusive If true, includes the node of the spatialObject as well. Default is false. - * @return list of Tuple of neighbor partitions and their index/partition ID's + * */ override def getNeighborNodes[T <: Shape3D](spatialObject: T, inclusive: Boolean = false): List[Tuple2[Int, Shape3D]] = { null } /** - * Gets the partitions which are the neighbors to the input partition. Useful when getting - * secondary neighbors (neighbors to neighbor) of the queryObject. - * - * @param containingNode The boundary of the Node for which neighbors are to be found. - * @param containingNodeID The index/partition ID of the containingNode - * @return list of Tuple of secondary neighbor partitions and their index/partition IDs - */ + * + */ override def getSecondaryNeighborNodes[T <: Shape3D](containingNode: T, containingNodeID: Int): List[Tuple2[Int, Shape3D]] = { null From 1eb16655ddfcb1dc12294a5d82d619b3ad8c172a Mon Sep 17 00:00:00 2001 From: abualia4 Date: Thu, 8 Aug 2019 10:53:06 +0200 Subject: [PATCH 43/47] Documenting the code --- src/main/scala/com/spark3d/Partitioners.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/scala/com/spark3d/Partitioners.scala b/src/main/scala/com/spark3d/Partitioners.scala index caf80ab..9511314 100644 --- a/src/main/scala/com/spark3d/Partitioners.scala +++ b/src/main/scala/com/spark3d/Partitioners.scala @@ -166,7 +166,8 @@ class Partitioners(df : DataFrame, options: Map[String, String]) extends Seriali val samples:List[Point3D] = rawRDD.takeSample(false, sampleSize, new Random(dataCount).nextInt(dataCount.asInstanceOf[Int])).toList.map(x => x.asInstanceOf[Point3D]) - // to determine the level which is used in partitioning, also the number of partitions is determined + // To determine the level which is used in partitioning, also the number of partitions is determined + // Number of partitions is power of 2. 1, 2, 4, 8, 16, 32 and so on val log2 = (x: Int) => floor(log10(x)/log10(2.0)).asInstanceOf[Int] val levelPartitioning=log2(numPartitionsRaw) +1 From 3feb9f7aae50a770488677b93ca757b66330797d Mon Sep 17 00:00:00 2001 From: abualia4 Date: Thu, 8 Aug 2019 11:11:44 +0200 Subject: [PATCH 44/47] Documenting the code --- .../scala/com/spark3d/examples/test.scala | 63 +++++++++---------- 1 file changed, 29 insertions(+), 34 deletions(-) diff --git a/src/main/scala/com/spark3d/examples/test.scala b/src/main/scala/com/spark3d/examples/test.scala index bcd9cc3..c6ea1d8 100644 --- a/src/main/scala/com/spark3d/examples/test.scala +++ b/src/main/scala/com/spark3d/examples/test.scala @@ -1,5 +1,5 @@ /* - * Copyright 2018 Julien Peloton + * Copyright 2019 AstroLab Software * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ import org.apache.log4j.Level import org.apache.log4j.Logger /** - * Main app. + * Main app. This is used by Ahmed Alia to run the Spark3D, especially KDtree */ object Test { // Set to Level.WARN is you want verbosity @@ -82,11 +82,15 @@ object Test { // Mode val mode = args(6).toString - // Load the data - ///////////////////////////////spark.sparkContext - //val df = spark.read.format("csv").option("header", "true").load("hdfs://134.158.75.222:8020/user/alia/example3D.csv") - //val df = spark.read.format("csv").option("header", "true").load("hdfs://134.158.75.222:8020/user/alia/test2d.csv") - val df = spark.read.format("fits").option("hdu", 1).load(fn_fits) + + + // Load the data + val df = if (fn_fits.endsWith("fits")) { + spark.read.format("fits").option("hdu", 1).load(fn_fits) + } else { + spark.read.format("parquet").load(fn_fits) + } + println("Number of pints:"+df.count) ////////////////////////////// @@ -112,35 +116,26 @@ object Test { //Find the number of partitions val maximumPartitions=pow(maximumLevel,2) -// if(log2(part)%1==0) - // if(part<=maximumPartitions){ + if(part=="kdtree"){// For kdtree partitioner + //Validation + if(log2(part)%1==0) + if(part<=maximumPartitions){ val df_colid = df.prePartition(options, part) val number1=df_colid.repartitionByCol("partition_id", true, part).mapPartitions(part => Iterator(part.size)).collect().toList - println("number1: "+ number1) - // } - // else - // println("Please, Maximum number of partitions is "+ maximumPartitions) - // else - // println("Please determine the number of partitions as 1, 2, 4, 8 and so on") - - - - // val df_colid = df.prePartition(options, part) - - // df_colid.show(100,false) - // df_colid.groupBy("partition_id").count().show(50,false) - // df_colid.printSchema() - //df_colid.filter($"partition_id">4).show() - //println(df_colid.filter($"DEC"< -0.001).count()) - //MC it to minimize flukes - - // val number= df_colid.repartitionByCol("partition_id", true, part) - - - // val number1=df_colid.repartitionByCol("partition_id", true, part).mapPartitions(part => Iterator(part.size)).collect().toList - // println("number1: "+ number1) - - + println("Partitions: "+ number1) + } + else + println("Please, Maximum number of partitions is "+ maximumPartitions) + else + println("Please determine the number of partitions as 1, 2, 4, 8 and so on") + } + else For others + { + val df_colid = df.prePartition(options, part) + val number1=df_colid.repartitionByCol("partition_id", true, part).mapPartitions(part => Iterator(part.size)).collect().toList + println("Partitions: "+ number1) + } + } } \ No newline at end of file From 9065432747b6f6d66420148ff7d67326d9e35105 Mon Sep 17 00:00:00 2001 From: abualia4 Date: Thu, 8 Aug 2019 11:18:04 +0200 Subject: [PATCH 45/47] Documenting the code --- .../spark3d/geometryObjects/BoxEnvelope.scala | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/main/scala/com/spark3d/geometryObjects/BoxEnvelope.scala b/src/main/scala/com/spark3d/geometryObjects/BoxEnvelope.scala index 2c624b4..0d812b1 100644 --- a/src/main/scala/com/spark3d/geometryObjects/BoxEnvelope.scala +++ b/src/main/scala/com/spark3d/geometryObjects/BoxEnvelope.scala @@ -627,8 +627,7 @@ class BoxEnvelope private( * being checked for containment * @param z the z-coordinate of the point for which this cube Envelope is * being checked for containment - * @return true if (x, y, z) lies in the interior or - * on the boundary of this cube Envelope, false if the cube Envelope is null. + * @return true if (x, y, z) lies in the interior of this cube Envelope, false if the cube Envelope is null. */ def covers(x: Double, y: Double, z: Double): Boolean = { @@ -646,7 +645,18 @@ class BoxEnvelope private( } - + /** + * Tests if the given tuple/point lies inside or in the border of the envelope. This method is used to assign the tuple to its partitioner in KD tree partitioner + * @param x the x-coordinate of the point for which this cube Envelope is + * being checked for containment + * @param y the y-coordinate of the point for which this cube Envelope is + * being checked for containment + * @param z the z-coordinate of the point for which this cube Envelope is + * being checked for containment + * @return true if (x, y, z) lies in the interior or + * on the boundary of this cube Envelope, false if the cube Envelope is null. + * + */ def coversKD(x: Double, y: Double, z: Double): Boolean = { if (isNull) { From 57b67050bcc8a97a2ec0b08f2405704a48ffa873 Mon Sep 17 00:00:00 2001 From: abualia4 Date: Thu, 8 Aug 2019 11:22:55 +0200 Subject: [PATCH 46/47] This bash code is used by Ahmed Alia to evaluate his work --- arun_scala.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arun_scala.sh b/arun_scala.sh index af6ec34..83181c8 100755 --- a/arun_scala.sh +++ b/arun_scala.sh @@ -12,10 +12,11 @@ HP=lib/jhealpix.jar #MASTERURL=spark://134.158.75.222:7077 MASTERURL=yarn -fitsfn=hdfs://134.158.75.222:8020//lsst/LSST1Y/out_srcs_s1_2.fits + # fitsfn=hdfs://134.158.75.222:8020//lsst/LSST1Y/out_srcs_s1_3.fits # fitsfn=hdfs://134.158.75.222:8020/user/alia/a.fits + fitsfn=hdfs://134.158.75.222:8020/user/julien.peloton/dc2 -# 4096 +#This is used in evaluation of the KDtree partitioner sum=0 for i in {1..10} do @@ -27,7 +28,7 @@ do --jars ${SF},${HP} --packages ${PACK} \ --class com.astrolabsoftware.spark3d.examples.Test \ target/scala-${SBT_VERSION_SPARK}/spark3d_${SBT_VERSION_SPARK}-${VERSION}.jar \ - $fitsfn 1 "Z_COSMO,RA,DEC" true "octree" 4096 "col" + $fitsfn 1 "position_x_mock,position_y_mock,position_z_mock" true "kdtree" 8 "col" end_time="$(date -u +%s)" elapsed="$(($end_time-$start_time))" echo "Total of $elapsed seconds elapsed for process" From 3927c4cbab3eb26b01301fdb39ee975c3a8a0da7 Mon Sep 17 00:00:00 2001 From: abualia4 Date: Thu, 8 Aug 2019 11:25:59 +0200 Subject: [PATCH 47/47] To perform some visual inspection of the KDtree partitioner --- run_viz_scala.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run_viz_scala.sh b/run_viz_scala.sh index ce19c1b..97ab015 100755 --- a/run_viz_scala.sh +++ b/run_viz_scala.sh @@ -30,12 +30,12 @@ MASTERURL=yarn # target/scala-${SCAlA_VERSION}/spark3d_${SCAlA_VERSION}-${VERSION}.jar \ # $inputfn "position_x_mock,position_y_mock,position_z_mock" false "octree" 512 0.001 "abualia4" "GFz0cUDumcKcnhpMd8qw" -inputfn=hdfs://134.158.75.222:8020/user/julien.peloton/dc2 +inputfn=hdfs://134.158.75.222:8020/user/julien.peloton/dc2_rand spark-submit \ --master ${MASTERURL} \ --driver-memory 4g --executor-memory 28g --executor-cores 17 --total-executor-cores 102 \ --jars ${SP},${HP} --packages ${PACK} \ --class com.astrolabsoftware.spark3d.examples.VisualizePart \ target/scala-${SCAlA_VERSION}/spark3d_${SCAlA_VERSION}-${VERSION}.jar \ - $inputfn "position_x_mock,position_y_mock,position_z_mock" false "octree" 512 0.001 "abualia4" "GFz0cUDumcKcnhpMd8qw" + $inputfn "position_x_mock,position_y_mock,position_z_mock" false "kdtree" 512 0.001 "abualia4" "GFz0cUDumcKcnhpMd8qw"