From 8ca4a1c0c0f645f39a11d5ef0c03d3d88678799b Mon Sep 17 00:00:00 2001 From: Ahmed Alia <52276877+abualia4@users.noreply.github.com> Date: Thu, 8 Aug 2019 13:07:49 +0200 Subject: [PATCH] Update Spark4Physicists.scala --- Spark4Physicists.scala | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Spark4Physicists.scala b/Spark4Physicists.scala index 87772a4..317825c 100644 --- a/Spark4Physicists.scala +++ b/Spark4Physicists.scala @@ -67,3 +67,27 @@ start = System.nanoTime() gal.describe( ).show() finish = System.nanoTime() elapsedTime(start,finish) + +// Minimum and Maximum spark functions + start = System.nanoTime() + val minMax= gal.select(min("z"),max("z") ).first() + val zMax =minMax(1).asInstanceOf[Float] + val zMin =minMax(0).asInstanceOf[Float] + println("Minimum Value:"+ zMin+ "\t"+ "Maximum Value:"+ zMax) + finish = System.nanoTime() + elapsedTime(start,finish) + +//Adding the zbin number column (labelled “bin”) + start = System.nanoTime() + val Nbins=100 + var dz=(zMax-zMin)/Nbins + val zBin=gal.select("Z").withColumn("bin", ((col("Z")-zMin-dz/2)/dz).cast(IntegerType) ) + +//Grouping by the bin column, counting its membersand sorting in ascending order +var h=zBin.groupBy("bin").count.orderBy("bin") +//add the bin locations and drop the bin numer +h=h.withColumn("loc", col("bin")*dz+zMin+dz/2 ).drop("bin") +h=h.select("loc","count") +h.show() +finish = System.nanoTime() +elapsedTime(start,finish)