Skip to content

Latest commit

 

History

History
46 lines (38 loc) · 1.21 KB

spark-sql-dataframe-structtype.adoc

File metadata and controls

46 lines (38 loc) · 1.21 KB

Explicit Schema using StructType and StructField

A schema is a description of the structure of DataFrame (which is a result of a query over a semi-structured data). It can be inferred (implied) or explicit.

An explicit schema is described using StructType which is a collection of StructField objects. A StructField describes a single column — the name, the type and whether or not it accepts empty values.

import org.apache.spark.sql.types.StructType

val schema1 = new StructType()
  .add("a", "int")
  .add("b", "string")

scala> schema1.printTreeString
root
 |-- a: integer (nullable = true)
 |-- b: string (nullable = true)

// it is equivalent to the above expression
import org.apache.spark.sql.types.{IntegerType, StringType}
val schema2 = new StructType()
  .add("a", IntegerType)
  .add("b", StringType)

scala> schema2.printTreeString
root
 |-- a: integer (nullable = true)
 |-- b: string (nullable = true)

scala> println(schema1.prettyJson)
{
 "type" : "struct",
 "fields" : [ {
   "name" : "a",
   "type" : "integer",
   "nullable" : true,
   "metadata" : { }
 }, {
   "name" : "b",
   "type" : "string",
   "nullable" : true,
   "metadata" : { }
 } ]
}