-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
…port Add support for importing avro formatted files
- Loading branch information
Showing
32 changed files
with
726 additions
and
293 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
sbt.version=1.2.6 | ||
sbt.version=1.2.8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
65 changes: 65 additions & 0 deletions
65
src/main/scala/com/exasol/cloudetl/avro/AvroRowIterator.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
package com.exasol.cloudetl.avro | ||
|
||
import com.exasol.cloudetl.data.Row | ||
|
||
import org.apache.avro.file.DataFileReader | ||
import org.apache.avro.generic.GenericRecord | ||
import org.apache.avro.util.Utf8 | ||
|
||
/** | ||
* An object that creates a [[com.exasol.cloudetl.data.Row]] iterator | ||
* given the Avro [[org.apache.avro.file.DataFileReader]] with | ||
* [[org.apache.avro.generic.GenericRecord]]. | ||
* | ||
* Each next record is converted into an internal Row class. | ||
*/ | ||
object AvroRowIterator { | ||
|
||
def apply(reader: DataFileReader[GenericRecord]): Iterator[Row] = | ||
new Iterator[Row] { | ||
@SuppressWarnings(Array("org.wartremover.warts.Var")) | ||
private[this] var isCompleted = false | ||
|
||
override def hasNext: Boolean = | ||
if (isCompleted) { | ||
false | ||
} else { | ||
val hasNext = reader.hasNext | ||
if (!hasNext) { | ||
reader.close() | ||
isCompleted = true | ||
} | ||
hasNext | ||
} | ||
|
||
override def next(): Row = { | ||
if (!hasNext) { | ||
throw new NoSuchElementException("Avro reader next on empty iterator") | ||
} | ||
val record = reader.next() | ||
recordToRow(record) | ||
} | ||
} | ||
|
||
private[this] def recordToRow(record: GenericRecord): Row = { | ||
val size = record.getSchema.getFields.size | ||
val values = Array.ofDim[Any](size) | ||
for { index <- 0 until size } { | ||
values.update(index, convertRecordValue(record.get(index))) | ||
} | ||
Row(values.toSeq) | ||
} | ||
|
||
@SuppressWarnings(Array("org.wartremover.warts.AsInstanceOf")) | ||
private[this] def convertRecordValue(value: Any): Any = value match { | ||
case _: GenericRecord => | ||
throw new IllegalArgumentException("Avro nested record type is not supported yet!") | ||
case _: java.util.Collection[_] => | ||
throw new IllegalArgumentException("Avro collection type is not supported yet!") | ||
case _: java.util.Map[_, _] => | ||
throw new IllegalArgumentException("Avro map type is not supported yet!") | ||
case _: Utf8 => value.asInstanceOf[Utf8].toString | ||
case primitiveType => primitiveType | ||
} | ||
|
||
} |
40 changes: 40 additions & 0 deletions
40
src/main/scala/com/exasol/cloudetl/bucket/AzureBlobBucket.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
package com.exasol.cloudetl.bucket | ||
|
||
import org.apache.hadoop.conf.Configuration | ||
|
||
/** A [[Bucket]] implementation for the Azure Blob Storage */ | ||
final case class AzureBlobBucket(path: String, params: Map[String, String]) extends Bucket { | ||
|
||
/** @inheritdoc */ | ||
override val bucketPath: String = path | ||
|
||
/** @inheritdoc */ | ||
override def validate(): Unit = | ||
Bucket.validate(params, Bucket.AZURE_PARAMETERS) | ||
|
||
/** | ||
* @inheritdoc | ||
* | ||
* Additionally validates that all required parameters are available | ||
* in order to create a configuration. | ||
*/ | ||
override def createConfiguration(): Configuration = { | ||
validate() | ||
|
||
val conf = new Configuration() | ||
val accountName = Bucket.requiredParam(params, "AZURE_ACCOUNT_NAME") | ||
val accountSecretKey = Bucket.requiredParam(params, "AZURE_SECRET_KEY") | ||
conf.set("fs.azure", classOf[org.apache.hadoop.fs.azure.NativeAzureFileSystem].getName) | ||
conf.set("fs.wasb.impl", classOf[org.apache.hadoop.fs.azure.NativeAzureFileSystem].getName) | ||
conf.set("fs.wasbs.impl", classOf[org.apache.hadoop.fs.azure.NativeAzureFileSystem].getName) | ||
conf.set("fs.AbstractFileSystem.wasb.impl", classOf[org.apache.hadoop.fs.azure.Wasb].getName) | ||
conf.set( | ||
"fs.AbstractFileSystem.wasbs.impl", | ||
classOf[org.apache.hadoop.fs.azure.Wasbs].getName | ||
) | ||
conf.set(s"fs.azure.account.key.$accountName.blob.core.windows.net", accountSecretKey) | ||
|
||
conf | ||
} | ||
|
||
} |
Oops, something went wrong.