Skip to content

Commit

Permalink
1, Enable ORC format data view.
Browse files Browse the repository at this point in the history
2, Refactor code for better extensibility. It won't be that annoying to add other format data like avro now.
  • Loading branch information
Eugene committed Feb 9, 2020
1 parent 129396a commit fbee071
Show file tree
Hide file tree
Showing 15 changed files with 273 additions and 84 deletions.
2 changes: 1 addition & 1 deletion src/main/java/org/eugene/controller/DashboardRenderer.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public void setDashboard(Dashboard dashboard){
this.dashboard = dashboard;
}

public void refreshMetaInfo(Schema schema, File selectedFile, int rowNumber, int columnNumber){
public void refreshMetaInfo(String schema, File selectedFile, int rowNumber, int columnNumber){
dashboard.refresh(schema, selectedFile, rowNumber, columnNumber);
}
}
7 changes: 7 additions & 0 deletions src/main/java/org/eugene/controller/DataParser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package org.eugene.controller;

import org.apache.hadoop.fs.Path;

public abstract class DataParser {
public abstract boolean parseData(Path path);
}
12 changes: 12 additions & 0 deletions src/main/java/org/eugene/controller/ORCDataParser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package org.eugene.controller;

import org.apache.hadoop.fs.Path;
import org.eugene.core.orc.ORCReader;

public class ORCDataParser extends DataParser {
@Override
public boolean parseData(Path path) {
ORCReader reader = new ORCReader();
return reader.read(path);
}
}
68 changes: 68 additions & 0 deletions src/main/java/org/eugene/controller/ParquetDataParser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package org.eugene.controller;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.hadoop.fs.Path;
import org.eugene.core.parquet.ParquetReader;
import org.eugene.model.CommonData;
import org.eugene.model.TableMeta;
import org.eugene.persistent.VirtualDB;
import org.eugene.ui.Notifier;

import java.util.ArrayList;
import java.util.List;

public class ParquetDataParser extends DataParser{
@Override
public boolean parseData(Path path) {
ParquetReader reader = new ParquetReader();
List<GenericData.Record> originalData = reader.read(path);
if(originalData == null)
{
return false;
}
if (originalData.isEmpty()) {
Notifier.info("The file is empty");
return false;
}

GenericData.Record firstRecord = originalData.get(0);
Schema schema = firstRecord.getSchema();

int rowNumber = originalData.size();
List<String> propertyList = new ArrayList<>();
for (Schema.Field field: schema.getFields())
{
String property = field.name();
propertyList.add(property);
}
int columnNumber = propertyList.size();
TableMeta tableMeta = new TableMeta();
tableMeta.setRow(rowNumber);
tableMeta.setColumn(columnNumber);

List<List<String>> data = new ArrayList<>();
for (int i = 0; i < originalData.size(); i++) {
GenericData.Record record = originalData.get(i);
List<String> commonRecord = new ArrayList<>();
for (int j = 0; j < columnNumber; j++) {
if (record.get(j) == null){
commonRecord.add("NULL");
}else{
commonRecord.add(String.valueOf(record.get(j)));
}
}
data.add(commonRecord);
}

CommonData commonData = new CommonData();
commonData.setSchema(schema.toString());
commonData.setData(data);
commonData.setPropertyList(propertyList);

VirtualDB.getInstance().setCommonData(commonData);
VirtualDB.getInstance().setTableMeta(tableMeta);

return true;
}
}
74 changes: 23 additions & 51 deletions src/main/java/org/eugene/controller/Renderer.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
import org.apache.avro.generic.GenericData;
import org.apache.hadoop.fs.Path;
import org.eugene.core.parquet.ParquetReader;
import org.eugene.model.CommonData;
import org.eugene.model.Parquet;
import org.eugene.model.TableMeta;
import org.eugene.persistent.VirtualDB;
import org.eugene.ui.*;

import java.io.File;
Expand All @@ -21,15 +23,9 @@ public class Renderer {
private DashboardRenderer dashboardRenderer;

private List<String> showingList;
private List<String> propertyList;

private File selectedFile;

private Parquet parquet;
private TableMeta tableMeta;

private Schema schema;

public Renderer(Stage stage){
this.stage = stage;
tableRenderer = new TableRenderer();
Expand All @@ -46,64 +42,40 @@ public void initUI(){
}

public boolean loadAndShow(){
boolean status = prepareData();
if (status) {
tableRenderer.init();
showingList = propertyList;
dashboardRenderer.refreshMetaInfo(parquet.getSchema(), selectedFile, tableMeta.getRow(), tableMeta.getColumn());
tableRenderer.refresh(showingList, propertyList, tableMeta.getRow(), tableMeta.getColumn(), parquet.getData());
}
return status;
}

private boolean prepareData(){
FileChooser filechooser = new FileChooser();
selectedFile = filechooser.showOpenDialog(stage);
Path path = new Path(selectedFile.getAbsolutePath());
ParquetReader reader = new ParquetReader();
List<GenericData.Record> data = reader.read(path);
if(data == null)
{
return false;
}
if (data.isEmpty()) {
Notifier.info("The file is empty");
String absolutePath = selectedFile.getAbsolutePath();
Path path = new Path(absolutePath);
DataParser dataParser;
if (absolutePath.endsWith(".orc")){
dataParser = new ORCDataParser();
}else {
dataParser = new ParquetDataParser();
}
parquet = new Parquet();
parquet.setData(data);
GenericData.Record record = data.get(0);
schema = record.getSchema();
parquet.setSchema(schema);
int rowNumber = data.size();
showingList = new ArrayList<>();
propertyList = new ArrayList<>();
for (Schema.Field field: schema.getFields())
{
String property = field.name();
showingList.add(property);
propertyList.add(property);
boolean status = dataParser.parseData(path);
if (status) {
tableRenderer.init();
CommonData commonData = VirtualDB.getInstance().getCommonData();
TableMeta tableMeta = VirtualDB.getInstance().getTableMeta();
showingList = commonData.getPropertyList();
dashboardRenderer.refreshMetaInfo(commonData.getSchema(), selectedFile, tableMeta.getRow(), tableMeta.getColumn());
tableRenderer.refresh(showingList, commonData.getPropertyList(), tableMeta.getRow(), tableMeta.getColumn(), commonData.getData());
}
int columnNumber = propertyList.size();
tableMeta = new TableMeta();
tableMeta.setRow(rowNumber);
tableMeta.setColumn(columnNumber);

return true;
return status;
}

public List<GenericData.Record> getData(){
return parquet.getData();
public List<List<String>> getData(){
return VirtualDB.getInstance().getCommonData().getData();
}

public void refreshTable(){
refreshTable(showingList);
}

public void refreshTable(List<String> showingList){
tableRenderer.refresh(showingList, propertyList, tableMeta.getRow(), tableMeta.getColumn(), parquet.getData());
CommonData commonData = VirtualDB.getInstance().getCommonData();
TableMeta tableMeta = VirtualDB.getInstance().getTableMeta();
tableRenderer.refresh(showingList, commonData.getPropertyList(), tableMeta.getRow(), tableMeta.getColumn(), commonData.getData());
}

public Schema getSchema() {
return schema;
}
}
2 changes: 1 addition & 1 deletion src/main/java/org/eugene/controller/TableRenderer.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public void setTable(Table table){
this.table = table;
}

public void refresh(List<String> showingList, List<String> propertyList, int rowNumber, int columnNumber, List<GenericData.Record> data){
public void refresh(List<String> showingList, List<String> propertyList, int rowNumber, int columnNumber, List<List<String>> data){
table.refresh(showingList, propertyList, rowNumber, columnNumber, data);
}

Expand Down
73 changes: 73 additions & 0 deletions src/main/java/org/eugene/core/orc/ORCReader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package org.eugene.core.orc;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.apache.hadoop.hive.ql.io.orc.Reader;
import org.apache.hadoop.hive.ql.io.orc.RecordReader;

import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.eugene.model.CommonData;
import org.eugene.model.TableMeta;
import org.eugene.persistent.VirtualDB;
import org.eugene.ui.Notifier;

import java.util.ArrayList;
import java.util.List;

public class ORCReader {
public boolean read(Path path){
try{
Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(new Configuration()));
StructObjectInspector inspector = (StructObjectInspector)reader.getObjectInspector();
String schema = reader.getSchema().toJson();
//The JSON schema provided is illegal, so need to make it valid firstly
schema = schema.replaceAll("(\"[\\w]+\"):([\\s]+[{]+)", "$1,$2");
RecordReader records = reader.rows();
//These objects are the metadata for each column. They give you the type of each column and can parse it unless you
//want to parse each column yourself
List fields = inspector.getAllStructFieldRefs();
List<String> propertyList = new ArrayList<>();
int columnNumber = fields.size();
for(int i = 0; i < fields.size(); ++i) {
propertyList.add(((StructField)fields.get(i)).getFieldObjectInspector().getTypeName());
}

Object row = null;
List<List<String>> data = new ArrayList<>();
while(records.hasNext())
{
row = records.next(row);
List list = inspector.getStructFieldsDataAsList(row);
StringBuilder builder = new StringBuilder();
List<String> record = new ArrayList<>();
for(Object field : list) {
if(field != null){
record.add(field.toString());
}
else{
record.add("NULL");
}
}
data.add(record);
}
CommonData commonData = new CommonData();
commonData.setPropertyList(propertyList);
commonData.setSchema(schema);
commonData.setData(data);
TableMeta tableMeta = new TableMeta();
tableMeta.setColumn(columnNumber);
tableMeta.setRow(data.size());
VirtualDB.getInstance().setCommonData(commonData);
VirtualDB.getInstance().setTableMeta(tableMeta);
return true;
}catch(Exception e){
e.printStackTrace();
Notifier.error("Failed to load the file! The exception throws is: " + e.getMessage());
return false;
}

}

}
33 changes: 33 additions & 0 deletions src/main/java/org/eugene/model/CommonData.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package org.eugene.model;

import java.util.List;

public class CommonData {
private String schema;
private List<List<String>> data;
private List<String> propertyList;

public void setSchema(String schema){
this.schema = schema;
}

public String getSchema(){
return schema;
}

public void setData(List<List<String>> data){
this.data = data;
}

public List<List<String>> getData(){
return data;
}

public void setPropertyList(List<String> propertyList){
this.propertyList = propertyList;
}

public List<String> getPropertyList(){
return propertyList;
}
}
2 changes: 2 additions & 0 deletions src/main/java/org/eugene/model/Parquet.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.List;

public class Parquet {
/**
private Schema schema;
private List<GenericData.Record> data;
Expand All @@ -24,4 +25,5 @@ public List<GenericData.Record> getData(){
public void setData(List<GenericData.Record> data){
this.data = data;
}
**/
}
35 changes: 35 additions & 0 deletions src/main/java/org/eugene/persistent/VirtualDB.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package org.eugene.persistent;

import org.apache.calcite.avatica.proto.Common;
import org.eugene.model.CommonData;
import org.eugene.model.TableMeta;

public class VirtualDB {
private static VirtualDB instance = new VirtualDB();
private CommonData commonData;
private TableMeta tableMeta;

private VirtualDB(){

}

public static VirtualDB getInstance(){
return instance;
}

public void setCommonData(CommonData commonData){
this.commonData = commonData;
}

public CommonData getCommonData(){
return commonData;
}

public void setTableMeta(TableMeta tableMeta){
this.tableMeta = tableMeta;
}

public TableMeta getTableMeta(){
return tableMeta;
}
}
2 changes: 1 addition & 1 deletion src/main/java/org/eugene/ui/CustomizedMenuBar.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public CustomizedMenuBar(Stage stage){
FileChooser fileChooser = new FileChooser();
File csvFile = fileChooser.showSaveDialog(stage);
Path path = new Path(csvFile.getAbsolutePath());
ArrayList<GenericData.Record> list = (ArrayList<GenericData.Record>) renderer.getData();
ArrayList<List<String>> list = (ArrayList<List<String>>) renderer.getData();
CSVWriter.write(new Path(csvFile.getAbsolutePath()), list);
});
MenuItem close = new MenuItem("Close");
Expand Down
Loading

0 comments on commit fbee071

Please sign in to comment.