From bc6c967f2a63e35baa5c47345f2c868bc44199ea Mon Sep 17 00:00:00 2001 From: mozhenghua Date: Thu, 23 Feb 2023 12:17:58 +0800 Subject: [PATCH] introduce IJobContainerContext into DataX for that enable fetch CoreContainer resource in Writer or Reader Job/Task --- .../writer/adbpgwriter/AdbpgWriter.java | 31 +- .../writer/adbpgwriter/util/Adb4pgUtil.java | 16 +- .../plugin/writer/adswriter/AdsWriter.java | 8 +- .../adswriter/insert/AdsInsertProxy.java | 18 +- .../plugin/writer/adswriter/util/AdsUtil.java | 5 +- .../clickhousewriter/ClickhouseWriter.java | 4 +- .../datax/common/element/BoolColumn.java | 6 + .../datax/common/element/BytesColumn.java | 6 + .../alibaba/datax/common/element/Column.java | 2 + .../datax/common/element/ColumnCast.java | 362 +++--- .../datax/common/element/DateColumn.java | 7 +- .../datax/common/element/DoubleColumn.java | 12 +- .../datax/common/element/LongColumn.java | 9 +- .../datax/common/element/StringColumn.java | 10 + .../common/exception/DataXException.java | 70 ++ .../common/plugin/AbstractJobPlugin.java | 29 +- .../datax/common/plugin/AbstractPlugin.java | 68 +- .../common/plugin/AbstractTaskPlugin.java | 4 + .../datax/common/plugin/Pluginable.java | 9 +- .../alibaba/datax/common/util/DESCipher.java | 229 ++++ .../common/util/IdAndKeyRollingUtil.java | 62 + .../alibaba/datax/common/util/ListUtil.java | 176 ++- .../datax/core/job/IJobContainerContext.java | 12 + .../alibaba/datax/core/job/JobContainer.java | 35 +- .../plugin/task/util/DirtyRecord.java | 6 + .../writer/doriswriter/DorisWriter.java | 2 +- .../plugin/reader/drdsreader/DrdsReader.java | 8 +- .../plugin/writer/drdswriter/DrdsWriter.java | 4 +- .../kingbaseesreader/KingbaseesReader.java | 6 +- .../kingbaseeswriter/KingbaseesWriter.java | 168 +-- .../writer/mongodbwriter/MongoDBWriter.java | 2 +- .../reader/mysqlreader/MysqlReader.java | 7 +- .../writer/mysqlwriter/MysqlWriter.java | 8 +- .../OceanBaseV10Writer.java | 12 +- odpswriter/pom.xml | 44 +- odpswriter/src/main/assembly/package.xml | 7 - .../plugin/writer/odpswriter/Constant.java | 30 + .../writer/odpswriter/DateTransForm.java | 57 + .../datax/plugin/writer/odpswriter/Key.java | 56 + .../writer/odpswriter/LocalStrings.properties | 34 + .../plugin/writer/odpswriter/OdpsWriter.java | 604 ++++++++-- .../odpswriter/OdpsWriterErrorCode.java | 45 +- .../writer/odpswriter/OdpsWriterProxy.java | 1043 +++++++++++++++-- .../odpswriter/model/PartitionInfo.java | 87 ++ .../odpswriter/model/UserDefinedFunction.java | 44 + .../model/UserDefinedFunctionRule.java | 26 + .../odpswriter/util/CustomPartitionUtils.java | 54 + .../writer/odpswriter/util/IdAndKeyUtil.java | 42 +- .../odpswriter/util/LocalStrings.properties | 39 + .../odpswriter/util/OdpsExceptionMsg.java | 3 - .../writer/odpswriter/util/OdpsUtil.java | 605 ++++++++-- .../reader/oraclereader/OracleReader.java | 140 ++- .../writer/oraclewriter/OracleWriter.java | 155 ++- .../writer/oscarwriter/OscarWriter.java | 137 ++- .../rdbms/reader/CommonRdbmsReader.java | 17 +- .../datax/plugin/rdbms/util/DBUtil.java | 32 +- .../plugin/rdbms/util/DataXResourceName.java | 26 + .../rdbms/writer/CommonRdbmsWriter.java | 15 +- .../postgresqlreader/PostgresqlReader.java | 5 +- .../postgresqlwriter/PostgresqlWriter.java | 168 +-- .../reader/rdbmsreader/RdbmsReader.java | 12 +- .../rdbmsreader/SubCommonRdbmsReader.java | 9 +- .../reader/rdbmswriter/RdbmsWriter.java | 12 +- .../rdbmswriter/SubCommonRdbmsWriter.java | 9 +- .../sqlserverreader/SqlServerReader.java | 146 +-- .../sqlserverwriter/SqlServerWriter.java | 4 +- .../starrockswriter/StarRocksWriter.java | 4 +- 67 files changed, 3926 insertions(+), 1198 deletions(-) create mode 100755 common/src/main/java/com/alibaba/datax/common/exception/DataXException.java create mode 100755 common/src/main/java/com/alibaba/datax/common/util/DESCipher.java create mode 100644 common/src/main/java/com/alibaba/datax/common/util/IdAndKeyRollingUtil.java create mode 100644 common/src/main/java/com/alibaba/datax/core/job/IJobContainerContext.java create mode 100644 odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/DateTransForm.java create mode 100644 odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/LocalStrings.properties create mode 100644 odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/PartitionInfo.java create mode 100644 odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunction.java create mode 100644 odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunctionRule.java create mode 100644 odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/CustomPartitionUtils.java create mode 100644 odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/LocalStrings.properties create mode 100644 plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataXResourceName.java diff --git a/adbpgwriter/src/main/java/com/alibaba/datax/plugin/writer/adbpgwriter/AdbpgWriter.java b/adbpgwriter/src/main/java/com/alibaba/datax/plugin/writer/adbpgwriter/AdbpgWriter.java index 9e5b548736..321bae6b28 100644 --- a/adbpgwriter/src/main/java/com/alibaba/datax/plugin/writer/adbpgwriter/AdbpgWriter.java +++ b/adbpgwriter/src/main/java/com/alibaba/datax/plugin/writer/adbpgwriter/AdbpgWriter.java @@ -1,24 +1,17 @@ package com.alibaba.datax.plugin.writer.adbpgwriter; import com.alibaba.datax.common.plugin.RecordReceiver; -import com.alibaba.datax.common.plugin.RecordSender; import com.alibaba.datax.common.spi.Writer; import com.alibaba.datax.common.util.Configuration; - -import java.util.ArrayList; -import java.util.List; - import com.alibaba.datax.plugin.rdbms.util.DataBaseType; import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter; -import com.alibaba.datax.plugin.rdbms.writer.Key; -import com.alibaba.datax.plugin.rdbms.writer.util.OriginalConfPretreatmentUtil; import com.alibaba.datax.plugin.writer.adbpgwriter.copy.Adb4pgClientProxy; import com.alibaba.datax.plugin.writer.adbpgwriter.util.Adb4pgUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode.*; -import static com.alibaba.datax.plugin.rdbms.util.DataBaseType.PostgreSQL; +import java.util.ArrayList; +import java.util.List; /** * @author yuncheng @@ -36,7 +29,7 @@ public static class Job extends Writer.Job { public void init() { this.originalConfig = super.getPluginJobConf(); LOG.info("in Job.init(), config is:[\n{}\n]", originalConfig.toJSON()); - this.commonRdbmsWriterMaster = new CommonRdbmsWriter.Job(DATABASE_TYPE); + this.commonRdbmsWriterMaster = new CommonRdbmsWriter.Job(DATABASE_TYPE, this.containerContext); //convert to DatabaseConfig, use DatabaseConfig to check user configuration Adb4pgUtil.checkConfig(originalConfig); } @@ -44,13 +37,13 @@ public void init() { @Override public void prepare() { - Adb4pgUtil.prepare(originalConfig); + Adb4pgUtil.prepare(originalConfig, this.containerContext); } @Override public List split(int adviceNumber) { List splitResult = new ArrayList(); - for(int i = 0; i < adviceNumber; i++) { + for (int i = 0; i < adviceNumber; i++) { splitResult.add(this.originalConfig.clone()); } return splitResult; @@ -59,7 +52,7 @@ public List split(int adviceNumber) { @Override public void post() { - Adb4pgUtil.post(originalConfig); + Adb4pgUtil.post(originalConfig, this.containerContext); } @Override @@ -68,24 +61,24 @@ public void destroy() { } - } public static class Task extends Writer.Task { private Configuration writerSliceConfig; private CommonRdbmsWriter.Task commonRdbmsWriterSlave; private Adb4pgClientProxy adb4pgClientProxy; + //Adb4pgClient client; @Override public void init() { this.writerSliceConfig = super.getPluginJobConf(); this.adb4pgClientProxy = new Adb4pgClientProxy(writerSliceConfig, super.getTaskPluginCollector()); - this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DATABASE_TYPE){ + this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DATABASE_TYPE, this.containerContext) { @Override - public String calcValueHolder(String columnType){ - if("serial".equalsIgnoreCase(columnType)){ + public String calcValueHolder(String columnType) { + if ("serial".equalsIgnoreCase(columnType)) { return "?::int"; - }else if("bit".equalsIgnoreCase(columnType)){ + } else if ("bit".equalsIgnoreCase(columnType)) { return "?::bit varying"; } return "?::" + columnType; @@ -100,7 +93,7 @@ public void prepare() { @Override public void startWrite(RecordReceiver recordReceiver) { - this.adb4pgClientProxy.startWriteWithConnection(recordReceiver, Adb4pgUtil.getAdbpgConnect(writerSliceConfig)); + this.adb4pgClientProxy.startWriteWithConnection(recordReceiver, Adb4pgUtil.getAdbpgConnect(writerSliceConfig, this.containerContext)); } @Override diff --git a/adbpgwriter/src/main/java/com/alibaba/datax/plugin/writer/adbpgwriter/util/Adb4pgUtil.java b/adbpgwriter/src/main/java/com/alibaba/datax/plugin/writer/adbpgwriter/util/Adb4pgUtil.java index e555048395..75c7c852bb 100644 --- a/adbpgwriter/src/main/java/com/alibaba/datax/plugin/writer/adbpgwriter/util/Adb4pgUtil.java +++ b/adbpgwriter/src/main/java/com/alibaba/datax/plugin/writer/adbpgwriter/util/Adb4pgUtil.java @@ -4,6 +4,7 @@ import com.alibaba.cloud.analyticdb.adb4pgclient.Adb4pgClientException; import com.alibaba.cloud.analyticdb.adb4pgclient.DatabaseConfig; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.core.job.IJobContainerContext; import com.alibaba.datax.plugin.rdbms.util.DBUtil; import com.alibaba.datax.plugin.rdbms.util.DataBaseType; import com.alibaba.datax.plugin.rdbms.writer.Key; @@ -35,6 +36,7 @@ public static void checkConfig(Configuration originalConfig) { throw new Adb4pgClientException(Adb4pgClientException.CONFIG_ERROR, "Check config exception: " + e.getMessage(), null); } } + public static DatabaseConfig convertConfiguration(Configuration originalConfig) { originalConfig.getNecessaryValue(Key.USERNAME, COLUMN_SPLIT_ERROR); originalConfig.getNecessaryValue(Key.PASSWORD, COLUMN_SPLIT_ERROR); @@ -80,11 +82,11 @@ private static Map> splitBySchemaName(List tables) return res; } - public static Connection getAdbpgConnect(Configuration conf) { + public static Connection getAdbpgConnect(Configuration conf, IJobContainerContext containerContext) { String userName = conf.getString(Key.USERNAME); String passWord = conf.getString(Key.PASSWORD); - com.qlangtech.tis.plugin.ds.IDataSourceFactoryGetter dsFactoryGetter = DBUtil.getReaderDataSourceFactoryGetter(conf); + com.qlangtech.tis.plugin.ds.IDataSourceFactoryGetter dsFactoryGetter = DBUtil.getReaderDataSourceFactoryGetter(conf, containerContext); return DBUtil.getConnection(dsFactoryGetter, generateJdbcUrl(conf), userName, passWord); @@ -99,7 +101,7 @@ private static String generateJdbcUrl(Configuration configuration) { } - public static void prepare(Configuration originalConfig) { + public static void prepare(Configuration originalConfig, IJobContainerContext containerContext) { List preSqls = originalConfig.getList(Key.PRE_SQL, String.class); @@ -113,17 +115,17 @@ public static void prepare(Configuration originalConfig) { originalConfig.remove(Key.PRE_SQL); - Connection conn = getAdbpgConnect(originalConfig); + Connection conn = getAdbpgConnect(originalConfig, containerContext); WriterUtil.executeSqls(conn, renderedPreSqls, generateJdbcUrl(originalConfig), DATABASE_TYPE); DBUtil.closeDBResources(null, null, conn); } - public static void post(Configuration configuration) { + public static void post(Configuration configuration, IJobContainerContext containerContext) { List postSqls = configuration.getList(Key.POST_SQL, String.class); - SelectTable tableName = SelectTable.createInTask( configuration);//.getString(Key.TABLE); + SelectTable tableName = SelectTable.createInTask(configuration);//.getString(Key.TABLE); List renderedPostSqls = WriterUtil.renderPreOrPostSqls( postSqls, tableName); @@ -133,7 +135,7 @@ public static void post(Configuration configuration) { configuration.remove(Key.POST_SQL); - Connection conn = getAdbpgConnect(configuration); + Connection conn = getAdbpgConnect(configuration, containerContext); WriterUtil.executeSqls(conn, renderedPostSqls, generateJdbcUrl(configuration), DATABASE_TYPE); DBUtil.closeDBResources(null, null, conn); diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/AdsWriter.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/AdsWriter.java index 4a7b247074..e959f6319c 100644 --- a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/AdsWriter.java +++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/AdsWriter.java @@ -198,7 +198,7 @@ public void prepare() { if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) { // 说明有 preSql 配置,则此处删除掉 this.originalConfig.remove(Key.PRE_SQL); - Connection preConn = AdsUtil.getAdsConnect(this.originalConfig); + Connection preConn = AdsUtil.getAdsConnect(this.originalConfig, this.containerContext); LOG.info("Begin to execute preSqls:[{}]. context info:{}.", StringUtils.join(renderedPreSqls, ";"), this.originalConfig.getString(Key.ADS_URL)); @@ -238,7 +238,7 @@ public void post() { if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) { // 说明有 preSql 配置,则此处删除掉 this.originalConfig.remove(Key.POST_SQL); - Connection postConn = AdsUtil.getAdsConnect(this.originalConfig); + Connection postConn = AdsUtil.getAdsConnect(this.originalConfig, this.containerContext); LOG.info( "Begin to execute postSqls:[{}]. context info:{}.", StringUtils.join(renderedPostSqls, ";"), @@ -372,13 +372,13 @@ public void startWrite(RecordReceiver recordReceiver) { } else { // insert 模式 List columns = writerSliceConfig.getList(Key.COLUMN, String.class); - Connection connection = AdsUtil.getAdsConnect(this.writerSliceConfig); + Connection connection = AdsUtil.getAdsConnect(this.writerSliceConfig, this.containerContext); TaskPluginCollector taskPluginCollector = super.getTaskPluginCollector(); if (StringUtils.equalsIgnoreCase(this.writeProxy, "adbClient")) { this.proxy = new AdsClientProxy(table, columns, writerSliceConfig, taskPluginCollector, this.tableInfo); } else { - this.proxy = new AdsInsertProxy(schema + "." + table, columns, writerSliceConfig, taskPluginCollector, this.tableInfo); + this.proxy = new AdsInsertProxy(schema + "." + table, columns, writerSliceConfig, taskPluginCollector, this.tableInfo, this.containerContext); } proxy.startWriteWithConnection(recordReceiver, connection, columnNumber); } diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/AdsInsertProxy.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/AdsInsertProxy.java index 49abda40b1..d3bc9117ce 100644 --- a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/AdsInsertProxy.java +++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/insert/AdsInsertProxy.java @@ -7,6 +7,7 @@ import com.alibaba.datax.common.plugin.TaskPluginCollector; import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.common.util.RetryUtil; +import com.alibaba.datax.core.job.IJobContainerContext; import com.alibaba.datax.plugin.rdbms.util.DBUtil; import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; import com.alibaba.datax.plugin.writer.adswriter.ads.TableInfo; @@ -14,21 +15,14 @@ import com.alibaba.datax.plugin.writer.adswriter.util.Constant; import com.alibaba.datax.plugin.writer.adswriter.util.Key; import com.mysql.jdbc.JDBC4PreparedStatement; - import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.sql.*; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.Map.Entry; -import java.util.Set; import java.util.concurrent.Callable; import java.util.zip.CRC32; import java.util.zip.Checksum; @@ -65,10 +59,12 @@ public class AdsInsertProxy implements AdsProxy { private String partitionColumn; private int partitionColumnIndex = -1; private int partitionCount; + private IJobContainerContext containerContext; - public AdsInsertProxy(String table, List columns, Configuration configuration, TaskPluginCollector taskPluginCollector, TableInfo tableInfo) { + public AdsInsertProxy(String table, List columns, Configuration configuration, TaskPluginCollector taskPluginCollector, TableInfo tableInfo, IJobContainerContext containerContext) { this.table = table; this.columns = columns; + this.containerContext = containerContext; this.configuration = configuration; this.taskPluginCollector = taskPluginCollector; this.emptyAsNull = configuration.getBool(Key.EMPTY_AS_NULL, false); @@ -308,7 +304,7 @@ private void doBatchRecordDml(List buffer, String mode) throws Exception while (null != eachException && maxIter < AdsInsertProxy.MAX_EXCEPTION_CAUSE_ITER) { if (this.isRetryable(eachException)) { LOG.warn("doBatchRecordDml meet a retry exception: " + e.getMessage()); - this.currentConnection = AdsUtil.getAdsConnect(this.configuration); + this.currentConnection = AdsUtil.getAdsConnect(this.configuration, containerContext); throw eachException; } else { try { @@ -375,7 +371,7 @@ private void doOneRecordDml(Record record, String mode) throws Exception { while (null != eachException && maxIter < AdsInsertProxy.MAX_EXCEPTION_CAUSE_ITER) { if (this.isRetryable(eachException)) { LOG.warn("doOneDml meet a retry exception: " + e.getMessage()); - this.currentConnection = AdsUtil.getAdsConnect(this.configuration); + this.currentConnection = AdsUtil.getAdsConnect(this.configuration, this.containerContext); throw eachException; } else { try { diff --git a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/util/AdsUtil.java b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/util/AdsUtil.java index bea28dd33e..179f17ee89 100644 --- a/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/util/AdsUtil.java +++ b/adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/util/AdsUtil.java @@ -2,6 +2,7 @@ import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.core.job.IJobContainerContext; import com.alibaba.datax.plugin.rdbms.util.DBUtil; import com.alibaba.datax.plugin.writer.adswriter.AdsWriterErrorCode; import com.alibaba.datax.plugin.writer.adswriter.load.AdsHelper; @@ -167,13 +168,13 @@ public static String prepareJdbcUrl(String adsURL, String schema, private static IDataSourceFactoryGetter dataSourceFactoryGetter; - public static Connection getAdsConnect(Configuration conf) { + public static Connection getAdsConnect(Configuration conf, IJobContainerContext containerContext) { String userName = conf.getString(Key.USERNAME); String passWord = conf.getString(Key.PASSWORD); String jdbcUrl = AdsUtil.prepareJdbcUrl(conf); if (dataSourceFactoryGetter == null) { - dataSourceFactoryGetter = DBUtil.getReaderDataSourceFactoryGetter(conf); + dataSourceFactoryGetter = DBUtil.getReaderDataSourceFactoryGetter(conf, containerContext); } Connection connection = DBUtil.getConnection(dataSourceFactoryGetter, jdbcUrl, userName, passWord); return connection; diff --git a/clickhousewriter/src/main/java/com/alibaba/datax/plugin/writer/clickhousewriter/ClickhouseWriter.java b/clickhousewriter/src/main/java/com/alibaba/datax/plugin/writer/clickhousewriter/ClickhouseWriter.java index e051455fc5..63991730b3 100644 --- a/clickhousewriter/src/main/java/com/alibaba/datax/plugin/writer/clickhousewriter/ClickhouseWriter.java +++ b/clickhousewriter/src/main/java/com/alibaba/datax/plugin/writer/clickhousewriter/ClickhouseWriter.java @@ -30,7 +30,7 @@ public static class Job extends Writer.Job { @Override public void init() { this.originalConfig = super.getPluginJobConf(); - this.commonRdbmsWriterMaster = new CommonRdbmsWriter.Job(DATABASE_TYPE); + this.commonRdbmsWriterMaster = new CommonRdbmsWriter.Job(DATABASE_TYPE,this.containerContext); this.commonRdbmsWriterMaster.init(this.originalConfig); } @@ -64,7 +64,7 @@ public static class Task extends Writer.Task { public void init() { this.writerSliceConfig = super.getPluginJobConf(); - this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DATABASE_TYPE) { + this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DATABASE_TYPE,this.containerContext) { // @Override // protected PreparedStatement fillPreparedStatementColumnType( diff --git a/common/src/main/java/com/alibaba/datax/common/element/BoolColumn.java b/common/src/main/java/com/alibaba/datax/common/element/BoolColumn.java index 7699e152ae..8a62ff38c1 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/BoolColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/BoolColumn.java @@ -93,6 +93,12 @@ public Date asDate() { CommonErrorCode.CONVERT_NOT_SUPPORT, "Bool类型不能转为Date ."); } + @Override + public Date asDate(String dateFormat) { + throw DataXException.asDataXException( + CommonErrorCode.CONVERT_NOT_SUPPORT, "Bool类型不能转为Date ."); + } + @Override public byte[] asBytes() { throw DataXException.asDataXException( diff --git a/common/src/main/java/com/alibaba/datax/common/element/BytesColumn.java b/common/src/main/java/com/alibaba/datax/common/element/BytesColumn.java index d3cc599361..6d653a9576 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/BytesColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/BytesColumn.java @@ -76,6 +76,12 @@ public Date asDate() { CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为Date ."); } + @Override + public Date asDate(String dateFormat) { + throw DataXException.asDataXException( + CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为Date ."); + } + @Override public Boolean asBoolean() { throw DataXException.asDataXException( diff --git a/common/src/main/java/com/alibaba/datax/common/element/Column.java b/common/src/main/java/com/alibaba/datax/common/element/Column.java index ed68e88d6b..29b6ee11f8 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/Column.java +++ b/common/src/main/java/com/alibaba/datax/common/element/Column.java @@ -56,6 +56,8 @@ protected void setByteSize(int byteSize) { public abstract Date asDate(); + public abstract Date asDate(String dateFormat); + public abstract byte[] asBytes(); public abstract Boolean asBoolean(); diff --git a/common/src/main/java/com/alibaba/datax/common/element/ColumnCast.java b/common/src/main/java/com/alibaba/datax/common/element/ColumnCast.java index 89d0a7c627..f7952c79cd 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/ColumnCast.java +++ b/common/src/main/java/com/alibaba/datax/common/element/ColumnCast.java @@ -8,192 +8,210 @@ import java.io.UnsupportedEncodingException; import java.text.ParseException; -import java.util.*; +import java.util.Collections; +import java.util.Date; +import java.util.List; +import java.util.TimeZone; public final class ColumnCast { - public static void bind(final Configuration configuration) { - StringCast.init(configuration); - DateCast.init(configuration); - BytesCast.init(configuration); - } - - public static Date string2Date(final StringColumn column) - throws ParseException { - return StringCast.asDate(column); - } - - public static byte[] string2Bytes(final StringColumn column) - throws UnsupportedEncodingException { - return StringCast.asBytes(column); - } - - public static String date2String(final DateColumn column) { - return DateCast.asString(column); - } - - public static String bytes2String(final BytesColumn column) - throws UnsupportedEncodingException { - return BytesCast.asString(column); - } + public static void bind(final Configuration configuration) { + StringCast.init(configuration); + DateCast.init(configuration); + BytesCast.init(configuration); + } + + public static Date string2Date(final StringColumn column) + throws ParseException { + return StringCast.asDate(column); + } + + public static Date string2Date(final StringColumn column, String dateFormat) + throws ParseException { + return StringCast.asDate(column, dateFormat); + } + + public static byte[] string2Bytes(final StringColumn column) + throws UnsupportedEncodingException { + return StringCast.asBytes(column); + } + + public static String date2String(final DateColumn column) { + return DateCast.asString(column); + } + + public static String bytes2String(final BytesColumn column) + throws UnsupportedEncodingException { + return BytesCast.asString(column); + } } class StringCast { - static String datetimeFormat = "yyyy-MM-dd HH:mm:ss"; - - static String dateFormat = "yyyy-MM-dd"; - - static String timeFormat = "HH:mm:ss"; - - static List extraFormats = Collections.emptyList(); - - static String timeZone = "GMT+8"; - - static FastDateFormat dateFormatter; - - static FastDateFormat timeFormatter; - - static FastDateFormat datetimeFormatter; - - static TimeZone timeZoner; - - static String encoding = "UTF-8"; - - static void init(final Configuration configuration) { - StringCast.datetimeFormat = configuration.getString( - "common.column.datetimeFormat", StringCast.datetimeFormat); - StringCast.dateFormat = configuration.getString( - "common.column.dateFormat", StringCast.dateFormat); - StringCast.timeFormat = configuration.getString( - "common.column.timeFormat", StringCast.timeFormat); - StringCast.extraFormats = configuration.getList( - "common.column.extraFormats", Collections.emptyList(), String.class); - - StringCast.timeZone = configuration.getString("common.column.timeZone", - StringCast.timeZone); - StringCast.timeZoner = TimeZone.getTimeZone(StringCast.timeZone); - - StringCast.datetimeFormatter = FastDateFormat.getInstance( - StringCast.datetimeFormat, StringCast.timeZoner); - StringCast.dateFormatter = FastDateFormat.getInstance( - StringCast.dateFormat, StringCast.timeZoner); - StringCast.timeFormatter = FastDateFormat.getInstance( - StringCast.timeFormat, StringCast.timeZoner); - - StringCast.encoding = configuration.getString("common.column.encoding", - StringCast.encoding); - } - - static Date asDate(final StringColumn column) throws ParseException { - if (null == column.asString()) { - return null; - } - - try { - return StringCast.datetimeFormatter.parse(column.asString()); - } catch (ParseException ignored) { - } - - try { - return StringCast.dateFormatter.parse(column.asString()); - } catch (ParseException ignored) { - } - - ParseException e; - try { - return StringCast.timeFormatter.parse(column.asString()); - } catch (ParseException ignored) { - e = ignored; - } - - for (String format : StringCast.extraFormats) { - try{ - return FastDateFormat.getInstance(format, StringCast.timeZoner).parse(column.asString()); - } catch (ParseException ignored){ - e = ignored; - } - } - throw e; - } - - static byte[] asBytes(final StringColumn column) - throws UnsupportedEncodingException { - if (null == column.asString()) { - return null; - } - - return column.asString().getBytes(StringCast.encoding); - } + static String datetimeFormat = "yyyy-MM-dd HH:mm:ss"; + + static String dateFormat = "yyyy-MM-dd"; + + static String timeFormat = "HH:mm:ss"; + + static List extraFormats = Collections.emptyList(); + + static String timeZone = "GMT+8"; + + static FastDateFormat dateFormatter; + + static FastDateFormat timeFormatter; + + static FastDateFormat datetimeFormatter; + + static TimeZone timeZoner; + + static String encoding = "UTF-8"; + + static void init(final Configuration configuration) { + StringCast.datetimeFormat = configuration.getString( + "common.column.datetimeFormat", StringCast.datetimeFormat); + StringCast.dateFormat = configuration.getString( + "common.column.dateFormat", StringCast.dateFormat); + StringCast.timeFormat = configuration.getString( + "common.column.timeFormat", StringCast.timeFormat); + StringCast.extraFormats = configuration.getList( + "common.column.extraFormats", Collections.emptyList(), String.class); + + StringCast.timeZone = configuration.getString("common.column.timeZone", + StringCast.timeZone); + StringCast.timeZoner = TimeZone.getTimeZone(StringCast.timeZone); + + StringCast.datetimeFormatter = FastDateFormat.getInstance( + StringCast.datetimeFormat, StringCast.timeZoner); + StringCast.dateFormatter = FastDateFormat.getInstance( + StringCast.dateFormat, StringCast.timeZoner); + StringCast.timeFormatter = FastDateFormat.getInstance( + StringCast.timeFormat, StringCast.timeZoner); + + StringCast.encoding = configuration.getString("common.column.encoding", + StringCast.encoding); + } + + static Date asDate(final StringColumn column) throws ParseException { + if (null == column.asString()) { + return null; + } + + try { + return StringCast.datetimeFormatter.parse(column.asString()); + } catch (ParseException ignored) { + } + + try { + return StringCast.dateFormatter.parse(column.asString()); + } catch (ParseException ignored) { + } + + ParseException e; + try { + return StringCast.timeFormatter.parse(column.asString()); + } catch (ParseException ignored) { + e = ignored; + } + + for (String format : StringCast.extraFormats) { + try { + return FastDateFormat.getInstance(format, StringCast.timeZoner).parse(column.asString()); + } catch (ParseException ignored) { + e = ignored; + } + } + throw e; + } + + static Date asDate(final StringColumn column, String dateFormat) throws ParseException { + ParseException e; + try { + return FastDateFormat.getInstance(dateFormat, StringCast.timeZoner).parse(column.asString()); + } catch (ParseException ignored) { + e = ignored; + } + throw e; + } + + static byte[] asBytes(final StringColumn column) + throws UnsupportedEncodingException { + if (null == column.asString()) { + return null; + } + + return column.asString().getBytes(StringCast.encoding); + } } /** * 后续为了可维护性,可以考虑直接使用 apache 的DateFormatUtils. - * + *

* 迟南已经修复了该问题,但是为了维护性,还是直接使用apache的内置函数 */ class DateCast { - static String datetimeFormat = "yyyy-MM-dd HH:mm:ss"; - - static String dateFormat = "yyyy-MM-dd"; - - static String timeFormat = "HH:mm:ss"; - - static String timeZone = "GMT+8"; - - static TimeZone timeZoner = TimeZone.getTimeZone(DateCast.timeZone); - - static void init(final Configuration configuration) { - DateCast.datetimeFormat = configuration.getString( - "common.column.datetimeFormat", datetimeFormat); - DateCast.timeFormat = configuration.getString( - "common.column.timeFormat", timeFormat); - DateCast.dateFormat = configuration.getString( - "common.column.dateFormat", dateFormat); - DateCast.timeZone = configuration.getString("common.column.timeZone", - DateCast.timeZone); - DateCast.timeZoner = TimeZone.getTimeZone(DateCast.timeZone); - return; - } - - static String asString(final DateColumn column) { - if (null == column.asDate()) { - return null; - } - - switch (column.getSubType()) { - case DATE: - return DateFormatUtils.format(column.asDate(), DateCast.dateFormat, - DateCast.timeZoner); - case TIME: - return DateFormatUtils.format(column.asDate(), DateCast.timeFormat, - DateCast.timeZoner); - case DATETIME: - return DateFormatUtils.format(column.asDate(), - DateCast.datetimeFormat, DateCast.timeZoner); - default: - throw DataXException - .asDataXException(CommonErrorCode.CONVERT_NOT_SUPPORT, - "时间类型出现不支持类型,目前仅支持DATE/TIME/DATETIME。该类型属于编程错误,请反馈给DataX开发团队 ."); - } - } + static String datetimeFormat = "yyyy-MM-dd HH:mm:ss"; + + static String dateFormat = "yyyy-MM-dd"; + + static String timeFormat = "HH:mm:ss"; + + static String timeZone = "GMT+8"; + + static TimeZone timeZoner = TimeZone.getTimeZone(DateCast.timeZone); + + static void init(final Configuration configuration) { + DateCast.datetimeFormat = configuration.getString( + "common.column.datetimeFormat", datetimeFormat); + DateCast.timeFormat = configuration.getString( + "common.column.timeFormat", timeFormat); + DateCast.dateFormat = configuration.getString( + "common.column.dateFormat", dateFormat); + DateCast.timeZone = configuration.getString("common.column.timeZone", + DateCast.timeZone); + DateCast.timeZoner = TimeZone.getTimeZone(DateCast.timeZone); + return; + } + + static String asString(final DateColumn column) { + if (null == column.asDate()) { + return null; + } + + switch (column.getSubType()) { + case DATE: + return DateFormatUtils.format(column.asDate(), DateCast.dateFormat, + DateCast.timeZoner); + case TIME: + return DateFormatUtils.format(column.asDate(), DateCast.timeFormat, + DateCast.timeZoner); + case DATETIME: + return DateFormatUtils.format(column.asDate(), + DateCast.datetimeFormat, DateCast.timeZoner); + default: + throw DataXException + .asDataXException(CommonErrorCode.CONVERT_NOT_SUPPORT, + "时间类型出现不支持类型,目前仅支持DATE/TIME/DATETIME。该类型属于编程错误,请反馈给DataX开发团队 ."); + } + } } class BytesCast { - static String encoding = "utf-8"; - - static void init(final Configuration configuration) { - BytesCast.encoding = configuration.getString("common.column.encoding", - BytesCast.encoding); - return; - } - - static String asString(final BytesColumn column) - throws UnsupportedEncodingException { - if (null == column.asBytes()) { - return null; - } - - return new String(column.asBytes(), encoding); - } -} \ No newline at end of file + static String encoding = "utf-8"; + + static void init(final Configuration configuration) { + BytesCast.encoding = configuration.getString("common.column.encoding", + BytesCast.encoding); + return; + } + + static String asString(final BytesColumn column) + throws UnsupportedEncodingException { + if (null == column.asBytes()) { + return null; + } + + return new String(column.asBytes(), encoding); + } +} diff --git a/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java b/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java index 6626a6fbdd..404332f460 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/DateColumn.java @@ -90,6 +90,11 @@ public Date asDate() { return new Date((Long)this.getRawData()); } + @Override + public Date asDate(String dateFormat) { + return asDate(); + } + @Override public byte[] asBytes() { throw DataXException.asDataXException( @@ -127,4 +132,4 @@ public DateType getSubType() { public void setSubType(DateType subType) { this.subType = subType; } -} \ No newline at end of file +} diff --git a/common/src/main/java/com/alibaba/datax/common/element/DoubleColumn.java b/common/src/main/java/com/alibaba/datax/common/element/DoubleColumn.java index 17170ea6c4..217f4d4b13 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/DoubleColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/DoubleColumn.java @@ -24,7 +24,7 @@ public DoubleColumn(Integer data) { /** * Double无法表示准确的小数数据,我们不推荐使用该方法保存Double数据,建议使用String作为构造入参 - * + * * */ public DoubleColumn(final Double data) { this(data == null ? (String) null @@ -33,7 +33,7 @@ public DoubleColumn(final Double data) { /** * Float无法表示准确的小数数据,我们不推荐使用该方法保存Float数据,建议使用String作为构造入参 - * + * * */ public DoubleColumn(final Float data) { this(data == null ? (String) null @@ -133,6 +133,12 @@ public Date asDate() { CommonErrorCode.CONVERT_NOT_SUPPORT, "Double类型无法转为Date类型 ."); } + @Override + public Date asDate(String dateFormat) { + throw DataXException.asDataXException( + CommonErrorCode.CONVERT_NOT_SUPPORT, "Double类型无法转为Date类型 ."); + } + @Override public byte[] asBytes() { throw DataXException.asDataXException( @@ -158,4 +164,4 @@ private void validate(final String data) { } } -} \ No newline at end of file +} diff --git a/common/src/main/java/com/alibaba/datax/common/element/LongColumn.java b/common/src/main/java/com/alibaba/datax/common/element/LongColumn.java index d8113f7c05..0b6f1e4867 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/LongColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/LongColumn.java @@ -12,10 +12,10 @@ public class LongColumn extends Column { /** * 从整形字符串表示转为LongColumn,支持Java科学计数法 - * + * * NOTE:
* 如果data为浮点类型的字符串表示,数据将会失真,请使用DoubleColumn对接浮点字符串 - * + * * */ public LongColumn(final String data) { super(null, Column.Type.LONG, 0); @@ -126,6 +126,11 @@ public Date asDate() { return new Date(this.asLong()); } + @Override + public Date asDate(String dateFormat) { + return this.asDate(); + } + @Override public byte[] asBytes() { throw DataXException.asDataXException( diff --git a/common/src/main/java/com/alibaba/datax/common/element/StringColumn.java b/common/src/main/java/com/alibaba/datax/common/element/StringColumn.java index 11209f4688..7fc68cc7b5 100755 --- a/common/src/main/java/com/alibaba/datax/common/element/StringColumn.java +++ b/common/src/main/java/com/alibaba/datax/common/element/StringColumn.java @@ -150,6 +150,16 @@ public Date asDate() { } } + @Override + public Date asDate(String dateFormat) { + try { + return ColumnCast.string2Date(this, dateFormat); + } catch (Exception e) { + throw DataXException.asDataXException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("String[\"%s\"]不能转为Date .", this.asString())); + } + } + @Override public byte[] asBytes() { try { diff --git a/common/src/main/java/com/alibaba/datax/common/exception/DataXException.java b/common/src/main/java/com/alibaba/datax/common/exception/DataXException.java new file mode 100755 index 0000000000..09d00adcf1 --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/exception/DataXException.java @@ -0,0 +1,70 @@ +package com.alibaba.datax.common.exception; + +import com.alibaba.datax.common.spi.ErrorCode; + +import java.io.PrintWriter; +import java.io.StringWriter; + +public class DataXException extends RuntimeException { + + private static final long serialVersionUID = 1L; + + private ErrorCode errorCode; + + public DataXException(ErrorCode errorCode, String errorMessage) { + super(errorCode.toString() + " - " + errorMessage); + this.errorCode = errorCode; + } + + public DataXException(String errorMessage) { + super(errorMessage); + } + + private DataXException(ErrorCode errorCode, String errorMessage, Throwable cause) { + super(errorCode.toString() + " - " + getMessage(errorMessage) + " - " + getMessage(cause), cause); + + this.errorCode = errorCode; + } + + public static DataXException asDataXException(ErrorCode errorCode, String message) { + return new DataXException(errorCode, message); + } + + public static DataXException asDataXException(String message) { + return new DataXException(message); + } + + public static DataXException asDataXException(ErrorCode errorCode, String message, Throwable cause) { + if (cause instanceof DataXException) { + return (DataXException) cause; + } + return new DataXException(errorCode, message, cause); + } + + public static DataXException asDataXException(ErrorCode errorCode, Throwable cause) { + if (cause instanceof DataXException) { + return (DataXException) cause; + } + return new DataXException(errorCode, getMessage(cause), cause); + } + + public ErrorCode getErrorCode() { + return this.errorCode; + } + + private static String getMessage(Object obj) { + if (obj == null) { + return ""; + } + + if (obj instanceof Throwable) { + StringWriter str = new StringWriter(); + PrintWriter pw = new PrintWriter(str); + ((Throwable) obj).printStackTrace(pw); + return str.toString(); + // return ((Throwable) obj).getMessage(); + } else { + return obj.toString(); + } + } +} diff --git a/common/src/main/java/com/alibaba/datax/common/plugin/AbstractJobPlugin.java b/common/src/main/java/com/alibaba/datax/common/plugin/AbstractJobPlugin.java index 946adfd0e4..ef8eb509f0 100755 --- a/common/src/main/java/com/alibaba/datax/common/plugin/AbstractJobPlugin.java +++ b/common/src/main/java/com/alibaba/datax/common/plugin/AbstractJobPlugin.java @@ -4,22 +4,23 @@ * Created by jingxing on 14-8-24. */ public abstract class AbstractJobPlugin extends AbstractPlugin { - /** - * @return the jobPluginCollector - */ - public JobPluginCollector getJobPluginCollector() { - return jobPluginCollector; - } - /** - * @param jobPluginCollector - * the jobPluginCollector to set - */ - public void setJobPluginCollector( + + /** + * @return the jobPluginCollector + */ + public JobPluginCollector getJobPluginCollector() { + return jobPluginCollector; + } + + /** + * @param jobPluginCollector the jobPluginCollector to set + */ + public void setJobPluginCollector( JobPluginCollector jobPluginCollector) { - this.jobPluginCollector = jobPluginCollector; - } + this.jobPluginCollector = jobPluginCollector; + } - private JobPluginCollector jobPluginCollector; + private JobPluginCollector jobPluginCollector; } diff --git a/common/src/main/java/com/alibaba/datax/common/plugin/AbstractPlugin.java b/common/src/main/java/com/alibaba/datax/common/plugin/AbstractPlugin.java index 184ee89ece..ef375c994f 100755 --- a/common/src/main/java/com/alibaba/datax/common/plugin/AbstractPlugin.java +++ b/common/src/main/java/com/alibaba/datax/common/plugin/AbstractPlugin.java @@ -2,51 +2,61 @@ import com.alibaba.datax.common.base.BaseObject; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.core.job.IJobContainerContext; public abstract class AbstractPlugin extends BaseObject implements Pluginable { - //作业的config + //作业的config private Configuration pluginJobConf; //插件本身的plugin - private Configuration pluginConf; + private Configuration pluginConf; // by qiangsi.lq。 修改为对端的作业configuration private Configuration peerPluginJobConf; private String peerPluginName; + protected IJobContainerContext containerContext; + + @Override - public String getPluginName() { - assert null != this.pluginConf; - return this.pluginConf.getString("name"); - } + public final void setContainerContext(IJobContainerContext containerContext) { + this.containerContext = containerContext; + } + + + @Override + public String getPluginName() { + assert null != this.pluginConf; + return this.pluginConf.getString("name"); + } @Override - public String getDeveloper() { - assert null != this.pluginConf; - return this.pluginConf.getString("developer"); - } + public String getDeveloper() { + assert null != this.pluginConf; + return this.pluginConf.getString("developer"); + } @Override - public String getDescription() { - assert null != this.pluginConf; - return this.pluginConf.getString("description"); - } + public String getDescription() { + assert null != this.pluginConf; + return this.pluginConf.getString("description"); + } @Override - public Configuration getPluginJobConf() { - return pluginJobConf; - } + public Configuration getPluginJobConf() { + return pluginJobConf; + } @Override - public void setPluginJobConf(Configuration pluginJobConf) { - this.pluginJobConf = pluginJobConf; - } + public void setPluginJobConf(Configuration pluginJobConf) { + this.pluginJobConf = pluginJobConf; + } @Override - public void setPluginConf(Configuration pluginConf) { - this.pluginConf = pluginConf; - } + public void setPluginConf(Configuration pluginConf) { + this.pluginConf = pluginConf; + } @Override public Configuration getPeerPluginJobConf() { @@ -71,17 +81,17 @@ public void setPeerPluginName(String peerPluginName) { public void preCheck() { } - public void prepare() { - } + public void prepare() { + } - public void post() { - } + public void post() { + } - public void preHandler(Configuration jobConfiguration){ + public void preHandler(Configuration jobConfiguration) { } - public void postHandler(Configuration jobConfiguration){ + public void postHandler(Configuration jobConfiguration) { } } diff --git a/common/src/main/java/com/alibaba/datax/common/plugin/AbstractTaskPlugin.java b/common/src/main/java/com/alibaba/datax/common/plugin/AbstractTaskPlugin.java index 39fbbe9b52..9a3f54e34a 100755 --- a/common/src/main/java/com/alibaba/datax/common/plugin/AbstractTaskPlugin.java +++ b/common/src/main/java/com/alibaba/datax/common/plugin/AbstractTaskPlugin.java @@ -1,5 +1,7 @@ package com.alibaba.datax.common.plugin; +import com.alibaba.datax.core.job.IJobContainerContext; + /** * Created by jingxing on 14-8-24. */ @@ -19,6 +21,8 @@ public void setTaskPluginCollector( this.taskPluginCollector = taskPluginCollector; } + + public int getTaskId() { return taskId; } diff --git a/common/src/main/java/com/alibaba/datax/common/plugin/Pluginable.java b/common/src/main/java/com/alibaba/datax/common/plugin/Pluginable.java index ac28f6a294..69bd34f992 100755 --- a/common/src/main/java/com/alibaba/datax/common/plugin/Pluginable.java +++ b/common/src/main/java/com/alibaba/datax/common/plugin/Pluginable.java @@ -1,17 +1,18 @@ package com.alibaba.datax.common.plugin; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.core.job.IJobContainerContext; public interface Pluginable { - String getDeveloper(); + String getDeveloper(); String getDescription(); void setPluginConf(Configuration pluginConf); - void init(); + void init(); - void destroy(); + void destroy(); String getPluginName(); @@ -21,6 +22,8 @@ public interface Pluginable { public String getPeerPluginName(); + void setContainerContext(IJobContainerContext containerContext); + void setPluginJobConf(Configuration jobConf); void setPeerPluginJobConf(Configuration peerPluginJobConf); diff --git a/common/src/main/java/com/alibaba/datax/common/util/DESCipher.java b/common/src/main/java/com/alibaba/datax/common/util/DESCipher.java new file mode 100755 index 0000000000..0692a7b3e3 --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/DESCipher.java @@ -0,0 +1,229 @@ +/** + * (C) 2010-2022 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.datax.common.util; + +import javax.crypto.Cipher; +import javax.crypto.SecretKey; +import javax.crypto.SecretKeyFactory; +import javax.crypto.spec.DESKeySpec; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.security.SecureRandom; + +/** + * DES加解密,支持与delphi交互(字符串编码需统一为UTF-8) + * 将这个工具类抽取到 common 中,方便后续代码复用 + */ +public class DESCipher { + private static Logger LOGGER = LoggerFactory.getLogger(DESCipher.class); + /** + * 密钥 + */ + public static final String KEY = ""; + private final static String DES = "DES"; + + /** + * 加密 + * @param src 明文(字节) + * @param key 密钥,长度必须是8的倍数 + * @return 密文(字节) + * @throws Exception + */ + public static byte[] encrypt(byte[] src, byte[] key) throws Exception { + // DES算法要求有一个可信任的随机数源 + SecureRandom sr = new SecureRandom(); + + // 从原始密匙数据创建DESKeySpec对象 + DESKeySpec dks = new DESKeySpec(key); + + // 创建一个密匙工厂,然后用它把DESKeySpec转换成 + // 一个SecretKey对象 + SecretKeyFactory keyFactory = SecretKeyFactory.getInstance(DES); + SecretKey securekey = keyFactory.generateSecret(dks); + + // Cipher对象实际完成加密操作 + Cipher cipher = Cipher.getInstance(DES); + + // 用密匙初始化Cipher对象 + cipher.init(Cipher.ENCRYPT_MODE, securekey, sr); + + // 现在,获取数据并加密 + // 正式执行加密操作 + return cipher.doFinal(src); + } + + /** + * * 解密 + * * @param src + * * 密文(字节) + * * @param key + * * 密钥,长度必须是8的倍数 + * * @return 明文(字节) + * * @throws Exception + */ + public static byte[] decrypt(byte[] src, byte[] key) throws Exception { + // DES算法要求有一个可信任的随机数源 + SecureRandom sr = new SecureRandom(); + + // 从原始密匙数据创建一个DESKeySpec对象 + DESKeySpec dks = new DESKeySpec(key); + + // 创建一个密匙工厂,然后用它把DESKeySpec对象转换成 + // 一个SecretKey对象 + SecretKeyFactory keyFactory = SecretKeyFactory.getInstance(DES); + SecretKey securekey = keyFactory.generateSecret(dks); + + // Cipher对象实际完成解密操作 + Cipher cipher = Cipher.getInstance(DES); + + // 用密匙初始化Cipher对象 + cipher.init(Cipher.DECRYPT_MODE, securekey, sr); + + // 现在,获取数据并解密 + // 正式执行解密操作 + return cipher.doFinal(src); + } + + /** + * 加密 + * @param src * 明文(字节) + * @return 密文(字节) + * @throws Exception + */ + public static byte[] encrypt(byte[] src) throws Exception { + return encrypt(src, KEY.getBytes()); + } + + /** + * 解密 + * @param src 密文(字节) + * @return 明文(字节) + * @throws Exception + */ + public static byte[] decrypt(byte[] src) throws Exception { + return decrypt(src, KEY.getBytes()); + } + + /** + * 加密 + * @param src 明文(字符串) + * @return 密文(16进制字符串) + * @throws Exception + */ + public final static String encrypt(String src) { + try { + return byte2hex(encrypt(src.getBytes(), KEY.getBytes())); + } catch (Exception e) { + LOGGER.warn(e.getMessage(), e); + } + return null; + } + + /** + * 加密 + * @param src 明文(字符串) + * @param encryptKey 加密用的秘钥 + * @return 密文(16进制字符串) + * @throws Exception + */ + public final static String encrypt(String src, String encryptKey) { + try { + return byte2hex(encrypt(src.getBytes(), encryptKey.getBytes())); + } catch (Exception e) { + LOGGER.warn(e.getMessage(), e); + } + return null; + } + + /** + * 解密 + * @param src 密文(字符串) + * @return 明文(字符串) + * @throws Exception + */ + public final static String decrypt(String src) { + try { + return new String(decrypt(hex2byte(src.getBytes()), KEY.getBytes())); + } catch (Exception e) { + LOGGER.warn(e.getMessage(), e); + } + return null; + } + + /** + * 解密 + * @param src 密文(字符串) + * @param decryptKey 解密用的秘钥 + * @return 明文(字符串) + * @throws Exception + */ + public final static String decrypt(String src, String decryptKey) { + try { + return new String(decrypt(hex2byte(src.getBytes()), decryptKey.getBytes())); + } catch (Exception e) { + LOGGER.warn(e.getMessage(), e); + } + return null; + } + + /** + * 加密 + * @param src + * 明文(字节) + * @return 密文(16进制字符串) + * @throws Exception + */ + public static String encryptToString(byte[] src) throws Exception { + return encrypt(new String(src)); + } + + /** + * 解密 + * @param src 密文(字节) + * @return 明文(字符串) + * @throws Exception + */ + public static String decryptToString(byte[] src) throws Exception { + return decrypt(new String(src)); + } + + public static String byte2hex(byte[] b) { + String hs = ""; + String stmp = ""; + for (int n = 0; n < b.length; n++) { + stmp = (Integer.toHexString(b[n] & 0XFF)); + if (stmp.length() == 1) + hs = hs + "0" + stmp; + else + hs = hs + stmp; + } + return hs.toUpperCase(); + } + + public static byte[] hex2byte(byte[] b) { + if ((b.length % 2) != 0) + throw new IllegalArgumentException("The length is not an even number"); + byte[] b2 = new byte[b.length / 2]; + for (int n = 0; n < b.length; n += 2) { + String item = new String(b, n, 2); + b2[n / 2] = (byte) Integer.parseInt(item, 16); + } + return b2; + } +} diff --git a/common/src/main/java/com/alibaba/datax/common/util/IdAndKeyRollingUtil.java b/common/src/main/java/com/alibaba/datax/common/util/IdAndKeyRollingUtil.java new file mode 100644 index 0000000000..8bab301e6f --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/common/util/IdAndKeyRollingUtil.java @@ -0,0 +1,62 @@ +package com.alibaba.datax.common.util; + +import java.util.Map; + +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.alibaba.datax.common.exception.DataXException; + +public class IdAndKeyRollingUtil { + private static Logger LOGGER = LoggerFactory.getLogger(IdAndKeyRollingUtil.class); + public static final String SKYNET_ACCESSID = "SKYNET_ACCESSID"; + public static final String SKYNET_ACCESSKEY = "SKYNET_ACCESSKEY"; + + public final static String ACCESS_ID = "accessId"; + public final static String ACCESS_KEY = "accessKey"; + + public static String parseAkFromSkynetAccessKey() { + Map envProp = System.getenv(); + String skynetAccessID = envProp.get(IdAndKeyRollingUtil.SKYNET_ACCESSID); + String skynetAccessKey = envProp.get(IdAndKeyRollingUtil.SKYNET_ACCESSKEY); + String accessKey = null; + // follow 原有的判断条件 + // 环境变量中,如果存在SKYNET_ACCESSID/SKYNET_ACCESSKEy(只要有其中一个变量,则认为一定是两个都存在的! + // if (StringUtils.isNotBlank(skynetAccessID) || + // StringUtils.isNotBlank(skynetAccessKey)) { + // 检查严格,只有加密串不为空的时候才进去,不过 之前能跑的加密串都不应该为空 + if (StringUtils.isNotBlank(skynetAccessKey)) { + LOGGER.info("Try to get accessId/accessKey from environment SKYNET_ACCESSKEY."); + accessKey = DESCipher.decrypt(skynetAccessKey); + if (StringUtils.isBlank(accessKey)) { + // 环境变量里面有,但是解析不到 + throw DataXException.asDataXException(String.format( + "Failed to get the [accessId]/[accessKey] from the environment variable. The [accessId]=[%s]", + skynetAccessID)); + } + } + if (StringUtils.isNotBlank(accessKey)) { + LOGGER.info("Get accessId/accessKey from environment variables SKYNET_ACCESSKEY successfully."); + } + return accessKey; + } + + public static String getAccessIdAndKeyFromEnv(Configuration originalConfig) { + String accessId = null; + Map envProp = System.getenv(); + accessId = envProp.get(IdAndKeyRollingUtil.SKYNET_ACCESSID); + String accessKey = null; + if (StringUtils.isBlank(accessKey)) { + // 老的没有出异常,只是获取不到ak + accessKey = IdAndKeyRollingUtil.parseAkFromSkynetAccessKey(); + } + + if (StringUtils.isNotBlank(accessKey)) { + // 确认使用这个的都是 accessId、accessKey的命名习惯 + originalConfig.set(IdAndKeyRollingUtil.ACCESS_ID, accessId); + originalConfig.set(IdAndKeyRollingUtil.ACCESS_KEY, accessKey); + } + return accessKey; + } +} diff --git a/common/src/main/java/com/alibaba/datax/common/util/ListUtil.java b/common/src/main/java/com/alibaba/datax/common/util/ListUtil.java index ac0e421ce6..c4482ecdf3 100755 --- a/common/src/main/java/com/alibaba/datax/common/util/ListUtil.java +++ b/common/src/main/java/com/alibaba/datax/common/util/ListUtil.java @@ -2,10 +2,11 @@ import com.alibaba.datax.common.exception.CommonErrorCode; import com.alibaba.datax.common.exception.DataXException; -import com.alibaba.datax.plugin.rdbms.writer.util.SelectCols; import org.apache.commons.lang3.StringUtils; import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; import java.util.List; /** @@ -14,66 +15,98 @@ */ public final class ListUtil { -// public static boolean checkIfValueDuplicate(List aList, -// boolean caseSensitive) { -// if (null == aList || aList.isEmpty()) { -// throw DataXException.asDataXException(CommonErrorCode.CONFIG_ERROR, -// "您提供的作业配置有误,List不能为空."); -// } -// -// try { -// SelectCols.makeSureNoValueDuplicate(aList, caseSensitive); -// } catch (Exception e) { -// return true; -// } -// return false; -// } - -// public static boolean checkIfBInA(List aList, List bList, -// boolean caseSensitive) { -// if (null == aList || aList.isEmpty() || null == bList -// || bList.isEmpty()) { -// throw new IllegalArgumentException("您提供的作业配置有误, List不能为空."); -// } -// -// try { -// makeSureBInA(aList, bList, caseSensitive); -// } catch (Exception e) { -// return false; -// } -// return true; -// } - -// public static void makeSureBInA(List aList, SelectCols bList, -// boolean caseSensitive) { -// if (null == aList || aList.isEmpty() || null == bList -// || bList.isEmpty()) { -// throw new IllegalArgumentException("您提供的作业配置有误, List不能为空."); -// } -// -// List all = null; -// List part = null; -// -// if (!caseSensitive) { -// all = valueToLowerCase(aList); -// part = valueToLowerCase(bList); -// } else { -// all = new ArrayList(aList); -// part = new ArrayList(bList); -// } -// -// for (String oneValue : part) { -// if (!all.contains(oneValue)) { -// throw DataXException -// .asDataXException( -// CommonErrorCode.CONFIG_ERROR, -// String.format( -// "您提供的作业配置信息有误, String:[%s] 不存在于列表中:[%s].", -// oneValue, StringUtils.join(aList, ","))); -// } -// } -// -// } + public static boolean checkIfValueDuplicate(List aList, + boolean caseSensitive) { + if (null == aList || aList.isEmpty()) { + throw DataXException.asDataXException(CommonErrorCode.CONFIG_ERROR, + "您提供的作业配置有误,List不能为空."); + } + + try { + makeSureNoValueDuplicate(aList, caseSensitive); + } catch (Exception e) { + return true; + } + return false; + } + + public static void makeSureNoValueDuplicate(List aList, + boolean caseSensitive) { + if (null == aList || aList.isEmpty()) { + throw new IllegalArgumentException("您提供的作业配置有误, List不能为空."); + } + + if (1 == aList.size()) { + return; + } else { + List list = null; + if (!caseSensitive) { + list = valueToLowerCase(aList); + } else { + list = new ArrayList(aList); + } + + Collections.sort(list); + + for (int i = 0, len = list.size() - 1; i < len; i++) { + if (list.get(i).equals(list.get(i + 1))) { + throw DataXException + .asDataXException( + CommonErrorCode.CONFIG_ERROR, + String.format( + "您提供的作业配置信息有误, String:[%s] 不允许重复出现在列表中: [%s].", + list.get(i), + StringUtils.join(aList, ","))); + } + } + } + } + + public static boolean checkIfBInA(List aList, List bList, + boolean caseSensitive) { + if (null == aList || aList.isEmpty() || null == bList + || bList.isEmpty()) { + throw new IllegalArgumentException("您提供的作业配置有误, List不能为空."); + } + + try { + makeSureBInA(aList, bList, caseSensitive); + } catch (Exception e) { + return false; + } + return true; + } + + public static void makeSureBInA(List aList, List bList, + boolean caseSensitive) { + if (null == aList || aList.isEmpty() || null == bList + || bList.isEmpty()) { + throw new IllegalArgumentException("您提供的作业配置有误, List不能为空."); + } + + List all = null; + List part = null; + + if (!caseSensitive) { + all = valueToLowerCase(aList); + part = valueToLowerCase(bList); + } else { + all = new ArrayList(aList); + part = new ArrayList(bList); + } + + for (String oneValue : part) { + if (!all.contains(oneValue)) { + throw DataXException + .asDataXException( + CommonErrorCode.CONFIG_ERROR, + String.format( + "您提供的作业配置信息有误, String:[%s] 不存在于列表中:[%s].", + oneValue, StringUtils.join(aList, ","))); + } + } + + } public static boolean checkIfValueSame(List aList) { if (null == aList || aList.isEmpty()) { @@ -104,4 +137,25 @@ public static List valueToLowerCase(List aList) { return result; } + + public static Boolean checkIfHasSameValue(List listA, List listB) { + if (null == listA || listA.isEmpty() || null == listB || listB.isEmpty()) { + return false; + } + + for (String oneValue : listA) { + if (listB.contains(oneValue)) { + return true; + } + } + + return false; + } + + public static boolean checkIfAllSameValue(List listA, List listB) { + if (null == listA || listA.isEmpty() || null == listB || listB.isEmpty() || listA.size() != listB.size()) { + return false; + } + return new HashSet<>(listA).containsAll(new HashSet<>(listB)); + } } diff --git a/common/src/main/java/com/alibaba/datax/core/job/IJobContainerContext.java b/common/src/main/java/com/alibaba/datax/core/job/IJobContainerContext.java new file mode 100644 index 0000000000..a149f11182 --- /dev/null +++ b/common/src/main/java/com/alibaba/datax/core/job/IJobContainerContext.java @@ -0,0 +1,12 @@ +package com.alibaba.datax.core.job; + +import com.qlangtech.tis.datax.IDataXNameAware; +import com.qlangtech.tis.datax.IDataXTaskSerializeNum; + +/** + * @author: 百岁(baisui@qlangtech.com) + * @create: 2023-02-23 10:06 + **/ +public interface IJobContainerContext extends IDataXTaskSerializeNum, IDataXNameAware { + +} diff --git a/core/src/main/java/com/alibaba/datax/core/job/JobContainer.java b/core/src/main/java/com/alibaba/datax/core/job/JobContainer.java index 8b523644dc..2b05529c12 100755 --- a/core/src/main/java/com/alibaba/datax/core/job/JobContainer.java +++ b/core/src/main/java/com/alibaba/datax/core/job/JobContainer.java @@ -43,7 +43,7 @@ * job实例运行在jobContainer容器中,它是所有任务的master,负责初始化、拆分、调度、运行、回收、监控和汇报 * 但它并不做实际的数据同步操作 */ -public class JobContainer extends AbstractContainer { +public class JobContainer extends AbstractContainer implements IJobContainerContext { private static final Logger LOG = LoggerFactory .getLogger(JobContainer.class); @@ -87,6 +87,15 @@ public JobContainer(Configuration configuration) { errorLimit = new ErrorRecordChecker(configuration); } + @Override + public int getTaskSerializeNum() { + throw new UnsupportedOperationException(); + } + + @Override + public String getTISDataXName() { + throw new UnsupportedOperationException(); + } /** * jobContainer主要负责的工作全部在start()里面,包括init、prepare、split、scheduler、 @@ -101,7 +110,7 @@ public void start() { try { this.startTimeStamp = System.currentTimeMillis(); isDryRun = configuration.getBool(CoreConstant.DATAX_JOB_SETTING_DRYRUN, false); - if(isDryRun) { + if (isDryRun) { LOG.info("jobContainer starts to do preCheck ..."); this.preCheck(); } else { @@ -162,7 +171,7 @@ public void start() { throw DataXException.asDataXException( FrameworkErrorCode.RUNTIME_ERROR, e); } finally { - if(!isDryRun) { + if (!isDryRun) { this.destroy(); this.endTimeStamp = System.currentTimeMillis(); @@ -222,7 +231,7 @@ private Reader.Job preCheckReaderInit(JobPluginCollector jobPluginCollector) { PluginType.READER, this.readerPluginName); this.configuration.set(CoreConstant.DATAX_JOB_CONTENT_READER_PARAMETER + ".dryRun", true); - + jobReader.setContainerContext(this); // 设置reader的jobConfig jobReader.setPluginJobConf(this.configuration.getConfiguration( CoreConstant.DATAX_JOB_CONTENT_READER_PARAMETER)); @@ -247,7 +256,7 @@ private Writer.Job preCheckWriterInit(JobPluginCollector jobPluginCollector) { PluginType.WRITER, this.writerPluginName); this.configuration.set(CoreConstant.DATAX_JOB_CONTENT_WRITER_PARAMETER + ".dryRun", true); - + jobWriter.setContainerContext(this); // 设置writer的jobConfig jobWriter.setPluginJobConf(this.configuration.getConfiguration( CoreConstant.DATAX_JOB_CONTENT_WRITER_PARAMETER)); @@ -312,7 +321,7 @@ private void prepare() { private void preHandle() { String handlerPluginTypeStr = this.configuration.getString( CoreConstant.DATAX_JOB_PREHANDLER_PLUGINTYPE); - if(!StringUtils.isNotEmpty(handlerPluginTypeStr)){ + if (!StringUtils.isNotEmpty(handlerPluginTypeStr)) { return; } PluginType handlerPluginType; @@ -348,7 +357,7 @@ private void postHandle() { String handlerPluginTypeStr = this.configuration.getString( CoreConstant.DATAX_JOB_POSTHANDLER_PLUGINTYPE); - if(!StringUtils.isNotEmpty(handlerPluginTypeStr)){ + if (!StringUtils.isNotEmpty(handlerPluginTypeStr)) { return; } PluginType handlerPluginType; @@ -398,7 +407,7 @@ private int split() { List transformerList = this.configuration.getListConfiguration(CoreConstant.DATAX_JOB_CONTENT_TRANSFORMER); - LOG.debug("transformer configuration: "+ JSON.toJSONString(transformerList)); + LOG.debug("transformer configuration: " + JSON.toJSONString(transformerList)); /** * 输入是reader和writer的parameter list,输出是content下面元素的list */ @@ -406,7 +415,7 @@ private int split() { readerTaskConfigs, writerTaskConfigs, transformerList); - LOG.debug("contentConfig configuration: "+ JSON.toJSONString(contentConfig)); + LOG.debug("contentConfig configuration: " + JSON.toJSONString(contentConfig)); this.configuration.set(CoreConstant.DATAX_JOB_CONTENT, contentConfig); @@ -513,7 +522,7 @@ private void schedule() { ExecuteMode executeMode = null; AbstractScheduler scheduler; try { - executeMode = ExecuteMode.STANDALONE; + executeMode = ExecuteMode.STANDALONE; scheduler = initStandaloneScheduler(this.configuration); //设置 executeMode @@ -663,7 +672,7 @@ private Reader.Job initJobReader( Reader.Job jobReader = (Reader.Job) LoadUtil.loadJobPlugin( PluginType.READER, this.readerPluginName); - + jobReader.setContainerContext(this); // 设置reader的jobConfig jobReader.setPluginJobConf(this.configuration.getConfiguration( CoreConstant.DATAX_JOB_CONTENT_READER_PARAMETER)); @@ -693,7 +702,7 @@ private Writer.Job initJobWriter( Writer.Job jobWriter = (Writer.Job) LoadUtil.loadJobPlugin( PluginType.WRITER, this.writerPluginName); - + jobWriter.setContainerContext(this); // 设置writer的jobConfig jobWriter.setPluginJobConf(this.configuration.getConfiguration( CoreConstant.DATAX_JOB_CONTENT_WRITER_PARAMETER)); @@ -800,7 +809,7 @@ private List mergeReaderAndWriterTaskConfigs( taskConfig.set(CoreConstant.JOB_WRITER_PARAMETER, writerTasksConfigs.get(i)); - if(transformerConfigs!=null && transformerConfigs.size()>0){ + if (transformerConfigs != null && transformerConfigs.size() > 0) { taskConfig.set(CoreConstant.JOB_TRANSFORMER, transformerConfigs); } diff --git a/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/util/DirtyRecord.java b/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/util/DirtyRecord.java index fdc5d8215d..d590dc2785 100755 --- a/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/util/DirtyRecord.java +++ b/core/src/main/java/com/alibaba/datax/core/statistics/plugin/task/util/DirtyRecord.java @@ -120,6 +120,12 @@ public Date asDate() { "该方法不支持!"); } + @Override + public Date asDate(String dateFormat) { + throw DataXException.asDataXException(FrameworkErrorCode.RUNTIME_ERROR, + "该方法不支持!"); + } + @Override public byte[] asBytes() { throw DataXException.asDataXException(FrameworkErrorCode.RUNTIME_ERROR, diff --git a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriter.java b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriter.java index 558ec8014d..ecbdfaf15e 100755 --- a/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriter.java +++ b/doriswriter/src/main/java/com/alibaba/datax/plugin/writer/doriswriter/DorisWriter.java @@ -51,7 +51,7 @@ public void init() { this.originalConfig = super.getPluginJobConf(); options = new Keys(super.getPluginJobConf()); options.doPretreatment(); - this.dsGetter = DBUtil.getWriterDataSourceFactoryGetter(originalConfig); + this.dsGetter = DBUtil.getWriterDataSourceFactoryGetter(originalConfig,this.containerContext); } @Override diff --git a/drdsreader/src/main/java/com/alibaba/datax/plugin/reader/drdsreader/DrdsReader.java b/drdsreader/src/main/java/com/alibaba/datax/plugin/reader/drdsreader/DrdsReader.java index bf4467db06..8c7e0028b6 100755 --- a/drdsreader/src/main/java/com/alibaba/datax/plugin/reader/drdsreader/DrdsReader.java +++ b/drdsreader/src/main/java/com/alibaba/datax/plugin/reader/drdsreader/DrdsReader.java @@ -34,7 +34,7 @@ public void init() { this.validateConfiguration(); this.commonRdbmsReaderJob = new CommonRdbmsReader.Job( - DATABASE_TYPE); + DATABASE_TYPE, this.containerContext); this.commonRdbmsReaderJob.init(this.originalConfig); } @@ -117,19 +117,19 @@ public static class Task extends Reader.Task { private Configuration readerSliceConfig; private CommonRdbmsReader.Task commonRdbmsReaderTask; + @Override public void init() { this.readerSliceConfig = super.getPluginJobConf(); this.commonRdbmsReaderTask = new CommonRdbmsReader.Task( - DATABASE_TYPE, super.getTaskGroupId(), super.getTaskId()); + DATABASE_TYPE, this.containerContext ,super.getTaskGroupId(), super.getTaskId()); this.commonRdbmsReaderTask.init(this.readerSliceConfig); } + @Override public void startRead(RecordSender recordSender) { - // int fetchSize = this.readerSliceConfig.getInt(Constant.FETCH_SIZE); - this.commonRdbmsReaderTask.startRead(this.readerSliceConfig, recordSender, super.getTaskPluginCollector()); } diff --git a/drdswriter/src/main/java/com/alibaba/datax/plugin/writer/drdswriter/DrdsWriter.java b/drdswriter/src/main/java/com/alibaba/datax/plugin/writer/drdswriter/DrdsWriter.java index b2bf0ac4e4..61b17d1e74 100755 --- a/drdswriter/src/main/java/com/alibaba/datax/plugin/writer/drdswriter/DrdsWriter.java +++ b/drdswriter/src/main/java/com/alibaba/datax/plugin/writer/drdswriter/DrdsWriter.java @@ -33,7 +33,7 @@ public void init() { } this.originalConfig.set(Key.WRITE_MODE, writeMode); - this.commonRdbmsWriterJob = new CommonRdbmsWriter.Job(DATABASE_TYPE); + this.commonRdbmsWriterJob = new CommonRdbmsWriter.Job(DATABASE_TYPE,this.containerContext); this.commonRdbmsWriterJob.init(this.originalConfig); } @@ -68,7 +68,7 @@ public static class Task extends Writer.Task { @Override public void init() { this.writerSliceConfig = super.getPluginJobConf(); - this.commonRdbmsWriterTask = new CommonRdbmsWriter.Task(DATABASE_TYPE); + this.commonRdbmsWriterTask = new CommonRdbmsWriter.Task(DATABASE_TYPE,this.containerContext); this.commonRdbmsWriterTask.init(this.writerSliceConfig); } diff --git a/kingbaseesreader/src/main/java/com/alibaba/datax/plugin/reader/kingbaseesreader/KingbaseesReader.java b/kingbaseesreader/src/main/java/com/alibaba/datax/plugin/reader/kingbaseesreader/KingbaseesReader.java index ff8e030133..09daab1360 100644 --- a/kingbaseesreader/src/main/java/com/alibaba/datax/plugin/reader/kingbaseesreader/KingbaseesReader.java +++ b/kingbaseesreader/src/main/java/com/alibaba/datax/plugin/reader/kingbaseesreader/KingbaseesReader.java @@ -1,11 +1,9 @@ package com.alibaba.datax.plugin.reader.kingbaseesreader; -import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordSender; import com.alibaba.datax.common.spi.Reader; import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.plugin.rdbms.reader.CommonRdbmsReader; -import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; import com.alibaba.datax.plugin.rdbms.util.DataBaseType; import java.util.List; @@ -30,7 +28,7 @@ public void init() { // } // this.originalConfig.set(com.alibaba.datax.plugin.rdbms.reader.Constant.FETCH_SIZE, fetchSize); - this.commonRdbmsReaderMaster = new CommonRdbmsReader.Job(DATABASE_TYPE); + this.commonRdbmsReaderMaster = new CommonRdbmsReader.Job(DATABASE_TYPE, this.containerContext); this.commonRdbmsReaderMaster.init(this.originalConfig); } @@ -59,7 +57,7 @@ public static class Task extends Reader.Task { @Override public void init() { this.readerSliceConfig = super.getPluginJobConf(); - this.commonRdbmsReaderSlave = new CommonRdbmsReader.Task(DATABASE_TYPE, super.getTaskGroupId(), super.getTaskId()); + this.commonRdbmsReaderSlave = new CommonRdbmsReader.Task(DATABASE_TYPE, containerContext, super.getTaskGroupId(), super.getTaskId()); this.commonRdbmsReaderSlave.init(this.readerSliceConfig); } diff --git a/kingbaseeswriter/src/main/java/com/alibaba/datax/plugin/writer/kingbaseeswriter/KingbaseesWriter.java b/kingbaseeswriter/src/main/java/com/alibaba/datax/plugin/writer/kingbaseeswriter/KingbaseesWriter.java index dec5ff9505..1760c132c3 100644 --- a/kingbaseeswriter/src/main/java/com/alibaba/datax/plugin/writer/kingbaseeswriter/KingbaseesWriter.java +++ b/kingbaseeswriter/src/main/java/com/alibaba/datax/plugin/writer/kingbaseeswriter/KingbaseesWriter.java @@ -12,89 +12,89 @@ import java.util.List; public class KingbaseesWriter extends Writer { - private static final DataBaseType DATABASE_TYPE = DataBaseType.KingbaseES; - - public static class Job extends Writer.Job { - private Configuration originalConfig = null; - private CommonRdbmsWriter.Job commonRdbmsWriterMaster; - - @Override - public void init() { - this.originalConfig = super.getPluginJobConf(); - - // warn:not like mysql, KingbaseES only support insert mode, don't use - String writeMode = this.originalConfig.getString(Key.WRITE_MODE); - if (null != writeMode) { - throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, - String.format("写入模式(writeMode)配置有误. 因为KingbaseES不支持配置参数项 writeMode: %s, KingbaseES仅使用insert sql 插入数据. 请检查您的配置并作出修改.", writeMode)); - } - - this.commonRdbmsWriterMaster = new CommonRdbmsWriter.Job(DATABASE_TYPE); - this.commonRdbmsWriterMaster.init(this.originalConfig); - } - - @Override - public void prepare() { - this.commonRdbmsWriterMaster.prepare(this.originalConfig); - } - - @Override - public List split(int mandatoryNumber) { - return this.commonRdbmsWriterMaster.split(this.originalConfig, mandatoryNumber); - } - - @Override - public void post() { - this.commonRdbmsWriterMaster.post(this.originalConfig); - } - - @Override - public void destroy() { - this.commonRdbmsWriterMaster.destroy(this.originalConfig); - } - - } - - public static class Task extends Writer.Task { - private Configuration writerSliceConfig; - private CommonRdbmsWriter.Task commonRdbmsWriterSlave; - - @Override - public void init() { - this.writerSliceConfig = super.getPluginJobConf(); - this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DATABASE_TYPE){ - @Override - public String calcValueHolder(String columnType){ - if("serial".equalsIgnoreCase(columnType)){ - return "?::int"; - }else if("bit".equalsIgnoreCase(columnType)){ - return "?::bit varying"; - } - return "?::" + columnType; - } - }; - this.commonRdbmsWriterSlave.init(this.writerSliceConfig); - } - - @Override - public void prepare() { - this.commonRdbmsWriterSlave.prepare(this.writerSliceConfig); - } - - public void startWrite(RecordReceiver recordReceiver) { - this.commonRdbmsWriterSlave.startWrite(recordReceiver, this.writerSliceConfig, super.getTaskPluginCollector()); - } - - @Override - public void post() { - this.commonRdbmsWriterSlave.post(this.writerSliceConfig); - } - - @Override - public void destroy() { - this.commonRdbmsWriterSlave.destroy(this.writerSliceConfig); - } - - } + private static final DataBaseType DATABASE_TYPE = DataBaseType.KingbaseES; + + public static class Job extends Writer.Job { + private Configuration originalConfig = null; + private CommonRdbmsWriter.Job commonRdbmsWriterMaster; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + + // warn:not like mysql, KingbaseES only support insert mode, don't use + String writeMode = this.originalConfig.getString(Key.WRITE_MODE); + if (null != writeMode) { + throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, + String.format("写入模式(writeMode)配置有误. 因为KingbaseES不支持配置参数项 writeMode: %s, KingbaseES仅使用insert sql 插入数据. 请检查您的配置并作出修改.", writeMode)); + } + + this.commonRdbmsWriterMaster = new CommonRdbmsWriter.Job(DATABASE_TYPE, this.containerContext); + this.commonRdbmsWriterMaster.init(this.originalConfig); + } + + @Override + public void prepare() { + this.commonRdbmsWriterMaster.prepare(this.originalConfig); + } + + @Override + public List split(int mandatoryNumber) { + return this.commonRdbmsWriterMaster.split(this.originalConfig, mandatoryNumber); + } + + @Override + public void post() { + this.commonRdbmsWriterMaster.post(this.originalConfig); + } + + @Override + public void destroy() { + this.commonRdbmsWriterMaster.destroy(this.originalConfig); + } + + } + + public static class Task extends Writer.Task { + private Configuration writerSliceConfig; + private CommonRdbmsWriter.Task commonRdbmsWriterSlave; + + @Override + public void init() { + this.writerSliceConfig = super.getPluginJobConf(); + this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DATABASE_TYPE, this.containerContext) { + @Override + public String calcValueHolder(String columnType) { + if ("serial".equalsIgnoreCase(columnType)) { + return "?::int"; + } else if ("bit".equalsIgnoreCase(columnType)) { + return "?::bit varying"; + } + return "?::" + columnType; + } + }; + this.commonRdbmsWriterSlave.init(this.writerSliceConfig); + } + + @Override + public void prepare() { + this.commonRdbmsWriterSlave.prepare(this.writerSliceConfig); + } + + public void startWrite(RecordReceiver recordReceiver) { + this.commonRdbmsWriterSlave.startWrite(recordReceiver, this.writerSliceConfig, super.getTaskPluginCollector()); + } + + @Override + public void post() { + this.commonRdbmsWriterSlave.post(this.writerSliceConfig); + } + + @Override + public void destroy() { + this.commonRdbmsWriterSlave.destroy(this.writerSliceConfig); + } + + } } diff --git a/mongodbwriter/src/main/java/com/alibaba/datax/plugin/writer/mongodbwriter/MongoDBWriter.java b/mongodbwriter/src/main/java/com/alibaba/datax/plugin/writer/mongodbwriter/MongoDBWriter.java index 68c254cfcd..d37d10f14b 100644 --- a/mongodbwriter/src/main/java/com/alibaba/datax/plugin/writer/mongodbwriter/MongoDBWriter.java +++ b/mongodbwriter/src/main/java/com/alibaba/datax/plugin/writer/mongodbwriter/MongoDBWriter.java @@ -331,7 +331,7 @@ public void init() { // } try { - IDataSourceFactoryGetter dsGetter = DBUtil.getWriterDataSourceFactoryGetter(this.writerSliceConfig); + IDataSourceFactoryGetter dsGetter = DBUtil.getWriterDataSourceFactoryGetter(this.writerSliceConfig, this.containerContext); DataSourceFactory dataSourceFactory = dsGetter.getDataSourceFactory(); this.mongoClient = dataSourceFactory.unwrap(MongoClient.class); diff --git a/mysqlreader/src/main/java/com/alibaba/datax/plugin/reader/mysqlreader/MysqlReader.java b/mysqlreader/src/main/java/com/alibaba/datax/plugin/reader/mysqlreader/MysqlReader.java index c0800aa9cc..e0ecf9b191 100755 --- a/mysqlreader/src/main/java/com/alibaba/datax/plugin/reader/mysqlreader/MysqlReader.java +++ b/mysqlreader/src/main/java/com/alibaba/datax/plugin/reader/mysqlreader/MysqlReader.java @@ -35,7 +35,7 @@ public void init() { this.originalConfig.set(Constant.FETCH_SIZE, Integer.MIN_VALUE); } - this.commonRdbmsReaderJob = new CommonRdbmsReader.Job(DATABASE_TYPE); + this.commonRdbmsReaderJob = new CommonRdbmsReader.Job(DATABASE_TYPE, containerContext); this.commonRdbmsReaderJob.init(this.originalConfig); } @@ -71,14 +71,14 @@ public static class Task extends Reader.Task { @Override public void init() { this.readerSliceConfig = super.getPluginJobConf(); - this.commonRdbmsReaderTask = new CommonRdbmsReader.Task(DATABASE_TYPE, super.getTaskGroupId(), super.getTaskId()); + this.commonRdbmsReaderTask = new CommonRdbmsReader.Task(DATABASE_TYPE, containerContext, super.getTaskGroupId(), super.getTaskId()); this.commonRdbmsReaderTask.init(this.readerSliceConfig); } @Override public void startRead(RecordSender recordSender) { - // int fetchSize = this.readerSliceConfig.getInt(Constant.FETCH_SIZE); + // int fetchSize = this.readerSliceConfig.getInt(Constant.FETCH_SIZE); this.commonRdbmsReaderTask.startRead(this.readerSliceConfig, recordSender, super.getTaskPluginCollector()); @@ -94,6 +94,7 @@ public void destroy() { this.commonRdbmsReaderTask.destroy(this.readerSliceConfig); } + } } diff --git a/mysqlwriter/src/main/java/com/alibaba/datax/plugin/writer/mysqlwriter/MysqlWriter.java b/mysqlwriter/src/main/java/com/alibaba/datax/plugin/writer/mysqlwriter/MysqlWriter.java index 9d2c82ee7c..ae56f25fb0 100755 --- a/mysqlwriter/src/main/java/com/alibaba/datax/plugin/writer/mysqlwriter/MysqlWriter.java +++ b/mysqlwriter/src/main/java/com/alibaba/datax/plugin/writer/mysqlwriter/MysqlWriter.java @@ -19,7 +19,7 @@ public static class Job extends Writer.Job { private CommonRdbmsWriter.Job commonRdbmsWriterJob; @Override - public void preCheck(){ + public void preCheck() { this.init(); this.commonRdbmsWriterJob.writerPreCheck(this.originalConfig, DATABASE_TYPE); } @@ -27,7 +27,7 @@ public void preCheck(){ @Override public void init() { this.originalConfig = super.getPluginJobConf(); - this.commonRdbmsWriterJob = new CommonRdbmsWriter.Job(DATABASE_TYPE); + this.commonRdbmsWriterJob = new CommonRdbmsWriter.Job(DATABASE_TYPE, this.containerContext); this.commonRdbmsWriterJob.init(this.originalConfig); } @@ -64,7 +64,7 @@ public static class Task extends Writer.Task { @Override public void init() { this.writerSliceConfig = super.getPluginJobConf(); - this.commonRdbmsWriterTask = new CommonRdbmsWriter.Task(DATABASE_TYPE); + this.commonRdbmsWriterTask = new CommonRdbmsWriter.Task(DATABASE_TYPE, containerContext); this.commonRdbmsWriterTask.init(this.writerSliceConfig); } @@ -90,7 +90,7 @@ public void destroy() { } @Override - public boolean supportFailOver(){ + public boolean supportFailOver() { String writeMode = writerSliceConfig.getString(Key.WRITE_MODE); return "replace".equalsIgnoreCase(writeMode); } diff --git a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/OceanBaseV10Writer.java b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/OceanBaseV10Writer.java index 89ef1c52aa..607bba2b8a 100644 --- a/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/OceanBaseV10Writer.java +++ b/oceanbasev10writer/src/main/java/com/alibaba/datax/plugin/writer/oceanbasev10writer/OceanBaseV10Writer.java @@ -27,7 +27,7 @@ * 2016-04-07 *

* 专门针对OceanBase1.0的Writer - * + * * @author biliang.wbl * */ @@ -38,13 +38,13 @@ public class OceanBaseV10Writer extends Writer { * Job 中的方法仅执行一次,Task 中方法会由框架启动多个 Task 线程并行执行。 *

* 整个 Writer 执行流程是: - * + * *

 	 * Job类init-->prepare-->split
-	 * 
+	 *
 	 *                          Task类init-->prepare-->startWrite-->post-->destroy
 	 *                          Task类init-->prepare-->startWrite-->post-->destroy
-	 * 
+	 *
 	 *                                                                            Job类post-->destroy
 	 * 
*/ @@ -61,7 +61,7 @@ public static class Job extends Writer.Job { public void init() { this.originalConfig = super.getPluginJobConf(); checkCompatibleMode(originalConfig); - this.commonJob = new CommonRdbmsWriter.Job(DATABASE_TYPE); + this.commonJob = new CommonRdbmsWriter.Job(DATABASE_TYPE,this.); this.commonJob.init(this.originalConfig); } @@ -166,7 +166,7 @@ public void post() { } originalConfig.remove(Key.POST_SQL); } - + /** * 注意:此方法仅执行一次。 最佳实践:通常配合 Job 中的 post() 方法一起完成 Job 的资源释放。 */ diff --git a/odpswriter/pom.xml b/odpswriter/pom.xml index cad0d01860..7460fdfa07 100755 --- a/odpswriter/pom.xml +++ b/odpswriter/pom.xml @@ -30,18 +30,11 @@ ch.qos.logback logback-classic - - - - - - - - com.aliyun.odps - odps-sdk-core - 0.20.7-public - + com.aliyun.odps + odps-sdk-core + 0.38.4-public + @@ -51,6 +44,14 @@ + + + + org.mockito mockito-core @@ -70,9 +71,30 @@ test + + + org.aspectj + aspectjweaver + 1.8.10 + + + + commons-codec + commons-codec + 1.8 + + + + + src/main/java + + **/*.properties + + + diff --git a/odpswriter/src/main/assembly/package.xml b/odpswriter/src/main/assembly/package.xml index 7d3c91b51b..0ef0b43b18 100755 --- a/odpswriter/src/main/assembly/package.xml +++ b/odpswriter/src/main/assembly/package.xml @@ -23,13 +23,6 @@ plugin/writer/odpswriter - - src/main/libs - - *.* - - plugin/writer/odpswriter/libs - diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Constant.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Constant.java index 22bcc16cb3..f4d9734b9c 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Constant.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Constant.java @@ -12,4 +12,34 @@ public class Constant { public static final String COLUMN_POSITION = "columnPosition"; + /* + * 每个task独立维护一个proxy列表,一共会生成 task并发量 * 分区数量 的proxy,每个proxy会创建 blocksizeInMB(一般是64M) 大小的数组 + * 因此极易OOM, + * 假设默认情况下768M的内存,实际最多只能创建 12 个proxy,8G内存最多只能创建126个proxy,所以最多只允许创建一定数量的proxy,对应到分区数量 1:1 + * + * blockSizeInMB 减小可以减少内存消耗,但是意味着更高频率的网络请求,会对odps服务器造成较大压力 + * + * 另外,可以考虑proxy不用常驻内存,但是需要增加复杂的控制逻辑 + * 但是一般情况下用户作为分区值得数据是有规律的,比如按照时间,2020-08的数据已经同步完成了,并且后面没有这个分区的数据了,对应的proxy还放在内存中, + * 会造成很大的内存浪费。所以有必要对某些proxy进行回收。 + * + * 这里采用是否回收某个proxy的标准是:在最近时间内是否有过数据传输。 + * + * + * 需要注意的问题! + * 多个任务公用一个proxy,写入时需要抢锁,多并发的性能会受到很大影响,相当于单个分区时串行写入 + * 这个对性能影响很大,需要避免这种方式,还是尽量各个task有独立的proxy,只是需要去控制内存的使用,只能是控制每个task保有的proxy数量了 + * + * 还可以考虑修改proxy的数组大小,但是设置太小不确定会不会影响性能。可以测试一下 + */ + + public static final Long PROXY_MAX_IDLE_TIME_MS =60 * 1000L; // 60s没有动作就回收 + + public static final Long MAX_PARTITION_CNT = 200L; + + public static final int UTF8_ENCODED_CHAR_MAX_SIZE = 6; + + public static final int DEFAULT_FIELD_MAX_SIZE = 8 * 1024 * 1024; + + } diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/DateTransForm.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/DateTransForm.java new file mode 100644 index 0000000000..dedc9eccda --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/DateTransForm.java @@ -0,0 +1,57 @@ +package com.alibaba.datax.plugin.writer.odpswriter; + +public class DateTransForm { + /** + * 列名称 + */ + private String colName; + + /** + * 之前是什么格式 + */ + private String fromFormat; + + /** + * 要转换成什么格式 + */ + private String toFormat; + + public DateTransForm(String colName, String fromFormat, String toFormat) { + this.colName = colName; + this.fromFormat = fromFormat; + this.toFormat = toFormat; + } + + public String getColName() { + return colName; + } + + public void setColName(String colName) { + this.colName = colName; + } + + public String getFromFormat() { + return fromFormat; + } + + public void setFromFormat(String fromFormat) { + this.fromFormat = fromFormat; + } + + public String getToFormat() { + return toFormat; + } + + public void setToFormat(String toFormat) { + this.toFormat = toFormat; + } + + @Override + public String toString() { + return "DateTransForm{" + + "colName='" + colName + '\'' + + ", fromFormat='" + fromFormat + '\'' + + ", toFormat='" + toFormat + '\'' + + '}'; + } +} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Key.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Key.java index f578d72d9a..2c8af6cc65 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Key.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/Key.java @@ -11,6 +11,8 @@ public final class Key { public final static String ACCESS_KEY = "accessKey"; + public final static String SECURITY_TOKEN = "securityToken"; + public final static String PROJECT = "project"; public final static String TABLE = "table"; @@ -31,4 +33,58 @@ public final class Key { public final static String ACCOUNT_TYPE = "accountType"; public final static String IS_COMPRESS = "isCompress"; + + // preSql + public final static String PRE_SQL="preSql"; + + // postSql + public final static String POST_SQL="postSql"; + + public final static String CONSISTENCY_COMMIT = "consistencyCommit"; + + public final static String UPLOAD_ID = "uploadId"; + + public final static String TASK_COUNT = "taskCount"; + + /** + * support dynamic partition,支持动态分区,即根据读取到的record的某一列或几列来确定该record应该存入哪个分区 + * 1. 如何确定根据哪些列:根据目的表哪几列是分区列,再根据对应的column来路由 + * 2. 何时创建upload session:由于是动态分区,因此无法在初始化时确定分区,也就无法在初始化时创建 upload session,只有再读取到具体record之后才能创建 + * 3. 缓存 upload sesseion:每当出现新的分区,则创建新的session,同时将该分区对应的session缓存下来,以备下次又有需要存入该分区的记录 + * 4. 参数检查:不必要检查分区是否配置 + */ + public final static String SUPPORT_DYNAMIC_PARTITION = "supportDynamicPartition"; + + /** + * 动态分区下,用户如果将源表的某一个时间列映射到分区列,存在如下需求场景:源表的该时间列精确到秒,当时同步到odps表时,只想保留到天,并存入对应的天分区 + * 格式: + * "partitionColumnMapping":[ + * { + * "name":"pt", // 必填 + * "srcDateFormat":"YYYY-MM-dd hh:mm:ss", // 可选,可能源表中的时间列是 String 类型,此时必须通过 fromDateFormat 来指定源表中该列的日期格式 + * "dateFormat":"YYYY-MM-dd" // 必填 + * }, + * { + * ... + * }, + * + * ... + * ] + */ + public final static String PARTITION_COL_MAPPING = "partitionColumnMapping"; + public final static String PARTITION_COL_MAPPING_NAME = "name"; + public final static String PARTITION_COL_MAPPING_SRC_COL_DATEFORMAT = "srcDateFormat"; + public final static String PARTITION_COL_MAPPING_DATEFORMAT = "dateFormat"; + public final static String WRITE_TIMEOUT_IN_MS = "writeTimeoutInMs"; + + public static final String OVER_LENGTH_RULE = "overLengthRule"; + //截断后保留的最大长度 + public static final String MAX_FIELD_LENGTH = "maxFieldLength"; + //odps本身支持的最大长度 + public static final String MAX_ODPS_FIELD_LENGTH = "maxOdpsFieldLength"; + public static final String ENABLE_OVER_LENGTH_OUTPUT = "enableOverLengthOutput"; + public static final String MAX_OVER_LENGTH_OUTPUT_COUNT = "maxOverLengthOutputCount"; + + //动态分区写入模式下,内存使用率达到80%则flush时间间隔,单位分钟 + public static final String DYNAMIC_PARTITION_MEM_USAGE_FLUSH_INTERVAL_IN_MINUTE = "dynamicPartitionMemUsageFlushIntervalInMinute"; } diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/LocalStrings.properties b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/LocalStrings.properties new file mode 100644 index 0000000000..be7862af38 --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/LocalStrings.properties @@ -0,0 +1,34 @@ +errorcode.required_value=\u60a8\u7f3a\u5931\u4e86\u5fc5\u987b\u586b\u5199\u7684\u53c2\u6570\u503c. +errorcode.illegal_value=\u60a8\u914d\u7f6e\u7684\u503c\u4e0d\u5408\u6cd5. +errorcode.unsupported_column_type=DataX \u4e0d\u652f\u6301\u5199\u5165 ODPS \u7684\u76ee\u7684\u8868\u7684\u6b64\u79cd\u6570\u636e\u7c7b\u578b. +errorcode.table_truncate_error=\u6e05\u7a7a ODPS \u76ee\u7684\u8868\u65f6\u51fa\u9519. +errorcode.create_master_upload_fail=\u521b\u5efa ODPS \u7684 uploadSession \u5931\u8d25. +errorcode.get_slave_upload_fail=\u83b7\u53d6 ODPS \u7684 uploadSession \u5931\u8d25. +errorcode.get_id_key_fail=\u83b7\u53d6 accessId/accessKey \u5931\u8d25. +errorcode.get_partition_fail=\u83b7\u53d6 ODPS \u76ee\u7684\u8868\u7684\u6240\u6709\u5206\u533a\u5931\u8d25. +errorcode.add_partition_failed=\u6dfb\u52a0\u5206\u533a\u5230 ODPS \u76ee\u7684\u8868\u5931\u8d25. +errorcode.writer_record_fail=\u5199\u5165\u6570\u636e\u5230 ODPS \u76ee\u7684\u8868\u5931\u8d25. +errorcode.commit_block_fail=\u63d0\u4ea4 block \u5230 ODPS \u76ee\u7684\u8868\u5931\u8d25. +errorcode.run_sql_failed=\u6267\u884c ODPS Sql \u5931\u8d25. +errorcode.check_if_partitioned_table_failed=\u68c0\u67e5 ODPS \u76ee\u7684\u8868:%s \u662f\u5426\u4e3a\u5206\u533a\u8868\u5931\u8d25. +errorcode.run_sql_odps_exception=\u6267\u884c ODPS Sql \u65f6\u629b\u51fa\u5f02\u5e38, \u53ef\u91cd\u8bd5 +errorcode.account_type_error=\u8d26\u53f7\u7c7b\u578b\u9519\u8bef. +errorcode.partition_error=\u5206\u533a\u914d\u7f6e\u9519\u8bef. +errorcode.column_not_exist=\u7528\u6237\u914d\u7f6e\u7684\u5217\u4e0d\u5b58\u5728. +errorcode.odps_project_not_fount=\u60a8\u914d\u7f6e\u7684\u503c\u4e0d\u5408\u6cd5, odps project \u4e0d\u5b58\u5728. +errorcode.odps_table_not_fount=\u60a8\u914d\u7f6e\u7684\u503c\u4e0d\u5408\u6cd5, odps table \u4e0d\u5b58\u5728 +errorcode.odps_access_key_id_not_found=\u60a8\u914d\u7f6e\u7684\u503c\u4e0d\u5408\u6cd5, odps accessId,accessKey \u4e0d\u5b58\u5728 +errorcode.odps_access_key_invalid=\u60a8\u914d\u7f6e\u7684\u503c\u4e0d\u5408\u6cd5, odps accessKey \u9519\u8bef +errorcode.odps_access_deny=\u62d2\u7edd\u8bbf\u95ee, \u60a8\u4e0d\u5728 \u60a8\u914d\u7f6e\u7684 project \u4e2d + + +odpswriter.1=\u8d26\u53f7\u7c7b\u578b\u9519\u8bef\uff0c\u56e0\u4e3a\u4f60\u7684\u8d26\u53f7 [{0}] \u4e0d\u662fdatax\u76ee\u524d\u652f\u6301\u7684\u8d26\u53f7\u7c7b\u578b\uff0c\u76ee\u524d\u4ec5\u652f\u6301aliyun, taobao\u8d26\u53f7\uff0c\u8bf7\u4fee\u6539\u60a8\u7684\u8d26\u53f7\u4fe1\u606f. +odpswriter.2=\u8fd9\u662f\u4e00\u6761\u9700\u8981\u6ce8\u610f\u7684\u4fe1\u606f \u7531\u4e8e\u60a8\u7684\u4f5c\u4e1a\u914d\u7f6e\u4e86\u5199\u5165 ODPS \u7684\u76ee\u7684\u8868\u65f6emptyAsNull=true, \u6240\u4ee5 DataX\u5c06\u4f1a\u628a\u957f\u5ea6\u4e3a0\u7684\u7a7a\u5b57\u7b26\u4e32\u4f5c\u4e3a java \u7684 null \u5199\u5165 ODPS. +odpswriter.3=\u60a8\u914d\u7f6e\u7684blockSizeInMB:{0} \u53c2\u6570\u9519\u8bef. \u6b63\u786e\u7684\u914d\u7f6e\u662f[1-512]\u4e4b\u95f4\u7684\u6574\u6570. \u8bf7\u4fee\u6539\u6b64\u53c2\u6570\u7684\u503c\u4e3a\u8be5\u533a\u95f4\u5185\u7684\u6570\u503c +odpswriter.4=\u5199\u5165 ODPS \u76ee\u7684\u8868\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. + + +odpswriterproxy.1=\u4eb2\uff0c\u914d\u7f6e\u4e2d\u7684\u6e90\u8868\u7684\u5217\u4e2a\u6570\u548c\u76ee\u7684\u7aef\u8868\u4e0d\u4e00\u81f4\uff0c\u6e90\u8868\u4e2d\u60a8\u914d\u7f6e\u7684\u5217\u6570\u662f:{0} \u5927\u4e8e\u76ee\u7684\u7aef\u7684\u5217\u6570\u662f:{1} , \u8fd9\u6837\u4f1a\u5bfc\u81f4\u6e90\u5934\u6570\u636e\u65e0\u6cd5\u6b63\u786e\u5bfc\u5165\u76ee\u7684\u7aef, \u8bf7\u68c0\u67e5\u60a8\u7684\u914d\u7f6e\u5e76\u4fee\u6539. +odpswriterproxy.2=\u6e90\u8868\u7684\u5217\u4e2a\u6570\u5c0f\u4e8e\u76ee\u7684\u8868\u7684\u5217\u4e2a\u6570\uff0c\u6e90\u8868\u5217\u6570\u662f:{0} \u76ee\u7684\u8868\u5217\u6570\u662f:{1} , \u6570\u76ee\u4e0d\u5339\u914d. DataX \u4f1a\u628a\u76ee\u7684\u7aef\u591a\u51fa\u7684\u5217\u7684\u503c\u8bbe\u7f6e\u4e3a\u7a7a\u503c. \u5982\u679c\u8fd9\u4e2a\u9ed8\u8ba4\u914d\u7f6e\u4e0d\u7b26\u5408\u60a8\u7684\u671f\u671b\uff0c\u8bf7\u4fdd\u6301\u6e90\u8868\u548c\u76ee\u7684\u8868\u914d\u7f6e\u7684\u5217\u6570\u76ee\u4fdd\u6301\u4e00\u81f4. +odpswriterproxy.3=Odps decimal \u7c7b\u578b\u7684\u6574\u6570\u4f4d\u4e2a\u6570\u4e0d\u80fd\u8d85\u8fc735 +odpswriterproxy.4=\u5199\u5165 ODPS \u76ee\u7684\u8868\u65f6\u9047\u5230\u4e86\u810f\u6570\u636e: \u7b2c[{0}]\u4e2a\u5b57\u6bb5 {1} \u7684\u6570\u636e\u51fa\u73b0\u9519\u8bef\uff0c\u8bf7\u68c0\u67e5\u8be5\u6570\u636e\u5e76\u4f5c\u51fa\u4fee\u6539 \u6216\u8005\u60a8\u53ef\u4ee5\u589e\u5927\u9600\u503c\uff0c\u5ffd\u7565\u8fd9\u6761\u8bb0\u5f55. \ No newline at end of file diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriter.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriter.java index 4637ddab5b..a87be9b042 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriter.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriter.java @@ -7,29 +7,50 @@ import com.alibaba.datax.common.spi.Writer; import com.alibaba.datax.common.statistics.PerfRecord; import com.alibaba.datax.common.util.Configuration; -import com.alibaba.datax.plugin.rdbms.writer.util.SelectCols; -import com.alibaba.datax.plugin.writer.odpswriter.util.IdAndKeyUtil; -import com.alibaba.datax.plugin.writer.odpswriter.util.OdpsUtil; +import com.alibaba.datax.common.util.ListUtil; +import com.alibaba.datax.common.util.MessageSource; +import com.alibaba.datax.plugin.writer.odpswriter.model.PartitionInfo; +import com.alibaba.datax.plugin.writer.odpswriter.model.UserDefinedFunction; +import com.alibaba.datax.plugin.writer.odpswriter.util.*; + +import com.alibaba.fastjson.JSONArray; +import com.alibaba.fastjson.JSONObject; import com.aliyun.odps.Odps; import com.aliyun.odps.Table; import com.aliyun.odps.TableSchema; import com.aliyun.odps.tunnel.TableTunnel; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.MutablePair; +import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.List; +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryUsage; +import java.util.*; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; + +import static com.alibaba.datax.plugin.writer.odpswriter.util.CustomPartitionUtils.getListWithJson; /** * 已修改为:每个 task 各自创建自己的 upload,拥有自己的 uploadId,并在 task 中完成对对应 block 的提交。 */ public class OdpsWriter extends Writer { + public static HashSet partitionsDealedTruncate = new HashSet<>(); + static final Object lockForPartitionDealedTruncate = new Object(); + public static AtomicInteger partitionCnt = new AtomicInteger(0); + public static Long maxPartitionCnt; + public static AtomicLong globalTotalTruncatedRecordNumber = new AtomicLong(0); + public static Long maxOutputOverLengthRecord; + public static int maxOdpsFieldLength = Constant.DEFAULT_FIELD_MAX_SIZE; + public static class Job extends Writer.Job { private static final Logger LOG = LoggerFactory .getLogger(Job.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsWriter.class); private static final boolean IS_DEBUG = LOG.isDebugEnabled(); @@ -46,6 +67,8 @@ public static class Job extends Writer.Job { private String uploadId; private TableTunnel.UploadSession masterUpload; private int blockSizeInMB; + private boolean consistencyCommit; + private boolean supportDynamicPartition; public void preCheck() { this.init(); @@ -53,66 +76,63 @@ public void preCheck() { } public void doPreCheck() { - //检查accessId,accessKey配置 - if (Constant.DEFAULT_ACCOUNT_TYPE - .equalsIgnoreCase(this.accountType)) { - this.originalConfig = IdAndKeyUtil.parseAccessIdAndKey(this.originalConfig); - String accessId = this.originalConfig.getString(Key.ACCESS_ID); - String accessKey = this.originalConfig.getString(Key.ACCESS_KEY); - if (IS_DEBUG) { - LOG.debug("accessId:[{}], accessKey:[{}] .", accessId, - accessKey); - } - LOG.info("accessId:[{}] .", accessId); - } - // init odps config - this.odps = OdpsUtil.initOdpsProject(this.originalConfig); - - //检查表等配置是否正确 - this.table = OdpsUtil.getTable(odps, this.projectName, this.tableName); //检查列信息是否正确 List allColumns = OdpsUtil.getAllColumns(this.table.getSchema()); LOG.info("allColumnList: {} .", StringUtils.join(allColumns, ',')); - dealColumn(this.originalConfig, allColumns); + List allPartColumns = OdpsUtil.getAllPartColumns(this.table.getSchema()); + LOG.info("allPartColumnsList: {} .", StringUtils.join(allPartColumns, ',')); + dealColumn(this.originalConfig, allColumns, allPartColumns); //检查分区信息是否正确 - OdpsUtil.preCheckPartition(this.odps, this.table, this.partition, this.truncate); + if (!supportDynamicPartition) { + OdpsUtil.preCheckPartition(this.odps, this.table, this.partition, this.truncate); + } } @Override public void init() { this.originalConfig = super.getPluginJobConf(); + OdpsUtil.checkNecessaryConfig(this.originalConfig); OdpsUtil.dealMaxRetryTime(this.originalConfig); + + this.projectName = this.originalConfig.getString(Key.PROJECT); this.tableName = this.originalConfig.getString(Key.TABLE); this.tunnelServer = this.originalConfig.getString(Key.TUNNEL_SERVER, null); + this.dealAK(); + + // init odps config + this.odps = OdpsUtil.initOdpsProject(this.originalConfig); + + //检查表等配置是否正确 + this.table = OdpsUtil.getTable(odps, this.projectName, this.tableName); + + // 处理动态分区参数,以及动态分区相关配置是否合法,如果没有配置动态分区,则根据列映射配置决定是否启用 + this.dealDynamicPartition(); + //check isCompress this.originalConfig.getBool(Key.IS_COMPRESS, false); - this.partition = OdpsUtil.formatPartition(this.originalConfig - .getString(Key.PARTITION, "")); - this.originalConfig.set(Key.PARTITION, this.partition); - - this.accountType = this.originalConfig.getString(Key.ACCOUNT_TYPE, - Constant.DEFAULT_ACCOUNT_TYPE); - if (!Constant.DEFAULT_ACCOUNT_TYPE.equalsIgnoreCase(this.accountType) && - !Constant.TAOBAO_ACCOUNT_TYPE.equalsIgnoreCase(this.accountType)) { - throw DataXException.asDataXException(OdpsWriterErrorCode.ACCOUNT_TYPE_ERROR, - String.format("账号类型错误,因为你的账号 [%s] 不是datax目前支持的账号类型,目前仅支持aliyun, taobao账号,请修改您的账号信息.", accountType)); + // 如果不是动态分区写入,则检查分区配置,动态分区写入不用检查 + if (!this.supportDynamicPartition) { + this.partition = OdpsUtil.formatPartition(this.originalConfig + .getString(Key.PARTITION, ""), true); + this.originalConfig.set(Key.PARTITION, this.partition); } - this.originalConfig.set(Key.ACCOUNT_TYPE, this.accountType); this.truncate = this.originalConfig.getBool(Key.TRUNCATE); + this.consistencyCommit = this.originalConfig.getBool(Key.CONSISTENCY_COMMIT, false); + boolean emptyAsNull = this.originalConfig.getBool(Key.EMPTY_AS_NULL, false); this.originalConfig.set(Key.EMPTY_AS_NULL, emptyAsNull); if (emptyAsNull) { - LOG.warn("这是一条需要注意的信息 由于您的作业配置了写入 ODPS 的目的表时emptyAsNull=true, 所以 DataX将会把长度为0的空字符串作为 java 的 null 写入 ODPS."); + LOG.warn(MESSAGE_SOURCE.message("odpswriter.2")); } this.blockSizeInMB = this.originalConfig.getInt(Key.BLOCK_SIZE_IN_MB, 64); @@ -121,6 +141,11 @@ public void init() { } this.originalConfig.set(Key.BLOCK_SIZE_IN_MB, this.blockSizeInMB); LOG.info("blockSizeInMB={}.", this.blockSizeInMB); + maxPartitionCnt = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax() / 1024 / 1024 / this.blockSizeInMB; + if (maxPartitionCnt < Constant.MAX_PARTITION_CNT) { + maxPartitionCnt = Constant.MAX_PARTITION_CNT; + } + LOG.info("maxPartitionCnt={}", maxPartitionCnt); if (IS_DEBUG) { LOG.debug("After master init(), job config now is: [\n{}\n] .", @@ -128,6 +153,92 @@ public void init() { } } + private void dealAK() { + this.accountType = this.originalConfig.getString(Key.ACCOUNT_TYPE, + Constant.DEFAULT_ACCOUNT_TYPE); + + if (!Constant.DEFAULT_ACCOUNT_TYPE.equalsIgnoreCase(this.accountType) && + !Constant.TAOBAO_ACCOUNT_TYPE.equalsIgnoreCase(this.accountType)) { + throw DataXException.asDataXException(OdpsWriterErrorCode.ACCOUNT_TYPE_ERROR, + MESSAGE_SOURCE.message("odpswriter.1", accountType)); + } + this.originalConfig.set(Key.ACCOUNT_TYPE, this.accountType); + + //检查accessId,accessKey配置 + if (Constant.DEFAULT_ACCOUNT_TYPE + .equalsIgnoreCase(this.accountType)) { + this.originalConfig = IdAndKeyUtil.parseAccessIdAndKey(this.originalConfig); + String accessId = this.originalConfig.getString(Key.ACCESS_ID); + String accessKey = this.originalConfig.getString(Key.ACCESS_KEY); + if (IS_DEBUG) { + LOG.debug("accessId:[{}], accessKey:[{}] .", accessId, + accessKey); + } + LOG.info("accessId:[{}] .", accessId); + } + } + + private void dealDynamicPartition() { + /* + * 如果显示配置了 supportDynamicPartition,则以配置为准 + * 如果没有配置,表为分区表且 列映射中包所有含分区列 + */ + List partitionCols = OdpsUtil.getAllPartColumns(this.table.getSchema()); + List configCols = this.originalConfig.getList(Key.COLUMN, String.class); + LOG.info("partition columns:{}", partitionCols); + LOG.info("config columns:{}", configCols); + LOG.info("support dynamic partition:{}",this.originalConfig.getBool(Key.SUPPORT_DYNAMIC_PARTITION)); + LOG.info("partition format type:{}",this.originalConfig.getString("partitionFormatType")); + if (this.originalConfig.getKeys().contains(Key.SUPPORT_DYNAMIC_PARTITION)) { + this.supportDynamicPartition = this.originalConfig.getBool(Key.SUPPORT_DYNAMIC_PARTITION); + if (supportDynamicPartition) { + // 自定义分区 + if("custom".equalsIgnoreCase(originalConfig.getString("partitionFormatType"))){ + List partitions = getListWithJson(originalConfig,"customPartitionColumns",PartitionInfo.class); + // 自定义分区配置必须与实际分区列完全一致 + if (!ListUtil.checkIfAllSameValue(partitions.stream().map(item->item.getName()).collect(Collectors.toList()), partitionCols)) { + throw DataXException.asDataXException("custom partition config is not same as real partition info."); + } + } else { + // 设置动态分区写入为真--检查是否所有分区列都配置在了列映射中,不满足则抛出异常 + if (!ListUtil.checkIfBInA(configCols, partitionCols, false)) { + throw DataXException.asDataXException("You config supportDynamicPartition as true, but didn't config all partition columns"); + } + } + } else { + // 设置动态分区写入为假--确保列映射中没有配置分区列,配置则抛出异常 + if (ListUtil.checkIfHasSameValue(configCols, partitionCols)) { + throw DataXException.asDataXException("You should config all partition columns in column param, or you can specify a static partition param"); + } + } + } else { + if (OdpsUtil.isPartitionedTable(table)) { + // 分区表,列映射配置了分区,同时检查所有分区列要么都被配置,要么都没有配置 + if (ListUtil.checkIfBInA(configCols, partitionCols, false)) { + // 所有的partition 列都配置在了column中 + this.supportDynamicPartition = true; + } else { + // 并非所有partition列都配置在了column中,此时还需检查是否只配置了部分,如果只配置了部分,则报错 + if (ListUtil.checkIfHasSameValue(configCols, partitionCols)) { + throw DataXException.asDataXException("You should config all partition columns in column param, or you can specify a static partition param"); + } + // 分区列没有配置任何分区列,则设置为false + this.supportDynamicPartition = false; + } + } else { + LOG.info("{} is not a partition tale, set supportDynamicParition as false", this.tableName); + this.supportDynamicPartition = false; + } + } + + // 分布式下不支持动态分区写入,如果是分布式模式则报错 + LOG.info("current run mode: {}", System.getProperty("datax.executeMode")); + if (supportDynamicPartition && StringUtils.equalsIgnoreCase("distribute", System.getProperty("datax.executeMode"))) { + LOG.error("Distribute mode don't support dynamic partition writing"); + System.exit(1); + } + } + @Override public void prepare() { String accessId = null; @@ -147,10 +258,29 @@ public void prepare() { // init odps config this.odps = OdpsUtil.initOdpsProject(this.originalConfig); + List preSqls = this.originalConfig.getList(Key.PRE_SQL, String.class); + if (preSqls != null && !preSqls.isEmpty()) { + LOG.info(String.format("Beigin to exectue preSql : %s. \n Attention: these preSqls must be idempotent!!!", + JSONObject.toJSONString(preSqls))); + long beginTime = System.currentTimeMillis(); + for (String preSql : preSqls) { + preSql = preSql.trim(); + if (!preSql.endsWith(";")) { + preSql = String.format("%s;", preSql); + } + OdpsUtil.runSqlTaskWithRetry(this.odps, preSql, "preSql"); + } + long endTime = System.currentTimeMillis(); + LOG.info(String.format("Exectue odpswriter preSql successfully! cost time: %s ms.", (endTime - beginTime))); + } + //检查表等配置是否正确 this.table = OdpsUtil.getTable(odps, this.projectName, this.tableName); - OdpsUtil.dealTruncate(this.odps, this.table, this.partition, this.truncate); + // 如果是动态分区写入,因为无需配置分区信息,因此也无法在任务初始化时进行 truncate + if (!supportDynamicPartition) { + OdpsUtil.dealTruncate(this.odps, this.table, this.partition, this.truncate); + } } /** @@ -168,20 +298,34 @@ public List split(int mandatoryNumber) { tableTunnel.setEndpoint(tunnelServer); } - this.masterUpload = OdpsUtil.createMasterTunnelUpload( - tableTunnel, this.projectName, this.tableName, this.partition); - this.uploadId = this.masterUpload.getId(); - LOG.info("Master uploadId:[{}].", this.uploadId); - - TableSchema schema = this.masterUpload.getSchema(); + TableSchema schema = this.table.getSchema(); List allColumns = OdpsUtil.getAllColumns(schema); LOG.info("allColumnList: {} .", StringUtils.join(allColumns, ',')); - - dealColumn(this.originalConfig, allColumns); + List allPartColumns = OdpsUtil.getAllPartColumns(this.table.getSchema()); + LOG.info("allPartColumnsList: {} .", StringUtils.join(allPartColumns, ',')); + dealColumn(this.originalConfig, allColumns, allPartColumns); + this.originalConfig.set("allColumns", allColumns); + + // 动态分区模式下,无法事先根据分区创建好 session, + if (!supportDynamicPartition) { + this.masterUpload = OdpsUtil.createMasterTunnelUpload( + tableTunnel, this.projectName, this.tableName, this.partition); + this.uploadId = this.masterUpload.getId(); + LOG.info("Master uploadId:[{}].", this.uploadId); + } for (int i = 0; i < mandatoryNumber; i++) { Configuration tempConfig = this.originalConfig.clone(); + // 非动态分区模式下,设置了统一提交,则需要克隆主 upload session,否则各个 task "各自为战" + if (!supportDynamicPartition && this.consistencyCommit) { + tempConfig.set(Key.UPLOAD_ID, uploadId); + tempConfig.set(Key.TASK_COUNT, mandatoryNumber); + } + + // 设置task的supportDynamicPartition属性 + tempConfig.set(Key.SUPPORT_DYNAMIC_PARTITION, this.supportDynamicPartition); + configurations.add(tempConfig); } @@ -189,31 +333,71 @@ public List split(int mandatoryNumber) { LOG.debug("After master split, the job config now is:[\n{}\n].", this.originalConfig); } - this.masterUpload = null; - return configurations; } - private void dealColumn(Configuration originalConfig, List allColumns) { + private void dealColumn(Configuration originalConfig, List allColumns, List allPartColumns) { //之前已经检查了userConfiguredColumns 一定不为空 - SelectCols userConfiguredColumns = SelectCols.createSelectCols(allColumns); - if (userConfiguredColumns.isSelectAllCols()) { - userConfiguredColumns = SelectCols.createSelectCols(allColumns); + List userConfiguredColumns = originalConfig.getList(Key.COLUMN, String.class); + + // 动态分区下column不支持配置* + if (supportDynamicPartition && userConfiguredColumns.contains("*")) { + throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, + "In dynamic partition write mode you can't specify column with *."); + } + if (1 == userConfiguredColumns.size() && "*".equals(userConfiguredColumns.get(0))) { + userConfiguredColumns = allColumns; originalConfig.set(Key.COLUMN, allColumns); } else { //检查列是否重复,大小写不敏感(所有写入,都是不允许写入段的列重复的) - userConfiguredColumns.makeSureNoValueDuplicate(false); - - // 检查列是否存在,大小写不敏感 - // ListUtil.makeSureBInA(allColumns, userConfiguredColumns, false); + ListUtil.makeSureNoValueDuplicate(userConfiguredColumns, false); + + //检查列是否存在,大小写不敏感 + if (supportDynamicPartition) { + List allColumnList = new ArrayList(); + allColumnList.addAll(allColumns); + allColumnList.addAll(allPartColumns); + ListUtil.makeSureBInA(allColumnList, userConfiguredColumns, false); + } else { + ListUtil.makeSureBInA(allColumns, userConfiguredColumns, false); + } } - List columnPositions = OdpsUtil.parsePosition(allColumns, userConfiguredColumns); + // 获取配置的所有数据列在目标表中所有数据列中的真正位置, -1 代表该列为分区列 + List columnPositions = OdpsUtil.parsePosition(allColumns, allPartColumns, userConfiguredColumns); originalConfig.set(Constant.COLUMN_POSITION, columnPositions); } @Override public void post() { + + if (supportDynamicPartition) { + LOG.info("Total create partition cnt:{}", partitionCnt); + } + + if (!supportDynamicPartition && this.consistencyCommit) { + LOG.info("Master which uploadId=[{}] begin to commit blocks.", this.uploadId); + OdpsUtil.masterComplete(this.masterUpload); + LOG.info("Master which uploadId=[{}] commit blocks ok.", this.uploadId); + } + + List postSqls = this.originalConfig.getList(Key.POST_SQL, String.class); + if (postSqls != null && !postSqls.isEmpty()) { + LOG.info(String.format("Beigin to exectue postSql : %s. \n Attention: these postSqls must be idempotent!!!", + JSONObject.toJSONString(postSqls))); + long beginTime = System.currentTimeMillis(); + for (String postSql : postSqls) { + postSql = postSql.trim(); + if (!postSql.endsWith(";")) { + postSql = String.format("%s;", postSql); + } + OdpsUtil.runSqlTaskWithRetry(this.odps, postSql, "postSql"); + } + long endTime = System.currentTimeMillis(); + LOG.info(String.format("Exectue odpswriter postSql successfully! cost time: %s ms.", (endTime - beginTime))); + } + + LOG.info("truncated record count: {}", globalTotalTruncatedRecordNumber.intValue() ); } @Override @@ -225,6 +409,7 @@ public void destroy() { public static class Task extends Writer.Task { private static final Logger LOG = LoggerFactory .getLogger(Task.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsWriter.class); private static final boolean IS_DEBUG = LOG.isDebugEnabled(); @@ -245,18 +430,54 @@ public static class Task extends Writer.Task { private List blocks; private int blockSizeInMB; + private boolean consistencyCommit; + + private int taskId; + private int taskCount; + private Integer failoverState = 0; //0 未failover 1准备failover 2已提交,不能failover private byte[] lock = new byte[0]; + private List allColumns; + + /* + * Partition 和 session 的对应关系,处理 record 时,路由到哪个分区,则通过对应的 proxy 上传 + * Key 为 所有分区列的值按配置顺序拼接 + */ + private HashMap>> partitionUploadSessionHashMap; + private Boolean supportDynamicPartition; + private TableTunnel tableTunnel; + private Table table; + + /** + * 保存分区列格式转换规则,只支持源表是 Date 列,或者内容为日期的 String 列 + */ + private HashMap dateTransFormMap; + + private Long writeTimeOutInMs; + + private String overLengthRule; + private int maxFieldLength; + private Boolean enableOverLengthOutput; + + /** + * 动态分区写入模式下,内存使用率达到80%则flush时间间隔,单位分钟 + * 默认5分钟做flush, 避免出现频繁的flush导致小文件问题 + */ + private int dynamicPartitionMemUsageFlushIntervalInMinute = 1; + + private long latestFlushTime = 0; @Override public void init() { this.sliceConfig = super.getPluginJobConf(); + // 默认十分钟超时时间 + this.writeTimeOutInMs = this.sliceConfig.getLong(Key.WRITE_TIMEOUT_IN_MS, 10 * 60 * 1000); this.projectName = this.sliceConfig.getString(Key.PROJECT); this.tableName = this.sliceConfig.getString(Key.TABLE); this.tunnelServer = this.sliceConfig.getString(Key.TUNNEL_SERVER, null); this.partition = OdpsUtil.formatPartition(this.sliceConfig - .getString(Key.PARTITION, "")); + .getString(Key.PARTITION, ""), true); this.sliceConfig.set(Key.PARTITION, this.partition); this.emptyAsNull = this.sliceConfig.getBool(Key.EMPTY_AS_NULL); @@ -264,9 +485,49 @@ public void init() { this.isCompress = this.sliceConfig.getBool(Key.IS_COMPRESS, false); if (this.blockSizeInMB < 1 || this.blockSizeInMB > 512) { throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, - String.format("您配置的blockSizeInMB:%s 参数错误. 正确的配置是[1-512]之间的整数. 请修改此参数的值为该区间内的数值", this.blockSizeInMB)); + MESSAGE_SOURCE.message("odpswriter.3", this.blockSizeInMB)); } + this.taskId = this.getTaskId(); + this.taskCount = this.sliceConfig.getInt(Key.TASK_COUNT, 0); + + this.supportDynamicPartition = this.sliceConfig.getBool(Key.SUPPORT_DYNAMIC_PARTITION, false); + + if (!supportDynamicPartition) { + this.consistencyCommit = this.sliceConfig.getBool(Key.CONSISTENCY_COMMIT, false); + if (consistencyCommit) { + this.uploadId = this.sliceConfig.getString(Key.UPLOAD_ID); + if (this.uploadId == null || this.uploadId.isEmpty()) { + throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, + MESSAGE_SOURCE.message("odpswriter.3", this.uploadId)); + } + } + } else { + this.partitionUploadSessionHashMap = new HashMap<>(); + + // 根据 partColFormats 参数初始化 dateTransFormMap + String dateTransListStr = this.sliceConfig.getString(Key.PARTITION_COL_MAPPING); + if (StringUtils.isNotBlank(dateTransListStr)) { + this.dateTransFormMap = new HashMap<>(); + JSONArray dateTransFormJsonArray = JSONArray.parseArray(dateTransListStr); + for (Object dateTransFormJson : dateTransFormJsonArray) { + DateTransForm dateTransForm = new DateTransForm( + ((JSONObject)dateTransFormJson).getString(Key.PARTITION_COL_MAPPING_NAME), + ((JSONObject)dateTransFormJson).getString(Key.PARTITION_COL_MAPPING_SRC_COL_DATEFORMAT), + ((JSONObject)dateTransFormJson).getString(Key.PARTITION_COL_MAPPING_DATEFORMAT)); + this.dateTransFormMap.put(((JSONObject)dateTransFormJson).getString(Key.PARTITION_COL_MAPPING_NAME), dateTransForm); + } + } + } + this.allColumns = this.sliceConfig.getList("allColumns", String.class); + this.overLengthRule = this.sliceConfig.getString(Key.OVER_LENGTH_RULE, "keepOn").toUpperCase(); + this.maxFieldLength = this.sliceConfig.getInt(Key.MAX_FIELD_LENGTH, Constant.DEFAULT_FIELD_MAX_SIZE); + this.enableOverLengthOutput = this.sliceConfig.getBool(Key.ENABLE_OVER_LENGTH_OUTPUT, true); + maxOutputOverLengthRecord = this.sliceConfig.getLong(Key.MAX_OVER_LENGTH_OUTPUT_COUNT); + maxOdpsFieldLength = this.sliceConfig.getInt(Key.MAX_ODPS_FIELD_LENGTH, Constant.DEFAULT_FIELD_MAX_SIZE); + + this.dynamicPartitionMemUsageFlushIntervalInMinute = this.sliceConfig.getInt(Key.DYNAMIC_PARTITION_MEM_USAGE_FLUSH_INTERVAL_IN_MINUTE, + 1); if (IS_DEBUG) { LOG.debug("After init in task, sliceConfig now is:[\n{}\n].", this.sliceConfig); } @@ -276,24 +537,32 @@ public void init() { @Override public void prepare() { this.odps = OdpsUtil.initOdpsProject(this.sliceConfig); + this.tableTunnel = new TableTunnel(this.odps); - TableTunnel tableTunnel = new TableTunnel(this.odps); - if (StringUtils.isNoneBlank(tunnelServer)) { - tableTunnel.setEndpoint(tunnelServer); + if (! supportDynamicPartition ) { + if (StringUtils.isNoneBlank(tunnelServer)) { + tableTunnel.setEndpoint(tunnelServer); + } + if (this.consistencyCommit) { + this.managerUpload = OdpsUtil.getSlaveTunnelUpload(this.tableTunnel, this.projectName, this.tableName, + this.partition, this.uploadId); + } else { + this.managerUpload = OdpsUtil.createMasterTunnelUpload(this.tableTunnel, this.projectName, + this.tableName, this.partition); + this.uploadId = this.managerUpload.getId(); + } + LOG.info("task uploadId:[{}].", this.uploadId); + this.workerUpload = OdpsUtil.getSlaveTunnelUpload(this.tableTunnel, this.projectName, + this.tableName, this.partition, uploadId); + } else { + this.table = OdpsUtil.getTable(this.odps, this.projectName, this.tableName); } - - this.managerUpload = OdpsUtil.createMasterTunnelUpload(tableTunnel, this.projectName, - this.tableName, this.partition); - this.uploadId = this.managerUpload.getId(); - LOG.info("task uploadId:[{}].", this.uploadId); - - this.workerUpload = OdpsUtil.getSlaveTunnelUpload(tableTunnel, this.projectName, - this.tableName, this.partition, uploadId); } @Override public void startWrite(RecordReceiver recordReceiver) { blocks = new ArrayList(); + List currentWriteBlocks; AtomicLong blockId = new AtomicLong(0); @@ -303,23 +572,180 @@ public void startWrite(RecordReceiver recordReceiver) { try { TaskPluginCollector taskPluginCollector = super.getTaskPluginCollector(); - OdpsWriterProxy proxy = new OdpsWriterProxy(this.workerUpload, this.blockSizeInMB, blockId, - columnPositions, taskPluginCollector, this.emptyAsNull, this.isCompress); + OdpsWriterProxy proxy; + // 可以配置化,保平安 + boolean checkWithGetSize = this.sliceConfig.getBool("checkWithGetSize", true); + if (!supportDynamicPartition) { + if (this.consistencyCommit) { + proxy = new OdpsWriterProxy(this.workerUpload, this.blockSizeInMB, blockId, taskId, taskCount, + columnPositions, taskPluginCollector, this.emptyAsNull, this.isCompress, checkWithGetSize, this.allColumns, this.writeTimeOutInMs, this.sliceConfig, this.overLengthRule, this.maxFieldLength, this.enableOverLengthOutput); + } else { + proxy = new OdpsWriterProxy(this.workerUpload, this.blockSizeInMB, blockId, + columnPositions, taskPluginCollector, this.emptyAsNull, this.isCompress, checkWithGetSize, this.allColumns, false, this.writeTimeOutInMs, this.sliceConfig, this.overLengthRule, this.maxFieldLength, this.enableOverLengthOutput); + } + currentWriteBlocks = blocks; + } else { + proxy = null; + currentWriteBlocks = null; + } com.alibaba.datax.common.element.Record dataXRecord = null; PerfRecord blockClose = new PerfRecord(super.getTaskGroupId(), super.getTaskId(), PerfRecord.PHASE.ODPS_BLOCK_CLOSE); blockClose.start(); long blockCloseUsedTime = 0; + boolean columnCntChecked = false; while ((dataXRecord = recordReceiver.getFromReader()) != null) { - blockCloseUsedTime += proxy.writeOneRecord(dataXRecord, blocks); + if (supportDynamicPartition) { + if (!columnCntChecked) { + // 动态分区模式下,读写两端的column数量必须相同 + if (dataXRecord.getColumnNumber() != this.sliceConfig.getList(Key.COLUMN).size()) { + throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, + "In dynamic partition write mode you must make sure reader and writer has same column count."); + } + columnCntChecked = true; + } + + // 如果是动态分区模式,则需要根据record内容来选择proxy + + String partitionFormatType = sliceConfig.getString("partitionFormatType"); + String partition; + if("custom".equalsIgnoreCase(partitionFormatType)){ + List partitions = getListWithJson(sliceConfig,"customPartitionColumns",PartitionInfo.class); + List functions = getListWithJson(sliceConfig,"customPartitionFunctions",UserDefinedFunction.class); + + partition = CustomPartitionUtils.generate(dataXRecord,functions, + partitions,sliceConfig.getList(Key.COLUMN, String.class)); + }else{ + partition = OdpsUtil.getPartColValFromDataXRecord(dataXRecord, columnPositions, + this.sliceConfig.getList(Key.COLUMN, String.class), + this.dateTransFormMap); + partition = OdpsUtil.formatPartition(partition, false); + } + + Pair> proxyBlocksPair = this.partitionUploadSessionHashMap.get(partition); + if (null != proxyBlocksPair) { + proxy = proxyBlocksPair.getLeft(); + currentWriteBlocks = proxyBlocksPair.getRight(); + if (null == proxy || null == currentWriteBlocks) { + throw DataXException.asDataXException("Get OdpsWriterProxy failed."); + } + } else { + /* + * 第一次写入该目标分区:处理truncate + * truncate 为 true,且还没有被truncate过,则truncate,加互斥锁 + */ + Boolean truncate = this.sliceConfig.getBool(Key.TRUNCATE); + if (truncate && !partitionsDealedTruncate.contains(partition)) { + synchronized (lockForPartitionDealedTruncate) { + if (!partitionsDealedTruncate.contains(partition)) { + LOG.info("Start to truncate partition {}", partition); + OdpsUtil.dealTruncate(this.odps, this.table, partition, truncate); + partitionsDealedTruncate.add(partition); + } + /* + * 判断分区是否创建过多,如果创建过多,则报错 + */ + if (partitionCnt.addAndGet(1) > maxPartitionCnt) { + throw new DataXException("Create too many partitions. Please make sure you config the right partition column"); + } + } + } + TableTunnel.UploadSession uploadSession = OdpsUtil.createMasterTunnelUpload(tableTunnel, this.projectName, + this.tableName, partition); + proxy = new OdpsWriterProxy(uploadSession, this.blockSizeInMB, blockId, + columnPositions, taskPluginCollector, this.emptyAsNull, this.isCompress, checkWithGetSize, this.allColumns, true, this.writeTimeOutInMs, this.sliceConfig, this.overLengthRule, this.maxFieldLength, this.enableOverLengthOutput); + currentWriteBlocks = new ArrayList<>(); + partitionUploadSessionHashMap.put(partition, new MutablePair<>(proxy, currentWriteBlocks)); + } + } + blockCloseUsedTime += proxy.writeOneRecord(dataXRecord, currentWriteBlocks); + + // 动态分区写入模式下,如果内存使用达到一定程度 80%,清理较久没有活动且缓存较多数据的分区 + if (supportDynamicPartition) { + boolean isNeedFush = checkIfNeedFlush(); + if (isNeedFush) { + LOG.info("====The memory used exceed 80%, start to clear...==="); + int releaseCnt = 0; + int remainCnt = 0; + for (String onePartition : partitionUploadSessionHashMap.keySet()) { + OdpsWriterProxy oneIdleProxy = partitionUploadSessionHashMap.get(onePartition) == null ? null : partitionUploadSessionHashMap.get(onePartition).getLeft(); + if (oneIdleProxy == null) { + continue; + } + + Long idleTime = System.currentTimeMillis() - oneIdleProxy.getLastActiveTime(); + if (idleTime > Constant.PROXY_MAX_IDLE_TIME_MS || oneIdleProxy.getCurrentTotalBytes() > (this.blockSizeInMB*1014*1024 / 2)) { + // 如果空闲一定时间,先把数据写出 + LOG.info("{} partition has no data last {} seconds, so release its uploadSession", onePartition, Constant.PROXY_MAX_IDLE_TIME_MS / 1000); + currentWriteBlocks = partitionUploadSessionHashMap.get(onePartition).getRight(); + blockCloseUsedTime += oneIdleProxy.writeRemainingRecord(currentWriteBlocks); + // 再清除 + partitionUploadSessionHashMap.put(onePartition, null); + releaseCnt++; + } else { + remainCnt++; + } + } + + // 释放的不足够多,再释放一次,这次随机释放,直到释放数量达到一半 + for (String onePartition : partitionUploadSessionHashMap.keySet()) { + if (releaseCnt >= remainCnt) { + break; + } + + if (partitionUploadSessionHashMap.get(onePartition) != null) { + OdpsWriterProxy oneIdleProxy = partitionUploadSessionHashMap.get(onePartition).getLeft(); + currentWriteBlocks = partitionUploadSessionHashMap.get(onePartition).getRight(); + blockCloseUsedTime += oneIdleProxy.writeRemainingRecord(currentWriteBlocks); + partitionUploadSessionHashMap.put(onePartition, null); + + releaseCnt++; + remainCnt--; + } + + } + + this.latestFlushTime = System.currentTimeMillis(); + LOG.info("===complete==="); + } + + } } - blockCloseUsedTime += proxy.writeRemainingRecord(blocks); - blockClose.end(blockCloseUsedTime); + // 对所有分区进行剩余 records 写入 + if (supportDynamicPartition) { + for (String partition : partitionUploadSessionHashMap.keySet()) { + if (partitionUploadSessionHashMap.get(partition) == null) { + continue; + } + proxy = partitionUploadSessionHashMap.get(partition).getLeft(); + currentWriteBlocks = partitionUploadSessionHashMap.get(partition).getRight(); + blockCloseUsedTime += proxy.writeRemainingRecord(currentWriteBlocks); + blockClose.end(blockCloseUsedTime); + } + } + else { + blockCloseUsedTime += proxy.writeRemainingRecord(blocks); + blockClose.end(blockCloseUsedTime); + } } catch (Exception e) { - throw DataXException.asDataXException(OdpsWriterErrorCode.WRITER_RECORD_FAIL, "写入 ODPS 目的表失败. 请联系 ODPS 管理员处理.", e); + throw DataXException.asDataXException(OdpsWriterErrorCode.WRITER_RECORD_FAIL, MESSAGE_SOURCE.message("odpswriter.4"), e); + } + } + + private boolean checkIfNeedFlush() { + + //检查是否到达flush时间,超过flush间隔时间 + boolean isArriveFlushTime = (System.currentTimeMillis() - this.latestFlushTime) > this.dynamicPartitionMemUsageFlushIntervalInMinute * 60 * 1000; + if (!isArriveFlushTime) { + //如果flush时间没有到,直接return掉 + return false; } + + MemoryUsage memoryUsage = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage(); + boolean isMemUsageExceed = (double)memoryUsage.getUsed() / memoryUsage.getMax() > 0.8f; + return isMemUsageExceed; } @Override @@ -327,10 +753,30 @@ public void post() { synchronized (lock) { if (failoverState == 0) { failoverState = 2; - LOG.info("Slave which uploadId=[{}] begin to commit blocks:[\n{}\n].", this.uploadId, - StringUtils.join(blocks, ",")); - OdpsUtil.masterCompleteBlocks(this.managerUpload, blocks.toArray(new Long[0])); - LOG.info("Slave which uploadId=[{}] commit blocks ok.", this.uploadId); + if (! supportDynamicPartition) { + if (! this.consistencyCommit) { + LOG.info("Slave which uploadId=[{}] begin to commit blocks:[\n{}\n].", this.uploadId, + StringUtils.join(blocks, ",")); + OdpsUtil.masterCompleteBlocks(this.managerUpload, blocks.toArray(new Long[0])); + LOG.info("Slave which uploadId=[{}] commit blocks ok.", this.uploadId); + } else { + LOG.info("Slave which uploadId=[{}] begin to check blocks:[\n{}\n].", this.uploadId, + StringUtils.join(blocks, ",")); + OdpsUtil.checkBlockComplete(this.managerUpload, blocks.toArray(new Long[0])); + LOG.info("Slave which uploadId=[{}] check blocks ok.", this.uploadId); + } + } else { + for (String partition : partitionUploadSessionHashMap.keySet()) { + OdpsWriterProxy proxy = partitionUploadSessionHashMap.get(partition).getLeft(); + List blocks = partitionUploadSessionHashMap.get(partition).getRight(); + TableTunnel.UploadSession uploadSession = proxy.getSlaveUpload(); + LOG.info("Slave which uploadId=[{}] begin to check blocks:[\n{}\n].", uploadSession.getId(), + StringUtils.join(blocks, ",")); + OdpsUtil.masterCompleteBlocks(uploadSession, blocks.toArray(new Long[0])); + LOG.info("Slave which uploadId=[{}] check blocks ok.", uploadSession.getId()); + } + } + } else { throw DataXException.asDataXException(CommonErrorCode.SHUT_DOWN_TASK, ""); } diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterErrorCode.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterErrorCode.java index 02020c046e..35f2ed155b 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterErrorCode.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterErrorCode.java @@ -1,42 +1,43 @@ package com.alibaba.datax.plugin.writer.odpswriter; import com.alibaba.datax.common.spi.ErrorCode; +import com.alibaba.datax.common.util.MessageSource; public enum OdpsWriterErrorCode implements ErrorCode { - REQUIRED_VALUE("OdpsWriter-00", "您缺失了必须填写的参数值."), - ILLEGAL_VALUE("OdpsWriter-01", "您配置的值不合法."), - UNSUPPORTED_COLUMN_TYPE("OdpsWriter-02", "DataX 不支持写入 ODPS 的目的表的此种数据类型."), + REQUIRED_VALUE("OdpsWriter-00", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.required_value")), + ILLEGAL_VALUE("OdpsWriter-01", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.illegal_value")), + UNSUPPORTED_COLUMN_TYPE("OdpsWriter-02", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.unsupported_column_type")), - TABLE_TRUNCATE_ERROR("OdpsWriter-03", "清空 ODPS 目的表时出错."), - CREATE_MASTER_UPLOAD_FAIL("OdpsWriter-04", "创建 ODPS 的 uploadSession 失败."), - GET_SLAVE_UPLOAD_FAIL("OdpsWriter-05", "获取 ODPS 的 uploadSession 失败."), - GET_ID_KEY_FAIL("OdpsWriter-06", "获取 accessId/accessKey 失败."), - GET_PARTITION_FAIL("OdpsWriter-07", "获取 ODPS 目的表的所有分区失败."), + TABLE_TRUNCATE_ERROR("OdpsWriter-03", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.table_truncate_error")), + CREATE_MASTER_UPLOAD_FAIL("OdpsWriter-04", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.create_master_upload_fail")), + GET_SLAVE_UPLOAD_FAIL("OdpsWriter-05", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.get_slave_upload_fail")), + GET_ID_KEY_FAIL("OdpsWriter-06", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.get_id_key_fail")), + GET_PARTITION_FAIL("OdpsWriter-07", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.get_partition_fail")), - ADD_PARTITION_FAILED("OdpsWriter-08", "添加分区到 ODPS 目的表失败."), - WRITER_RECORD_FAIL("OdpsWriter-09", "写入数据到 ODPS 目的表失败."), + ADD_PARTITION_FAILED("OdpsWriter-08", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.add_partition_failed")), + WRITER_RECORD_FAIL("OdpsWriter-09", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.writer_record_fail")), - COMMIT_BLOCK_FAIL("OdpsWriter-10", "提交 block 到 ODPS 目的表失败."), - RUN_SQL_FAILED("OdpsWriter-11", "执行 ODPS Sql 失败."), - CHECK_IF_PARTITIONED_TABLE_FAILED("OdpsWriter-12", "检查 ODPS 目的表:%s 是否为分区表失败."), + COMMIT_BLOCK_FAIL("OdpsWriter-10", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.commit_block_fail")), + RUN_SQL_FAILED("OdpsWriter-11", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.run_sql_failed")), + CHECK_IF_PARTITIONED_TABLE_FAILED("OdpsWriter-12", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.check_if_partitioned_table_failed")), - RUN_SQL_ODPS_EXCEPTION("OdpsWriter-13", "执行 ODPS Sql 时抛出异常, 可重试"), + RUN_SQL_ODPS_EXCEPTION("OdpsWriter-13", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.run_sql_odps_exception")), - ACCOUNT_TYPE_ERROR("OdpsWriter-30", "账号类型错误."), + ACCOUNT_TYPE_ERROR("OdpsWriter-30", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.account_type_error")), - PARTITION_ERROR("OdpsWriter-31", "分区配置错误."), + PARTITION_ERROR("OdpsWriter-31", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.partition_error")), - COLUMN_NOT_EXIST("OdpsWriter-32", "用户配置的列不存在."), + COLUMN_NOT_EXIST("OdpsWriter-32", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.column_not_exist")), - ODPS_PROJECT_NOT_FOUNT("OdpsWriter-100", "您配置的值不合法, odps project 不存在."), //ODPS-0420111: Project not found + ODPS_PROJECT_NOT_FOUNT("OdpsWriter-100", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.odps_project_not_fount")), //ODPS-0420111: Project not found - ODPS_TABLE_NOT_FOUNT("OdpsWriter-101", "您配置的值不合法, odps table 不存在"), // ODPS-0130131:Table not found + ODPS_TABLE_NOT_FOUNT("OdpsWriter-101", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.odps_table_not_fount")), // ODPS-0130131:Table not found - ODPS_ACCESS_KEY_ID_NOT_FOUND("OdpsWriter-102", "您配置的值不合法, odps accessId,accessKey 不存在"), //ODPS-0410051:Invalid credentials - accessKeyId not found + ODPS_ACCESS_KEY_ID_NOT_FOUND("OdpsWriter-102", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.odps_access_key_id_not_found")), //ODPS-0410051:Invalid credentials - accessKeyId not found - ODPS_ACCESS_KEY_INVALID("OdpsWriter-103", "您配置的值不合法, odps accessKey 错误"), //ODPS-0410042:Invalid signature value - User signature dose not match; + ODPS_ACCESS_KEY_INVALID("OdpsWriter-103", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.odps_access_key_invalid")), //ODPS-0410042:Invalid signature value - User signature dose not match; - ODPS_ACCESS_DENY("OdpsWriter-104", "拒绝访问, 您不在 您配置的 project 中") //ODPS-0420095: Access Denied - Authorization Failed [4002], You doesn't exist in project + ODPS_ACCESS_DENY("OdpsWriter-104", MessageSource.loadResourceBundle(OdpsWriterErrorCode.class).message("errorcode.odps_access_deny")) //ODPS-0420095: Access Denied - Authorization Failed [4002], You doesn't exist in project ; diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterProxy.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterProxy.java index 9833616c5d..75527b229e 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterProxy.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/OdpsWriterProxy.java @@ -3,29 +3,57 @@ import com.alibaba.datax.common.element.StringColumn; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.TaskPluginCollector; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.plugin.writer.odpswriter.util.OdpsUtil; - -import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONArray; +import com.alibaba.fastjson.JSONObject; import com.aliyun.odps.OdpsType; import com.aliyun.odps.TableSchema; - +import com.aliyun.odps.data.ArrayRecord; +import com.aliyun.odps.data.Binary; +import com.aliyun.odps.data.Char; +import com.aliyun.odps.data.IntervalDayTime; +import com.aliyun.odps.data.IntervalYearMonth; import com.aliyun.odps.data.Record; - +import com.aliyun.odps.data.SimpleStruct; +import com.aliyun.odps.data.Struct; +import com.aliyun.odps.data.Varchar; import com.aliyun.odps.tunnel.TableTunnel; - import com.aliyun.odps.tunnel.TunnelException; import com.aliyun.odps.tunnel.io.ProtobufRecordPack; +import com.aliyun.odps.type.ArrayTypeInfo; +import com.aliyun.odps.type.CharTypeInfo; +import com.aliyun.odps.type.MapTypeInfo; +import com.aliyun.odps.type.StructTypeInfo; +import com.aliyun.odps.type.TypeInfo; +import com.aliyun.odps.type.VarcharTypeInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.math.BigDecimal; +import java.sql.Timestamp; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.lang3.StringUtils; + +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TimeZone; import java.util.concurrent.atomic.AtomicLong; public class OdpsWriterProxy { - private static final Logger LOG = LoggerFactory - .getLogger(OdpsWriterProxy.class); + private static final Logger LOG = LoggerFactory.getLogger(OdpsWriterProxy.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsWriterProxy.class); private volatile boolean printColumnLess;// 是否打印对于源头字段数小于 ODPS 目的表的行的日志 @@ -39,18 +67,98 @@ public class OdpsWriterProxy { private AtomicLong blockId; private List columnPositions; - private List tableOriginalColumnTypeList; + private List tableOriginalColumnTypeList; private boolean emptyAsNull; private boolean isCompress; - public OdpsWriterProxy(TableTunnel.UploadSession slaveUpload, int blockSizeInMB, - AtomicLong blockId, List columnPositions, - TaskPluginCollector taskPluginCollector, boolean emptyAsNull, boolean isCompress) - throws IOException, TunnelException { + private int taskId; + private int taskCOUNT; + private boolean consistencyCommit = false; + private boolean checkWithGetSize = true; + private List allColumns; + private String overLengthRule; + private int maxFieldLength; + private Boolean enableOverLengthOutput; + + /** + * 记录最近一次活动时间,动态分区写入模式下,超过一定时间不活动,则关闭这个proxy + */ + private Long lastActiveTime; + + /** + * 写block超时时间 + */ + private Long writeTimeoutInMs; + + private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + + // 读取 jvm 默认时区 + private Calendar calendarForDate = null; + private boolean useDateWithCalendar = true; + + private Calendar initCalendar(Configuration config) { + // 理论上不会有其他选择,有配置化可以随时应急 + String calendarType = config.getString("calendarType", "iso8601"); + Boolean lenient = config.getBool("calendarLenient", true); + + // 默认jvm时区 + TimeZone timeZone = TimeZone.getDefault(); + String timeZoneStr = config.getString("calendarTimeZone"); + if (StringUtils.isNotBlank(timeZoneStr)) { + // 如果用户明确指定使用用户指定的 + timeZone = TimeZone.getTimeZone(timeZoneStr); + } + + Calendar calendarForDate = new Calendar.Builder().setCalendarType(calendarType).setLenient(lenient) + .setTimeZone(timeZone).build(); + return calendarForDate; + } + + public OdpsWriterProxy(TableTunnel.UploadSession slaveUpload, int blockSizeInMB, AtomicLong blockId, + List columnPositions, TaskPluginCollector taskPluginCollector, boolean emptyAsNull, + boolean isCompress, boolean checkWithGetSize, List allColumns, boolean initBufSizeZero, + Long writeTimeoutInMs, Configuration taskConfig, String overLengthRule, int maxFieldLength, + Boolean enableOverLengthOutput) throws IOException, TunnelException { this.slaveUpload = slaveUpload; this.schema = this.slaveUpload.getSchema(); - this.tableOriginalColumnTypeList = OdpsUtil - .getTableOriginalColumnTypeList(this.schema); + this.tableOriginalColumnTypeList = OdpsUtil.getTableOriginalColumnTypeList(this.schema); + + this.blockId = blockId; + this.columnPositions = columnPositions; + this.taskPluginCollector = taskPluginCollector; + this.emptyAsNull = emptyAsNull; + this.isCompress = isCompress; + + // 初始化与 buffer 区相关的值 + this.maxBufferSize = (blockSizeInMB - 4) * 1024 * 1024; + if (initBufSizeZero) { + // 动态分区下初始化为0,随着写入的reord变多慢慢增加 + this.protobufCapacity = 0; + } else { + this.protobufCapacity = blockSizeInMB * 1024 * 1024; + } + this.protobufRecordPack = new ProtobufRecordPack(this.schema, null, this.protobufCapacity); + this.printColumnLess = true; + this.checkWithGetSize = checkWithGetSize; + + this.allColumns = allColumns; + this.overLengthRule = overLengthRule; + this.maxFieldLength = maxFieldLength; + this.enableOverLengthOutput = enableOverLengthOutput; + + this.writeTimeoutInMs = writeTimeoutInMs; + + this.calendarForDate = this.initCalendar(taskConfig); + this.useDateWithCalendar = taskConfig.getBool("useDateWithCalendar", true); + } + + public OdpsWriterProxy(TableTunnel.UploadSession slaveUpload, int blockSizeInMB, AtomicLong blockId, int taskId, + int taskCount, List columnPositions, TaskPluginCollector taskPluginCollector, boolean emptyAsNull, + boolean isCompress, boolean checkWithGetSize, List allColumns, Long writeTimeoutInMs, Configuration taskConfig, + String overLengthRule, int maxFieldLength, Boolean enableOverLengthOutput) throws IOException, TunnelException { + this.slaveUpload = slaveUpload; + this.schema = this.slaveUpload.getSchema(); + this.tableOriginalColumnTypeList = OdpsUtil.getTableOriginalColumnTypeList(this.schema); this.blockId = blockId; this.columnPositions = columnPositions; @@ -64,11 +172,37 @@ public OdpsWriterProxy(TableTunnel.UploadSession slaveUpload, int blockSizeInMB, this.protobufRecordPack = new ProtobufRecordPack(this.schema, null, this.protobufCapacity); printColumnLess = true; + this.taskId = taskId; + this.taskCOUNT = taskCount; + this.consistencyCommit = true; + this.checkWithGetSize = checkWithGetSize; + this.allColumns = allColumns; + this.overLengthRule = overLengthRule; + this.maxFieldLength = maxFieldLength; + this.enableOverLengthOutput = enableOverLengthOutput; + + this.writeTimeoutInMs = writeTimeoutInMs; + + this.calendarForDate = this.initCalendar(taskConfig); + this.useDateWithCalendar = taskConfig.getBool("useDateWithCalendar", true); + } + + public long getCurrentBlockId() { + if (this.consistencyCommit) { + return this.taskId + this.taskCOUNT * (this.blockId.get()); + } else { + return this.blockId.get(); + } + } + + public TableTunnel.UploadSession getSlaveUpload() { + return this.slaveUpload; } - public long writeOneRecord( - com.alibaba.datax.common.element.Record dataXRecord, - List blocks) throws Exception { + public long writeOneRecord(com.alibaba.datax.common.element.Record dataXRecord, List blocks) + throws Exception { + + this.lastActiveTime = System.currentTimeMillis(); Record record = dataxRecordToOdpsRecord(dataXRecord); @@ -77,12 +211,11 @@ public long writeOneRecord( } protobufRecordPack.append(record); - if (protobufRecordPack.getTotalBytes() >= maxBufferSize) { + if (protobufRecordPack.getProtobufStream().size() >= maxBufferSize) { long startTimeInNs = System.nanoTime(); - OdpsUtil.slaveWriteOneBlock(this.slaveUpload, - protobufRecordPack, blockId.get(), this.isCompress); - LOG.info("write block {} ok.", blockId.get()); - blocks.add(blockId.get()); + OdpsUtil.slaveWriteOneBlock(this.slaveUpload, protobufRecordPack, getCurrentBlockId(), this.writeTimeoutInMs); + LOG.info("write block {} ok.", getCurrentBlockId()); + blocks.add(getCurrentBlockId()); protobufRecordPack.reset(); this.blockId.incrementAndGet(); return System.nanoTime() - startTimeInNs; @@ -92,13 +225,20 @@ public long writeOneRecord( public long writeRemainingRecord(List blocks) throws Exception { // complete protobuf stream, then write to http - if (protobufRecordPack.getTotalBytes() != 0) { + // protobufRecordPack.getTotalBytes() 慕明: getTotalBytes并不一定保证能拿到写入的字节数,按你们的逻辑应该是用getTotalBytesWritten + // if (protobufRecordPack.getTotalBytes() != 0) { + boolean hasRemindData = false; + if (this.checkWithGetSize) { + hasRemindData = protobufRecordPack.getSize() != 0; + } else { + hasRemindData = protobufRecordPack.getTotalBytes() != 0; + } + if (hasRemindData) { long startTimeInNs = System.nanoTime(); - OdpsUtil.slaveWriteOneBlock(this.slaveUpload, - protobufRecordPack, blockId.get(), this.isCompress); - LOG.info("write block {} ok.", blockId.get()); + OdpsUtil.slaveWriteOneBlock(this.slaveUpload, protobufRecordPack, getCurrentBlockId(), this.writeTimeoutInMs); + LOG.info("write block {} ok.", getCurrentBlockId()); - blocks.add(blockId.get()); + blocks.add(getCurrentBlockId()); // reset the buffer for next block protobufRecordPack.reset(); return System.nanoTime() - startTimeInNs; @@ -106,85 +246,846 @@ public long writeRemainingRecord(List blocks) throws Exception { return 0; } - public Record dataxRecordToOdpsRecord( - com.alibaba.datax.common.element.Record dataXRecord) throws Exception { + public Record dataxRecordToOdpsRecord(com.alibaba.datax.common.element.Record dataXRecord) throws Exception { int sourceColumnCount = dataXRecord.getColumnNumber(); - Record odpsRecord = slaveUpload.newRecord(); + ArrayRecord odpsRecord = (ArrayRecord) slaveUpload.newRecord(); int userConfiguredColumnNumber = this.columnPositions.size(); -//todo + if (sourceColumnCount > userConfiguredColumnNumber) { - throw DataXException - .asDataXException( - OdpsWriterErrorCode.ILLEGAL_VALUE, - String.format( - "亲,配置中的源表的列个数和目的端表不一致,源表中您配置的列数是:%s 大于目的端的列数是:%s , 这样会导致源头数据无法正确导入目的端, 请检查您的配置并修改.", - sourceColumnCount, - userConfiguredColumnNumber)); + throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, + MESSAGE_SOURCE.message("odpswriterproxy.1", sourceColumnCount, userConfiguredColumnNumber)); } else if (sourceColumnCount < userConfiguredColumnNumber) { if (printColumnLess) { - LOG.warn( - "源表的列个数小于目的表的列个数,源表列数是:{} 目的表列数是:{} , 数目不匹配. DataX 会把目的端多出的列的值设置为空值. 如果这个默认配置不符合您的期望,请保持源表和目的表配置的列数目保持一致.", - sourceColumnCount, userConfiguredColumnNumber); + LOG.warn(MESSAGE_SOURCE.message("odpswriterproxy.2", sourceColumnCount, userConfiguredColumnNumber)); } printColumnLess = false; } - int currentIndex; + int currentIndex = 0; int sourceIndex = 0; try { com.alibaba.datax.common.element.Column columnValue; for (; sourceIndex < sourceColumnCount; sourceIndex++) { + // 跳过分区列 + if (this.columnPositions.get(sourceIndex) == -1) { + continue; + } currentIndex = columnPositions.get(sourceIndex); - OdpsType type = this.tableOriginalColumnTypeList - .get(currentIndex); + TypeInfo typeInfo = this.tableOriginalColumnTypeList.get(currentIndex); + OdpsType type = typeInfo.getOdpsType(); + String typeName = typeInfo.getTypeName(); columnValue = dataXRecord.getColumn(sourceIndex); if (columnValue == null) { continue; } // for compatible dt lib, "" as null - if(this.emptyAsNull && columnValue instanceof StringColumn && "".equals(columnValue.asString())){ + if (this.emptyAsNull && columnValue instanceof StringColumn && "".equals(columnValue.asString())) { continue; } switch (type) { - case STRING: - odpsRecord.setString(currentIndex, columnValue.asString()); - break; - case BIGINT: - odpsRecord.setBigint(currentIndex, columnValue.asLong()); - break; - case BOOLEAN: - odpsRecord.setBoolean(currentIndex, columnValue.asBoolean()); - break; - case DATETIME: - odpsRecord.setDatetime(currentIndex, columnValue.asDate()); - break; - case DOUBLE: - odpsRecord.setDouble(currentIndex, columnValue.asDouble()); - break; - case DECIMAL: - odpsRecord.setDecimal(currentIndex, columnValue.asBigDecimal()); - String columnStr = columnValue.asString(); - if(columnStr != null && columnStr.indexOf(".") >= 36) { - throw new Exception("Odps decimal 类型的整数位个数不能超过35"); + case STRING: + String newValue = (String)OdpsUtil.processOverLengthData(columnValue.asString(), OdpsType.STRING, this.overLengthRule, this.maxFieldLength, this.enableOverLengthOutput); + odpsRecord.setString(currentIndex, newValue); + break; + case BIGINT: + odpsRecord.setBigint(currentIndex, columnValue.asLong()); + break; + case BOOLEAN: + odpsRecord.setBoolean(currentIndex, columnValue.asBoolean()); + break; + case DATETIME: + odpsRecord.setDatetime(currentIndex, columnValue.asDate()); +// Date datetimeData = columnValue.asDate(); +// if (null == datetimeData) { +// odpsRecord.setDatetime(currentIndex, null); +// } else { +// Timestamp dateDataForOdps = new Timestamp(datetimeData.getTime()); +// if (datetimeData instanceof java.sql.Timestamp) { +// dateDataForOdps.setNanos(((java.sql.Timestamp)datetimeData).getNanos()); +// } +// odpsRecord.setDatetime(currentIndex, dateDataForOdps); +// } + break; + case DATE: + Date dateData = columnValue.asDate(); + if (null == dateData) { + odpsRecord.setDatetime(currentIndex, null); + } else { + if (this.useDateWithCalendar) { + odpsRecord.setDate(currentIndex, new java.sql.Date(dateData.getTime()), this.calendarForDate); + } else { + odpsRecord.setDatetime(currentIndex, new java.sql.Date(dateData.getTime())); + } + } + break; + case DOUBLE: + odpsRecord.setDouble(currentIndex, columnValue.asDouble()); + break; + case FLOAT: + Double floatValue = columnValue.asDouble(); + if (null == floatValue) { + ((ArrayRecord) odpsRecord).setFloat(currentIndex, null); + } else { + ((ArrayRecord) odpsRecord).setFloat(currentIndex, floatValue.floatValue()); + } + break; + case DECIMAL: + odpsRecord.setDecimal(currentIndex, columnValue.asBigDecimal()); + String columnStr = columnValue.asString(); + if (columnStr != null && columnStr.indexOf(".") >= 36) { + throw new Exception(MESSAGE_SOURCE.message("odpswriterproxy.3")); + } + break; + case TINYINT: + Long tinyintValueStr = columnValue.asLong(); + if (null == tinyintValueStr) { + ((ArrayRecord) odpsRecord).setTinyint(currentIndex, null); + } else { + ((ArrayRecord) odpsRecord).setTinyint(currentIndex, + Byte.valueOf(String.valueOf(tinyintValueStr))); + } + break; + case SMALLINT: + Long smallIntValue = columnValue.asLong(); + if (null == smallIntValue) { + ((ArrayRecord) odpsRecord).setSmallint(currentIndex, null); + } else { + ((ArrayRecord) odpsRecord).setSmallint(currentIndex, smallIntValue.shortValue()); + } + break; + case INT: + Long intValue = columnValue.asLong(); + if (null == intValue) { + ((ArrayRecord) odpsRecord).setInt(currentIndex, null); + } else { + ((ArrayRecord) odpsRecord).setInt(currentIndex, intValue.intValue()); + } + break; + case VARCHAR: + // warn: columnValue.asString() 为 null 时 , odps sdk 有 BUG + // 不能用 Varchar 的默认构造函数,不然有 NPE + String varcharValueStr = columnValue.asString(); + Varchar varcharData = null; + if (varcharValueStr != null){ + varcharData = new Varchar(columnValue.asString()); + } + ((ArrayRecord) odpsRecord).setVarchar(currentIndex, varcharData); + break; + case CHAR: + String charValueStr = columnValue.asString(); + Char charData = null; + if (charValueStr != null ){ + charData = new Char(charValueStr); + } + ((ArrayRecord) odpsRecord).setChar(currentIndex, charData); + break; + case TIMESTAMP: + Date timestampData = columnValue.asDate(); + if (null == timestampData) { + ((ArrayRecord) odpsRecord).setTimestamp(currentIndex, null); + } else { + Timestamp timestampDataForOdps = new Timestamp(timestampData.getTime()); + if (timestampData instanceof java.sql.Timestamp) { + // 纳秒 + timestampDataForOdps.setNanos(((java.sql.Timestamp)timestampData).getNanos()); } - default: - break; + // warn优化:如果原来类型就是Timestamp,直接使用就少创建了一个对象 + ((ArrayRecord) odpsRecord).setTimestamp(currentIndex, timestampDataForOdps); + } + break; + case BINARY: + Binary newBinaryData = (Binary)OdpsUtil.processOverLengthData(new Binary(columnValue.asBytes()), OdpsType.BINARY, this.overLengthRule, this.maxFieldLength, this.enableOverLengthOutput); + ((ArrayRecord) odpsRecord).setBinary(currentIndex,columnValue.asBytes() == null ? null : newBinaryData); + break; + case ARRAY: + JSONArray arrayJson = JSONObject.parseArray(columnValue.asString()); + ((ArrayRecord) odpsRecord).setArray(currentIndex, parseArray(arrayJson, (ArrayTypeInfo) typeInfo)); + break; + case MAP: + JSONObject mapJson = JSONObject.parseObject(columnValue.asString()); + ((ArrayRecord) odpsRecord).setMap(currentIndex, parseMap(mapJson, (MapTypeInfo) typeInfo)); + break; + case STRUCT: + JSONObject structJson = JSONObject.parseObject(columnValue.asString()); + ((ArrayRecord) odpsRecord).setStruct(currentIndex, + parseStruct(structJson, (StructTypeInfo) typeInfo)); + break; + default: + break; } } return odpsRecord; } catch (Exception e) { - String message = String.format( - "写入 ODPS 目的表时遇到了脏数据: 第[%s]个字段的数据出现错误,请检查该数据并作出修改 或者您可以增大阀值,忽略这条记录.", sourceIndex); - this.taskPluginCollector.collectDirtyRecord(dataXRecord, e, - message); + String dirtyColumnName = ""; + try { + dirtyColumnName = this.allColumns.get(currentIndex); + } catch (Exception ignoreEx) { + // ignore + } + String message = MESSAGE_SOURCE.message("odpswriterproxy.4", sourceIndex, dirtyColumnName); + this.taskPluginCollector.collectDirtyRecord(dataXRecord, e, message); + return null; + } + + } + + private List parseArray(JSONArray jsonArray, ArrayTypeInfo arrayTypeInfo) throws ParseException { + if (null == jsonArray) { + return null; + } + List result = new ArrayList(); + switch (arrayTypeInfo.getElementTypeInfo().getOdpsType()) { + case BIGINT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getLong(i)); + } + return result; + /** + * 双精度浮点 + */ + case DOUBLE: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getDouble(i)); + } + return result; + /** + * 布尔型 + */ + case BOOLEAN: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getBoolean(i)); + } + return result; + /** + * 日期类型 + */ + case DATETIME: + // TODO 精度 + for (int i = 0; i < jsonArray.size(); i++) { + result.add(dateFormat.parse(jsonArray.getString(i))); + } + return result; + /** + * 字符串类型 + */ + case STRING: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getString(i)); + } + return result; + /** + * 精确小数类型 + */ + case DECIMAL: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getBigDecimal(i)); + } + return result; + /** + * 1字节有符号整型 + */ + case TINYINT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getByte(i)); + } + return result; + /** + * 2字节有符号整型 + */ + case SMALLINT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getShort(i)); + } + return result; + /** + * 4字节有符号整型 + */ + case INT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getInteger(i)); + } + return result; + /** + * 单精度浮点 + */ + case FLOAT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(jsonArray.getFloat(i)); + } + return result; + /** + * 固定长度字符串 + */ + case CHAR: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(new Char(jsonArray.getString(i), + ((CharTypeInfo) arrayTypeInfo.getElementTypeInfo()).getLength())); + } + return result; + /** + * 可变长度字符串 + */ + case VARCHAR: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(new Varchar(jsonArray.getString(i), + ((VarcharTypeInfo) arrayTypeInfo.getElementTypeInfo()).getLength())); + } + return result; + /** + * 时间类型 + */ + case DATE: + // TODO string -> date need timezone + // TODO how to use odps Record + for (int i = 0; i < jsonArray.size(); i++) { + result.add(java.sql.Date.valueOf(jsonArray.getString(i))); + } + return result; + /** + * 时间戳 + */ + case TIMESTAMP: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(Timestamp.valueOf(jsonArray.getString(i))); + } + return result; + /** + * 字节数组 + */ + case BINARY: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(Base64.decodeBase64(jsonArray.getString(i))); + } + return result; + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + for (int i = 0; i < jsonArray.size(); i++) { + JSONObject json = jsonArray.getJSONObject(i); + result.add(new IntervalDayTime(json.getInteger("totalSeconds"), json.getInteger("nanos"))); + } + return result; + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + for (int i = 0; i < jsonArray.size(); i++) { + JSONObject json = jsonArray.getJSONObject(i); + result.add(new IntervalYearMonth(json.getInteger("years"), json.getInteger("months"))); + } + return result; + /** + * 结构体 + */ + case STRUCT: + for (int i = 0; i < jsonArray.size(); i++) { + result.add( + parseStruct(jsonArray.getJSONObject(i), (StructTypeInfo) arrayTypeInfo.getElementTypeInfo())); + } + return result; + /** + * MAP类型 + */ + case MAP: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(parseMap(jsonArray.getJSONObject(i), (MapTypeInfo) arrayTypeInfo.getElementTypeInfo())); + } + return result; + /** + * ARRAY类型 + */ + case ARRAY: + for (int i = 0; i < jsonArray.size(); i++) { + result.add(parseArray(jsonArray.getJSONArray(i), (ArrayTypeInfo) arrayTypeInfo.getElementTypeInfo())); + } + return result; + + default: + return result; + } + } + private Map parseMap(JSONObject json, MapTypeInfo typeInfo) throws ParseException { + if (json == null) { return null; } + Map keyMap = new HashMap(); + Set keys = json.keySet(); + switch (typeInfo.getKeyTypeInfo().getOdpsType()) { + case BIGINT: + for (String item : keys) { + keyMap.put(Long.parseLong(item), item); + } + break; + /** + * 双精度浮点 + */ + case DOUBLE: + for (String item : keys) { + keyMap.put(Double.parseDouble(item), item); + } + break; + /** + * 布尔型 + */ + case BOOLEAN: + for (String item : keys) { + keyMap.put(Boolean.parseBoolean(item), item); + } + break; + /** + * 日期类型 + */ + case DATETIME: + // TODO 精度 + for (String item : keys) { + keyMap.put(dateFormat.parse(item), item); + } + break; + /** + * 字符串类型 + */ + case STRING: + for (String item : keys) { + keyMap.put(item, item); + } + break; + /** + * 精确小数类型 + */ + case DECIMAL: + for (String item : keys) { + keyMap.put(new BigDecimal(item), item); + } + break; + /** + * 1字节有符号整型 + */ + case TINYINT: + for (String item : keys) { + keyMap.put(Byte.parseByte(item), item); + } + break; + /** + * 2字节有符号整型 + */ + case SMALLINT: + for (String item : keys) { + keyMap.put(Short.parseShort(item), item); + } + break; + /** + * 4字节有符号整型 + */ + case INT: + for (String item : keys) { + keyMap.put(Integer.parseInt(item), item); + } + break; + /** + * 单精度浮点 + */ + case FLOAT: + for (String item : keys) { + keyMap.put(Float.parseFloat(item), item); + } + break; + /** + * 固定长度字符串 + */ + case CHAR: + for (String item : keys) { + keyMap.put(new Char(item, ((CharTypeInfo) typeInfo.getKeyTypeInfo()).getLength()), item); + } + break; + /** + * 可变长度字符串 + */ + case VARCHAR: + for (String item : keys) { + keyMap.put(new Varchar(item, ((VarcharTypeInfo) typeInfo.getKeyTypeInfo()).getLength()), item); + } + break; + /** + * 时间类型 + */ + case DATE: + // TODO string -> date need timezone + // TODO how to use odps Record + for (String item : keys) { + keyMap.put(java.sql.Date.valueOf(item), item); + } + break; + /** + * 时间戳 + */ + case TIMESTAMP: + for (String item : keys) { + keyMap.put(Timestamp.valueOf(item), item); + } + break; + /** + * 字节数组 + */ + case BINARY: + for (String item : keys) { + keyMap.put(new Binary(Base64.decodeBase64(item)), item); + } + break; + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + for (String item : keys) { + JSONObject jsonObject = JSONObject.parseObject(item); + keyMap.put(new IntervalDayTime(jsonObject.getInteger("totalSeconds"), jsonObject.getInteger("nanos")), + item); + } + break; + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + for (String item : keys) { + JSONObject jsonObject = JSONObject.parseObject(item); + keyMap.put(new IntervalYearMonth(jsonObject.getInteger("years"), jsonObject.getInteger("months")), + item); + } + break; + default: + break; + // TODO throw an exception + } + Map result = new HashMap(); + // process map value + switch (typeInfo.getValueTypeInfo().getOdpsType()) { + case BIGINT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getLong(item.getValue())); + } + return result; + /** + * 双精度浮点 + */ + case DOUBLE: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getDouble(item.getValue())); + } + return result; + /** + * 布尔型 + */ + case BOOLEAN: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getBoolean(item.getValue())); + } + return result; + /** + * 日期类型 + */ + case DATETIME: + // TODO 精度 + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), dateFormat.parse(json.getString(item.getValue()))); + } + return result; + /** + * 字符串类型 + */ + case STRING: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getString(item.getValue())); + } + return result; + /** + * 精确小数类型 + */ + case DECIMAL: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getBigDecimal(item.getValue())); + } + return result; + /** + * 1字节有符号整型 + */ + case TINYINT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getByte(item.getValue())); + } + return result; + /** + * 2字节有符号整型 + */ + case SMALLINT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getShort(item.getValue())); + } + return result; + /** + * 4字节有符号整型 + */ + case INT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getInteger(item.getValue())); + } + return result; + /** + * 单精度浮点 + */ + case FLOAT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), json.getFloat(item.getValue())); + } + return result; + /** + * 固定长度字符串 + */ + case CHAR: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), new Char(json.getString(item.getValue()), + ((CharTypeInfo) typeInfo.getValueTypeInfo()).getLength())); + } + return result; + /** + * 可变长度字符串 + */ + case VARCHAR: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), new Varchar(json.getString(item.getValue()), + ((VarcharTypeInfo) typeInfo.getValueTypeInfo()).getLength())); + } + return result; + /** + * 时间类型 + */ + case DATE: + // TODO string -> date need timezone + // TODO how to use odps Record + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), java.sql.Date.valueOf(json.getString(item.getValue()))); + } + return result; + /** + * 时间戳 + */ + case TIMESTAMP: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), Timestamp.valueOf(json.getString(item.getValue()))); + } + return result; + /** + * 字节数组 + */ + case BINARY: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), new Binary(Base64.decodeBase64(json.getString(item.getValue())))); + } + return result; + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + for (Map.Entry item : keyMap.entrySet()) { + JSONObject jsonObject = json.getJSONObject(item.getValue()); + result.put(item.getKey(), + new IntervalDayTime(jsonObject.getInteger("totalSeconds"), jsonObject.getInteger("nanos"))); + } + return result; + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + for (Map.Entry item : keyMap.entrySet()) { + JSONObject jsonObject = json.getJSONObject(item.getValue()); + result.put(item.getKey(), + new IntervalYearMonth(jsonObject.getInteger("years"), jsonObject.getInteger("months"))); + } + return result; + /** + * 结构体 + */ + case STRUCT: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), + parseStruct(json.getJSONObject(item.getValue()), (StructTypeInfo) typeInfo.getValueTypeInfo())); + } + return result; + /** + * MAP类型 + */ + case MAP: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), + parseMap(json.getJSONObject(item.getValue()), (MapTypeInfo) typeInfo.getValueTypeInfo())); + } + return result; + /** + * ARRAY类型 + */ + case ARRAY: + for (Map.Entry item : keyMap.entrySet()) { + result.put(item.getKey(), + parseArray(json.getJSONArray(item.getValue()), (ArrayTypeInfo) typeInfo.getValueTypeInfo())); + } + return result; + + default: + throw new IllegalArgumentException("decode record failed. column type: " + typeInfo.getTypeName()); + } + } + + public Struct parseStruct(JSONObject json, StructTypeInfo struct) throws ParseException { + if (null == json) { + return null; + } + List fieldNames = struct.getFieldNames(); + List typeInfos = struct.getFieldTypeInfos(); + List structValues = new ArrayList(); + for (int i = 0; i < fieldNames.size(); i++) { + String fieldName = fieldNames.get(i); + switch (typeInfos.get(i).getOdpsType()) { + case BIGINT: + structValues.add(json.getLong(fieldName)); + break; + /** + * 双精度浮点 + */ + case DOUBLE: + structValues.add(json.getDouble(fieldName)); + break; + /** + * 布尔型 + */ + case BOOLEAN: + structValues.add(json.getBoolean(fieldName)); + break; + /** + * 日期类型 + */ + case DATETIME: + // TODO 精度 + structValues.add(dateFormat.parse(json.getString(fieldName))); + break; + /** + * 字符串类型 + */ + case STRING: + structValues.add(json.getString(fieldName)); + break; + /** + * 精确小数类型 + */ + case DECIMAL: + structValues.add(json.getBigDecimal(fieldName)); + break; + /** + * 1字节有符号整型 + */ + case TINYINT: + structValues.add(json.getByte(fieldName)); + break; + /** + * 2字节有符号整型 + */ + case SMALLINT: + structValues.add(json.getShort(fieldName)); + break; + /** + * 4字节有符号整型 + */ + case INT: + structValues.add(json.getInteger(fieldName)); + break; + /** + * 单精度浮点 + */ + case FLOAT: + structValues.add(json.getFloat(fieldName)); + break; + /** + * 固定长度字符串 + */ + case CHAR: + structValues.add(new Char(json.getString(fieldName), ((CharTypeInfo) typeInfos.get(i)).getLength())); + break; + /** + * 可变长度字符串 + */ + case VARCHAR: + structValues + .add(new Varchar(json.getString(fieldName), ((VarcharTypeInfo) typeInfos.get(i)).getLength())); + break; + /** + * 时间类型 + */ + case DATE: + // TODO string -> date need timezone + // TODO how to use odps Record + structValues.add(java.sql.Date.valueOf(json.getString(fieldName))); + break; + /** + * 时间戳 + */ + case TIMESTAMP: + structValues.add(Timestamp.valueOf(json.getString(fieldName))); + break; + /** + * 字节数组 + */ + case BINARY: + structValues.add(Base64.decodeBase64(json.getString(fieldName))); + break; + /** + * 日期间隔 + */ + case INTERVAL_DAY_TIME: + // TODO special process as map object + structValues.add(new IntervalDayTime(json.getInteger("totalSeconds"), json.getInteger("nanos"))); + /** + * 年份间隔 + */ + case INTERVAL_YEAR_MONTH: + structValues.add(new IntervalYearMonth(json.getInteger("years"), json.getInteger("months"))); + /** + * 结构体 + */ + case STRUCT: + structValues.add(parseStruct(json.getJSONObject(fieldName), (StructTypeInfo) typeInfos.get(i))); + break; + /** + * MAP类型 + */ + case MAP: + structValues.add(parseMap(json.getJSONObject(fieldName), (MapTypeInfo) typeInfos.get(i))); + break; + /** + * ARRAY类型 + */ + case ARRAY: + structValues.add(parseArray(json.getJSONArray(fieldName), (ArrayTypeInfo) typeInfos.get(i))); + break; + } + } + + SimpleStruct simpleStruct = new SimpleStruct(struct, structValues); + return simpleStruct; + } + + public Long getLastActiveTime() { + return lastActiveTime; + } + + public void setLastActiveTime(Long lastActiveTime) { + this.lastActiveTime = lastActiveTime; + } + public Long getCurrentTotalBytes() throws IOException { + return this.protobufRecordPack.getTotalBytes(); } } diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/PartitionInfo.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/PartitionInfo.java new file mode 100644 index 0000000000..f293d8ccbe --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/PartitionInfo.java @@ -0,0 +1,87 @@ +package com.alibaba.datax.plugin.writer.odpswriter.model; + +public class PartitionInfo { + /** + * 字段名 + */ + private String name; + /** + * String + */ + private String type; + /** + * eventTime or function + * yyyy/MM/dd/HH/mm + * 可自定义组合 + */ + private String valueMode; + private String value; + private String comment; + /** + * 自定义分区有效 + * eventTime / constant + * function + */ + private String category; + /** + * 当 partitionType 为function时 + * functionExpression 为 valueMode 对应的expression + */ + private String functionExpression; + + public String getFunctionExpression() { + return functionExpression; + } + + public void setFunctionExpression(String functionExpression) { + this.functionExpression = functionExpression; + } + + public String getCategory() { + return category; + } + + public void setCategory(String category) { + this.category = category; + } + + public String getComment() { + return comment; + } + + public void setComment(String comment) { + this.comment = comment; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getValueMode() { + return valueMode; + } + + public void setValueMode(String valueMode) { + this.valueMode = valueMode; + } + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } +} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunction.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunction.java new file mode 100644 index 0000000000..55c8a1145c --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunction.java @@ -0,0 +1,44 @@ +package com.alibaba.datax.plugin.writer.odpswriter.model; + +import java.io.Serializable; +import java.util.List; + +public class UserDefinedFunction implements Serializable { + private static final long serialVersionUID = 1L; + private String name; + private String expression; + private String inputColumn; + private List variableRule; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getExpression() { + return expression; + } + + public void setExpression(String expression) { + this.expression = expression; + } + + public String getInputColumn() { + return inputColumn; + } + + public void setInputColumn(String inputColumn) { + this.inputColumn = inputColumn; + } + + public List getVariableRule() { + return variableRule; + } + + public void setVariableRule(List variableRule) { + this.variableRule = variableRule; + } +} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunctionRule.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunctionRule.java new file mode 100644 index 0000000000..5676eb4574 --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/model/UserDefinedFunctionRule.java @@ -0,0 +1,26 @@ +package com.alibaba.datax.plugin.writer.odpswriter.model; + +import java.io.Serializable; +import java.util.List; + +public class UserDefinedFunctionRule implements Serializable { + private static final long serialVersionUID = 1L; + private String type; + private List params; + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public List getParams() { + return params; + } + + public void setParams(List params) { + this.params = params; + } +} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/CustomPartitionUtils.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/CustomPartitionUtils.java new file mode 100644 index 0000000000..9fca6ee015 --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/CustomPartitionUtils.java @@ -0,0 +1,54 @@ +package com.alibaba.datax.plugin.writer.odpswriter.util; + +import com.alibaba.datax.common.element.Record; +import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.plugin.writer.odpswriter.model.PartitionInfo; +import com.alibaba.datax.plugin.writer.odpswriter.model.UserDefinedFunction; +import com.alibaba.fastjson.JSONObject; +import com.google.common.base.Joiner; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Serializable; +import java.util.List; +import java.util.stream.Collectors; + +public class CustomPartitionUtils implements Serializable { + private static final long serialVersionUID = 1L; + protected static Logger logger = LoggerFactory.getLogger(CustomPartitionUtils.class); + + public static List getListWithJson(Configuration config, String path, Class clazz) { + Object object = config.get(path, List.class); + if (null == object) { + return null; + } + + return JSONObject.parseArray(JSONObject.toJSONString(object), clazz); + } + + public static String generate(Record record, List functions, List partitions, + List allColumns) { + for (PartitionInfo partitionInfo : partitions) { + partitionInfo.setValue(buildPartitionValue(partitionInfo, functions, record, allColumns)); + } + List partitionList = partitions.stream() + .map(item -> String.format("%s='%s'", item.getName(), item.getValue())) + .collect(Collectors.toList()); + return Joiner.on(",").join(partitionList); + } + + private static String buildPartitionValue(PartitionInfo partitionInfo, List functions, Record record, + List allColumns) { +// logger.info("try build partition value:partitionInfo:\n{},functions:\n{}", +// JSON.toJSONString(partitionInfo), JSON.toJSONString(functions)); + if (StringUtils.isBlank(partitionInfo.getCategory()) + || "eventTime".equalsIgnoreCase(partitionInfo.getCategory()) + || "constant".equalsIgnoreCase(partitionInfo.getCategory())) { + // 直接输出原样字符串 + return partitionInfo.getValueMode(); +// throw new RuntimeException("not support partition category:" + partitionInfo.getCategory()); + } + throw new RuntimeException("un support partition info type:" + partitionInfo.getCategory()); + } +} diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/IdAndKeyUtil.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/IdAndKeyUtil.java index 95e4b56b54..98c9afdd95 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/IdAndKeyUtil.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/IdAndKeyUtil.java @@ -1,5 +1,5 @@ /** - * (C) 2010-2014 Alibaba Group Holding Limited. + * (C) 2010-2022 Alibaba Group Holding Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,9 +18,11 @@ import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; -import com.alibaba.datax.plugin.writer.odpswriter.Constant; +import com.alibaba.datax.common.util.IdAndKeyRollingUtil; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.plugin.writer.odpswriter.Key; import com.alibaba.datax.plugin.writer.odpswriter.OdpsWriterErrorCode; + import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,6 +31,7 @@ public class IdAndKeyUtil { private static Logger LOG = LoggerFactory.getLogger(IdAndKeyUtil.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(IdAndKeyUtil.class); public static Configuration parseAccessIdAndKey(Configuration originalConfig) { String accessId = originalConfig.getString(Key.ACCESS_ID); @@ -50,36 +53,13 @@ public static Configuration parseAccessIdAndKey(Configuration originalConfig) { private static Configuration getAccessIdAndKeyFromEnv(Configuration originalConfig, Map envProp) { - String accessId = null; - String accessKey = null; - - String skynetAccessID = envProp.get(Constant.SKYNET_ACCESSID); - String skynetAccessKey = envProp.get(Constant.SKYNET_ACCESSKEY); - - if (StringUtils.isNotBlank(skynetAccessID) - || StringUtils.isNotBlank(skynetAccessKey)) { - /** - * 环境变量中,如果存在SKYNET_ACCESSID/SKYNET_ACCESSKEy(只要有其中一个变量,则认为一定是两个都存在的!), - * 则使用其值作为odps的accessId/accessKey(会解密) - */ - - LOG.info("Try to get accessId/accessKey from environment."); - accessId = skynetAccessID; - accessKey = DESCipher.decrypt(skynetAccessKey); - if (StringUtils.isNotBlank(accessKey)) { - originalConfig.set(Key.ACCESS_ID, accessId); - originalConfig.set(Key.ACCESS_KEY, accessKey); - LOG.info("Get accessId/accessKey from environment variables successfully."); - } else { - throw DataXException.asDataXException(OdpsWriterErrorCode.GET_ID_KEY_FAIL, - String.format("从环境变量中获取accessId/accessKey 失败, accessId=[%s]", accessId)); - } - } else { - // 无处获取(既没有配置在作业中,也没用在环境变量中) + // 如果获取到ak,在getAccessIdAndKeyFromEnv中已经设置到originalConfig了 + String accessKey = IdAndKeyRollingUtil.getAccessIdAndKeyFromEnv(originalConfig); + if (StringUtils.isBlank(accessKey)) { + // 无处获取(既没有配置在作业中,也没用在环境变量中) throw DataXException.asDataXException(OdpsWriterErrorCode.GET_ID_KEY_FAIL, - "无法获取到accessId/accessKey. 它们既不存在于您的配置中,也不存在于环境变量中."); - } - + MESSAGE_SOURCE.message("idandkeyutil.2")); + } return originalConfig; } } diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/LocalStrings.properties b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/LocalStrings.properties new file mode 100644 index 0000000000..289c70fa05 --- /dev/null +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/LocalStrings.properties @@ -0,0 +1,39 @@ +descipher.1=\u957f\u5ea6\u4e0d\u662f\u5076\u6570 + +idandkeyutil.1=\u4ece\u73af\u5883\u53d8\u91cf\u4e2d\u83b7\u53d6accessId/accessKey \u5931\u8d25, accessId=[{0}] +idandkeyutil.2=\u65e0\u6cd5\u83b7\u53d6\u5230accessId/accessKey. \u5b83\u4eec\u65e2\u4e0d\u5b58\u5728\u4e8e\u60a8\u7684\u914d\u7f6e\u4e2d\uff0c\u4e5f\u4e0d\u5b58\u5728\u4e8e\u73af\u5883\u53d8\u91cf\u4e2d. + +odpsutil.1=\u60a8\u672a\u914d\u7f6e\u5199\u5165 ODPS \u76ee\u7684\u8868\u7684\u5217\u4fe1\u606f. \u6b63\u786e\u7684\u914d\u7f6e\u65b9\u5f0f\u662f\u7ed9datax\u7684 column \u9879\u914d\u7f6e\u4e0a\u60a8\u9700\u8981\u8bfb\u53d6\u7684\u5217\u540d\u79f0,\u7528\u82f1\u6587\u9017\u53f7\u5206\u9694 \u4f8b\u5982: \"column\": [\"id\",\"name\"]. +odpsutil.2=[truncate]\u662f\u5fc5\u586b\u914d\u7f6e\u9879, \u610f\u601d\u662f\u5199\u5165 ODPS \u76ee\u7684\u8868\u524d\u662f\u5426\u6e05\u7a7a\u8868/\u5206\u533a. \u8bf7\u60a8\u589e\u52a0 truncate \u7684\u914d\u7f6e\uff0c\u6839\u636e\u4e1a\u52a1\u9700\u8981\u9009\u62e9\u4e0atrue \u6216\u8005 false. +odpsutil.3=\u60a8\u6240\u914d\u7f6e\u7684maxRetryTime \u503c\u9519\u8bef. \u8be5\u503c\u4e0d\u80fd\u5c0f\u4e8e1, \u4e14\u4e0d\u80fd\u5927\u4e8e {0}. \u63a8\u8350\u7684\u914d\u7f6e\u65b9\u5f0f\u662f\u7ed9maxRetryTime \u914d\u7f6e1-11\u4e4b\u95f4\u7684\u67d0\u4e2a\u503c. \u8bf7\u60a8\u68c0\u67e5\u914d\u7f6e\u5e76\u505a\u51fa\u76f8\u5e94\u4fee\u6539. +odpsutil.4=\u4e0d\u652f\u6301\u7684\u8d26\u53f7\u7c7b\u578b:[{0}]. \u8d26\u53f7\u7c7b\u578b\u76ee\u524d\u4ec5\u652f\u6301aliyun, taobao. +odpsutil.5=\u83b7\u53d6 ODPS \u76ee\u7684\u8868:{0} \u7684\u6240\u6709\u5206\u533a\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.6=\u68c0\u67e5 ODPS \u76ee\u7684\u8868:{0} \u662f\u5426\u4e3a\u5206\u533a\u8868\u5931\u8d25, \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.7=\u6e05\u7a7a ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25, \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.8=\u6dfb\u52a0 ODPS \u76ee\u7684\u8868\u7684\u5206\u533a\u5931\u8d25. \u9519\u8bef\u53d1\u751f\u5728\u6dfb\u52a0 ODPS \u7684\u9879\u76ee:{0} \u7684\u8868:{1} \u7684\u5206\u533a:{2}. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.9=\u521b\u5efaTunnelUpload\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.10=\u521b\u5efaTunnelUpload\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.11=\u83b7\u53d6TunnelUpload\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.12=\u83b7\u53d6TunnelUpload\u5931\u8d25. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.13=Drop ODPS \u76ee\u7684\u8868\u5206\u533a\u5931\u8d25. \u9519\u8bef\u53d1\u751f\u5728\u9879\u76ee:{0} \u7684\u8868:{1} \u7684\u5206\u533a:{2} .\u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.14=ODPS \u76ee\u7684\u8868\u81ea\u8eab\u7684 partition:{0} \u683c\u5f0f\u4e0d\u5bf9. \u6b63\u786e\u7684\u683c\u5f0f\u5f62\u5982: pt=1,ds=hangzhou +odpsutil.15=ODPS \u76ee\u7684\u8868\u5728\u8fd0\u884c ODPS SQL\u5931\u8d25, \u8fd4\u56de\u503c\u4e3a:{0}. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. SQL \u5185\u5bb9\u4e3a:[\n{1}\n]. +odpsutil.16=ODPS \u76ee\u7684\u8868\u5728\u8fd0\u884c ODPS SQL \u65f6\u629b\u51fa\u5f02\u5e38, \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. SQL \u5185\u5bb9\u4e3a:[\n{0}\n]. +odpsutil.17=ODPS \u76ee\u7684\u8868\u5728\u63d0\u4ea4 block:[\n{0}\n] \u65f6\u5931\u8d25, uploadId=[{1}]. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.18=ODPS \u76ee\u7684\u8868\u5199 block:{0} \u5931\u8d25\uff0c uploadId=[{1}]. \u8bf7\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5904\u7406. +odpsutil.19=ODPS \u76ee\u7684\u8868\u7684\u5217\u914d\u7f6e\u9519\u8bef. \u7531\u4e8e\u60a8\u6240\u914d\u7f6e\u7684\u5217:{0} \u4e0d\u5b58\u5728\uff0c\u4f1a\u5bfc\u81f4datax\u65e0\u6cd5\u6b63\u5e38\u63d2\u5165\u6570\u636e\uff0c\u8bf7\u68c0\u67e5\u8be5\u5217\u662f\u5426\u5b58\u5728\uff0c\u5982\u679c\u5b58\u5728\u8bf7\u68c0\u67e5\u5927\u5c0f\u5199\u7b49\u914d\u7f6e. +odpsutil.20=DataX \u5199\u5165 ODPS \u8868\u4e0d\u652f\u6301\u8be5\u5b57\u6bb5\u7c7b\u578b:[{0}]. \u76ee\u524d\u652f\u6301\u62bd\u53d6\u7684\u5b57\u6bb5\u7c7b\u578b\u6709\uff1abigint, boolean, datetime, double, string. \u60a8\u53ef\u4ee5\u9009\u62e9\u4e0d\u62bd\u53d6 DataX \u4e0d\u652f\u6301\u7684\u5b57\u6bb5\u6216\u8005\u8054\u7cfb ODPS \u7ba1\u7406\u5458\u5bfb\u6c42\u5e2e\u52a9. +odpsutil.21=\u60a8\u6ca1\u6709\u914d\u7f6e\u5206\u533a\u4fe1\u606f\uff0c\u56e0\u4e3a\u4f60\u914d\u7f6e\u7684\u8868\u662f\u5206\u533a\u8868:{0} \u5982\u679c\u9700\u8981\u8fdb\u884c truncate \u64cd\u4f5c\uff0c\u5fc5\u987b\u6307\u5b9a\u9700\u8981\u6e05\u7a7a\u7684\u5177\u4f53\u5206\u533a. \u8bf7\u4fee\u6539\u5206\u533a\u914d\u7f6e\uff0c\u683c\u5f0f\u5f62\u5982 pt=$'{bizdate'} . +odpsutil.22=\u5206\u533a\u4fe1\u606f\u914d\u7f6e\u9519\u8bef\uff0c\u4f60\u7684ODPS\u8868\u662f\u975e\u5206\u533a\u8868:{0} \u8fdb\u884c truncate \u64cd\u4f5c\u65f6\u4e0d\u9700\u8981\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u68c0\u67e5\u60a8\u7684\u5206\u533a\u914d\u7f6e\uff0c\u5220\u9664\u8be5\u914d\u7f6e\u9879\u7684\u503c. +odpsutil.23=\u60a8\u7684\u76ee\u7684\u8868\u662f\u5206\u533a\u8868\uff0c\u5199\u5165\u5206\u533a\u8868:{0} \u65f6\u5fc5\u987b\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u4fee\u6539\u60a8\u7684\u5206\u533a\u914d\u7f6e\u4fe1\u606f\uff0c\u683c\u5f0f\u5f62\u5982 \u683c\u5f0f\u5f62\u5982 pt=$'{bizdate'}. +odpsutil.24=\u60a8\u7684\u76ee\u7684\u8868\u662f\u975e\u5206\u533a\u8868\uff0c\u5199\u5165\u975e\u5206\u533a\u8868:{0} \u65f6\u4e0d\u9700\u8981\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u5220\u9664\u5206\u533a\u914d\u7f6e\u4fe1\u606f +odpsutil.25=\u60a8\u6ca1\u6709\u914d\u7f6e\u5206\u533a\u4fe1\u606f\uff0c\u56e0\u4e3a\u4f60\u914d\u7f6e\u7684\u8868\u662f\u5206\u533a\u8868:{0} \u5982\u679c\u9700\u8981\u8fdb\u884c truncate \u64cd\u4f5c\uff0c\u5fc5\u987b\u6307\u5b9a\u9700\u8981\u6e05\u7a7a\u7684\u5177\u4f53\u5206\u533a. \u8bf7\u4fee\u6539\u5206\u533a\u914d\u7f6e\uff0c\u683c\u5f0f\u5f62\u5982 pt=$'{bizdate'} . +odpsutil.26=\u5206\u533a\u4fe1\u606f\u914d\u7f6e\u9519\u8bef\uff0c\u4f60\u7684ODPS\u8868\u662f\u975e\u5206\u533a\u8868:{0} \u8fdb\u884c truncate \u64cd\u4f5c\u65f6\u4e0d\u9700\u8981\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u68c0\u67e5\u60a8\u7684\u5206\u533a\u914d\u7f6e\uff0c\u5220\u9664\u8be5\u914d\u7f6e\u9879\u7684\u503c. +odpsutil.27=\u60a8\u7684\u76ee\u7684\u8868\u662f\u5206\u533a\u8868\uff0c\u5199\u5165\u5206\u533a\u8868:{0} \u65f6\u5fc5\u987b\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u4fee\u6539\u60a8\u7684\u5206\u533a\u914d\u7f6e\u4fe1\u606f\uff0c\u683c\u5f0f\u5f62\u5982 \u683c\u5f0f\u5f62\u5982 pt=$'{bizdate'}. +odpsutil.28=\u60a8\u7684\u76ee\u7684\u8868\u662f\u975e\u5206\u533a\u8868\uff0c\u5199\u5165\u975e\u5206\u533a\u8868:{0} \u65f6\u4e0d\u9700\u8981\u6307\u5b9a\u5177\u4f53\u5206\u533a\u503c. \u8bf7\u5220\u9664\u5206\u533a\u914d\u7f6e\u4fe1\u606f +odpsutil.29=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 [project] \u662f\u5426\u6b63\u786e. +odpsutil.30=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 [table] \u662f\u5426\u6b63\u786e. +odpsutil.31=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 [accessId] [accessKey]\u662f\u5426\u6b63\u786e. +odpsutil.32=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 [accessKey] \u662f\u5426\u6b63\u786e. +odpsutil.33=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 [accessId] [accessKey] [project]\u662f\u5426\u5339\u914d. +odpsutil.34=\u52a0\u8f7d ODPS \u76ee\u7684\u8868:{0} \u5931\u8d25. \u8bf7\u68c0\u67e5\u60a8\u914d\u7f6e\u7684 ODPS \u76ee\u7684\u8868\u7684 project,table,accessId,accessKey,odpsServer\u7b49\u503c. \ No newline at end of file diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsExceptionMsg.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsExceptionMsg.java index d613eefda9..ae6f275c19 100644 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsExceptionMsg.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsExceptionMsg.java @@ -1,8 +1,5 @@ package com.alibaba.datax.plugin.writer.odpswriter.util; -/** - * Created by hongjiao.hj on 2015/6/9. - */ public class OdpsExceptionMsg { public static final String ODPS_PROJECT_NOT_FOUNT = "ODPS-0420111: Project not found"; diff --git a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsUtil.java b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsUtil.java index d39c015cfa..2e501cf079 100755 --- a/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsUtil.java +++ b/odpswriter/src/main/java/com/alibaba/datax/plugin/writer/odpswriter/util/OdpsUtil.java @@ -2,21 +2,22 @@ import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.common.util.MessageSource; import com.alibaba.datax.common.util.RetryUtil; -import com.alibaba.datax.plugin.rdbms.writer.util.SelectCols; -import com.alibaba.datax.plugin.writer.odpswriter.Constant; -import com.alibaba.datax.plugin.writer.odpswriter.Key; - -import com.alibaba.datax.plugin.writer.odpswriter.OdpsWriterErrorCode; +import com.alibaba.datax.plugin.writer.odpswriter.*; import com.aliyun.odps.*; +import com.aliyun.odps.Column; import com.aliyun.odps.account.Account; import com.aliyun.odps.account.AliyunAccount; +import com.aliyun.odps.data.ResultSet; +import com.aliyun.odps.data.Binary; import com.aliyun.odps.task.SQLTask; import com.aliyun.odps.tunnel.TableTunnel; - import com.aliyun.odps.tunnel.io.ProtobufRecordPack; -import com.aliyun.odps.tunnel.io.TunnelRecordWriter; +import com.aliyun.odps.type.TypeInfo; + import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.time.DateFormatUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -25,6 +26,7 @@ public class OdpsUtil { private static final Logger LOG = LoggerFactory.getLogger(OdpsUtil.class); + private static final MessageSource MESSAGE_SOURCE = MessageSource.loadResourceBundle(OdpsUtil.class); public static int MAX_RETRY_TIME = 10; @@ -39,15 +41,14 @@ public static void checkNecessaryConfig(Configuration originalConfig) { if (null == originalConfig.getList(Key.COLUMN) || originalConfig.getList(Key.COLUMN, String.class).isEmpty()) { - throw DataXException.asDataXException(OdpsWriterErrorCode.REQUIRED_VALUE, "您未配置写入 ODPS 目的表的列信息. " + - "正确的配置方式是给datax的 column 项配置上您需要读取的列名称,用英文逗号分隔 例如: \"column\": [\"id\",\"name\"]."); + throw DataXException.asDataXException(OdpsWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("odpsutil.1")); } // getBool 内部要求,值只能为 true,false 的字符串(大小写不敏感),其他一律报错,不再有默认配置 + // 如果是动态分区写入,不进行truncate Boolean truncate = originalConfig.getBool(Key.TRUNCATE); if (null == truncate) { - throw DataXException.asDataXException(OdpsWriterErrorCode.REQUIRED_VALUE, "[truncate]是必填配置项, 意思是写入 ODPS 目的表前是否清空表/分区. " + - "请您增加 truncate 的配置,根据业务需要选择上true 或者 false."); + throw DataXException.asDataXException(OdpsWriterErrorCode.REQUIRED_VALUE, MESSAGE_SOURCE.message("odpsutil.2")); } } @@ -55,19 +56,22 @@ public static void dealMaxRetryTime(Configuration originalConfig) { int maxRetryTime = originalConfig.getInt(Key.MAX_RETRY_TIME, OdpsUtil.MAX_RETRY_TIME); if (maxRetryTime < 1 || maxRetryTime > OdpsUtil.MAX_RETRY_TIME) { - throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, "您所配置的maxRetryTime 值错误. 该值不能小于1, 且不能大于 " + OdpsUtil.MAX_RETRY_TIME + - ". 推荐的配置方式是给maxRetryTime 配置1-11之间的某个值. 请您检查配置并做出相应修改."); + throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, MESSAGE_SOURCE.message("odpsutil.3", OdpsUtil.MAX_RETRY_TIME)); } MAX_RETRY_TIME = maxRetryTime; } - public static String formatPartition(String partitionString) { + public static String formatPartition(String partitionString, Boolean printLog) { if (null == partitionString) { return null; } - - return partitionString.trim().replaceAll(" *= *", "=").replaceAll(" */ *", ",") + String parsedPartition = partitionString.trim().replaceAll(" *= *", "=").replaceAll(" */ *", ",") .replaceAll(" *, *", ",").replaceAll("'", ""); + if (printLog) { + LOG.info("format partition with rules: remove all space; remove all '; replace / to ,"); + LOG.info("original partiton {} parsed partition {}", partitionString, parsedPartition); + } + return parsedPartition; } @@ -78,13 +82,18 @@ public static Odps initOdpsProject(Configuration originalConfig) { String odpsServer = originalConfig.getString(Key.ODPS_SERVER); String project = originalConfig.getString(Key.PROJECT); + String securityToken = originalConfig.getString(Key.SECURITY_TOKEN); Account account; if (accountType.equalsIgnoreCase(Constant.DEFAULT_ACCOUNT_TYPE)) { - account = new AliyunAccount(accessId, accessKey); + if (StringUtils.isNotBlank(securityToken)) { + account = new com.aliyun.odps.account.StsAccount(accessId, accessKey, securityToken); + } else { + account = new AliyunAccount(accessId, accessKey); + } } else { throw DataXException.asDataXException(OdpsWriterErrorCode.ACCOUNT_TYPE_ERROR, - String.format("不支持的账号类型:[%s]. 账号类型目前仅支持aliyun, taobao.", accountType)); + MESSAGE_SOURCE.message("odpsutil.4", accountType)); } Odps odps = new Odps(account); @@ -96,6 +105,7 @@ public static Odps initOdpsProject(Configuration originalConfig) { } odps.setDefaultProject(project); odps.setEndpoint(odpsServer); + odps.setUserAgent("DATAX"); return odps; } @@ -125,8 +135,7 @@ public static List listOdpsPartitions(Table table) { parts.add(partition.getPartitionSpec().toString()); } } catch (Exception e) { - throw DataXException.asDataXException(OdpsWriterErrorCode.GET_PARTITION_FAIL, String.format("获取 ODPS 目的表:%s 的所有分区失败. 请联系 ODPS 管理员处理.", - table.getName()), e); + throw DataXException.asDataXException(OdpsWriterErrorCode.GET_PARTITION_FAIL, MESSAGE_SOURCE.message("odpsutil.5", table.getName()), e); } return parts; } @@ -141,37 +150,45 @@ public static boolean isPartitionedTable(Table table) { } } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.CHECK_IF_PARTITIONED_TABLE_FAILED, - String.format("检查 ODPS 目的表:%s 是否为分区表失败, 请联系 ODPS 管理员处理.", table.getName()), e); + MESSAGE_SOURCE.message("odpsutil.6", table.getName()), e); } return false; } public static void truncateNonPartitionedTable(Odps odps, Table tab) { - String truncateNonPartitionedTableSql = "truncate table " + tab.getName() + ";"; + truncateNonPartitionedTable(odps, tab.getName()); + } + + public static void truncateNonPartitionedTable(Odps odps, String tableName) { + String truncateNonPartitionedTableSql = "truncate table " + tableName + ";"; try { - runSqlTaskWithRetry(odps, truncateNonPartitionedTableSql, MAX_RETRY_TIME, 1000, true); + LOG.info("truncate non partitioned table with sql: {}", truncateNonPartitionedTableSql); + runSqlTaskWithRetry(odps, truncateNonPartitionedTableSql, MAX_RETRY_TIME, 1000, true, "truncate", null); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.TABLE_TRUNCATE_ERROR, - String.format(" 清空 ODPS 目的表:%s 失败, 请联系 ODPS 管理员处理.", tab.getName()), e); + MESSAGE_SOURCE.message("odpsutil.7", tableName), e); } } public static void truncatePartition(Odps odps, Table table, String partition) { if (isPartitionExist(table, partition)) { + LOG.info("partition {} is already exist, truncate it to clean old data", partition); dropPart(odps, table, partition); } + LOG.info("begin to add partition {}", partition); addPart(odps, table, partition); } private static boolean isPartitionExist(Table table, String partition) { // check if exist partition 返回值不为 null List odpsParts = OdpsUtil.listOdpsPartitions(table); - int j = 0; for (; j < odpsParts.size(); j++) { if (odpsParts.get(j).replaceAll("'", "").equals(partition)) { + LOG.info("found a partiton {} equals to (ignore ' if contains) configured partiton {}", + odpsParts.get(j), partition); break; } } @@ -186,11 +203,14 @@ public static void addPart(Odps odps, Table table, String partition) { addPart.append("alter table ").append(table.getName()).append(" add IF NOT EXISTS partition(") .append(partSpec).append(");"); try { - runSqlTaskWithRetry(odps, addPart.toString(), MAX_RETRY_TIME, 1000, true); + Map hints = new HashMap(); + //开启ODPS SQL TYPE2.0类型 + hints.put("odps.sql.type.system.odps2", "true"); + LOG.info("add partition with sql: {}", addPart.toString()); + runSqlTaskWithRetry(odps, addPart.toString(), MAX_RETRY_TIME, 1000, true, "addPart", hints); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.ADD_PARTITION_FAILED, - String.format("添加 ODPS 目的表的分区失败. 错误发生在添加 ODPS 的项目:%s 的表:%s 的分区:%s. 请联系 ODPS 管理员处理.", - table.getProject(), table.getName(), partition), e); + MESSAGE_SOURCE.message("odpsutil.8", table.getProject(), table.getName(), partition), e); } } @@ -207,7 +227,7 @@ public TableTunnel.UploadSession call() throws Exception { }, MAX_RETRY_TIME, 1000L, true); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.CREATE_MASTER_UPLOAD_FAIL, - "创建TunnelUpload失败. 请联系 ODPS 管理员处理.", e); + MESSAGE_SOURCE.message("odpsutil.9"), e); } } else { final PartitionSpec partitionSpec = new PartitionSpec(partition); @@ -220,7 +240,7 @@ public TableTunnel.UploadSession call() throws Exception { }, MAX_RETRY_TIME, 1000L, true); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.CREATE_MASTER_UPLOAD_FAIL, - "创建TunnelUpload失败. 请联系 ODPS 管理员处理.", e); + MESSAGE_SOURCE.message("odpsutil.10"), e); } } } @@ -239,7 +259,7 @@ public TableTunnel.UploadSession call() throws Exception { } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.GET_SLAVE_UPLOAD_FAIL, - "获取TunnelUpload失败. 请联系 ODPS 管理员处理.", e); + MESSAGE_SOURCE.message("odpsutil.11"), e); } } else { final PartitionSpec partitionSpec = new PartitionSpec(partition); @@ -253,7 +273,7 @@ public TableTunnel.UploadSession call() throws Exception { } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.GET_SLAVE_UPLOAD_FAIL, - "获取TunnelUpload失败. 请联系 ODPS 管理员处理.", e); + MESSAGE_SOURCE.message("odpsutil.12"), e); } } } @@ -266,11 +286,14 @@ private static void dropPart(Odps odps, Table table, String partition) { .append(" drop IF EXISTS partition(").append(partSpec) .append(");"); try { - runSqlTaskWithRetry(odps, dropPart.toString(), MAX_RETRY_TIME, 1000, true); + Map hints = new HashMap(); + //开启ODPS SQL TYPE2.0类型 + hints.put("odps.sql.type.system.odps2", "true"); + LOG.info("drop partition with sql: {}", dropPart.toString()); + runSqlTaskWithRetry(odps, dropPart.toString(), MAX_RETRY_TIME, 1000, true, "truncate", hints); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.ADD_PARTITION_FAILED, - String.format("Drop ODPS 目的表分区失败. 错误发生在项目:%s 的表:%s 的分区:%s .请联系 ODPS 管理员处理.", - table.getProject(), table.getName(), partition), e); + MESSAGE_SOURCE.message("odpsutil.13", table.getProject(), table.getName(), partition), e); } } @@ -282,7 +305,7 @@ private static String getPartSpec(String partition) { String[] kv = part.split("="); if (kv.length != 2) { throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, - String.format("ODPS 目的表自身的 partition:%s 格式不对. 正确的格式形如: pt=1,ds=hangzhou", partition)); + MESSAGE_SOURCE.message("odpsutil.14", partition)); } partSpec.append(kv[0]).append("="); partSpec.append("'").append(kv[1].replace("'", "")).append("'"); @@ -293,6 +316,38 @@ private static String getPartSpec(String partition) { return partSpec.toString(); } + public static Instance runSqlTaskWithRetry(final Odps odps, final String sql, String tag) { + try { + long beginTime = System.currentTimeMillis(); + + Instance instance = runSqlTaskWithRetry(odps, sql, MAX_RETRY_TIME, 1000, true, tag, null); + + long endIime = System.currentTimeMillis(); + LOG.info(String.format("exectue odps sql: %s finished, cost time : %s ms", + sql, (endIime - beginTime))); + return instance; + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_ODPS_EXCEPTION, + MESSAGE_SOURCE.message("odpsutil.16", sql), e); + } + } + + public static ResultSet getSqlTaskRecordsWithRetry(final Odps odps, final String sql, String tag) { + Instance instance = runSqlTaskWithRetry(odps, sql, tag); + if (instance == null) { + LOG.error("can not get odps instance from sql {}", sql); + throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_ODPS_EXCEPTION, + MESSAGE_SOURCE.message("odpsutil.16", sql)); + } + try { + return SQLTask.getResultSet(instance, instance.getTaskNames().iterator().next()); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_ODPS_EXCEPTION, + MESSAGE_SOURCE.message("odpsutil.16", sql), e); + } + } + + /** * 该方法只有在 sql 为幂等的才可以使用,且odps抛出异常时候才会进行重试 * @@ -300,12 +355,12 @@ private static String getPartSpec(String partition) { * @param query 执行sql * @throws Exception */ - public static void runSqlTaskWithRetry(final Odps odps, final String query, int retryTimes, - long sleepTimeInMilliSecond, boolean exponential) throws Exception { + public static Instance runSqlTaskWithRetry(final Odps odps, final String query, int retryTimes, + long sleepTimeInMilliSecond, boolean exponential, String tag, + Map hints) throws Exception { for(int i = 0; i < retryTimes; i++) { try { - runSqlTask(odps, query); - return; + return runSqlTask(odps, query, tag, hints); } catch (DataXException e) { if (OdpsWriterErrorCode.RUN_SQL_ODPS_EXCEPTION.equals(e.getErrorCode())) { LOG.debug("Exception when calling callable", e); @@ -338,34 +393,83 @@ public static void runSqlTaskWithRetry(final Odps odps, final String query, int throw e; } } + return null; } - public static void runSqlTask(Odps odps, String query) { + public static Instance runSqlTask(Odps odps, String query, String tag, Map hints) { if (StringUtils.isBlank(query)) { - return; + return null; } - String taskName = "datax_odpswriter_trunacte_" + UUID.randomUUID().toString().replace('-', '_'); - + String taskName = String.format("datax_odpswriter_%s_%s", tag, UUID.randomUUID().toString().replace('-', '_')); LOG.info("Try to start sqlTask:[{}] to run odps sql:[\n{}\n] .", taskName, query); //todo:biz_id set (目前ddl先不做) Instance instance; Instance.TaskStatus status; try { - instance = SQLTask.run(odps, odps.getDefaultProject(), query, taskName, null, null); + instance = SQLTask.run(odps, odps.getDefaultProject(), query, taskName, hints, null); instance.waitForSuccess(); status = instance.getTaskStatus().get(taskName); if (!Instance.TaskStatus.Status.SUCCESS.equals(status.getStatus())) { throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_FAILED, - String.format("ODPS 目的表在运行 ODPS SQL失败, 返回值为:%s. 请联系 ODPS 管理员处理. SQL 内容为:[\n%s\n].", instance.getTaskResults().get(taskName), - query)); + MESSAGE_SOURCE.message("odpsutil.15", query)); } + return instance; } catch (DataXException e) { throw e; } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_ODPS_EXCEPTION, - String.format("ODPS 目的表在运行 ODPS SQL 时抛出异常, 请联系 ODPS 管理员处理. SQL 内容为:[\n%s\n].", query), e); + MESSAGE_SOURCE.message("odpsutil.16", query), e); + } + } + + + public static String generateTaskName(String tag) { + return String.format("datax_odpswriter_%s_%s", tag, UUID.randomUUID().toString().replace('-', '_')); + } + + public static void checkBlockComplete(final TableTunnel.UploadSession masterUpload, final Long[] blocks) { + Long[] serverBlocks; + try { + serverBlocks = + RetryUtil.executeWithRetry(new Callable() { + @Override + public Long[] call() throws Exception { + return masterUpload.getBlockList(); + } + }, MAX_RETRY_TIME, 1000L, true); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.COMMIT_BLOCK_FAIL, + MESSAGE_SOURCE.message("odpsutil.17", masterUpload.getId()), e); + } + + HashMap serverBlockMap = new HashMap(); + for (Long blockId : serverBlocks) { + serverBlockMap.put(blockId, true); + } + + for (Long blockId : blocks) { + if (!serverBlockMap.containsKey(blockId)) { + throw DataXException.asDataXException(OdpsWriterErrorCode.COMMIT_BLOCK_FAIL, + "BlockId[" + blockId + "] upload failed!"); + } + } + + } + + public static void masterComplete(final TableTunnel.UploadSession masterUpload) { + try { + RetryUtil.executeWithRetry(new Callable() { + @Override + public Void call() throws Exception { + masterUpload.commit(); + return null; + } + }, MAX_RETRY_TIME, 1000L, true); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.COMMIT_BLOCK_FAIL, + MESSAGE_SOURCE.message("odpsutil.17", masterUpload.getId()), e); } } @@ -380,31 +484,29 @@ public Void call() throws Exception { }, MAX_RETRY_TIME, 1000L, true); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.COMMIT_BLOCK_FAIL, - String.format("ODPS 目的表在提交 block:[\n%s\n] 时失败, uploadId=[%s]. 请联系 ODPS 管理员处理.", StringUtils.join(blocks, ","), masterUpload.getId()), e); + MESSAGE_SOURCE.message("odpsutil.17", StringUtils.join(blocks, ","), masterUpload.getId()), e); } } public static void slaveWriteOneBlock(final TableTunnel.UploadSession slaveUpload, final ProtobufRecordPack protobufRecordPack, - final long blockId, final boolean isCompress) { + final long blockId, final Long timeoutInMs) { try { RetryUtil.executeWithRetry(new Callable() { @Override public Void call() throws Exception { - TunnelRecordWriter tunnelRecordWriter = (TunnelRecordWriter)slaveUpload.openRecordWriter(blockId, isCompress); - tunnelRecordWriter.write(protobufRecordPack); - tunnelRecordWriter.close(); + slaveUpload.writeBlock(blockId, protobufRecordPack, timeoutInMs); return null; } }, MAX_RETRY_TIME, 1000L, true); } catch (Exception e) { throw DataXException.asDataXException(OdpsWriterErrorCode.WRITER_RECORD_FAIL, - String.format("ODPS 目的表写 block:%s 失败, uploadId=[%s]. 请联系 ODPS 管理员处理.", blockId, slaveUpload.getId()), e); + MESSAGE_SOURCE.message("odpsutil.18", blockId, slaveUpload.getId()), e); } } - public static List parsePosition(List allColumnList, - SelectCols userConfiguredColumns) { + public static List parsePosition(List allColumnList, List allPartColumnList, + List userConfiguredColumns) { List retList = new ArrayList(); boolean hasColumn; @@ -417,9 +519,20 @@ public static List parsePosition(List allColumnList, break; } } + + if (null != allPartColumnList) { + for (int i = 0, len = allPartColumnList.size(); i < len; i++) { + if (allPartColumnList.get(i).equalsIgnoreCase(col)) { + retList.add(-1); + hasColumn = true; + break; + } + } + } + if (!hasColumn) { throw DataXException.asDataXException(OdpsWriterErrorCode.COLUMN_NOT_EXIST, - String.format("ODPS 目的表的列配置错误. 由于您所配置的列:%s 不存在,会导致datax无法正常插入数据,请检查该列是否存在,如果存在请检查大小写等配置.", col)); + MESSAGE_SOURCE.message("odpsutil.19", col)); } } return retList; @@ -437,22 +550,81 @@ public static List getAllColumns(TableSchema schema) { for(Column column: columns) { allColumns.add(column.getName()); type = column.getType(); - if (type == OdpsType.ARRAY || type == OdpsType.MAP) { - throw DataXException.asDataXException(OdpsWriterErrorCode.UNSUPPORTED_COLUMN_TYPE, - String.format("DataX 写入 ODPS 表不支持该字段类型:[%s]. 目前支持抽取的字段类型有:bigint, boolean, datetime, double, string. " + - "您可以选择不抽取 DataX 不支持的字段或者联系 ODPS 管理员寻求帮助.", - type)); - } } return allColumns; } - public static List getTableOriginalColumnTypeList(TableSchema schema) { - List tableOriginalColumnTypeList = new ArrayList(); + public static List getAllPartColumns(TableSchema schema) { + if (null == schema) { + throw new IllegalArgumentException("parameter schema can not be null."); + } + + List allPartColumns = new ArrayList<>(); + + List partCols = schema.getPartitionColumns(); + + for (Column column : partCols) { + allPartColumns.add(column.getName()); + } + + return allPartColumns; + } + + public static String getPartColValFromDataXRecord(com.alibaba.datax.common.element.Record dataxRecord, + List positions, List userConfiguredColumns, + Map dateTransFormMap) { + StringBuilder partition = new StringBuilder(); + for (int i = 0, len = dataxRecord.getColumnNumber(); i < len; i++) { + if (positions.get(i) == -1) { + if (partition.length() > 0) { + partition.append(","); + } + String partName = userConfiguredColumns.get(i); + //todo: 这里应该根据分区列的类型做转换,这里先直接toString转换了 + com.alibaba.datax.common.element.Column partitionCol = dataxRecord.getColumn(i); + String partVal = partitionCol.getRawData().toString(); + if (StringUtils.isBlank(partVal)) { + throw new DataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, String.format( + "value of column %s exit null value, it can not be used as partition column", partName)); + } + + // 如果分区列的值的格式是一个日期,并且用户设置列的转换规则 + DateTransForm dateTransForm = null; + if (null != dateTransFormMap) { + dateTransForm = dateTransFormMap.get(partName); + } + if (null != dateTransForm) { + try { + // 日期列 + if (partitionCol.getType().equals(com.alibaba.datax.common.element.Column.Type.DATE)) { + partVal = OdpsUtil.date2StringWithFormat(partitionCol.asDate(), dateTransForm.getToFormat()); + } + // String 列,需要先按照 fromFormat 转换为日期 + if (partitionCol.getType().equals(com.alibaba.datax.common.element.Column.Type.STRING)) { + partVal = OdpsUtil.date2StringWithFormat(partitionCol.asDate(dateTransForm.getFromFormat()), dateTransForm.getToFormat()); + } + } catch (DataXException e) { + LOG.warn("Parse {} with format {} error! Please check the column config and {} config. So user original value '{}'. Detail info: {}", + partVal, dateTransForm.toString(), Key.PARTITION_COL_MAPPING, partVal, e); + } + } + + partition.append(partName).append("=").append(partVal); + } + } + return partition.toString(); + } + + public static String date2StringWithFormat(Date date, String dateFormat) { + return DateFormatUtils.format(date, dateFormat, TimeZone.getTimeZone("GMT+8")); + } + + public static List getTableOriginalColumnTypeList(TableSchema schema) { + List tableOriginalColumnTypeList = new ArrayList(); List columns = schema.getColumns(); for (Column column : columns) { - tableOriginalColumnTypeList.add(column.getType()); + tableOriginalColumnTypeList.add(column.getTypeInfo()); } return tableOriginalColumnTypeList; @@ -466,8 +638,7 @@ public static void dealTruncate(Odps odps, Table table, String partition, boolea if (isPartitionedTable) { //分区表 if (StringUtils.isBlank(partition)) { - throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, String.format("您没有配置分区信息,因为你配置的表是分区表:%s 如果需要进行 truncate 操作,必须指定需要清空的具体分区. 请修改分区配置,格式形如 pt=${bizdate} .", - table.getName())); + throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, MESSAGE_SOURCE.message("odpsutil.21", table.getName())); } else { LOG.info("Try to truncate partition=[{}] in table=[{}].", partition, table.getName()); OdpsUtil.truncatePartition(odps, table, partition); @@ -475,8 +646,7 @@ public static void dealTruncate(Odps odps, Table table, String partition, boolea } else { //非分区表 if (StringUtils.isNotBlank(partition)) { - throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, String.format("分区信息配置错误,你的ODPS表是非分区表:%s 进行 truncate 操作时不需要指定具体分区值. 请检查您的分区配置,删除该配置项的值.", - table.getName())); + throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, MESSAGE_SOURCE.message("odpsutil.22", table.getName())); } else { LOG.info("Try to truncate table:[{}].", table.getName()); OdpsUtil.truncateNonPartitionedTable(odps, table); @@ -488,7 +658,7 @@ public static void dealTruncate(Odps odps, Table table, String partition, boolea //分区表 if (StringUtils.isBlank(partition)) { throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, - String.format("您的目的表是分区表,写入分区表:%s 时必须指定具体分区值. 请修改您的分区配置信息,格式形如 格式形如 pt=${bizdate}.", table.getName())); + MESSAGE_SOURCE.message("odpsutil.23", table.getName())); } else { boolean isPartitionExists = OdpsUtil.isPartitionExist(table, partition); if (!isPartitionExists) { @@ -501,7 +671,7 @@ public static void dealTruncate(Odps odps, Table table, String partition, boolea //非分区表 if (StringUtils.isNotBlank(partition)) { throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, - String.format("您的目的表是非分区表,写入非分区表:%s 时不需要指定具体分区值. 请删除分区配置信息", table.getName())); + MESSAGE_SOURCE.message("odpsutil.24", table.getName())); } } } @@ -524,14 +694,12 @@ public static void preCheckPartition(Odps odps, Table table, String partition, b if (isPartitionedTable) { //分区表 if (StringUtils.isBlank(partition)) { - throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, String.format("您没有配置分区信息,因为你配置的表是分区表:%s 如果需要进行 truncate 操作,必须指定需要清空的具体分区. 请修改分区配置,格式形如 pt=${bizdate} .", - table.getName())); + throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, MESSAGE_SOURCE.message("odpsutil.25", table.getName())); } } else { //非分区表 if (StringUtils.isNotBlank(partition)) { - throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, String.format("分区信息配置错误,你的ODPS表是非分区表:%s 进行 truncate 操作时不需要指定具体分区值. 请检查您的分区配置,删除该配置项的值.", - table.getName())); + throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, MESSAGE_SOURCE.message("odpsutil.26", table.getName())); } } } else { @@ -540,13 +708,13 @@ public static void preCheckPartition(Odps odps, Table table, String partition, b //分区表 if (StringUtils.isBlank(partition)) { throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, - String.format("您的目的表是分区表,写入分区表:%s 时必须指定具体分区值. 请修改您的分区配置信息,格式形如 格式形如 pt=${bizdate}.", table.getName())); + MESSAGE_SOURCE.message("odpsutil.27", table.getName())); } } else { //非分区表 if (StringUtils.isNotBlank(partition)) { throw DataXException.asDataXException(OdpsWriterErrorCode.PARTITION_ERROR, - String.format("您的目的表是非分区表,写入非分区表:%s 时不需要指定具体分区值. 请删除分区配置信息", table.getName())); + MESSAGE_SOURCE.message("odpsutil.28", table.getName())); } } } @@ -559,29 +727,286 @@ public static void throwDataXExceptionWhenReloadTable(Exception e, String tableN if(e.getMessage() != null) { if(e.getMessage().contains(OdpsExceptionMsg.ODPS_PROJECT_NOT_FOUNT)) { throw DataXException.asDataXException(OdpsWriterErrorCode.ODPS_PROJECT_NOT_FOUNT, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 [project] 是否正确.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.29", tableName), e); } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_TABLE_NOT_FOUNT)) { throw DataXException.asDataXException(OdpsWriterErrorCode.ODPS_TABLE_NOT_FOUNT, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 [table] 是否正确.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.30", tableName), e); } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_KEY_ID_NOT_FOUND)) { throw DataXException.asDataXException(OdpsWriterErrorCode.ODPS_ACCESS_KEY_ID_NOT_FOUND, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 [accessId] [accessKey]是否正确.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.31", tableName), e); } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_KEY_INVALID)) { throw DataXException.asDataXException(OdpsWriterErrorCode.ODPS_ACCESS_KEY_INVALID, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 [accessKey] 是否正确.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.32", tableName), e); } else if(e.getMessage().contains(OdpsExceptionMsg.ODPS_ACCESS_DENY)) { throw DataXException.asDataXException(OdpsWriterErrorCode.ODPS_ACCESS_DENY, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 [accessId] [accessKey] [project]是否匹配.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.33", tableName), e); } } throw DataXException.asDataXException(OdpsWriterErrorCode.ILLEGAL_VALUE, - String.format("加载 ODPS 目的表:%s 失败. " + - "请检查您配置的 ODPS 目的表的 project,table,accessId,accessKey,odpsServer等值.", tableName), e); + MESSAGE_SOURCE.message("odpsutil.34", tableName), e); + } + + /** + * count统计数据,自动创建统计表 + * @param tableName 统计表名字 + * @return + */ + public static String getCreateSummaryTableDDL(String tableName) { + return String.format("CREATE TABLE IF NOT EXISTS %s " + + "(src_table_name STRING, " + + "dest_table_name STRING, " + + "src_row_num BIGINT, " + + "src_query_time DATETIME, " + + "read_succeed_records BIGINT," + + "write_succeed_records BIGINT," + + "dest_row_num BIGINT, " + + "write_time DATETIME);", + tableName); + } + + /** + * count统计数据,获取count dml + * @param tableName + * @return + */ + public static String countTableSql(final String tableName, final String partition) { + if (StringUtils.isNotBlank(partition)) { + String[] partitions = partition.split("\\,"); + String p = String.join(" and ", partitions); + return String.format("SELECT COUNT(1) AS odps_num FROM %s WHERE %s;", tableName, p); + } else { + return String.format("SELECT COUNT(1) AS odps_num FROM %s;", tableName); + } + } + + /** + * count统计数据 dml 对应字段,用于查询 + * @return + */ + public static String countName() { + return "odps_num"; + } + + /** + * count统计数据dml + * @param summaryTableName 统计数据写入表 + * @param sourceTableName datax reader 表 + * @param destTableName datax writer 表 + * @param srcCount reader表行数 + * @param queryTime reader表查询时间 + * @param destCount writer 表行书 + * @return insert dml sql + */ + public static String getInsertSummaryTableSql(String summaryTableName, String sourceTableName, String destTableName, + Long srcCount, String queryTime, Number readSucceedRecords, + Number writeSucceedRecords, Long destCount) { + final String sql = "INSERT INTO %s (src_table_name,dest_table_name," + + " src_row_num, src_query_time, read_succeed_records, write_succeed_records, dest_row_num, write_time) VALUES ( %s );"; + + String insertData = String.format("'%s', '%s', %s, %s, %s, %s, %s, getdate()", + sourceTableName, destTableName, srcCount, queryTime, readSucceedRecords, writeSucceedRecords, destCount ); + return String.format(sql, summaryTableName, insertData); + } + + public static void createTable(Odps odps, String tableName, final String sql) { + try { + LOG.info("create table with sql: {}", sql); + runSqlTaskWithRetry(odps, sql, MAX_RETRY_TIME, 1000, true, "create", null); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_FAILED, + MESSAGE_SOURCE.message("odpsutil.7", tableName), e); + } + } + + public static void createTableFromTable(Odps odps, String resourceTable, String targetTable) { + TableSchema schema = odps.tables().get(resourceTable).getSchema(); + StringBuilder builder = new StringBuilder(); + Iterator iterator = schema.getColumns().iterator(); + while (iterator.hasNext()) { + Column c = iterator.next(); + builder.append(String.format(" %s %s ", c.getName(), c.getTypeInfo().getTypeName())); + if (iterator.hasNext()) { + builder.append(","); + } + } + String createTableSql = String.format("CREATE TABLE IF NOT EXISTS %s (%s);", targetTable, builder.toString()); + + try { + LOG.info("create table with sql: {}", createTableSql); + runSqlTaskWithRetry(odps, createTableSql, MAX_RETRY_TIME, 1000, true, "create", null); + } catch (Exception e) { + throw DataXException.asDataXException(OdpsWriterErrorCode.RUN_SQL_FAILED, + MESSAGE_SOURCE.message("odpsutil.7", targetTable), e); + } + } + + public static Object truncateSingleFieldData(OdpsType type, Object data, int limit, Boolean enableOverLengthOutput) { + if (data == null) { + return data; + } + if (OdpsType.STRING.equals(type)) { + if(enableOverLengthOutput) { + LOG.warn( + "InvalidData: The string's length is more than " + limit + " bytes. content:" + data); + } + LOG.info("before truncate string length:" + ((String) data).length()); + //确保特殊字符场景下的截断 + limit -= Constant.UTF8_ENCODED_CHAR_MAX_SIZE; + data = cutString((String) data, limit); + LOG.info("after truncate string length:" + ((String) data).length()); + } else if (OdpsType.BINARY.equals(type)) { + byte[] oriDataBytes = ((Binary) data).data(); + if(oriDataBytes == null){ + return data; + } + int originLength = oriDataBytes.length; + if (originLength <= limit) { + return data; + } + if(enableOverLengthOutput) { + LOG.warn("InvalidData: The binary's length is more than " + limit + " bytes. content:" + byteArrToHex(oriDataBytes)); + } + LOG.info("before truncate binary length:" + oriDataBytes.length); + byte[] newData = new byte[limit]; + System.arraycopy(oriDataBytes, 0, newData, 0, limit); + LOG.info("after truncate binary length:" + newData.length); + return new Binary(newData); + } + return data; + } + public static Object setNull(OdpsType type,Object data, int limit, Boolean enableOverLengthOutput) { + if (data == null ) { + return null; + } + if (OdpsType.STRING.equals(type)) { + if(enableOverLengthOutput) { + LOG.warn( + "InvalidData: The string's length is more than " + limit + " bytes. content:" + data); + } + return null; + } else if (OdpsType.BINARY.equals(type)) { + byte[] oriDataBytes = ((Binary) data).data(); + int originLength = oriDataBytes.length; + if (originLength > limit) { + if(enableOverLengthOutput) { + LOG.warn("InvalidData: The binary's length is more than " + limit + " bytes. content:" + new String(oriDataBytes)); + } + return null; + } + } + return data; + } + public static boolean validateStringLength(String value, long limit) { + try { + if (value.length() * Constant.UTF8_ENCODED_CHAR_MAX_SIZE > limit + && value.getBytes("utf-8").length > limit) { + return false; + } + } catch (Exception e) { + e.printStackTrace(); + return true; + } + return true; + } + public static String cutString(String sourceString, int cutBytes) { + if (sourceString == null || "".equals(sourceString.trim()) || cutBytes < 1) { + return ""; + } + int lastIndex = 0; + boolean stopFlag = false; + int totalBytes = 0; + for (int i = 0; i < sourceString.length(); i++) { + String s = Integer.toBinaryString(sourceString.charAt(i)); + if (s.length() > 8) { + totalBytes += 3; + } else { + totalBytes += 1; + } + if (!stopFlag) { + if (totalBytes == cutBytes) { + lastIndex = i; + stopFlag = true; + } else if (totalBytes > cutBytes) { + lastIndex = i - 1; + stopFlag = true; + } + } + } + if (!stopFlag) { + return sourceString; + } else { + return sourceString.substring(0, lastIndex + 1); + } + } + public static boolean dataOverLength(OdpsType type, Object data, int limit){ + if (data == null ) { + return false; + } + if (OdpsType.STRING.equals(type)) { + if(!OdpsUtil.validateStringLength((String)data, limit)){ + return true; + } + }else if (OdpsType.BINARY.equals(type)){ + byte[] oriDataBytes = ((Binary) data).data(); + if(oriDataBytes == null){ + return false; + } + int originLength = oriDataBytes.length; + if (originLength > limit) { + return true; + } + } + return false; + } + public static Object processOverLengthData(Object data, OdpsType type, String overLengthRule, int maxFieldLength, Boolean enableOverLengthOutput) { + try{ + //超长数据检查 + if(OdpsWriter.maxOutputOverLengthRecord != null && OdpsWriter.globalTotalTruncatedRecordNumber.get() >= OdpsWriter.maxOutputOverLengthRecord){ + enableOverLengthOutput = false; + } + if ("truncate".equalsIgnoreCase(overLengthRule)) { + if (OdpsUtil.dataOverLength(type, data, OdpsWriter.maxOdpsFieldLength)) { + Object newData = OdpsUtil.truncateSingleFieldData(type, data, maxFieldLength, enableOverLengthOutput); + OdpsWriter.globalTotalTruncatedRecordNumber.incrementAndGet(); + return newData; + } + } else if ("setNull".equalsIgnoreCase(overLengthRule)) { + if (OdpsUtil.dataOverLength(type, data, OdpsWriter.maxOdpsFieldLength)) { + OdpsWriter.globalTotalTruncatedRecordNumber.incrementAndGet(); + return OdpsUtil.setNull(type, data, maxFieldLength, enableOverLengthOutput); + } + } + }catch (Throwable e){ + LOG.warn("truncate overLength data failed!", e); + } + return data; + } + private static final char HEX_CHAR_ARR[] = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; + /** + * 字节数组转十六进制字符串 + * @param btArr + * @return + */ + public static String byteArrToHex(byte[] btArr) { + char strArr[] = new char[btArr.length * 2]; + int i = 0; + for (byte bt : btArr) { + strArr[i++] = HEX_CHAR_ARR[bt>>>4 & 0xf]; + strArr[i++] = HEX_CHAR_ARR[bt & 0xf]; + } + return new String(strArr); + } + public static byte[] hexToByteArr(String hexStr) { + char[] charArr = hexStr.toCharArray(); + byte btArr[] = new byte[charArr.length / 2]; + int index = 0; + for (int i = 0; i < charArr.length; i++) { + int highBit = hexStr.indexOf(charArr[i]); + int lowBit = hexStr.indexOf(charArr[++i]); + btArr[index] = (byte) (highBit << 4 | lowBit); + index++; + } + return btArr; } } diff --git a/oraclereader/src/main/java/com/alibaba/datax/plugin/reader/oraclereader/OracleReader.java b/oraclereader/src/main/java/com/alibaba/datax/plugin/reader/oraclereader/OracleReader.java index 749d889fab..ea7ba23edc 100755 --- a/oraclereader/src/main/java/com/alibaba/datax/plugin/reader/oraclereader/OracleReader.java +++ b/oraclereader/src/main/java/com/alibaba/datax/plugin/reader/oraclereader/OracleReader.java @@ -7,7 +7,6 @@ import com.alibaba.datax.plugin.rdbms.reader.CommonRdbmsReader; import com.alibaba.datax.plugin.rdbms.reader.Key; import com.alibaba.datax.plugin.rdbms.reader.util.HintUtil; -import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; import com.alibaba.datax.plugin.rdbms.util.DataBaseType; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -17,50 +16,49 @@ public class OracleReader extends Reader { - private static final DataBaseType DATABASE_TYPE = DataBaseType.Oracle; + private static final DataBaseType DATABASE_TYPE = DataBaseType.Oracle; - public static class Job extends Reader.Job { - private static final Logger LOG = LoggerFactory - .getLogger(OracleReader.Job.class); + public static class Job extends Reader.Job { + private static final Logger LOG = LoggerFactory + .getLogger(OracleReader.Job.class); - private Configuration originalConfig = null; - private CommonRdbmsReader.Job commonRdbmsReaderJob; + private Configuration originalConfig = null; + private CommonRdbmsReader.Job commonRdbmsReaderJob; - @Override - public void init() { - this.originalConfig = super.getPluginJobConf(); + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); - //dealFetchSize(this.originalConfig); + //dealFetchSize(this.originalConfig); - this.commonRdbmsReaderJob = new CommonRdbmsReader.Job( - DATABASE_TYPE); - this.commonRdbmsReaderJob.init(this.originalConfig); + this.commonRdbmsReaderJob = new CommonRdbmsReader.Job(DATABASE_TYPE, this.containerContext); + this.commonRdbmsReaderJob.init(this.originalConfig); - // 注意:要在 this.commonRdbmsReaderJob.init(this.originalConfig); 之后执行,这样可以直接快速判断是否是querySql 模式 - dealHint(this.originalConfig); - } + // 注意:要在 this.commonRdbmsReaderJob.init(this.originalConfig); 之后执行,这样可以直接快速判断是否是querySql 模式 + dealHint(this.originalConfig); + } @Override - public void preCheck(){ + public void preCheck() { init(); - this.commonRdbmsReaderJob.preCheck(this.originalConfig,DATABASE_TYPE); + this.commonRdbmsReaderJob.preCheck(this.originalConfig, DATABASE_TYPE); } - @Override - public List split(int adviceNumber) { - return this.commonRdbmsReaderJob.split(this.originalConfig, - adviceNumber); - } + @Override + public List split(int adviceNumber) { + return this.commonRdbmsReaderJob.split(this.originalConfig, + adviceNumber); + } - @Override - public void post() { - this.commonRdbmsReaderJob.post(this.originalConfig); - } + @Override + public void post() { + this.commonRdbmsReaderJob.post(this.originalConfig); + } - @Override - public void destroy() { - this.commonRdbmsReaderJob.destroy(this.originalConfig); - } + @Override + public void destroy() { + this.commonRdbmsReaderJob.destroy(this.originalConfig); + } // private void dealFetchSize(Configuration originalConfig) { // int fetchSize = originalConfig.getInt( @@ -77,50 +75,50 @@ public void destroy() { // fetchSize); // } - private void dealHint(Configuration originalConfig) { - String hint = originalConfig.getString(Key.HINT); - if (StringUtils.isNotBlank(hint)) { - boolean isTableMode = originalConfig.getBool(com.alibaba.datax.plugin.rdbms.reader.Constant.IS_TABLE_MODE).booleanValue(); - if(!isTableMode){ - throw DataXException.asDataXException(OracleReaderErrorCode.HINT_ERROR, "当且仅当非 querySql 模式读取 oracle 时才能配置 HINT."); - } - HintUtil.initHintConf(DATABASE_TYPE, originalConfig); - } - } - } - - public static class Task extends Reader.Task { - - private Configuration readerSliceConfig; - private CommonRdbmsReader.Task commonRdbmsReaderTask; - - @Override - public void init() { - this.readerSliceConfig = super.getPluginJobConf(); - this.commonRdbmsReaderTask = new CommonRdbmsReader.Task( - DATABASE_TYPE ,super.getTaskGroupId(), super.getTaskId()); - this.commonRdbmsReaderTask.init(this.readerSliceConfig); - } - - @Override - public void startRead(RecordSender recordSender) { + private void dealHint(Configuration originalConfig) { + String hint = originalConfig.getString(Key.HINT); + if (StringUtils.isNotBlank(hint)) { + boolean isTableMode = originalConfig.getBool(com.alibaba.datax.plugin.rdbms.reader.Constant.IS_TABLE_MODE).booleanValue(); + if (!isTableMode) { + throw DataXException.asDataXException(OracleReaderErrorCode.HINT_ERROR, "当且仅当非 querySql 模式读取 oracle 时才能配置 HINT."); + } + HintUtil.initHintConf(DATABASE_TYPE, originalConfig); + } + } + } + + public static class Task extends Reader.Task { + + private Configuration readerSliceConfig; + private CommonRdbmsReader.Task commonRdbmsReaderTask; + + @Override + public void init() { + this.readerSliceConfig = super.getPluginJobConf(); + this.commonRdbmsReaderTask = new CommonRdbmsReader.Task( + DATABASE_TYPE, containerContext, super.getTaskGroupId(), super.getTaskId()); + this.commonRdbmsReaderTask.init(this.readerSliceConfig); + } + + @Override + public void startRead(RecordSender recordSender) { // int fetchSize = this.readerSliceConfig // .getInt(com.alibaba.datax.plugin.rdbms.reader.Constant.FETCH_SIZE); - this.commonRdbmsReaderTask.startRead(this.readerSliceConfig, - recordSender, super.getTaskPluginCollector()); - } + this.commonRdbmsReaderTask.startRead(this.readerSliceConfig, + recordSender, super.getTaskPluginCollector()); + } - @Override - public void post() { - this.commonRdbmsReaderTask.post(this.readerSliceConfig); - } + @Override + public void post() { + this.commonRdbmsReaderTask.post(this.readerSliceConfig); + } - @Override - public void destroy() { - this.commonRdbmsReaderTask.destroy(this.readerSliceConfig); - } + @Override + public void destroy() { + this.commonRdbmsReaderTask.destroy(this.readerSliceConfig); + } - } + } } diff --git a/oraclewriter/src/main/java/com/alibaba/datax/plugin/writer/oraclewriter/OracleWriter.java b/oraclewriter/src/main/java/com/alibaba/datax/plugin/writer/oraclewriter/OracleWriter.java index 73a9ad6a37..7f5889f87f 100755 --- a/oraclewriter/src/main/java/com/alibaba/datax/plugin/writer/oraclewriter/OracleWriter.java +++ b/oraclewriter/src/main/java/com/alibaba/datax/plugin/writer/oraclewriter/OracleWriter.java @@ -12,11 +12,11 @@ import java.util.List; public class OracleWriter extends Writer { - private static final DataBaseType DATABASE_TYPE = DataBaseType.Oracle; + private static final DataBaseType DATABASE_TYPE = DataBaseType.Oracle; - public static class Job extends Writer.Job { - private Configuration originalConfig = null; - private CommonRdbmsWriter.Job commonRdbmsWriterJob; + public static class Job extends Writer.Job { + private Configuration originalConfig = null; + private CommonRdbmsWriter.Job commonRdbmsWriterJob; public void preCheck() { this.init(); @@ -24,81 +24,80 @@ public void preCheck() { } @Override - public void init() { - this.originalConfig = super.getPluginJobConf(); - - // warn:not like mysql, oracle only support insert mode, don't use - String writeMode = this.originalConfig.getString(Key.WRITE_MODE); - if (null != writeMode) { - throw DataXException - .asDataXException( - DBUtilErrorCode.CONF_ERROR, - String.format( - "写入模式(writeMode)配置错误. 因为Oracle不支持配置项 writeMode: %s, Oracle只能使用insert sql 插入数据. 请检查您的配置并作出修改", - writeMode)); - } - - this.commonRdbmsWriterJob = new CommonRdbmsWriter.Job( - DATABASE_TYPE); - this.commonRdbmsWriterJob.init(this.originalConfig); - } - - @Override - public void prepare() { + public void init() { + this.originalConfig = super.getPluginJobConf(); + + // warn:not like mysql, oracle only support insert mode, don't use + String writeMode = this.originalConfig.getString(Key.WRITE_MODE); + if (null != writeMode) { + throw DataXException + .asDataXException( + DBUtilErrorCode.CONF_ERROR, + String.format( + "写入模式(writeMode)配置错误. 因为Oracle不支持配置项 writeMode: %s, Oracle只能使用insert sql 插入数据. 请检查您的配置并作出修改", + writeMode)); + } + + this.commonRdbmsWriterJob = new CommonRdbmsWriter.Job(DATABASE_TYPE, containerContext); + this.commonRdbmsWriterJob.init(this.originalConfig); + } + + @Override + public void prepare() { //oracle实跑先不做权限检查 //this.commonRdbmsWriterJob.privilegeValid(this.originalConfig, DATABASE_TYPE); - this.commonRdbmsWriterJob.prepare(this.originalConfig); - } - - @Override - public List split(int mandatoryNumber) { - return this.commonRdbmsWriterJob.split(this.originalConfig, - mandatoryNumber); - } - - @Override - public void post() { - this.commonRdbmsWriterJob.post(this.originalConfig); - } - - @Override - public void destroy() { - this.commonRdbmsWriterJob.destroy(this.originalConfig); - } - - } - - public static class Task extends Writer.Task { - private Configuration writerSliceConfig; - private CommonRdbmsWriter.Task commonRdbmsWriterTask; - - @Override - public void init() { - this.writerSliceConfig = super.getPluginJobConf(); - this.commonRdbmsWriterTask = new CommonRdbmsWriter.Task(DATABASE_TYPE); - this.commonRdbmsWriterTask.init(this.writerSliceConfig); - } - - @Override - public void prepare() { - this.commonRdbmsWriterTask.prepare(this.writerSliceConfig); - } - - public void startWrite(RecordReceiver recordReceiver) { - this.commonRdbmsWriterTask.startWrite(recordReceiver, - this.writerSliceConfig, super.getTaskPluginCollector()); - } - - @Override - public void post() { - this.commonRdbmsWriterTask.post(this.writerSliceConfig); - } - - @Override - public void destroy() { - this.commonRdbmsWriterTask.destroy(this.writerSliceConfig); - } - - } + this.commonRdbmsWriterJob.prepare(this.originalConfig); + } + + @Override + public List split(int mandatoryNumber) { + return this.commonRdbmsWriterJob.split(this.originalConfig, + mandatoryNumber); + } + + @Override + public void post() { + this.commonRdbmsWriterJob.post(this.originalConfig); + } + + @Override + public void destroy() { + this.commonRdbmsWriterJob.destroy(this.originalConfig); + } + + } + + public static class Task extends Writer.Task { + private Configuration writerSliceConfig; + private CommonRdbmsWriter.Task commonRdbmsWriterTask; + + @Override + public void init() { + this.writerSliceConfig = super.getPluginJobConf(); + this.commonRdbmsWriterTask = new CommonRdbmsWriter.Task(DATABASE_TYPE, containerContext); + this.commonRdbmsWriterTask.init(this.writerSliceConfig); + } + + @Override + public void prepare() { + this.commonRdbmsWriterTask.prepare(this.writerSliceConfig); + } + + public void startWrite(RecordReceiver recordReceiver) { + this.commonRdbmsWriterTask.startWrite(recordReceiver, + this.writerSliceConfig, super.getTaskPluginCollector()); + } + + @Override + public void post() { + this.commonRdbmsWriterTask.post(this.writerSliceConfig); + } + + @Override + public void destroy() { + this.commonRdbmsWriterTask.destroy(this.writerSliceConfig); + } + + } } diff --git a/oscarwriter/src/main/java/com/alibaba/datax/plugin/writer/oscarwriter/OscarWriter.java b/oscarwriter/src/main/java/com/alibaba/datax/plugin/writer/oscarwriter/OscarWriter.java index 0602bb4473..4d39bc38a4 100644 --- a/oscarwriter/src/main/java/com/alibaba/datax/plugin/writer/oscarwriter/OscarWriter.java +++ b/oscarwriter/src/main/java/com/alibaba/datax/plugin/writer/oscarwriter/OscarWriter.java @@ -9,82 +9,81 @@ import java.util.List; public class OscarWriter extends Writer { - private static final DataBaseType DATABASE_TYPE = DataBaseType.Oscar; + private static final DataBaseType DATABASE_TYPE = DataBaseType.Oscar; - public static class Job extends Writer.Job { - private Configuration originalConfig = null; - private CommonRdbmsWriter.Job commonRdbmsWriterJob; + public static class Job extends Writer.Job { + private Configuration originalConfig = null; + private CommonRdbmsWriter.Job commonRdbmsWriterJob; - @Override + @Override public void preCheck() { this.init(); this.commonRdbmsWriterJob.writerPreCheck(this.originalConfig, DATABASE_TYPE); } @Override - public void init() { - this.originalConfig = super.getPluginJobConf(); - - this.commonRdbmsWriterJob = new CommonRdbmsWriter.Job( - DATABASE_TYPE); - this.commonRdbmsWriterJob.init(this.originalConfig); - } - - @Override - public void prepare() { - this.commonRdbmsWriterJob.prepare(this.originalConfig); - } - - @Override - public List split(int mandatoryNumber) { - return this.commonRdbmsWriterJob.split(this.originalConfig, - mandatoryNumber); - } - - @Override - public void post() { - this.commonRdbmsWriterJob.post(this.originalConfig); - } - - @Override - public void destroy() { - this.commonRdbmsWriterJob.destroy(this.originalConfig); - } - - } - - public static class Task extends Writer.Task { - private Configuration writerSliceConfig; - private CommonRdbmsWriter.Task commonRdbmsWriterTask; - - @Override - public void init() { - this.writerSliceConfig = super.getPluginJobConf(); - this.commonRdbmsWriterTask = new CommonRdbmsWriter.Task(DATABASE_TYPE); - this.commonRdbmsWriterTask.init(this.writerSliceConfig); - } - - @Override - public void prepare() { - this.commonRdbmsWriterTask.prepare(this.writerSliceConfig); - } - - @Override - public void startWrite(RecordReceiver recordReceiver) { - this.commonRdbmsWriterTask.startWrite(recordReceiver, - this.writerSliceConfig, super.getTaskPluginCollector()); - } - - @Override - public void post() { - this.commonRdbmsWriterTask.post(this.writerSliceConfig); - } - - @Override - public void destroy() { - this.commonRdbmsWriterTask.destroy(this.writerSliceConfig); - } - - } + public void init() { + this.originalConfig = super.getPluginJobConf(); + + this.commonRdbmsWriterJob = new CommonRdbmsWriter.Job(DATABASE_TYPE, containerContext); + this.commonRdbmsWriterJob.init(this.originalConfig); + } + + @Override + public void prepare() { + this.commonRdbmsWriterJob.prepare(this.originalConfig); + } + + @Override + public List split(int mandatoryNumber) { + return this.commonRdbmsWriterJob.split(this.originalConfig, + mandatoryNumber); + } + + @Override + public void post() { + this.commonRdbmsWriterJob.post(this.originalConfig); + } + + @Override + public void destroy() { + this.commonRdbmsWriterJob.destroy(this.originalConfig); + } + + } + + public static class Task extends Writer.Task { + private Configuration writerSliceConfig; + private CommonRdbmsWriter.Task commonRdbmsWriterTask; + + @Override + public void init() { + this.writerSliceConfig = super.getPluginJobConf(); + this.commonRdbmsWriterTask = new CommonRdbmsWriter.Task(DATABASE_TYPE, containerContext); + this.commonRdbmsWriterTask.init(this.writerSliceConfig); + } + + @Override + public void prepare() { + this.commonRdbmsWriterTask.prepare(this.writerSliceConfig); + } + + @Override + public void startWrite(RecordReceiver recordReceiver) { + this.commonRdbmsWriterTask.startWrite(recordReceiver, + this.writerSliceConfig, super.getTaskPluginCollector()); + } + + @Override + public void post() { + this.commonRdbmsWriterTask.post(this.writerSliceConfig); + } + + @Override + public void destroy() { + this.commonRdbmsWriterTask.destroy(this.writerSliceConfig); + } + + } } diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/CommonRdbmsReader.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/CommonRdbmsReader.java index a43abf1a1d..ac07275d6f 100755 --- a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/CommonRdbmsReader.java +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/CommonRdbmsReader.java @@ -7,6 +7,7 @@ import com.alibaba.datax.common.statistics.PerfRecord; import com.alibaba.datax.common.statistics.PerfTrace; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.core.job.IJobContainerContext; import com.alibaba.datax.plugin.rdbms.reader.util.OriginalConfPretreatmentUtil; import com.alibaba.datax.plugin.rdbms.reader.util.PreCheckTask; import com.alibaba.datax.plugin.rdbms.reader.util.ReaderSplitUtil; @@ -52,14 +53,16 @@ public static void main(String[] args) { public static class Job { private static final Logger LOG = LoggerFactory.getLogger(Job.class); public IDataSourceFactoryGetter dataSourceFactoryGetter; + private final IJobContainerContext containerContext; - public Job(DataBaseType dataBaseType) { + public Job(DataBaseType dataBaseType, IJobContainerContext containerContext) { OriginalConfPretreatmentUtil.DATABASE_TYPE = dataBaseType; SingleTableSplitUtil.DATABASE_TYPE = dataBaseType; + this.containerContext = containerContext; } public void init(Configuration originalConfig) { - this.dataSourceFactoryGetter = DBUtil.getReaderDataSourceFactoryGetter(originalConfig); + this.dataSourceFactoryGetter = DBUtil.getReaderDataSourceFactoryGetter(originalConfig, this.containerContext); OriginalConfPretreatmentUtil.doPretreatment(this.dataSourceFactoryGetter, originalConfig); LOG.debug("After job init(), job config now is:[\n{}\n]", @@ -138,24 +141,26 @@ public static class Task { private String jdbcUrl; private String mandatoryEncoding; private static final Pattern PATTERN_FROM_TABLE = Pattern.compile("[fF][rR][oO][mM]\\s+(\\S+)"); + private final IJobContainerContext containerContext; // 作为日志显示信息时,需要附带的通用信息。比如信息所对应的数据库连接等信息,针对哪个表做的操作 private String basicMsg; - public Task(DataBaseType dataBaseType) { - this(dataBaseType, -1, -1); + public Task(DataBaseType dataBaseType, IJobContainerContext containerContext) { + this(dataBaseType, containerContext, -1, -1); } - public Task(DataBaseType dataBaseType, int taskGropuId, int taskId) { + public Task(DataBaseType dataBaseType, IJobContainerContext containerContext, int taskGropuId, int taskId) { this.dataBaseType = dataBaseType; this.taskGroupId = taskGropuId; this.taskId = taskId; + this.containerContext = containerContext; } public void init(Configuration readerSliceConfig) { /* for database connection */ - this.readerDataSourceFactoryGetter = DBUtil.getReaderDataSourceFactoryGetter(readerSliceConfig); + this.readerDataSourceFactoryGetter = DBUtil.getReaderDataSourceFactoryGetter(readerSliceConfig, this.containerContext); this.username = readerSliceConfig.getString(Key.USERNAME); this.password = readerSliceConfig.getString(Key.PASSWORD); this.jdbcUrl = readerSliceConfig.getString(Key.JDBC_URL); diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DBUtil.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DBUtil.java index 1950b000df..7109fd0131 100755 --- a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DBUtil.java +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DBUtil.java @@ -3,6 +3,7 @@ import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.common.util.RetryUtil; +import com.alibaba.datax.core.job.IJobContainerContext; import com.alibaba.datax.plugin.rdbms.reader.Key; import com.alibaba.datax.plugin.rdbms.writer.util.SelectCols; import com.alibaba.datax.plugin.rdbms.writer.util.SelectTable; @@ -13,7 +14,6 @@ import com.qlangtech.tis.TIS; import com.qlangtech.tis.datax.impl.DataxWriter; import com.qlangtech.tis.offline.DataxUtils; -import com.qlangtech.tis.plugin.KeyedPluginStore; import com.qlangtech.tis.plugin.StoreResourceType; import com.qlangtech.tis.plugin.ds.*; import com.qlangtech.tis.sql.parser.tuple.creator.EntityName; @@ -864,19 +864,20 @@ public static void loadDriverClass(String pluginType, String pluginName) { } } - public static IDataSourceFactoryGetter getWriterDataSourceFactoryGetter(Configuration config) { - return getDataSourceFactoryGetter(config, (res) -> { - return DataxWriter.load(null, res.resType, res.resourceName, true); + public static IDataSourceFactoryGetter getWriterDataSourceFactoryGetter(Configuration originalConfig, IJobContainerContext containerContext) { + return getDataSourceFactoryGetter(originalConfig, containerContext, (res) -> { + return DataxWriter.load(null, res.resType, res.getDataXName(), true); }); } - public static IDataSourceFactoryGetter getReaderDataSourceFactoryGetter(Configuration config) { - return getDataSourceFactoryGetter(config, (res) -> { + public static IDataSourceFactoryGetter getReaderDataSourceFactoryGetter(Configuration config, IJobContainerContext containerContext) { + return getDataSourceFactoryGetter(config, containerContext, (res) -> { return new IDataSourceFactoryGetter() { @Override public DataSourceFactory getDataSourceFactory() { return TIS.getDataBasePlugin(new PostedDSProp(res.dbFactoryId)); } + @Override public Integer getRowFetchSize() { return 2000; @@ -886,8 +887,10 @@ public Integer getRowFetchSize() { } private static IDataSourceFactoryGetter getDataSourceFactoryGetter( - Configuration originalConfig, Function callable) { - String dataXName = originalConfig.getString(DataxUtils.DATAX_NAME); + Configuration originalConfig, IJobContainerContext containerContext, Function callable) { + + + String dataXName = containerContext.getTISDataXName(); // originalConfig.getString(DataxUtils.DATAX_NAME); StoreResourceType resType = StoreResourceType.parse( originalConfig.getString(StoreResourceType.KEY_STORE_RESOURCE_TYPE)); @@ -897,7 +900,7 @@ private static IDataSourceFactoryGetter getDataSourceFactoryGetter( throw new IllegalArgumentException("param dataXName:" + dataXName + "can not be null"); } try { - Object dataxPlugin = callable.apply(new ResourceName(dataXName, resType, dbFactoryId)); + Object dataxPlugin = callable.apply(new DataXResourceName(() -> dataXName, resType, dbFactoryId)); Objects.requireNonNull(dataxPlugin, "dataXName:" + dataXName + " relevant instance can not be null"); if (!(dataxPlugin instanceof IDataSourceFactoryGetter)) { throw new IllegalStateException("dataxWriter:" + dataxPlugin.getClass() + " mus be type of " + IDataSourceFactoryGetter.class); @@ -908,15 +911,4 @@ private static IDataSourceFactoryGetter getDataSourceFactoryGetter( } } - private static final class ResourceName { - private final String resourceName; - private final StoreResourceType resType; - private final DBIdentity dbFactoryId; - - public ResourceName(String resourceName, StoreResourceType resType, DBIdentity dbFactoryId) { - this.resourceName = resourceName; - this.resType = resType; - this.dbFactoryId = dbFactoryId; - } - } } diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataXResourceName.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataXResourceName.java new file mode 100644 index 0000000000..622044c574 --- /dev/null +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataXResourceName.java @@ -0,0 +1,26 @@ +package com.alibaba.datax.plugin.rdbms.util; + +import com.qlangtech.tis.datax.IDataXNameAware; +import com.qlangtech.tis.plugin.StoreResourceType; +import com.qlangtech.tis.plugin.ds.DBIdentity; + +/** + * @author: 百岁(baisui@qlangtech.com) + * @create: 2023-02-23 11:26 + **/ +public class DataXResourceName { + private final IDataXNameAware name; + public final StoreResourceType resType; + public final DBIdentity dbFactoryId; + + public DataXResourceName(IDataXNameAware name, StoreResourceType resType, DBIdentity dbFactoryId) { + this.name = name; + this.resType = resType; + this.dbFactoryId = dbFactoryId; + } + + public String getDataXName() { + return this.name.getTISDataXName(); + } + +} diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/writer/CommonRdbmsWriter.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/writer/CommonRdbmsWriter.java index a9315689c0..685928e379 100755 --- a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/writer/CommonRdbmsWriter.java +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/writer/CommonRdbmsWriter.java @@ -6,6 +6,7 @@ import com.alibaba.datax.common.plugin.RecordReceiver; import com.alibaba.datax.common.plugin.TaskPluginCollector; import com.alibaba.datax.common.util.Configuration; +import com.alibaba.datax.core.job.IJobContainerContext; import com.alibaba.datax.plugin.rdbms.util.DBUtil; import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; import com.alibaba.datax.plugin.rdbms.util.DataBaseType; @@ -34,17 +35,19 @@ public class CommonRdbmsWriter { public static class Job { private DataBaseType dataBaseType; private IDataSourceFactoryGetter dataSourceFactoryGetter; + private final IJobContainerContext containerContext; private static final Logger LOG = LoggerFactory .getLogger(Job.class); - public Job(DataBaseType dataBaseType) { + public Job(DataBaseType dataBaseType, IJobContainerContext containerContext) { this.dataBaseType = dataBaseType; + this.containerContext = containerContext; OriginalConfPretreatmentUtil.DATABASE_TYPE = this.dataBaseType; } public void init(Configuration originalConfig) { - this.dataSourceFactoryGetter = DBUtil.getWriterDataSourceFactoryGetter(originalConfig); + this.dataSourceFactoryGetter = DBUtil.getWriterDataSourceFactoryGetter(originalConfig, this.containerContext); OriginalConfPretreatmentUtil.doPretreatment(originalConfig, this.dataSourceFactoryGetter, this.dataBaseType); LOG.debug("After job init(), originalConfig now is:[\n{}\n]", @@ -211,9 +214,11 @@ public static class Task { protected String writeMode; protected boolean emptyAsNull; protected List> resultSetMetaData; + private final IJobContainerContext containerContext; - public Task(DataBaseType dataBaseType) { + public Task(DataBaseType dataBaseType, IJobContainerContext containerContext) { this.dataBaseType = dataBaseType; + this.containerContext = containerContext; } public void init(Configuration writerSliceConfig) { @@ -249,7 +254,7 @@ public void init(Configuration writerSliceConfig) { this.writeRecordSql = String.format(INSERT_OR_REPLACE_TEMPLATE, this.table); BASIC_MESSAGE = String.format("jdbcUrl:[%s], table:[%s]", this.jdbcUrl, this.table); - this.dataSourceFactoryGetter = DBUtil.getWriterDataSourceFactoryGetter(writerSliceConfig); + this.dataSourceFactoryGetter = DBUtil.getWriterDataSourceFactoryGetter(writerSliceConfig, this.containerContext); this.columns = SelectCols.createSelectCols(writerSliceConfig, this.dataSourceFactoryGetter.getDataSourceFactory().getEscapeChar()); this.columnNumber = this.columns.size(); } @@ -500,7 +505,7 @@ public void startWrite(RecordReceiver recordReceiver, this.jdbcUrl, username, password); DBUtil.dealWithSessionConfig(connection, writerSliceConfig, this.dataBaseType, BASIC_MESSAGE); - startWriteWithConnection(recordReceiver, taskPluginCollector, new DataSourceMeta.JDBCConnection( connection,this.jdbcUrl)); + startWriteWithConnection(recordReceiver, taskPluginCollector, new DataSourceMeta.JDBCConnection(connection, this.jdbcUrl)); } diff --git a/postgresqlreader/src/main/java/com/alibaba/datax/plugin/reader/postgresqlreader/PostgresqlReader.java b/postgresqlreader/src/main/java/com/alibaba/datax/plugin/reader/postgresqlreader/PostgresqlReader.java index 58b01f9e35..d2c53585dd 100755 --- a/postgresqlreader/src/main/java/com/alibaba/datax/plugin/reader/postgresqlreader/PostgresqlReader.java +++ b/postgresqlreader/src/main/java/com/alibaba/datax/plugin/reader/postgresqlreader/PostgresqlReader.java @@ -28,7 +28,7 @@ public void init() { // } // this.originalConfig.set(com.alibaba.datax.plugin.rdbms.reader.Constant.FETCH_SIZE, fetchSize); - this.commonRdbmsReaderMaster = new CommonRdbmsReader.Job(DATABASE_TYPE); + this.commonRdbmsReaderMaster = new CommonRdbmsReader.Job(DATABASE_TYPE, this.containerContext); this.commonRdbmsReaderMaster.init(this.originalConfig); } @@ -57,7 +57,8 @@ public static class Task extends Reader.Task { @Override public void init() { this.readerSliceConfig = super.getPluginJobConf(); - this.commonRdbmsReaderSlave = new CommonRdbmsReader.Task(DATABASE_TYPE, super.getTaskGroupId(), super.getTaskId()); + this.commonRdbmsReaderSlave + = new CommonRdbmsReader.Task(DATABASE_TYPE, containerContext, super.getTaskGroupId(), super.getTaskId()); this.commonRdbmsReaderSlave.init(this.readerSliceConfig); } diff --git a/postgresqlwriter/src/main/java/com/alibaba/datax/plugin/writer/postgresqlwriter/PostgresqlWriter.java b/postgresqlwriter/src/main/java/com/alibaba/datax/plugin/writer/postgresqlwriter/PostgresqlWriter.java index 22dc0c1e6d..172017075e 100755 --- a/postgresqlwriter/src/main/java/com/alibaba/datax/plugin/writer/postgresqlwriter/PostgresqlWriter.java +++ b/postgresqlwriter/src/main/java/com/alibaba/datax/plugin/writer/postgresqlwriter/PostgresqlWriter.java @@ -12,89 +12,89 @@ import java.util.List; public class PostgresqlWriter extends Writer { - private static final DataBaseType DATABASE_TYPE = DataBaseType.PostgreSQL; - - public static class Job extends Writer.Job { - private Configuration originalConfig = null; - private CommonRdbmsWriter.Job commonRdbmsWriterMaster; - - @Override - public void init() { - this.originalConfig = super.getPluginJobConf(); - - // warn:not like mysql, PostgreSQL only support insert mode, don't use - String writeMode = this.originalConfig.getString(Key.WRITE_MODE); - if (null != writeMode) { - throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, - String.format("写入模式(writeMode)配置有误. 因为PostgreSQL不支持配置参数项 writeMode: %s, PostgreSQL仅使用insert sql 插入数据. 请检查您的配置并作出修改.", writeMode)); - } - - this.commonRdbmsWriterMaster = new CommonRdbmsWriter.Job(DATABASE_TYPE); - this.commonRdbmsWriterMaster.init(this.originalConfig); - } - - @Override - public void prepare() { - this.commonRdbmsWriterMaster.prepare(this.originalConfig); - } - - @Override - public List split(int mandatoryNumber) { - return this.commonRdbmsWriterMaster.split(this.originalConfig, mandatoryNumber); - } - - @Override - public void post() { - this.commonRdbmsWriterMaster.post(this.originalConfig); - } - - @Override - public void destroy() { - this.commonRdbmsWriterMaster.destroy(this.originalConfig); - } - - } - - public static class Task extends Writer.Task { - private Configuration writerSliceConfig; - private CommonRdbmsWriter.Task commonRdbmsWriterSlave; - - @Override - public void init() { - this.writerSliceConfig = super.getPluginJobConf(); - this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DATABASE_TYPE){ - @Override - public String calcValueHolder(String columnType){ - if("serial".equalsIgnoreCase(columnType)){ - return "?::int"; - }else if("bit".equalsIgnoreCase(columnType)){ - return "?::bit varying"; - } - return "?::" + columnType; - } - }; - this.commonRdbmsWriterSlave.init(this.writerSliceConfig); - } - - @Override - public void prepare() { - this.commonRdbmsWriterSlave.prepare(this.writerSliceConfig); - } - - public void startWrite(RecordReceiver recordReceiver) { - this.commonRdbmsWriterSlave.startWrite(recordReceiver, this.writerSliceConfig, super.getTaskPluginCollector()); - } - - @Override - public void post() { - this.commonRdbmsWriterSlave.post(this.writerSliceConfig); - } - - @Override - public void destroy() { - this.commonRdbmsWriterSlave.destroy(this.writerSliceConfig); - } - - } + private static final DataBaseType DATABASE_TYPE = DataBaseType.PostgreSQL; + + public static class Job extends Writer.Job { + private Configuration originalConfig = null; + private CommonRdbmsWriter.Job commonRdbmsWriterMaster; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + + // warn:not like mysql, PostgreSQL only support insert mode, don't use + String writeMode = this.originalConfig.getString(Key.WRITE_MODE); + if (null != writeMode) { + throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, + String.format("写入模式(writeMode)配置有误. 因为PostgreSQL不支持配置参数项 writeMode: %s, PostgreSQL仅使用insert sql 插入数据. 请检查您的配置并作出修改.", writeMode)); + } + + this.commonRdbmsWriterMaster = new CommonRdbmsWriter.Job(DATABASE_TYPE, this.containerContext); + this.commonRdbmsWriterMaster.init(this.originalConfig); + } + + @Override + public void prepare() { + this.commonRdbmsWriterMaster.prepare(this.originalConfig); + } + + @Override + public List split(int mandatoryNumber) { + return this.commonRdbmsWriterMaster.split(this.originalConfig, mandatoryNumber); + } + + @Override + public void post() { + this.commonRdbmsWriterMaster.post(this.originalConfig); + } + + @Override + public void destroy() { + this.commonRdbmsWriterMaster.destroy(this.originalConfig); + } + + } + + public static class Task extends Writer.Task { + private Configuration writerSliceConfig; + private CommonRdbmsWriter.Task commonRdbmsWriterSlave; + + @Override + public void init() { + this.writerSliceConfig = super.getPluginJobConf(); + this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DATABASE_TYPE, containerContext) { + @Override + public String calcValueHolder(String columnType) { + if ("serial".equalsIgnoreCase(columnType)) { + return "?::int"; + } else if ("bit".equalsIgnoreCase(columnType)) { + return "?::bit varying"; + } + return "?::" + columnType; + } + }; + this.commonRdbmsWriterSlave.init(this.writerSliceConfig); + } + + @Override + public void prepare() { + this.commonRdbmsWriterSlave.prepare(this.writerSliceConfig); + } + + public void startWrite(RecordReceiver recordReceiver) { + this.commonRdbmsWriterSlave.startWrite(recordReceiver, this.writerSliceConfig, super.getTaskPluginCollector()); + } + + @Override + public void post() { + this.commonRdbmsWriterSlave.post(this.writerSliceConfig); + } + + @Override + public void destroy() { + this.commonRdbmsWriterSlave.destroy(this.writerSliceConfig); + } + + } } diff --git a/rdbmsreader/src/main/java/com/alibaba/datax/plugin/reader/rdbmsreader/RdbmsReader.java b/rdbmsreader/src/main/java/com/alibaba/datax/plugin/reader/rdbmsreader/RdbmsReader.java index d7da48a47e..975c885bba 100755 --- a/rdbmsreader/src/main/java/com/alibaba/datax/plugin/reader/rdbmsreader/RdbmsReader.java +++ b/rdbmsreader/src/main/java/com/alibaba/datax/plugin/reader/rdbmsreader/RdbmsReader.java @@ -1,22 +1,22 @@ package com.alibaba.datax.plugin.reader.rdbmsreader; -import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordSender; import com.alibaba.datax.common.spi.Reader; import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.plugin.rdbms.reader.CommonRdbmsReader; import com.alibaba.datax.plugin.rdbms.util.DBUtil; -import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; import com.alibaba.datax.plugin.rdbms.util.DataBaseType; import java.util.List; public class RdbmsReader extends Reader { private static final DataBaseType DATABASE_TYPE = DataBaseType.RDBMS; + static { - //加载插件下面配置的驱动类 + //加载插件下面配置的驱动类 DBUtil.loadDriverClass("reader", "rdbms"); } + public static class Job extends Reader.Job { private Configuration originalConfig; @@ -40,8 +40,7 @@ public void init() { // com.alibaba.datax.plugin.rdbms.reader.Constant.FETCH_SIZE, // fetchSize); - this.commonRdbmsReaderMaster = new SubCommonRdbmsReader.Job( - DATABASE_TYPE); + this.commonRdbmsReaderMaster = new SubCommonRdbmsReader.Job(DATABASE_TYPE, this.containerContext); this.commonRdbmsReaderMaster.init(this.originalConfig); } @@ -71,8 +70,7 @@ public static class Task extends Reader.Task { @Override public void init() { this.readerSliceConfig = super.getPluginJobConf(); - this.commonRdbmsReaderSlave = new SubCommonRdbmsReader.Task( - DATABASE_TYPE); + this.commonRdbmsReaderSlave = new SubCommonRdbmsReader.Task(DATABASE_TYPE, this.containerContext); this.commonRdbmsReaderSlave.init(this.readerSliceConfig); } diff --git a/rdbmsreader/src/main/java/com/alibaba/datax/plugin/reader/rdbmsreader/SubCommonRdbmsReader.java b/rdbmsreader/src/main/java/com/alibaba/datax/plugin/reader/rdbmsreader/SubCommonRdbmsReader.java index 11ba866cf7..263933d461 100755 --- a/rdbmsreader/src/main/java/com/alibaba/datax/plugin/reader/rdbmsreader/SubCommonRdbmsReader.java +++ b/rdbmsreader/src/main/java/com/alibaba/datax/plugin/reader/rdbmsreader/SubCommonRdbmsReader.java @@ -1,5 +1,6 @@ package com.alibaba.datax.plugin.reader.rdbmsreader; +import com.alibaba.datax.core.job.IJobContainerContext; import com.alibaba.datax.plugin.rdbms.reader.CommonRdbmsReader; import com.alibaba.datax.plugin.rdbms.util.DBUtil; import com.alibaba.datax.plugin.rdbms.util.DataBaseType; @@ -12,8 +13,8 @@ public class SubCommonRdbmsReader extends CommonRdbmsReader { } public static class Job extends CommonRdbmsReader.Job { - public Job(DataBaseType dataBaseType) { - super(dataBaseType); + public Job(DataBaseType dataBaseType, IJobContainerContext containerContext) { + super(dataBaseType, containerContext); } } @@ -22,8 +23,8 @@ public static class Task extends CommonRdbmsReader.Task { private static final Logger LOG = LoggerFactory.getLogger(Task.class); private static final boolean IS_DEBUG = LOG.isDebugEnabled(); - public Task(DataBaseType dataBaseType) { - super(dataBaseType); + public Task(DataBaseType dataBaseType, IJobContainerContext containerContext) { + super(dataBaseType, containerContext); } diff --git a/rdbmswriter/src/main/java/com/alibaba/datax/plugin/reader/rdbmswriter/RdbmsWriter.java b/rdbmswriter/src/main/java/com/alibaba/datax/plugin/reader/rdbmswriter/RdbmsWriter.java index 71fe795628..8af1d977b8 100755 --- a/rdbmswriter/src/main/java/com/alibaba/datax/plugin/reader/rdbmswriter/RdbmsWriter.java +++ b/rdbmswriter/src/main/java/com/alibaba/datax/plugin/reader/rdbmswriter/RdbmsWriter.java @@ -14,10 +14,12 @@ public class RdbmsWriter extends Writer { private static final DataBaseType DATABASE_TYPE = DataBaseType.RDBMS; + static { - //加载插件下面配置的驱动类 + //加载插件下面配置的驱动类 DBUtil.loadDriverClass("writer", "rdbms"); } + public static class Job extends Writer.Job { private Configuration originalConfig = null; private CommonRdbmsWriter.Job commonRdbmsWriterMaster; @@ -37,8 +39,7 @@ public void init() { writeMode)); } - this.commonRdbmsWriterMaster = new SubCommonRdbmsWriter.Job( - DATABASE_TYPE); + this.commonRdbmsWriterMaster = new SubCommonRdbmsWriter.Job(DATABASE_TYPE, this.containerContext); this.commonRdbmsWriterMaster.init(this.originalConfig); } @@ -72,8 +73,7 @@ public static class Task extends Writer.Task { @Override public void init() { this.writerSliceConfig = super.getPluginJobConf(); - this.commonRdbmsWriterSlave = new SubCommonRdbmsWriter.Task( - DATABASE_TYPE); + this.commonRdbmsWriterSlave = new SubCommonRdbmsWriter.Task(DATABASE_TYPE, this.containerContext); this.commonRdbmsWriterSlave.init(this.writerSliceConfig); } @@ -99,4 +99,4 @@ public void destroy() { } -} \ No newline at end of file +} diff --git a/rdbmswriter/src/main/java/com/alibaba/datax/plugin/reader/rdbmswriter/SubCommonRdbmsWriter.java b/rdbmswriter/src/main/java/com/alibaba/datax/plugin/reader/rdbmswriter/SubCommonRdbmsWriter.java index 417c184e25..67797c16c3 100755 --- a/rdbmswriter/src/main/java/com/alibaba/datax/plugin/reader/rdbmswriter/SubCommonRdbmsWriter.java +++ b/rdbmswriter/src/main/java/com/alibaba/datax/plugin/reader/rdbmswriter/SubCommonRdbmsWriter.java @@ -1,5 +1,6 @@ package com.alibaba.datax.plugin.reader.rdbmswriter; +import com.alibaba.datax.core.job.IJobContainerContext; import com.alibaba.datax.plugin.rdbms.util.DBUtil; import com.alibaba.datax.plugin.rdbms.util.DataBaseType; import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter; @@ -10,14 +11,14 @@ public class SubCommonRdbmsWriter extends CommonRdbmsWriter { } public static class Job extends CommonRdbmsWriter.Job { - public Job(DataBaseType dataBaseType) { - super(dataBaseType); + public Job(DataBaseType dataBaseType, IJobContainerContext containerContext) { + super(dataBaseType, containerContext); } } public static class Task extends CommonRdbmsWriter.Task { - public Task(DataBaseType dataBaseType) { - super(dataBaseType); + public Task(DataBaseType dataBaseType, IJobContainerContext containerContext) { + super(dataBaseType, containerContext); } diff --git a/sqlserverreader/src/main/java/com/alibaba/datax/plugin/reader/sqlserverreader/SqlServerReader.java b/sqlserverreader/src/main/java/com/alibaba/datax/plugin/reader/sqlserverreader/SqlServerReader.java index 2da95d7435..d07245f764 100755 --- a/sqlserverreader/src/main/java/com/alibaba/datax/plugin/reader/sqlserverreader/SqlServerReader.java +++ b/sqlserverreader/src/main/java/com/alibaba/datax/plugin/reader/sqlserverreader/SqlServerReader.java @@ -12,83 +12,85 @@ public class SqlServerReader extends Reader { - private static final DataBaseType DATABASE_TYPE = DataBaseType.SQLServer; - - public static class Job extends Reader.Job { - - private Configuration originalConfig = null; - private CommonRdbmsReader.Job commonRdbmsReaderJob; - - @Override - public void init() { - this.originalConfig = super.getPluginJobConf(); - int fetchSize = this.originalConfig.getInt( - com.alibaba.datax.plugin.rdbms.reader.Constant.FETCH_SIZE, - Constant.DEFAULT_FETCH_SIZE); - if (fetchSize < 1) { - throw DataXException - .asDataXException(DBUtilErrorCode.REQUIRED_VALUE, - String.format("您配置的fetchSize有误,根据DataX的设计,fetchSize : [%d] 设置值不能小于 1.", - fetchSize)); - } - this.originalConfig.set( - com.alibaba.datax.plugin.rdbms.reader.Constant.FETCH_SIZE, - fetchSize); - - this.commonRdbmsReaderJob = new CommonRdbmsReader.Job( - DATABASE_TYPE); - this.commonRdbmsReaderJob.init(this.originalConfig); - } - - @Override - public List split(int adviceNumber) { - return this.commonRdbmsReaderJob.split(this.originalConfig, - adviceNumber); - } - - @Override - public void post() { - this.commonRdbmsReaderJob.post(this.originalConfig); - } - - @Override - public void destroy() { - this.commonRdbmsReaderJob.destroy(this.originalConfig); - } - - } - - public static class Task extends Reader.Task { - - private Configuration readerSliceConfig; - private CommonRdbmsReader.Task commonRdbmsReaderTask; - - @Override - public void init() { - this.readerSliceConfig = super.getPluginJobConf(); - this.commonRdbmsReaderTask = new CommonRdbmsReader.Task( - DATABASE_TYPE ,super.getTaskGroupId(), super.getTaskId()); - this.commonRdbmsReaderTask.init(this.readerSliceConfig); - } - - @Override - public void startRead(RecordSender recordSender) { + private static final DataBaseType DATABASE_TYPE = DataBaseType.SQLServer; + + public static class Job extends Reader.Job { + + private Configuration originalConfig = null; + private CommonRdbmsReader.Job commonRdbmsReaderJob; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + int fetchSize = this.originalConfig.getInt( + com.alibaba.datax.plugin.rdbms.reader.Constant.FETCH_SIZE, + Constant.DEFAULT_FETCH_SIZE); + if (fetchSize < 1) { + throw DataXException + .asDataXException(DBUtilErrorCode.REQUIRED_VALUE, + String.format("您配置的fetchSize有误,根据DataX的设计,fetchSize : [%d] 设置值不能小于 1.", + fetchSize)); + } + this.originalConfig.set( + com.alibaba.datax.plugin.rdbms.reader.Constant.FETCH_SIZE, + fetchSize); + + this.commonRdbmsReaderJob = new CommonRdbmsReader.Job( + DATABASE_TYPE, this.containerContext); + this.commonRdbmsReaderJob.init(this.originalConfig); + } + + @Override + public List split(int adviceNumber) { + return this.commonRdbmsReaderJob.split(this.originalConfig, + adviceNumber); + } + + @Override + public void post() { + this.commonRdbmsReaderJob.post(this.originalConfig); + } + + @Override + public void destroy() { + this.commonRdbmsReaderJob.destroy(this.originalConfig); + } + + } + + public static class Task extends Reader.Task { + + private Configuration readerSliceConfig; + private CommonRdbmsReader.Task commonRdbmsReaderTask; + + + @Override + public void init() { + this.readerSliceConfig = super.getPluginJobConf(); + this.commonRdbmsReaderTask = new CommonRdbmsReader.Task( + DATABASE_TYPE, containerContext, super.getTaskGroupId(), super.getTaskId()); + this.commonRdbmsReaderTask.init(this.readerSliceConfig); + } + + + @Override + public void startRead(RecordSender recordSender) { // int fetchSize = this.readerSliceConfig // .getInt(com.alibaba.datax.plugin.rdbms.reader.Constant.FETCH_SIZE); - this.commonRdbmsReaderTask.startRead(this.readerSliceConfig, recordSender, super.getTaskPluginCollector()); - } + this.commonRdbmsReaderTask.startRead(this.readerSliceConfig, recordSender, super.getTaskPluginCollector()); + } - @Override - public void post() { - this.commonRdbmsReaderTask.post(this.readerSliceConfig); - } + @Override + public void post() { + this.commonRdbmsReaderTask.post(this.readerSliceConfig); + } - @Override - public void destroy() { - this.commonRdbmsReaderTask.destroy(this.readerSliceConfig); - } + @Override + public void destroy() { + this.commonRdbmsReaderTask.destroy(this.readerSliceConfig); + } - } + } } diff --git a/sqlserverwriter/src/main/java/com/alibaba/datax/plugin/writer/sqlserverwriter/SqlServerWriter.java b/sqlserverwriter/src/main/java/com/alibaba/datax/plugin/writer/sqlserverwriter/SqlServerWriter.java index 6c81971915..2c9c2a35d4 100644 --- a/sqlserverwriter/src/main/java/com/alibaba/datax/plugin/writer/sqlserverwriter/SqlServerWriter.java +++ b/sqlserverwriter/src/main/java/com/alibaba/datax/plugin/writer/sqlserverwriter/SqlServerWriter.java @@ -33,7 +33,7 @@ public void init() { writeMode)); } - this.commonRdbmsWriterJob = new CommonRdbmsWriter.Job(DATABASE_TYPE); + this.commonRdbmsWriterJob = new CommonRdbmsWriter.Job(DATABASE_TYPE, this.containerContext); this.commonRdbmsWriterJob.init(this.originalConfig); } @@ -68,7 +68,7 @@ public static class Task extends Writer.Task { public void init() { this.writerSliceConfig = super.getPluginJobConf(); this.commonRdbmsWriterTask = new CommonRdbmsWriter.Task( - DATABASE_TYPE); + DATABASE_TYPE, containerContext); this.commonRdbmsWriterTask.init(this.writerSliceConfig); } diff --git a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriter.java b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriter.java index 6d2a497118..24212ec6b3 100755 --- a/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriter.java +++ b/starrockswriter/src/main/java/com/starrocks/connector/datax/plugin/writer/starrockswriter/StarRocksWriter.java @@ -33,7 +33,7 @@ public void init() { this.originalConfig = super.getPluginJobConf(); options = new StarRocksWriterOptions(super.getPluginJobConf()); options.doPretreatment(); - this.dsGetter = DBUtil.getWriterDataSourceFactoryGetter(originalConfig); + this.dsGetter = DBUtil.getWriterDataSourceFactoryGetter(originalConfig, this.containerContext); } @Override @@ -95,7 +95,7 @@ public static class Task extends Writer.Task { @Override public void init() { options = new StarRocksWriterOptions(super.getPluginJobConf()); - IDataSourceFactoryGetter dsGetter = DBUtil.getWriterDataSourceFactoryGetter(super.getPluginJobConf()); + IDataSourceFactoryGetter dsGetter = DBUtil.getWriterDataSourceFactoryGetter(super.getPluginJobConf(), this.containerContext); if (options.isWildcardColumn()) { Connection conn = DBUtil.getConnection(dsGetter, options.getJdbcUrl(), options.getUsername(), options.getPassword()); List columns = StarRocksWriterUtil.getStarRocksColumns(conn, options.getDatabase(), options.getTable());