Skip to content

Latest commit

 

History

History
431 lines (370 loc) · 13 KB

README.md

File metadata and controls

431 lines (370 loc) · 13 KB

oracledb-datapump

oracledb-datapump is a Python package for running Oracle Datapump remotely and without the need to install an Oracle Database client.

Quick Start

There are three primary modes of usage:

  • Python package
  • CLI
  • AWS Lambda

Python package

Export (synchronous)

import logging

from oracledb_datapump import Job, Directive, Operation, JobMode

logging.basicConfig(level="INFO")


job = Job(
    operation=Operation.EXPORT,
    mode=JobMode.SCHEMA,
    directives=[Directive.INCLUDE_SCHEMA("HR"), Directive.PARALLEL(2)],
)
response = job.run(wait=True, connection="system/manager@localhost/orclpdb1")
print(response)
print(job.get_logfile())

Export (asynchronous)

import logging

from oracledb_datapump import Job, Directive, Operation, JobMode

logging.basicConfig(level="INFO")


job = Job(
    operation=Operation.EXPORT,
    mode=JobMode.SCHEMA,
    directives=[Directive.INCLUDE_SCHEMA("HR"), Directive.PARALLEL(2)],
)
response = job.run(
    wait=False, connection="system/manager@localhost/orclpdb1"
)
print(response)

result = job.poll_for_completion(30)
print(result)
print(job.get_logfile())

Import (synchronous)

import logging

from oracledb_datapump import Job, Directive, Operation, JobMode

logging.basicConfig(level="INFO")


job = Job(
    operation=Operation.IMPORT,
    mode=JobMode.SCHEMA,
    dumpfiles=["EXP-HR-20230316210554292374_%U.dmp"],
    directives=[
        Directive.INCLUDE_SCHEMA("HR"),
        Directive.PARALLEL(2),
        Directive.REMAP_SCHEMA(old_value="HR", value="HR2")
    ],
)
response = job.run(
    wait=True, connection="system/manager@localhost/orclpdb1"
)
print(response)
print(job.get_logfile())

Status on existing job

import logging

from oracledb_datapump import Job

logging.basicConfig(level="INFO")


job = Job.attach(
    connection="system/manager@localhost/orclpdb1",
    job_name="EXP-HR-20230320145316600771",
    job_owner="SYSTEM"
)
print(job.get_status())
print(job.get_logfile())

Export using JSON request (synchronous)

import logging
import json

from oracledb_datapump.client import DataPump

logging.basicConfig(level="INFO")


job_request = {
    "connection": {
        "username": "system",
        "password": "manager",
        "hostname": "localhost",
        "database": "orclpdb1"
    },
    "request": "SUBMIT",
    "payload": {
        "operation": "EXPORT",
        "mode": "SCHEMA",
        "wait": True,
        "directives": [
            {"name": "INCLUDE_SCHEMA", "value": "HR"}
        ]
    }
}

response = DataPump.submit(json.dumps(job_request))
print(response)

logfile = DataPump.get_logfile(
    str(response.logfile),
    connection={
        "username": "system",
        "password": "manager",
        "hostname": "localhost",
        "database": "orclpdb1"
    }
)
print(logfile)

Import using JSON request (synchronous)

import logging
import json

from oracledb_datapump.client import DataPump

logging.basicConfig(level="INFO")


job_request = {
    "connection": {
        "username": "system",
        "password": "manager",
        "hostname": "localhost",
        "database": "orclpdb1",
    },
    "request": "SUBMIT",
    "payload": {
        "operation": "IMPORT",
        "mode": "SCHEMA",
        "wait": True,
        "dumpfiles": ["HR.dmp"],
        "directives": [
            {"name": "PARALLEL", "value": 1},
            {"name": "INCLUDE_SCHEMA", "value": "HR"},
            {"name": "OID", "value": False},
            {"name": "REMAP_SCHEMA", "old_value": "HR", "value": "HR2"},
            {"name": "TABLE_EXISTS_ACTION", "value": "REPLACE"},
        ],
    },
}

response = DataPump.submit(json.dumps(job_request))
print(response)

logfile = DataPump.get_logfile(
    str(response.logfile),
    connection={
        "username": "system",
        "password": "manager",
        "hostname": "localhost",
        "database": "orclpdb1"
    }
)
print(logfile)

Import using JSON request (asynchronous w/ polling)

import logging
import json
from time import sleep

from oracledb_datapump.client import DataPump

logging.basicConfig(level="INFO")


job_request = {
    "connection": {
        "username": "system",
        "password": "manager",
        "hostname": "localhost",
        "database": "orclpdb1",
    },
    "request": "SUBMIT",
    "payload": {
        "operation": "IMPORT",
        "mode": "SCHEMA",
        "wait": False,
        "dumpfiles": ["HR.dmp"],
        "directives": [
            {"name": "PARALLEL", "value": 1},
            {"name": "INCLUDE_SCHEMA", "value": "HR"},
            {"name": "OID", "value": False},
            {"name": "REMAP_SCHEMA", "old_value": "HR", "value": "HR2"},
            {"name": "TABLE_EXISTS_ACTION", "value": "REPLACE"},
        ],
    },
}

response = DataPump.submit(json.dumps(job_request))
print(response)

status_request = {
    "connection": {
        "username": "system",
        "password": "manager",
        "hostname": "localhost",
        "database": "orclpdb1",
    },
    "request": "STATUS",
    "payload": {
        "job_name": response.job_name,
        "job_owner": response.job_owner
    },
}

status = DataPump.submit(json.dumps(status_request))
while status.state not in ("COMPLETED", "COMPLETED_WITH_ERRORS", "STOPPED"):
    print(status)
    sleep(15)
    status = DataPump.submit(json.dumps(status_request))

print(f"final status: {status}")

CLI

$ oracledb-datapump --help
usage: oracledb-datapump [-h] (--schema SCHEMA | --full | --table TABLE) --username USERNAME --password PASSWORD --hostname HOSTNAME --database DATABASE [--parallel PARALLEL] [--dumpfile DUMPFILE]
                         [--compression {DATA_ONLY,METADATA_ONLY,ALL,NONE}] [--exclude EXCLUDE] [--remap_schema REMAP_SCHEMA] [--remap_tablespace REMAP_TABLESPACE]
                         [--flashback_utc FLASHBACK_UTC] [--directive DIRECTIVE]
                         {import,export,impdp,expdp}

Remote Oracle Datapump (limited feature set)

positional arguments:
  {import,export,impdp,expdp}

options:
  -h, --help            show this help message and exit
  --schema SCHEMA
  --full
  --table TABLE
  --username USERNAME   Oracle admin username
  --password PASSWORD   Oracle admin password
  --hostname HOSTNAME   Database service host
  --database DATABASE   Database service name
  --parallel PARALLEL   Number of datapump workers
  --dumpfile DUMPFILE   Oracle dumpfile - Required for import
  --compression {DATA_ONLY,METADATA_ONLY,ALL,NONE}
  --exclude EXCLUDE     Exclude object type
  --remap_schema REMAP_SCHEMA
                        Remap schema FROM_SCHEMA:TO_SCHEMA
  --remap_tablespace REMAP_TABLESPACE
                        Remap tablespace FROM_TBLSPC:TO_TBLSPC
  --flashback_time FLASHBACK_TIME
                        ISO format timestamp
  --directive DIRECTIVE
                        Datapump directive NAME:VALUE

Export

oracledb-datapump --username system --password manager --hostname localhost --database orclpdb1 --parallel 2 --schema hr export

Import

oracledb-datapump --username system --password manager --hostname localhost --database orclpdb1 --schema HR --dumpfile HR.dmp --remap_schema "HR:HR2" import

HTTP Server as AWS Lambda

This example assumes the use of a custom domain name mapped to an API Gateway or ALB where the datapump lambda is mapped to a datapump endpoint.

Export

curl -XPOST "https://oracledb-util-api.somedomain.com/datapump" -d \
 '{
    "connection": {
        "username": "system",
        "password": "manager",
        "hostname": "host.docker.internal",
        "database": "orclpdb1"
    },
    "request": "SUBMIT",
    "payload": {
        "operation": "EXPORT",
        "mode": "SCHEMA",
        "wait": false,
        "directives": [
            {"name": "PARALLEL", "value": 2},
            {"name": "COMPRESSION", "value": "ALL"},
            {"name": "INCLUDE_SCHEMA", "value": "HR"}
        ]
    }
}'

Import

response=$(curl -XPOST "https://oracledb-util-api.somedomain.com/datapump" -d \
 '{
    "connection": {
        "username": "system",
        "password": "manager",
        "hostname": "host.docker.internal",
        "database": "orclpdb1"
    },
    "request": "SUBMIT",
    "payload": {
        "operation": "IMPORT",
        "mode": "SCHEMA",
        "wait": false,
        "dumpfiles": ["HR.dmp"],
        "directives": [
            {"name": "PARALLEL", "value": 2},
            {"name": "INCLUDE_SCHEMA", "value": "HR"},
            {"name": "OID", "value": false},
            {"name": "REMAP_SCHEMA", "old_value": "HR", "value": "HR2"},
            {"name": "TABLE_EXISTS_ACTION", "value": "REPLACE"}
        ]
    }
}'| jq '.body | fromjson')
echo $response
JOB_NAME=$(jq -r '.job_name' <<< $response)
JOB_OWNER=$(jq -r '.job_owner' <<< $response)

Status

curl -XPOST "https://oracledb-util-api.somedomain.com/datapump" -d \
 '{
    "connection": {
        "username": "system",
        "password": "manager",
        "hostname": "host.docker.internal",
        "database": "orclpdb1"
    },
    "request": "STATUS",
    "payload": {
        "job_name": "'"$JOB_NAME"'",
        "job_owner": "'"$JOB_OWNER"'",
    }
}' | jq '.body | fromjson'

Directives

Directives are used to set parameters, remaps and transforms on a Datapump job. Most of these map back to:

Further information on each of these can be obtained from the Oracle documentation for your database version. Be aware that the usage of some of these parameters require feature based Oracle licenses.

The following is a list of valid directives:

EXCLUDE_OBJECT_TYPE - args: value: str, object_path: str | None
INCLUDE_SCHEMA - args: value: str, object_path: str | None
INCLUDE_TABLE - args: value: str, object_path: str | None
CLIENT_COMMAND - args: value: str
COMPRESSION - args: value: {DATA_ONLY, METADATA_ONLY, ALL, NONE}
COMPRESSION_ALGORITHM - args: value: {BASIC, LOW, MEDIUM, HIGH}
DATA_ACCESS_METHOD - args: value: {AUTOMATIC, DIRECT_PATH, EXTERNAL_TABLE}
DATA_OPTIONS - args: value: [SKIP_CONST_ERR, XMLTYPE_CLOB, NO_TYPE_EVOL, DISABL_APPEND_HINT, REJECT_ROWS_REPCHR, ENABLE_NET_COMP, GRP_PART_TAB, TRUST_EXIST_TB_PAR, VALIDATE_TBL_DATA, VERIFY_STREAM_FORM, CONT_LD_ON_FMT_ERR]
ENCRYPTION - args: value: {ALL, DATA_ONLY, ENCRYPTED_COLUMNS_ONLY, METADATA_ONLY}
ENCRYPTION_ALGORITHM - args: value: {AES128, AES192, AES256}
ENCRYPTION_MODE - args: value: {PASSWORD, TRANSPARENT, DUAL}
ENCRYPTION_PASSWORD - args: value: {PASSWORD, DUAL}
ESTIMATE - args: value: {BLOCKS, STATISTICS}
ESTIMATE_ONLY - args: value: int
FLASHBACK_SCN - args: value: int
FLASHBACK_TIME - args: value: str | datetime # Must be an ISO format timestamp.
INCLUDE_METADATA - args: value: bool
KEEP_MASTER - args: value: bool
LOGTIME - args: value: {NONE, STATUS, LOGFILE, ALL}
MASTER_ONLY - args: value: bool
METRICS - args: value: bool
PARTITION_OPTIONS - args: value: {NONE, DEPARTITION, MERGE}
REUSE_DATAFILES - args: value: bool
SKIP_UNUSABLE_INDEXES - args: value: bool
SOURCE_EDITION - args: value: bool
STREAMS_CONFIGURATION - args: value: bool
TABLE_EXISTS_ACTION - args: value: {TRUNCATE, REPLACE, APPEND, SKIP}
TABLESPACE_DATAFILE - args: value: {TABLESPACE_DATAFILE}
TARGET_EDITION - args: value: str
TRANSPORTABLE - args: value: {ALWAYS, NEVER}
TTS_FULL_CHECK - args: value: bool
USER_METADATA - args: value: bool
PARALLEL - args: value: int
REMAP_SCHEMA - args: old_value: str, value: str, object_path: str | None
REMAP_TABLESPACE - args: old_value: str, value: str, object_path: str | None
REMAP_DATAFILE - args: old_value: str, value: str, object_path: str | None
DISABLE_ARCHIVE_LOGGING - args: value: bool, object_path: str | None
INMEMORY - args: value: bool, object_path: str | None
INMEMORY_CLAUSE - args: value: str, object_path: str | None
LOB_STORAGE - args: value: {SECUREFILE, BASICFILE, DEFAULT, NO_CHANGE}, object_path: str | None
OID - args: value: bool, object_path: str | None
PCTSPACE - args: value: int, object_path: str | None
SEGMENT_ATTRIBUTES - args: value: bool, object_path: str | None
SEGMENT_CREATION - args: value: bool, object_path: str | None
STORAGE - args: value: bool, object_path: str | None
TABLE_COMPRESSION_CLAUSE - args: value: str | object_path: None
DELETE_FILES: Custom directive that deletes the dumpfiles once an import is complete. Valid only for synchronous executions.