From d2c5d39b0748e68ca44603df25c309427cd5c7e8 Mon Sep 17 00:00:00 2001 From: Mayara Moromisato <44944954+moromimay@users.noreply.github.com> Date: Thu, 8 Apr 2021 14:07:12 -0300 Subject: [PATCH] Change solution for tracking logs (#308) * Change tracking logs method. * Change version to generate dev package. * Change path name in S3 --- butterfree/_cli/migrate.py | 45 +++++++++++++++++++++----------------- requirements.txt | 3 ++- setup.py | 2 +- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/butterfree/_cli/migrate.py b/butterfree/_cli/migrate.py index f3c533d7..42b3fb4a 100644 --- a/butterfree/_cli/migrate.py +++ b/butterfree/_cli/migrate.py @@ -1,3 +1,4 @@ +import datetime import importlib import inspect import os @@ -5,13 +6,13 @@ import sys from typing import Set +import boto3 import setuptools import typer +from botocore.exceptions import ClientError -from butterfree.clients import SparkClient from butterfree.configs import environment from butterfree.configs.logger import __logger -from butterfree.extract.readers import FileReader from butterfree.migrations.database_migration import ALLOWED_DATABASE from butterfree.pipelines import FeatureSetPipeline @@ -106,30 +107,34 @@ class Migrate: pipelines: list of Feature Set Pipelines to use to migration. """ - def __init__( - self, pipelines: Set[FeatureSetPipeline], spark_client: SparkClient = None - ) -> None: + def __init__(self, pipelines: Set[FeatureSetPipeline],) -> None: self.pipelines = pipelines - self.spark_client = spark_client or SparkClient() def _send_logs_to_s3(self, file_local: bool) -> None: """Send all migration logs to S3.""" - log_path = "../logging.json" - - file_reader = FileReader(id="name", path=log_path, format="json") - df = file_reader.consume(self.spark_client) - - path = environment.get_variable("FEATURE_STORE_S3_BUCKET") - - self.spark_client.write_dataframe( - dataframe=df, - format_="json", - mode="append", - **{"path": f"s3a://{path}/logging"}, + s3_client = boto3.client("s3") + + file_name = "../logging.json" + timestamp = datetime.datetime.now() + object_name = ( + f"logs/migrate/" + f"{timestamp.strftime('%Y-%m-%d')}" + f"/logging-{timestamp.strftime('%H:%M:%S')}.json" ) + bucket = environment.get_variable("FEATURE_STORE_S3_BUCKET") + + try: + s3_client.upload_file( + file_name, + bucket, + object_name, + ExtraArgs={"ACL": "bucket-owner-full-control"}, + ) + except ClientError: + raise - if not file_local and os.path.exists(log_path): - os.remove(log_path) + if not file_local and os.path.exists(file_name): + os.remove(file_name) def run(self, generate_logs: bool = False) -> None: """Construct and apply the migrations.""" diff --git a/requirements.txt b/requirements.txt index 82a99d7f..9548edb3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ parameters-validation>=1.1.5,<2.0 pyspark==3.* typer>=0.3,<0.4 setuptools>=41,<42 -typing-extensions==3.7.4.3 \ No newline at end of file +typing-extensions==3.7.4.3 +boto3==1.17.* \ No newline at end of file diff --git a/setup.py b/setup.py index 5122a831..348e5f98 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages, setup __package_name__ = "butterfree" -__version__ = "1.2.0.dev7" +__version__ = "1.2.0.dev8" __repository_url__ = "https://github.com/quintoandar/butterfree" with open("requirements.txt") as f: