diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..d07c003 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,12 @@ +.git/ +.gitignore +node_modules/ +npm-debug.log +Dockerfile +.dockerignore +temp/ +*.md +*.egg-info/ +venv/ +env/ +.idea/ diff --git a/.github/workflows/build-docs-dev.yml b/.github/workflows/build-docs-dev.yml new file mode 100644 index 0000000..43938dc --- /dev/null +++ b/.github/workflows/build-docs-dev.yml @@ -0,0 +1,44 @@ +name: Deploy Docs +on: + push: + branches: [master] + +permissions: + contents: write + +jobs: + docs: + name: Generate Website + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install docs dependencies + run: pip install -r docs/requirements.txt + + - name: Install + run: pip install -e . + + - name: Run some auxiliary scripts, e.g. build environments docs + run: python docs/_scripts/gen_envs_mds.py + + - name: Build + run: sphinx-build -b dirhtml -v docs _build + + - name: Move 404 + run: mv _build/404/index.html _build/404.html + + - name: Update 404 links + run: python docs/_scripts/move_404.py _build/404.html + + - name: Remove .doctrees + run: rm -r _build/.doctrees + + - name: Upload to GitHub Pages + uses: JamesIves/github-pages-deploy-action@v4 + with: + folder: _build diff --git a/.gitignore b/.gitignore index b6e4761..49ecbc7 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,7 @@ __pycache__/ # C extensions *.so - +.ssh/ # Distribution / packaging .Python build/ @@ -127,3 +127,13 @@ dmypy.json # Pyre type checker .pyre/ + +# Ignore singularity images +*.simg +*.sif + +# Ignore Intellij IDE files +.idea/ + +# Ignore Logs directory since it's used for running experiments +logs/ \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 0d86b4f..840e400 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ -[submodule "web_navigation"] +[submodule "a2perf/domains/web_navigation"] path = a2perf/domains/web_navigation url = https://github.com/Farama-Foundation/a2perf-web-nav.git branch = dev diff --git a/a2perf/a2perf_benchmark_submission b/a2perf/a2perf_benchmark_submission index 42978aa..e1e0d0c 160000 --- a/a2perf/a2perf_benchmark_submission +++ b/a2perf/a2perf_benchmark_submission @@ -1 +1 @@ -Subproject commit 42978aa363c0620e52693cf1170c701ec5890c9a +Subproject commit e1e0d0c4c25a376dc55713c4e9ba28956495be0c diff --git a/a2perf/analysis/__init__.py b/a2perf/analysis/__init__.py new file mode 100644 index 0000000..5ab06df --- /dev/null +++ b/a2perf/analysis/__init__.py @@ -0,0 +1,3 @@ +from . import reliability +from . import results +from . import system diff --git a/a2perf/analysis/evaluation.py b/a2perf/analysis/evaluation.py new file mode 100644 index 0000000..b38cf6b --- /dev/null +++ b/a2perf/analysis/evaluation.py @@ -0,0 +1,186 @@ +import functools +import json +import multiprocessing +import os +from typing import Any +from typing import Dict +from typing import Tuple + +from a2perf.analysis.metrics_lib import load_training_system_data +from a2perf.domains import circuit_training +from a2perf.domains import quadruped_locomotion +from a2perf.domains import web_navigation +from a2perf.domains.tfa.suite_gym import create_domain +from a2perf.domains.tfa.utils import load_policy +from a2perf.domains.tfa.utils import perform_rollouts +from absl import app +from absl import flags +from absl import logging +import numpy as np + +_NUM_EVAL_EPISODES = flags.DEFINE_integer( + 'num_eval_episodes', 100, 'Number of episodes to evaluate the policy.' +) + +_MAX_PARALLEL_ENVS = flags.DEFINE_integer( + 'max_parallel_envs', 1, 'Maximum number of parallel environments to use.' +) +_ROOT_DIR = flags.DEFINE_string( + 'root_dir', + None, + 'Root directory of the environment. If not set, the ROOT_DIR environment ' + 'variable is used.', +) + +_ENV_NAME = flags.DEFINE_string( + 'env_name', 'CartPole-v0', 'The name of the environment to evaluate.' +) +_POLICY_NAME = flags.DEFINE_string( + 'policy_name', 'policy', 'The name of the policy to evaluate.' +) + + +def load_policy_and_perform_rollouts( + checkpoint_path: str, + env_name: str, + policy_path: str, + num_episodes: int, + root_dir: str = None, +) -> Dict[str, Any]: + try: + policy = load_policy(policy_path, checkpoint_path) + if env_name == 'CircuitTraining-v0' or env_name == 'WebNavigation-v0': + env = create_domain(env_name, root_dir=root_dir) + else: + env = create_domain(env_name) + episode_returns = perform_rollouts(policy, env, num_episodes) + + eval_dict = { + checkpoint_path: { + 'mean': np.mean(episode_returns).astype(float), + 'std': np.std(episode_returns).astype(float), + 'min': np.min(episode_returns).astype(float), + 'max': np.max(episode_returns).astype(float), + 'median': np.median(episode_returns).astype(float), + 'count': int(episode_returns.size), + 'rollout_returns': [float(v) for v in episode_returns], + } + } + + logging.info('Evaluation results for %s:', checkpoint_path) + logging.info('\t%s', eval_dict[checkpoint_path]) + return eval_dict + except Exception as e: + import traceback + + logging.error('Error evaluating checkpoint %s: %s', checkpoint_path, e) + traceback.print_exc() + return {} + + +def add_training_energy_cost( + checkpoint_item: Tuple[str, Dict[str, Any]], total_energy_kwh +) -> Tuple[str, Dict[str, Any]]: + checkpoint_path = checkpoint_item[0] + checkpoint_dict = checkpoint_item[1] + + policy_checkpoint_name = os.path.basename(checkpoint_path) + policy_checkpoint_number = int(policy_checkpoint_name.split('_')[-1]) + + checkpoint_dict.update({ + 'total_training_energy_kwh': total_energy_kwh, + 'training_energy_kwh': total_energy_kwh * policy_checkpoint_number, + 'checkpoint_number': policy_checkpoint_number, + }) + return checkpoint_path, checkpoint_dict + + +def main(_): + multiprocessing.set_start_method('spawn', force=False) + saved_model_path = os.path.join( + _ROOT_DIR.value, 'policies', _POLICY_NAME.value + ) + checkpoints_path = os.path.join(_ROOT_DIR.value, 'policies', 'checkpoints') + + # Get absolute paths of all checkpoints + all_checkpoints_paths = [ + os.path.join(checkpoints_path, checkpoint) + for checkpoint in os.listdir(checkpoints_path) + ] + + # Create a partial function that has all the fixed parameters set + partial_func = functools.partial( + load_policy_and_perform_rollouts, + root_dir=_ROOT_DIR.value, + env_name=_ENV_NAME.value, + policy_path=saved_model_path, + num_episodes=_NUM_EVAL_EPISODES.value, + ) + + with multiprocessing.Pool(_MAX_PARALLEL_ENVS.value) as pool: + episode_returns = pool.map(partial_func, all_checkpoints_paths) + pool.close() + pool.join() + + all_episode_returns = {k: v for d in episode_returns for k, v in d.items()} + + # Use the experiment path to get the system metrics for this training run + experiment_path = os.path.abspath( + os.path.join(_ROOT_DIR.value, os.pardir, os.pardir, os.pardir, os.pardir) + ) + logging.debug('Experiment path: %s', experiment_path) + training_system_df = load_training_system_data( + base_dir=os.path.abspath(os.path.join(experiment_path, os.pardir)), + experiment_ids=[os.path.basename(experiment_path)], + ) + + # For each run-id, take the last `energy_consumed` entry and sum them together + total_training_energy_kwh = ( + training_system_df.groupby( + ['domain', 'algo', 'task', 'experiment', 'seed'] + )['energy_consumed'] + .last() + .sum() + ) + + # Add the training sample cost to the evaluation results + with multiprocessing.Pool(_MAX_PARALLEL_ENVS.value) as pool: + all_episode_returns = pool.map( + functools.partial( + add_training_energy_cost, total_energy_kwh=total_training_energy_kwh + ), + all_episode_returns.items(), + ) + pool.close() + pool.join() + + # Turn all_episode_returns back into a dictionary + all_episode_returns = {k: v for (k, v) in all_episode_returns} + + maximum_checkpoint_number = max( + [int(v['checkpoint_number']) for v in all_episode_returns.values()] + ) + logging.info('Maximum checkpoint number: %d', maximum_checkpoint_number) + + for checkpoint_path, checkpoint_dict in all_episode_returns.items(): + # Adjusting the training energy cost such that earlier checkpoints are + # associated with less energy usage + checkpoint_dict['training_energy_kwh'] = ( + checkpoint_dict['training_energy_kwh'] / maximum_checkpoint_number + ) + + # Make sure that the energy usage for the final checkpoint is the same as + # the total energy usage + if checkpoint_dict['checkpoint_number'] == maximum_checkpoint_number: + assert checkpoint_dict['training_energy_kwh'] == total_training_energy_kwh + + # Save as JSON + evaluation_save_path = os.path.join( + _ROOT_DIR.value, 'policies', 'evaluation.json' + ) + with open(evaluation_save_path, 'w') as f: + json.dump(all_episode_returns, f, indent=2) + + +if __name__ == '__main__': + app.run(main) diff --git a/a2perf/analysis/expertise.py b/a2perf/analysis/expertise.py new file mode 100644 index 0000000..545aaf0 --- /dev/null +++ b/a2perf/analysis/expertise.py @@ -0,0 +1,180 @@ +import glob +import json +import multiprocessing +import os + +from absl import app +from absl import flags +from absl import logging +from matplotlib import pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns + +_EXPERIMENT_IDS = flags.DEFINE_list( + 'experiment_ids', [], 'List of experiment IDs to load the evaluation data.' +) +_ROOT_DIR = flags.DEFINE_string( + 'root_dir', None, 'Root directory to load the evaluation data.' +) + +_AVERAGE_MEASURE = flags.DEFINE_enum( + 'average_measure', + None, + ['mean', 'median'], + 'Measure to use for averaging the episode rewards.', +) + +_SKILL_LEVEL = flags.DEFINE_enum( + 'skill_level', + None, + ['novice', 'intermediate', 'expert'], + 'Skill level of the expert.', +) + +_TASK_NAME = flags.DEFINE_string( + 'task_name', + None, + 'Name of the task to perform. This is used to name the dataset.', +) + + +def assign_skill_level(row, col_name, bounds): + """Assign skill level to a row based on episode_reward and predefined bounds.""" + reward = row[col_name] + + # Check against the bounds and assign the level + if reward <= bounds['novice'][1]: + return 'novice' + elif bounds['intermediate'][0] <= reward <= bounds['intermediate'][1]: + return 'intermediate' + elif reward >= bounds['expert'][0]: + return 'expert' + else: + # If it is not in any bounds, we place a pd.NA and drop it later + return pd.NA + + +def plot_skill_levels(data_df, col_name, save_path=None): + fig, ax = plt.subplots(figsize=(10, 6)) + + data_df['skill_level'] = data_df['skill_level'].astype('category') + + sns.histplot( + data=data_df, + x=col_name, + hue='skill_level', + kde=False, + stat='count', + legend=True, + linewidth=0, + ax=ax, + ) + + ax.set_title('Episode Reward Distribution by Skill Level') + ax.set_xlabel('Episode Reward') + ax.set_ylabel('Count') + fig.legend() + fig.show() + if save_path: + os.makedirs(os.path.dirname(save_path), exist_ok=True) + fig.savefig(save_path) + + +def glob_path(path): + return glob.glob(path, recursive=True) + + +def load_evaluation_json_data(base_dir, experiment_ids): + with multiprocessing.Pool() as pool: + json_files = pool.map( + glob_path, + [ + os.path.join(base_dir, f'{exp_id}/**/evaluation.json') + for exp_id in experiment_ids + ], + ) + pool.close() + pool.join() + json_files_paths = [item for sublist in json_files for item in sublist] + json_files_paths = set(json_files_paths) + + all_data = [] + for file_path in json_files_paths: + with open(file_path, 'r') as f: + data = json.load(f) + data_df = pd.DataFrame.from_dict(data, orient='index').reset_index() + data_df = data_df.rename(columns={'index': 'checkpoint_path'}) + all_data.append(data_df) + all_data = pd.concat(all_data, ignore_index=True) + return all_data + + +def main(_): + root_dir = os.path.expanduser(_ROOT_DIR.value) + evaluation_data_df = load_evaluation_json_data( + base_dir=root_dir, experiment_ids=_EXPERIMENT_IDS.value + ) + + logging.info('Loaded evaluation data') + + logging.info('Using average measure: %s', _AVERAGE_MEASURE.value) + average_average_return = evaluation_data_df[_AVERAGE_MEASURE.value].mean() + logging.info('Average average return: %s', average_average_return) + + std_average_return = evaluation_data_df[_AVERAGE_MEASURE.value].std() + logging.info('Standard deviation of average return: %s', std_average_return) + + novice_cutoff = (-np.inf, average_average_return - (2 * std_average_return)) + intermediate_cutoff = ( + average_average_return - std_average_return, + average_average_return + std_average_return, + ) + expert_cutoff = (average_average_return + (2 * std_average_return), np.inf) + + logging.info('Novice cutoff: %s', novice_cutoff) + logging.info('Intermediate cutoff: %s', intermediate_cutoff) + logging.info('Expert cutoff: %s', expert_cutoff) + + # Add a column to the evaluation_data_df for the skill level + evaluation_data_df['skill_level'] = evaluation_data_df.apply( + assign_skill_level, + args=( + _AVERAGE_MEASURE.value, + { + 'novice': novice_cutoff, + 'intermediate': intermediate_cutoff, + 'expert': expert_cutoff, + }, + ), + axis=1, + ) + + # Drop rows with pd.NA since they do not fall into any skill level + evaluation_data_df = evaluation_data_df.dropna(subset=['skill_level']) + + plot_skill_levels( + evaluation_data_df, + _AVERAGE_MEASURE.value, + save_path=os.path.join( + root_dir, + _TASK_NAME.value, + _SKILL_LEVEL.value, + 'skill_level_distribution.png', + ), + ) + + # Save the data with skill levels so we can load to generate datasets + evaluation_data_df.to_csv( + os.path.join( + root_dir, + _TASK_NAME.value, + _SKILL_LEVEL.value, + 'evaluation_data_with_skill_levels.csv', + ), + index=False, + ) + + +if __name__ == '__main__': + app.run(main) diff --git a/a2perf/analysis/metrics.py b/a2perf/analysis/metrics.py new file mode 100644 index 0000000..c2e596b --- /dev/null +++ b/a2perf/analysis/metrics.py @@ -0,0 +1,169 @@ +import os + +import matplotlib.pyplot as plt +import numpy as np +import seaborn as sns +import tensorflow as tf +from absl import app +from absl import flags +from absl import logging + +from a2perf import analysis +from a2perf.analysis.metrics_lib import correct_energy_measurements +from a2perf.analysis.metrics_lib import load_generalization_metric_data +from a2perf.analysis.metrics_lib import load_inference_metric_data +from a2perf.analysis.metrics_lib import load_inference_system_data +from a2perf.analysis.metrics_lib import load_training_reward_data +from a2perf.analysis.metrics_lib import load_training_system_data + +_SEED = flags.DEFINE_integer('seed', 0, 'Random seed.') +_BASE_DIR = flags.DEFINE_string( + 'base-dir', + '/home/ikechukwuu/workspace/rl-perf/logs', + 'Base directory for logs.', +) +_EXPERIMENT_IDS = flags.DEFINE_list( + 'experiment-ids', [94408569], 'Experiment IDs to process.' +) +NUM_COLLECT_JOB_TO_CPU_RATIO = dict( + quadruped_locomotion=44 / 96, + web_navigation=36 / 96, + circuit_training=25 / 96, +) + +DOMAIN_COLLECT_CPU_USAGE_FRACTION = { + 'circuit_training': 0.85, + 'quadruped_locomotion': 0.46, + 'web_navigation': 0.57, +} +TRAIN_SERVER_CPU_USAGE_FRACTION = 0.05 + + +def _initialize_plotting(): + base_font_size = 25 + sns.set_style('whitegrid') + plt.rcParams.update({ + 'figure.figsize': (12, 6), + 'font.size': base_font_size - 2, + 'axes.labelsize': base_font_size - 2, + 'axes.titlesize': base_font_size, + 'axes.labelweight': 'bold', # Bold font for the axes labels + 'legend.fontsize': base_font_size - 4, + 'xtick.labelsize': base_font_size - 4, + 'ytick.labelsize': base_font_size - 4, + 'figure.titlesize': base_font_size, + 'figure.dpi': 100, + 'savefig.dpi': 100, + 'savefig.format': 'png', + 'savefig.bbox': 'tight', + 'grid.linewidth': 0.5, + 'grid.alpha': 0.5 # Lighter grid lines + }) + + +def main(_): + tf.compat.v1.enable_eager_execution() + np.random.seed(_SEED.value) + tf.random.set_seed(_SEED.value) + _initialize_plotting() + + base_dir = os.path.expanduser(_BASE_DIR.value) + training_reward_data_df = load_training_reward_data( + base_dir=base_dir, experiment_ids=_EXPERIMENT_IDS.value + ) + + # plot_training_reward_data(training_reward_data_df, + # event_file_tags=['Metrics/AverageReturn']) + + training_reward_metrics = analysis.reliability.get_training_metrics( + data_df=training_reward_data_df, tag='Metrics/AverageReturn', index='Step' + ) + training_system_metrics_df = load_training_system_data( + base_dir=base_dir, experiment_ids=_EXPERIMENT_IDS.value + ) + + # DEBUG: See how many collect jobs there are. To do this, group by domain/task/algo/seed, and count the number of unique + # experiment fields. This should be 1 + the number of collect jobs. + total_jobs = training_system_metrics_df.groupby( + ['domain', 'task', 'algo', 'seed', 'experiment']).run_id.nunique() + num_collect_jobs = total_jobs - 1 + # Only keep groups with more than one run_id + num_collect_jobs = num_collect_jobs[num_collect_jobs > 0] + env_batch_size = 512 + domain = training_system_metrics_df['domain'].iloc[0] + collect_cpu_ratio = NUM_COLLECT_JOB_TO_CPU_RATIO[domain] + + # Make sure the number of collect jobs matches the expected number + expected_num_collect_jobs = np.ceil(env_batch_size / collect_cpu_ratio / 96) + if all(num_collect_jobs == expected_num_collect_jobs): + # 96 vCPU case + cpus_per_collect_job = 96 + logging.info('Experiments were run on 96 vCPUs') + else: + # 32 vCPU case + cpus_per_collect_job = 32 + cpus_per_train_Job = 48 + total_cpus_on_collect_machine = 128 # https://cloud.google.com/compute/docs/general-purpose-machines#n2_series + total_cpus_on_train_machine = 48 # https://cloud.google.com/compute/docs/gpus#a100-gpus + true_collect_cpu_tdp = 300 # https://www.cpu-world.com/CPUs/Xeon/Intel-Xeon%208373C.html + true_train_cpu_tdp = 165 # https://www.cpu-world.com/CPUs/Xeon/Intel-Xeon%208273CL.html + training_system_metrics_df = correct_energy_measurements(training_system_metrics_df, + cpus_per_train_job=cpus_per_train_Job, + total_cpus_on_train_machine=total_cpus_on_train_machine, + percent_train_cpu_usage=TRAIN_SERVER_CPU_USAGE_FRACTION, + cpus_per_collect_job=cpus_per_collect_job, + total_cpus_on_collect_machine=total_cpus_on_collect_machine, + percent_collect_cpu_usage= + DOMAIN_COLLECT_CPU_USAGE_FRACTION[ + domain], + true_train_cpu_tdp=true_train_cpu_tdp, + true_collect_cpu_tdp=true_collect_cpu_tdp) + + training_system_metrics = analysis.system.get_training_metrics( + data_df=training_system_metrics_df + ) + print(training_system_metrics) + training_metrics = dict(**training_reward_metrics, **training_system_metrics) + + inference_reward_metrics, inference_reward_metrics_df = load_inference_metric_data( + base_dir=base_dir, experiment_ids=_EXPERIMENT_IDS.value + ) + inference_reward_metrics.update(analysis.reliability.get_inference_metrics( + data_df=inference_reward_metrics_df)) + inference_system_metrics_df = load_inference_system_data( + base_dir=base_dir, + experiment_ids=_EXPERIMENT_IDS.value) + inference_system_metrics = analysis.system.get_inference_metrics( + data_df=inference_system_metrics_df) + inference_metrics = dict(**inference_reward_metrics, + **inference_system_metrics) + print(inference_metrics) + + generalization_reward_metrics = load_generalization_metric_data( + base_dir=base_dir, experiment_ids=_EXPERIMENT_IDS.value + ) + print(generalization_reward_metrics) + + # Take the rollout_returns from generalization_metrics and add it to training_metrics + training_metrics['generalization_rollout_returns'] = \ + generalization_reward_metrics[ + 'generalization_rollout_returns'] + del generalization_reward_metrics['generalization_rollout_returns'] + + # Take rollout_returns from inference_metrics and add it to training_metrics + training_metrics['rollout_returns'] = inference_metrics['rollout_returns'] + del inference_metrics['rollout_returns'] + + training_metrics_df = analysis.results.metrics_dict_to_pandas_df( + training_metrics + ) + inference_metrics_df = analysis.results.metrics_dict_to_pandas_df( + inference_metrics + ) + + print(analysis.results.df_as_latex(training_metrics_df, mode='train')) + print(analysis.results.df_as_latex(inference_metrics_df, mode='inference')) + + +if __name__ == '__main__': + app.run(main) diff --git a/a2perf/analysis/metrics_lib.py b/a2perf/analysis/metrics_lib.py new file mode 100644 index 0000000..54ecba7 --- /dev/null +++ b/a2perf/analysis/metrics_lib.py @@ -0,0 +1,659 @@ +import collections +import concurrent.futures +import functools +import glob +import json +import multiprocessing +import os +import re + +import matplotlib.pyplot as plt +import matplotlib.ticker as ticker +import numpy as np +import pandas as pd +import seaborn as sns +import tensorflow as tf +from absl import logging + +DOMAIN_DISPLAY_NAME = { + 'quadruped_locomotion': 'Quadruped Locomotion', + 'circuit_training': 'Circuit Training', + 'web_navigation': 'Web Navigation', +} + +TASK_DISPLAY_NAME = { + 'dog_pace': 'Dog Pace', + 'dog_trot': 'Dog Trot', + 'dog_spin': 'Dog Spin', +} + +ALGO_DISPLAY_NAME = { + 'sac': 'SAC', + 'ppo': 'PPO', + 'dqn': 'DQN', + 'ddqn': 'DDQN', +} + +METRIC_DISPLAY_NAME = { + 'Metrics/AverageReturn': 'Episodic Returns', +} + + +def format_func(value, tick_number): + # Convert to integer if the value is effectively a whole number + if value.is_integer(): + return f'{int(value)}' + else: + return f'{value}' + + +def load_tb_data(log_file, tags=None): + tf.compat.v1.enable_eager_execution() + + if tags is None: + tags = [] + + # Initialize separate lists for steps, values, and timestamps for each tag + data = {} + for tag in tags: + data[f'{tag}_Step'] = [] + data[f'{tag}_Value'] = [] + data[f'{tag}_Timestamp'] = [] + + for event in tf.compat.v1.train.summary_iterator(log_file): + if event.HasField('summary'): + for value in event.summary.value: + if value.tag in tags: + if value.HasField('simple_value'): + data_value = value.simple_value + elif value.HasField('tensor'): + # Parse tensor_content as a tensor and then extract its value + tensor = tf.make_ndarray(value.tensor) + data_value = tensor.item() + else: + raise ValueError( + f'Value type not recognized for tag {value.tag}. Expected' + f' simple_value or tensor, got {value.WhichOneof("value")}' + ) + + data[f'{value.tag}_Step'].append(event.step) + data[f'{value.tag}_Value'].append(data_value) + data[f'{value.tag}_Timestamp'].append( + pd.to_datetime(event.wall_time, unit='s') + ) + + if all(len(data[f'{tag}_Step']) == 0 for tag in tags): + return pd.DataFrame() # Return an empty DataFrame if no data + + # Construct and return the DataFrame + return pd.DataFrame(data) + + +def process_tb_event_dir(event_file_path, tags=None): + tf.compat.v1.enable_eager_execution() + + log_base_dir = os.path.dirname(event_file_path) + exp_split = event_file_path.split('/') + + if 'collect' in event_file_path: + # Single collect job output + if exp_split[-3] == 'summaries': + indices = [-7, -8, -9, -10, -11] + + # Some jobs have multiple collect job outputs, so increase the indices + if exp_split[-4] == 'summaries': + indices = [-8, -9, -10, -11, -12] + else: + indices = [-4, -5, -6, -7, -8] + + details_segment, algo, task, experiment_number, domain = [ + exp_split[i] for i in indices + ] + seed = int(re.search(r'seed_(\d+)', details_segment).group(1)) + skill_level = re.search(r'skill_level_(\w+)', details_segment).group(1) + + logging.info(f'Processing log dir: {event_file_path}') + logging.info( + f'\tDomain: {domain}, Task: {task}, Algo: {algo}, Experiment Number:' + f' {experiment_number}, Seed: {seed}, Skill Level: {skill_level}' + ) + data_csv_path = os.path.join(log_base_dir, 'data.csv') + + if 1 == 0 and os.path.exists(data_csv_path): + data = pd.read_csv(data_csv_path, on_bad_lines='skip') + + # We can't load timestamp from csv, so we need to convert that column from + # string to datetime + for tag in tags: + data[f'{tag}_Timestamp'] = pd.to_datetime(data[f'{tag}_Timestamp']) + logging.info(f'Loaded data from {data_csv_path}') + else: + data = load_tb_data(event_file_path, tags) + if data.empty: + logging.warning(f'No data found in {event_file_path}') + return None + + # Add the experiment details to the DataFrame + data['domain'] = domain + data['task'] = task + data['algo'] = algo + data['experiment'] = experiment_number + data['seed'] = seed + data['skill_level'] = skill_level + data.to_csv(data_csv_path) + logging.info(f'Saved data to {data_csv_path}') + return data + + +def process_codecarbon_csv(csv_file_path): + df = pd.read_csv(csv_file_path, on_bad_lines='skip') + exp_split = csv_file_path.split('/') + + # Process path to extract experiment details + if 'collect' in csv_file_path: + indices = [-6, -7, -8, -9, -10] + else: + indices = [-4, -5, -6, -7, -8] + + exp_name, algo, task, experiment, domain = [exp_split[i] for i in indices] + seed = re.search(r'seed_(\d+)', exp_name).group(1) + skill_level = re.search(r'skill_level_(\w+)', exp_name).group(1) + + logging.info('Processing Experiment: %s, Seed: %s, Algo: %s', experiment, + seed, algo) + df['seed'] = int(seed) + df['experiment'] = experiment + df['algo'] = algo + df['task'] = task + df['domain'] = domain + df['skill_level'] = skill_level + + # Convert timestamps and identify corrupt data + df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce') + corrupt_rows = df[df['timestamp'].isna()] + + if not corrupt_rows.empty: + logging.warning('Corrupt rows due to invalid timestamps:') + logging.warning(corrupt_rows) + + # Remove rows with corrupt timestamps + df = df.dropna(subset=['timestamp']) + df['timestamp'] = df['timestamp'].apply( + lambda x: x.replace(tzinfo=None) if x.tzinfo else x + ) + + # Identify and handle corrupt rows for specific columns + for tag in ['gpu_power', 'cpu_power', 'duration', 'ram_power']: + # Convert to numeric, coercing errors to NaN + df[tag] = pd.to_numeric(df[tag], errors='coerce') + + # Log and drop rows where conversion failed (NaN values present) + corrupt_rows = df[df[tag].isna()] + if not corrupt_rows.empty: + logging.warning(f'Corrupt rows due to invalid {tag}:') + logging.warning(corrupt_rows) + + # Drop rows with NaN values in these columns + df = df.dropna(subset=[tag]) + + # Sort by timestamp + df = df.sort_values(by='timestamp') + + return df + + +def correct_energy_measurements(df, cpus_per_collect_job=128, + cpus_per_train_job=128, + total_cpus_on_collect_machine=128, + total_cpus_on_train_machine=128, + true_collect_cpu_tdp=120, + true_train_cpu_tdp=120, + percent_collect_cpu_usage=1.0, + percent_train_cpu_usage=1.0, +): + # Create a job type column with either `collect` or `train` depending on whether gpu_energy is 0 or not + df['job_type'] = 'collect' + df.loc[df['gpu_power'] > 0, 'job_type'] = 'train' + + # Group by the specified columns + grouped = df.groupby( + ['domain', 'task', 'algo', 'experiment', 'seed', 'run_id']) + + # Adjust the CPU power based on the job type if all existing power metrics are the same + def adjust_cpu_power(group): + if group['cpu_power'].nunique() == 1: + job_type = group['job_type'].iloc[0] + if job_type == 'collect': + group.loc[:, + 'actual_cpu_power'] = true_collect_cpu_tdp * percent_collect_cpu_usage * ( + cpus_per_collect_job / total_cpus_on_collect_machine) + elif job_type == 'train': + group.loc[:, + 'actual_cpu_power'] = true_train_cpu_tdp * percent_train_cpu_usage * ( + cpus_per_train_job / total_cpus_on_train_machine) + else: + group.loc[:, 'actual_cpu_power'] = group['cpu_power'] + return group + + # Apply the CPU power adjustment function to each group + df = grouped.apply(adjust_cpu_power).reset_index(drop=True) + + # Re-group by the specified columns after adjustment + grouped = df.groupby( + ['domain', 'task', 'algo', 'experiment', 'seed', 'run_id']) + + def compute_energy(group): + group['duration_interval'] = group['duration'].diff().fillna(0) + + # Calculate energy consumption directly in place + mask = group['duration_interval'] >= 0 + group.loc[mask, 'cpu_energy'] = (group.loc[mask, 'actual_cpu_power'] * + group.loc[ + mask, 'duration_interval'] / 3600) / 1000 + assert group.loc[mask, 'cpu_energy'].min() >= 0, 'Negative CPU energy' + group.loc[mask, 'gpu_energy'] = (group.loc[mask, 'gpu_power'] * group.loc[ + mask, 'duration_interval'] / 3600) / 1000 + assert group.loc[mask, 'gpu_energy'].min() >= 0, 'Negative GPU energy' + group.loc[mask, 'ram_energy'] = (group.loc[mask, 'ram_power'] * group.loc[ + mask, 'duration_interval'] / 3600) / 1000 + assert group.loc[mask, 'ram_energy'].min() >= 0, 'Negative RAM energy' + group.loc[mask, 'energy_consumed'] = group.loc[mask, 'cpu_energy'] + \ + group.loc[mask, 'gpu_energy'] + \ + group.loc[mask, 'ram_energy'] + + # Filter out rows with negative duration intervals + return group[mask] + + # Apply the function to each group and reset the index + df = grouped.apply(compute_energy).reset_index(drop=True) + + return df + + +def format_func(value, tick_number): + # Function to format tick labels + if value.is_integer(): + return f'{int(value)}' + else: + return f'{value}' + + +def downsample_steps(group, tag, n_steps=1000): + """ Select a subset of steps at regular intervals that have sufficient data points across seeds """ + # Count the number of values at each step + step_counts = group.groupby(f'{tag}_Step').size() + + # Filter steps with more than 3 values (for mean and std calculation) + valid_steps = step_counts[step_counts > 2].index + + # Calculate the interval at which to select steps + interval = max(1, len(valid_steps) // n_steps) + + # Select steps at regular intervals + selected_steps = valid_steps[::interval] + + # Return the filtered group + return group[group[f'{tag}_Step'].isin(selected_steps)] + + +def plot_training_reward_data(metrics_df, + event_file_tags=('Metrics/AverageReturn',)): + for tag in event_file_tags: + metrics_df[f'{tag}_Duration_minutes'] = metrics_df[f'{tag}_Duration'] // 60 + tag_display_val = METRIC_DISPLAY_NAME.get(tag, tag) + plot_df = metrics_df.groupby(['domain', 'task']) + + for (domain, task), group_df in plot_df: + fig, ax = plt.subplots(1, 1, figsize=(16, 10)) + for algo in group_df['algo'].unique(): + group = group_df[group_df['algo'] == algo] + group = downsample_steps(group=group, n_steps=750, tag=tag) + + # Plot for 'Step' + sns.lineplot( + x=f'{tag}_Step', + y=f'{tag}_Value', + data=group, + label=f'{ALGO_DISPLAY_NAME.get(algo, algo)}' + ) + + min_max_steps = group_df.groupby('algo')[f'{tag}_Step'].agg( + ['min', 'max']) + common_max_step = min_max_steps[ + 'max'].min() # Use the minimum of the maximum steps across algos + + ax.set_xlim(0, common_max_step) + ax.set_xlabel('Train Step') + ax.set_ylabel(tag_display_val) + ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True)) + ax.yaxis.set_major_formatter(ticker.FuncFormatter(format_func)) + title = f'{DOMAIN_DISPLAY_NAME.get(domain, domain)} - {TASK_DISPLAY_NAME.get(task, task)} (Train Steps)' + ax.set_title(title) + ax.legend() + plt.tight_layout() + plt.show() + + fig, ax = plt.subplots(1, 1, figsize=(16, 10)) + for algo in group_df['algo'].unique(): + group = group_df[group_df['algo'] == algo] + group = downsample_steps(group=group, n_steps=750, tag=tag) + + sns.lineplot( + x=f'{tag}_Duration_minutes', + y=f'{tag}_Value', + data=group, + label=f'{ALGO_DISPLAY_NAME.get(algo, algo)}' + ) + + min_max_durations = group_df.groupby('algo')[ + f'{tag}_Duration_minutes'].agg( + ['min', 'max']) + common_max_duration = min_max_durations[ + 'max'].min() # Use the minimum of the maximum durations across algos + + ax.set_xlim(0, common_max_duration) + ax.set_xlabel('Duration (minutes)') + ax.set_ylabel(tag_display_val) + ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True)) + ax.yaxis.set_major_formatter(ticker.FuncFormatter(format_func)) + title = f'{DOMAIN_DISPLAY_NAME.get(domain, domain)} - {TASK_DISPLAY_NAME.get(task, task)} (Duration)' + ax.set_title(title) + ax.legend() + plt.tight_layout() + plt.show() + + +def glob_path(path): + return glob.glob(path, recursive=True) + + +def load_data(patterns): + with multiprocessing.Pool() as pool: + files = pool.map( + glob_path, + [ + pattern + for pattern in patterns + ], + ) + pool.close() + pool.join() + files = [item for sublist in files for item in sublist] + files = set(files) + return files + + +def load_training_reward_data( + base_dir, experiment_ids, event_file_tags=('Metrics/AverageReturn',) +): + patterns = [ + os.path.join(base_dir, f'{exp_id}/**/collect/**/*events.out.tfevents*') + for exp_id in experiment_ids + ] + event_log_dirs = load_data(patterns) + logging.info(f'Found {len(event_log_dirs)} event log dirs') + + process_log_dir_fn = functools.partial( + process_tb_event_dir, tags=event_file_tags + ) + + all_dfs = [] + with concurrent.futures.ProcessPoolExecutor() as executor: + for data in executor.map(process_log_dir_fn, event_log_dirs): + if data is not None: + logging.info('Processing log dir: %s', + f' {data.iloc[0]["domain"]}/{data.iloc[0]["task"]}/{data.iloc[0]["algo"]}/{data.iloc[0]["experiment"]}/{data.iloc[0]["seed"]}') + + all_dfs.append(data) + metrics_df = pd.concat(all_dfs) + logging.info('Loaded %s rows of data', len(metrics_df)) + + # Get the number of seeds for each algo, domain, task combo to make sure + # we have the right number of seeds + seed_counts = metrics_df.groupby(['domain', 'task', 'algo']).seed.nunique() + logging.info('Seed counts: %s', seed_counts) + + # Get the number of rows for each algo, domain, task combo to make sure + # each experiment has the same number of steps + row_counts = metrics_df.groupby(['domain', 'task', 'algo'])['seed'].count() + logging.info('Row counts: %s', row_counts) + + # Since we have parallelized, distributed experiments, we'll see + # the same 'Step' multiple times. We simply need to combine the values + for tag in event_file_tags: + value_col = f'{tag}_Value' + step_col = f'{tag}_Step' + + # Define aggregation methods: mean for the value column, first for others + aggregation = {value_col: 'mean'} + for col in metrics_df.columns: + if col not in [ + value_col, + step_col, + 'domain', + 'task', + 'algo', + 'experiment', + 'seed', + ]: + aggregation[col] = 'first' + + # Group by and apply the specified aggregation + df = ( + metrics_df.groupby( + ['domain', 'task', 'algo', 'experiment', 'seed', step_col] + ) + .agg(aggregation) + .reset_index() + ) + metrics_df = df + + row_counts = metrics_df.groupby(['domain', 'task', 'algo'])['seed'].count() + logging.info('Row counts after removing duplicate steps: %s', row_counts) + + # Add a "duration" column to the DataFrame + for tag in event_file_tags: + # Flat column names for timestamp and duration + timestamp_col = f'{tag}_Timestamp' + duration_col = f'{tag}_Duration' + + # Calculate the Duration and assign it to the DataFrame + metrics_df[duration_col] = metrics_df.groupby( + ['domain', 'task', 'algo', 'experiment', 'seed'] + )[timestamp_col].transform(lambda x: x - x.min()) + + # Convert duration to seconds and round to the nearest second + metrics_df[duration_col] = ( + metrics_df[duration_col].dt.total_seconds().round().astype(int) + ) + + return metrics_df + + +def load_training_system_data_sequential(base_dir, experiment_ids): + patterns = [ + os.path.join(base_dir, f'{exp_id}/**/train_emissions.csv') + for exp_id in experiment_ids + ] + csv_files = load_data(patterns) + logging.info('Found %s csv files', len(csv_files)) + + all_dfs = [] + for csv_file in csv_files: + logging.info('Processing csv file: %s', csv_file) + data = process_codecarbon_csv(csv_file) + if data is not None: + all_dfs.append(data) + metrics_df = pd.concat(all_dfs) + logging.info('Loaded %s rows of data', len(metrics_df)) + + return metrics_df + + +def load_training_system_data(base_dir, experiment_ids): + patterns = [ + os.path.join(base_dir, f'{exp_id}/**/train_emissions.csv') + for exp_id in experiment_ids + ] + csv_files = load_data(patterns) + logging.info('Found %s csv files', len(csv_files)) + + all_dfs = [] + with concurrent.futures.ProcessPoolExecutor() as executor: + for data in executor.map(process_codecarbon_csv, csv_files): + if data is not None: + all_dfs.append(data) + metrics_df = pd.concat(all_dfs) + logging.info('Loaded %s rows of data', len(metrics_df)) + + return metrics_df + + +def load_generalization_metric_data(base_dir, experiment_ids): + patterns = [ + os.path.join(base_dir, f'{exp_id}/**/generalization_rollouts.json') + for exp_id in experiment_ids + ] + json_files = load_data(patterns) + logging.info('Found %s json files', len(json_files)) + + # Load all of the json files + all_dfs = [] + for json_file in json_files: + logging.info('Processing json file: %s', json_file) + indices = [-4, -5, -6, -7, -8] + exp_split = json_file.split('/') + details_segment, algo, task, experiment_number, domain = [ + exp_split[i] for i in indices + ] + seed = int(re.search(r'seed_(\d+)', details_segment).group(1) + ) + skill_level = re.search(r'skill_level_(\w+)', details_segment).group(1) + + logging.info('Processing log dir: %s', json_file) + logging.info( + '\tDomain: %s, Task: %s, Algo: %s, Experiment Number: %s, Seed: %s, Skill Level: %s', + domain, task, algo, experiment_number, seed, skill_level) + + with open(json_file, 'r') as f: + data = json.load(f) + data_df = pd.DataFrame.from_dict(data, orient='index') + + # For generalization, we care about the total reward, so sum up the + # rewards along the first axis. The columns are numbered for each rollout + rollout_return = data_df.sum(axis=1) + + # Make the rollout return series into a DataFrame + rollout_return_df = rollout_return.to_frame().reset_index() + + # Rename the column named '0' to 'total_reward' + rollout_return_df = rollout_return_df.rename(columns={0: 'total_reward'}) + + # Add another column for the number of rollouts + rollout_return_df['num_rollouts'] = len(data_df.columns) + + # Add columns for domain/algo/task/expeirment/seed so we can group by them + # later + data_df = rollout_return_df + data_df['domain'] = domain + data_df['task'] = task + data_df['algo'] = algo + data_df['experiment'] = experiment_number + data_df['seed'] = seed + data_df['skill_level'] = skill_level + all_dfs.append(data_df) + + metrics_df = pd.concat(all_dfs) + metrics = collections.defaultdict(dict) + + # For generalization, we add up all rewards and divide by the number of rollouts + for (domain, task, algo,), group in metrics_df.groupby( + ['domain', 'task', 'algo', ] + ): + # For each task (index in the df) get the cumulative total reward + group['avg_rollout_returns'] = group['total_reward'] / group['num_rollouts'] + mean_val = group.groupby('index')['avg_rollout_returns'].mean().sum() + metrics['generalization_rollout_returns'][(domain, algo, task)] = mean_val + return metrics + + +def load_inference_metric_data(base_dir, experiment_ids): + patterns = [ + os.path.join(base_dir, f'{exp_id}/**/inference_metrics_results.json') + for exp_id in experiment_ids] + json_files = load_data(patterns) + logging.info('Found %s json files', len(json_files)) + + # Load all of the json files + all_dfs = [] + for json_file in json_files: + logging.info('Processing json file: %s', json_file) + indices = [-4, -5, -6, -7, -8] + exp_split = json_file.split('/') + details_segment, algo, task, experiment_number, domain = [ + exp_split[i] for i in indices + ] + seed = int(re.search(r'seed_(\d+)', details_segment).group(1)) + skill_level = re.search(r'skill_level_(\w+)', details_segment).group(1) + + logging.info('Processing log dir: %s', json_file) + logging.info( + '\tDomain: %s, Task: %s, Algo: %s, Experiment Number: %s, Seed: %s, Skill Level: %s', + domain, task, algo, experiment_number, seed, skill_level) + + with open(json_file, 'r') as f: + data = json.load(f) + data_df = pd.DataFrame.from_dict(data, orient='index').reset_index() + data_df = data_df.rename(columns={'index': 'metric'}) + # Add columns for domain/algo/task/expeirment/seed so we can group by them + # later + + data_df['domain'] = domain + data_df['task'] = task + data_df['algo'] = algo + data_df['experiment'] = experiment_number + data_df['seed'] = seed + data_df['skill_level'] = skill_level + all_dfs.append(data_df) + + metrics_df = pd.concat(all_dfs) + metrics = collections.defaultdict(dict) + + for metric, df in metrics_df.groupby('metric'): + for (domain, task, algo,), group in df.groupby( + ['domain', 'task', 'algo', ] + ): + # Each row has a list object in the 'values' column. We need to aggregate + # these lists to get the mean and standard deviation + all_values = [] + for values in group['values']: + all_values.extend(values) + + mean_val = np.mean(all_values) + std_val = np.std(all_values) + metrics[metric][(domain, algo, task)] = { + 'mean': mean_val, + 'std': std_val + } + return metrics, metrics_df + + +def load_inference_system_data(base_dir, experiment_ids): + patterns = [ + os.path.join(base_dir, f'{exp_id}/**/inference_emissions.csv') + for exp_id in experiment_ids + ] + csv_files = load_data(patterns) + logging.info('Found %s csv files', len(csv_files)) + + process_codecarbon_csv_fn = functools.partial(process_codecarbon_csv) + + all_dfs = [] + with concurrent.futures.ProcessPoolExecutor() as executor: + for data in executor.map(process_codecarbon_csv_fn, csv_files): + if data is not None: + all_dfs.append(data) + metrics_df = pd.concat(all_dfs) + logging.info('Loaded %s rows of data', len(metrics_df)) + + return metrics_df diff --git a/a2perf/analysis/reliability.py b/a2perf/analysis/reliability.py new file mode 100644 index 0000000..dc27dba --- /dev/null +++ b/a2perf/analysis/reliability.py @@ -0,0 +1,468 @@ +import logging +import multiprocessing +import os + +import numpy as np +import pandas as pd +import scipy + +LEFT_TAIL_ALPHA = 0.05 +RIGHT_TAIL_ALPHA = 0.95 + +MIN_NUM_DISPERSION_DATA_POINTS = 2 + + +def compute_dispersion(curve): + # Compute the dispersion as the IQR of the curve + return scipy.stats.iqr(curve) + + +def apply_dispersion_fn_to_groups(groups, data_column, tag): + results = [] + for g in groups: + result = compute_dispersion_if_enough_data(g[data_column], tag) + g = g.iloc[[0]] + g['iqr_dispersion'] = pd.NA + + if result: + g.iloc[0, g.columns.get_loc('iqr_dispersion')] = result + results.append(g) + return pd.concat(results) + + +def compute_drawdown(sequence): + """Computes the drawdown for a sequence of numbers. + + The drawdown at time T is the decline from the highest peak occurring at or + before time T. https://en.wikipedia.org/wiki/Drawdown_(economics). + + The drawdown is always non-negative. A larger (more positive) drawdown + indicates a larger drop. + + Args: + sequence: A numpy array. + + Returns: + A numpy array of same length as the original sequence, containing the + drawdown at each timestep. + """ + peak_so_far = np.maximum.accumulate(sequence) + return peak_so_far - sequence + + +def compute_dispersion_if_enough_data(group, tag): + if len(group) > MIN_NUM_DISPERSION_DATA_POINTS: + return compute_dispersion(group) + else: + logging.warning('Insufficient data at step %s for tag %s.', group.index[0], + tag) + return None # or return some default value + + +def lowpass_filter(curve, lowpass_thresh): + filt_b, filt_a = scipy.signal.butter(8, lowpass_thresh) + + def butter_filter_fn(c): + padlen = min(len(c) - 1, 3 * max(len(filt_a), len(filt_b))) + return scipy.signal.filtfilt(filt_b, filt_a, curve, padlen=padlen) + + processed_curve = butter_filter_fn(curve) + return processed_curve + + +def apply_lowpass(curve): + # Apply the lowpass filter directly on the curve + low_pass_curve = lowpass_filter(curve, lowpass_thresh=0.01) + return low_pass_curve + + +def dispersion_across_runs(data_df, tag, index): + step_col = f'{tag}_{index}' + value_col = f'{tag}_Value' + lowpass_value_col = f'{tag}_lowpass_Value' + + # Lowpass filter each curve + data_df[lowpass_value_col] = data_df.groupby( + ['domain', 'task', 'algo', 'experiment', 'seed'] + )[value_col].transform(apply_lowpass) + + # Group the curves by 'domain', 'algo', 'task', and 'step_col' to compute dispersion + dispersion_groups = data_df.groupby(['domain', 'algo', 'task', step_col]) + dispersion_groups = [group for _, group in dispersion_groups] + indices = range(len(dispersion_groups)) + + num_processes = os.cpu_count() // 2 + split_indices = np.array_split(indices, num_processes) + dispersion_group_batches = [[dispersion_groups[index] for index in batch] for + batch + in split_indices] + + with multiprocessing.Pool(num_processes) as pool: + results = pool.starmap(apply_dispersion_fn_to_groups, + [(batch, lowpass_value_col, tag) for batch in + dispersion_group_batches]) + pool.close() + pool.join() + + dispersion_df = pd.concat(results) + metrics = {} + for (domain, algo, task), group in dispersion_df.groupby( + ['domain', 'algo', 'task'] + ): + mean_iqr = group['iqr_dispersion'].mean() + std_iqr = group['iqr_dispersion'].std() + metrics[( + domain, + algo, + task, + )] = dict(mean=mean_iqr, std=std_iqr) + logging.info('Domain: %s, Task: %s, Algo: %s, Mean IQR: %s, Std IQR: %s', + domain, task, algo, mean_iqr, std_iqr) + return metrics + + +def compute_eval_points_within_runs( + data_df, tag, index, eval_points_per_window=5 +): + experiment_meta_data = {} + for (domain, task, algo, experiment, seed), group in data_df.groupby( + ['domain', 'task', 'algo', 'experiment', 'seed'] + ): + df = group.sort_values(by=f'{tag}_{index}').copy() + median_step_diff = df[f'{tag}_{index}'].diff().median() + window_size = int(eval_points_per_window * median_step_diff) + eval_points = list( + range( + np.ceil(window_size / 2).astype(int), + max(df[f'{tag}_{index}']), + int(median_step_diff), + ) + ) + + logging.info( + 'Domain: %s, Algo: %s, Experiment: %s, Task: %s, Seed: %s', + domain, algo, experiment, task, seed + ) + logging.info('\tMedian step difference: %s', median_step_diff) + logging.info('\tWindow size: %s', window_size) + logging.info('\tNum eval points: %s', len(eval_points)) + + experiment_meta_data[(domain, task, algo, experiment, seed)] = { + 'eval_points': eval_points, + 'window_size': window_size, + 'median_step_diff': median_step_diff, + } + + return experiment_meta_data + + +def dispersion_within_seed( + domain, task, algo, exp_id, seed, data_df, + tag, + index, + experiment_meta_data, + dispersion_window_fn=scipy.stats.iqr, ): + seed_group = data_df.groupby( + ['domain', 'task', 'algo', 'experiment', 'seed']).get_group( + (domain, task, algo, exp_id, seed)) + seed_group[f'{tag}_Value_diff'] = seed_group[f'{tag}_Value'].diff() + seed_group = seed_group[[f'{tag}_{index}', f'{tag}_Value_diff']].copy() + seed_group = seed_group.dropna() + steps, episode_reward_diff = seed_group.to_numpy().T + + window_size = experiment_meta_data[(domain, task, algo, exp_id, seed)][ + 'window_size' + ] + + all_iqr_values = [] + for eval_point in experiment_meta_data[ + (domain, task, algo, exp_id, seed) + ]['eval_points']: + low_end = np.ceil(eval_point - (window_size / 2)) + high_end = np.floor(eval_point + (window_size / 2)) + + eval_points_above = steps >= low_end + eval_points_below = steps <= high_end + eval_points_in_window = np.logical_and( + eval_points_above, eval_points_below + ) + valid_eval_points = np.nonzero(eval_points_in_window)[0] + + if len(valid_eval_points) == 0: + logging.warning( + 'No valid eval points for domain: %s, task: %s, algo: %s,' + ' exp_id: %s, seed: %s, eval_point: %s', + domain, task, algo, exp_id, seed, eval_point) + + continue + elif len(valid_eval_points) < 2: + # IQR needs at least 2 data points for meaningful calculation + logging.warning( + 'Insufficient data points for IQR calculation for domain: %s,' + ' task: %s, algo: %s, exp_id: %s, seed: %s, eval_point: %s', + domain, task, algo, exp_id, seed, eval_point + ) + continue + + # Apply window_fn to get the IQR for the current window + window_dispersion = dispersion_window_fn( + episode_reward_diff[valid_eval_points] + ) + all_iqr_values.append(window_dispersion) + + return all_iqr_values + + +def dispersion_within_runs( + data_df, + tag, + index, + experiment_meta_data, + dispersion_window_fn=scipy.stats.iqr, +): + metrics = {} + for (domain, task, algo), group in data_df.groupby( + ['domain', 'task', 'algo'] + ): + logging.info('Processing Domain: %s, Task: %s, Algo: %s', domain, task + , algo) + all_iqr_values = [] + experiment_ids_and_seeds = group.groupby(['experiment', 'seed']).groups + with multiprocessing.Pool() as pool: + results = pool.starmap( + dispersion_within_seed, + [ + ( + domain, task, algo, exp_id, seed, data_df, tag, index, + experiment_meta_data, dispersion_window_fn + ) for (exp_id, seed) in experiment_ids_and_seeds + ] + ) + + pool.close() + pool.join() + + results = [item for sublist in results for item in sublist] + all_iqr_values.extend(results) + mean_iqr = np.mean(all_iqr_values) + std_iqr = np.std(all_iqr_values) + metrics[( + domain, + algo, + task, + )] = dict(mean=mean_iqr, std=std_iqr) + logging.info('Domain: %s, Task: %s, Algo: %s, Mean IQR: %s, Std IQR: %s', + domain, task, algo, mean_iqr, std_iqr) + return metrics + + +def short_term_risk(data_df, tag, index): + metrics = {} + for (domain, task, algo), group in data_df.groupby( + ['domain', 'task', 'algo'] + ): + logging.info('Processing Domain: %s, Task: %s, Algo: %s', domain, task, + algo) + + all_diffs = [] + for exp_id, exp_group in group.groupby('experiment'): + for seed, seed_group in exp_group.groupby('seed'): + seed_df = seed_group.sort_values(by=f'{tag}_{index}').copy() + seed_df[f'{tag}_Value_diff'] = seed_df[f'{tag}_Value'].diff() + seed_df = seed_df.dropna() + + episode_reward_diffs = seed_df[f'{tag}_Value_diff'].values + all_diffs.extend(episode_reward_diffs) + + risk = np.percentile(all_diffs, LEFT_TAIL_ALPHA * 100, method='linear') + + # CVaR is the average of the bottom "alpha" percent of diffs + all_diffs = np.array(all_diffs) + cvar = np.mean(all_diffs[all_diffs <= risk]) + cvar = -cvar # make it positive for easier interpretation + logging.info('\t\t\tCVaR: %s', cvar) + metrics[( + domain, + algo, + task, + )] = cvar + return metrics + + +def long_term_risk(data_df, tag, index): + """Calculate the Conditional Value at Risk (CVaR) for different experimental groups. + + Args: + + data_df (DataFrame): The dataset containing the experimental results. + tag (str): The tag used to identify the relevant columns in data_df. + index (str): The index used to sort the data within each group. + + Returns: + dict: A dictionary containing the CVaR for each (domain, algorithm, task) + combination. + """ + + metrics = {} + for (domain, task, algo), group in data_df.groupby( + ['domain', 'task', 'algo'] + ): + logging.info('Processing Domain: %s, Task: %s, Algo: %s', domain, task, + algo) + all_drawdowns = [] + + for exp_id, exp_group in group.groupby('experiment'): + for seed, seed_group in exp_group.groupby('seed'): + # Ensure the correct column exists + if f'{tag}_{index}' not in seed_group.columns: + logging.warning('Column %s not found in data.', f'{tag}_{index}') + continue + + # Sort the seed group by the index + seed_group = seed_group.sort_values(by=f'{tag}_{index}') + + # Compute the drawdowns + values = seed_group[f'{tag}_Value'].values + drawdowns = compute_drawdown(values) + all_drawdowns.extend(drawdowns) + + if all_drawdowns: + # Get the worst alpha percentile of drawdowns + top_alpha_percent = np.percentile(all_drawdowns, RIGHT_TAIL_ALPHA * 100) + all_drawdowns = np.array(all_drawdowns) + + # CVaR is the average of the worst "alpha" percent of drawdowns + cvar = np.mean(all_drawdowns[all_drawdowns >= top_alpha_percent]) + logging.info('\t\t\tCVaR: %s', cvar) + metrics[(domain, algo, task)] = cvar + else: + logging.warning('No drawdown data available for %s, %s, %s', domain, task, + algo) + return metrics + + +def risk_across_runs(data_df, tag, alpha=0.05): + """Calculate the Conditional Value at Risk (CVaR) for the final values across different runs. + + Args: + + data_df (DataFrame): The dataset containing the experimental results. + tag (str): The tag used to identify the relevant value columns in data_df. + index (str): The index used to sort the data within each group. + alpha (float): The percentile for CVaR calculation (default is 0.05). + + Returns: + DataFrame: A DataFrame with CVaR for each (domain, task, algo) combination. + """ + + metrics = {} + + # Extract the final values for each group + final_values_col = f'{tag}_Value' + final_tag_values = ( + data_df.groupby(['domain', 'task', 'algo', 'experiment', 'seed'])[ + final_values_col + ] + .last() + .reset_index() + ) + + # Grouping all experiments and seeds within a specific domain/task/algo + for (domain, task, algo), group in final_tag_values.groupby( + ['domain', 'task', 'algo'] + ): + # Get the bottom "alpha" percentile of final values + values = group[final_values_col].values + bottom_alpha_percent = np.percentile(values, alpha * 100, method='linear') + cvar = np.mean(values[values <= bottom_alpha_percent]) + + # Logging the process and results + logging.info('Processing Domain: %s, Task: %s, Algo: %s', domain, task + , algo) + logging.info('\t\t\tCVaR: %s', cvar) + # Storing the CVaR values in the metrics dictionary + metrics[(domain, algo, task)] = cvar + + return metrics + + +def get_training_metrics(data_df, tag, index): + dispersion_across_runs_result = dispersion_across_runs(data_df, tag, index) + experiment_meta_data = compute_eval_points_within_runs( + data_df=data_df, tag=tag, index=index + ) + dispersion_within_runs_result = dispersion_within_runs( + data_df=data_df, + tag=tag, + index=index, + experiment_meta_data=experiment_meta_data, + ) + short_term_risk_result = short_term_risk( + data_df=data_df, tag=tag, index=index + ) + long_term_risk_result = long_term_risk(data_df=data_df, tag=tag, index=index) + risk_across_runs_result = risk_across_runs( + data_df=data_df, tag=tag, alpha=LEFT_TAIL_ALPHA + ) + + return { + 'dispersion_across_runs': dispersion_across_runs_result, + 'dispersion_within_runs': dispersion_within_runs_result, + 'short_term_risk': short_term_risk_result, + 'long_term_risk': long_term_risk_result, + 'risk_across_runs': risk_across_runs_result, + } + + +def dispersion_across_rollouts(data_df): + metrics = {} + for (domain, algo, task), group in data_df.groupby( + ['domain', 'algo', 'task']): + + # We are only interested in the `rollout_returns` metric + group = group[group['metric'] == 'rollout_returns'] + all_values = [] + for values in group['values']: + all_values.extend(values) + + all_values = np.array(all_values) + + iqr = scipy.stats.iqr(all_values) + logging.info('Processing Domain: %s, Task: %s, Algo: %s', domain, task, + algo) + logging.info('\t\t\tIQR: %s', iqr) + + metrics[(domain, algo, task)] = iqr + return metrics + + +def risk_across_rollouts(data_df): + metrics = {} + for (domain, algo, task), group in data_df.groupby( + ['domain', 'algo', 'task']): + + # We are only interested in the `rollout_returns` metric + group = group[group['metric'] == 'rollout_returns'] + all_values = [] + for values in group['values']: + all_values.extend(values) + + all_values = np.array(all_values) + bottom_alpha_percent = np.percentile(all_values, LEFT_TAIL_ALPHA * 100, + method='linear') + cvar = np.mean(all_values[all_values <= bottom_alpha_percent]) + logging.info('Processing Domain: %s, Task: %s, Algo: %s', domain, task, + algo) + logging.info('\t\t\tCVaR: %s', cvar) + + metrics[(domain, algo, task)] = cvar + return metrics + + +def get_inference_metrics(data_df): + risk_across_rollouts_result = risk_across_rollouts(data_df) + dispersion_across_rollouts_result = dispersion_across_rollouts(data_df) + return { + 'risk_across_rollouts': risk_across_rollouts_result, + 'dispersion_across_rollouts': dispersion_across_rollouts_result, + } diff --git a/a2perf/analysis/results.py b/a2perf/analysis/results.py new file mode 100644 index 0000000..403dcb5 --- /dev/null +++ b/a2perf/analysis/results.py @@ -0,0 +1,221 @@ +import pandas as pd + +OPTIMAL_METRIC_CRITERIA = dict( + energy_consumed='min', + rollout_returns='max', + generalization_rollout_returns='max', + dispersion_across_runs='min', + dispersion_within_runs='min', + dispersion_across_rollouts='min', + short_term_risk='min', + long_term_risk='min', + risk_across_runs='max', + peak_ram_usage='min', + mean_ram_usage='min', + wall_clock_time='min', + inference_time='min', + gpu_power_usage='min', + cpu_power_usage='min', + risk_across_rollouts='max', + disperion_across_rollouts='min', + +) + +METRIC_TO_DISPLAY_NAME = dict( + rollout_returns='Returns', + generalization_rollout_returns='Generalization', + dispersion_across_runs='Dispersion Across Runs', + dispersion_within_runs='Dispersion Within Runs', + dispersion_across_rollouts='Dispersion Across Rollouts', + short_term_risk='Short Term Risk', + long_term_risk='Long Term Risk', + risk_across_runs='Risk Across Runs', + peak_ram_usage='Peak RAM Usage', + mean_ram_usage='Mean RAM Usage', + wall_clock_time='Wall Clock Time', + inference_time='Inference Time', + gpu_power_usage='GPU Power Usage', + cpu_power_usage='CPU Power Usage', + risk_across_rollouts='Risk Across Rollouts', + disperion_across_rollouts='Dispersion Across Rollouts', + energy_consumed='Energy Consumed', +) + +METRIC_TO_CATEGORY = dict( + rollout_returns='Application', + generalization_rollout_returns='Application', + dispersion_across_runs='Reliability', + dispersion_within_runs='Reliability', + dispersion_across_rollouts='Reliability', + short_term_risk='Reliability', + long_term_risk='Reliability', + risk_across_runs='Reliability', + peak_ram_usage='System', + mean_ram_usage='System', + wall_clock_time='System', + inference_time='System', + gpu_power_usage='System', + cpu_power_usage='System', + risk_across_rollouts='Reliability', + disperion_across_rollouts='Reliability', + energy_consumed='System', +) + +METRIC_TO_UNIT = dict( + rollout_returns='100 eps.', + generalization_rollout_returns='100 eps. [all tasks]', + dispersion_across_runs='IQR', + dispersion_within_runs='IQR', + dispersion_across_rollouts='IQR', + short_term_risk='CVaR', + long_term_risk='CVaR', + risk_across_runs='CVaR', + peak_ram_usage='GB', + mean_ram_usage='GB', + wall_clock_time='Hours', + inference_time='ms', + gpu_power_usage='W', + cpu_power_usage='W', + risk_across_rollouts='CVaR', + disperion_across_rollouts='IQR', + energy_consumed='kWh', +) + + +def format_value(val): + """Format the value in scientific notation if its absolute value is below a certain threshold.""" + threshold = 1e-2 # You can adjust this threshold as needed + if abs(val) < threshold: + return f'{val:.2e}' # scientific notation + else: + return f'{val:.2f}' # standard decimal format + + +def metrics_dict_to_pandas_df(metrics_dict): + # Transform the metrics dictionary into a DataFrame + data_for_df = [] + for metric_name, metric_data in metrics_dict.items(): + for parameters, values in metric_data.items(): + domain, algo, task = parameters + category = METRIC_TO_CATEGORY[metric_name] + display_name = METRIC_TO_DISPLAY_NAME[metric_name] + unit = METRIC_TO_UNIT[metric_name] + + # Within your existing code block + if isinstance(values, dict): + mean = values['mean'] + std = values['std'] + display_mean = format_value(mean) + display_std = format_value(std) + display_val = f'{display_mean} \\pm {display_std}' + else: + value_to_compare = values + display_val = format_value(value_to_compare) + + data_for_df.append( + (domain, task, algo, category, display_name, unit, display_val) + ) + + # For every metric, we must decide which algorithm is the best + for metric_name, metric_data in metrics_dict.items(): + optimal_criterion = OPTIMAL_METRIC_CRITERIA[metric_name] + + best_exps = [] + best_value = None + for (domain, algo, task), values in metric_data.items(): + category = METRIC_TO_CATEGORY[metric_name] + display_name = METRIC_TO_DISPLAY_NAME[metric_name] + unit = METRIC_TO_UNIT[metric_name] + + value_to_compare = values + comparison_function = None + + if isinstance(value_to_compare, dict): + value_to_compare = value_to_compare['mean'] + + if best_value is None: + comparison_function = lambda new, old: True + elif optimal_criterion == 'min': + comparison_function = lambda new, old: new < old + elif optimal_criterion == 'max': + comparison_function = lambda new, old: new > old + + if comparison_function is not None and comparison_function( + value_to_compare, best_value): + best_exps.clear() + best_value = value_to_compare + + # Check for equality for the case where it's as good as the best_value (and best_value is not None) + parameters_to_add = ( + domain, task, algo, category, display_name, unit, display_val) + if best_value is not None and value_to_compare == best_value: + best_exps.append( + parameters_to_add) + elif comparison_function(value_to_compare, best_value): + best_exps = [ + parameters_to_add] + + df = pd.DataFrame( + data_for_df, + columns=[ + 'domain', + 'task', + 'algo', + 'category', + 'metric', + 'unit', + 'display_val', + ], + ) + return df + + +def df_as_latex(df, mode): + # Merge 'metric' and 'unit' into one column, properly formatted + df['metric'] = df.apply(lambda x: f"{x['metric']} ({x['unit']})", axis=1) + + # Drop the 'unit' column as it's no longer needed + df = df.drop(columns='unit') + + # Reindex and sort the DataFrame + df = df.set_index( + ['domain', 'task', 'category', 'metric', 'algo'] + ).sort_index() + + # Create a pivot table with 'category' and 'metric' as the row index, and 'algo' as the column index + df_pivot = df.pivot_table( + index=['category', 'metric'], + columns='algo', + values='display_val', + aggfunc='first', + ) + + df_pivot = df_pivot.rename_axis( + index={ + 'category': '\\textbf{Category}', + 'metric': '\\textbf{Metric Name}', + } + ) + + df_pivot.columns = [ + f'\\textbf{{{algo.upper()}}}' for algo in df_pivot.columns + ] + + # Determine the number of algorithms dynamically for column formatting + column_format = '|l|l|' + 'c|' * len(df_pivot.columns) + + # Generate the LaTeX table + latex_table = df_pivot.to_latex( + index=True, + multirow=True, + multicolumn=True, + position='!htbp', + bold_rows=False, # Set to False as we've already applied bold to headers + column_format=column_format, + caption='Metrics for the application domain', + na_rep='N/A', + label='tab:metrics', + escape=False, # Allow LaTeX commands within cells + ) + + return latex_table diff --git a/a2perf/analysis/system.py b/a2perf/analysis/system.py new file mode 100644 index 0000000..3b9aea2 --- /dev/null +++ b/a2perf/analysis/system.py @@ -0,0 +1,231 @@ +from functools import reduce + +import pandas as pd + + +def get_distributed_experiment_metric(data_df, metric, + tolerance=pd.Timedelta('10sec'), dtype=float): + data_df['timestamp'] = pd.to_datetime(data_df['timestamp']) + final_dfs_to_concat = [] + + for _, group in data_df.groupby( + ['domain', 'algo', 'task', 'experiment', 'seed']): + group = group.sort_values('timestamp') + + # Initialize list to hold data frames for each run_id + run_groups = [] + run_ids = group['run_id'].unique() + + # Create a separate DataFrame for each run_id with the specific metric column renamed + for run_id in run_ids: + run_group = group[group['run_id'] == run_id][ + ['timestamp', 'domain', 'algo', 'task', 'experiment', 'seed', + metric]].rename( + columns={metric: f'{metric}_{run_id}'} + ).astype({f'{metric}_{run_id}': dtype}) + run_groups.append(run_group) + + # Use functools.reduce to merge all DataFrame objects at once + merged_group = reduce( + lambda left, right: pd.merge_asof( + left, right, on='timestamp', tolerance=tolerance, + suffixes=('', f'_{right.columns[-1]}') + ), run_groups + ) + + metric_columns = [col for col in merged_group.columns if + col.startswith(metric)] + merged_group[f'experiment_{metric}'] = merged_group[metric_columns].sum( + axis=1) + final_dfs_to_concat.append(merged_group[ + ['domain', 'algo', 'task', 'experiment', + 'seed', 'timestamp', f'experiment_{metric}']]) + + # Concatenate all final DataFrames + aggregated_df = pd.concat(final_dfs_to_concat, ignore_index=True) + return aggregated_df + + +def get_metric(data_df, metric, tolerance=pd.Timedelta('10sec')): + metric_df = get_distributed_experiment_metric(data_df, metric, tolerance) + experiment_metric_col_name = f'experiment_{metric}' + + metrics = {} + for (domain, algo, task), group in metric_df.groupby( + ['domain', 'algo', 'task'] + ): + mean_metric = group[experiment_metric_col_name].mean() + std_metric = group[experiment_metric_col_name].std() + metrics[(domain, algo, task)] = {'mean': mean_metric, 'std': std_metric} + return metrics + + +def get_mean_ram_usage(data_df, tolerance=pd.Timedelta('10sec')): + ram_usage_df = get_distributed_experiment_metric( + data_df, 'ram_process', tolerance=tolerance + ) + metrics = {} + + for (domain, algo, task), group in ram_usage_df.groupby( + ['domain', 'algo', 'task'] + ): + mean_ram_usage = group['experiment_ram_process'].mean() + std_ram_usage = group['experiment_ram_process'].std() + + metrics[(domain, algo, task)] = { + 'mean': mean_ram_usage, + 'std': std_ram_usage, + } + + return metrics + + +def get_gpu_power_usage(data_df): + gpu_power_usage_df = get_distributed_experiment_metric(data_df, 'gpu_power') + metrics = {} + for (domain, algo, task), group in gpu_power_usage_df.groupby( + ['domain', 'algo', 'task'] + ): + mean_gpu_power_usage = group['experiment_gpu_power'].mean() + std_gpu_power_usage = group['experiment_gpu_power'].std() + + metrics[(domain, algo, task)] = { + 'mean': mean_gpu_power_usage, + 'std': std_gpu_power_usage, + } + + return metrics + + +def get_peak_ram_usage(data_df, tolerance=pd.Timedelta('10sec')): + ram_usage_df = get_distributed_experiment_metric( + data_df, 'ram_process', tolerance=tolerance + ) + # Find the max ram usage for each experiment + peak_ram_usage = ram_usage_df.groupby( + ['domain', 'algo', 'task', 'experiment', 'seed'] + )['experiment_ram_process'].max() + metrics = {} + for (domain, algo, task), group in peak_ram_usage.groupby( + ['domain', 'algo', 'task'] + ): + metrics[(domain, algo, task)] = {'mean': group.mean(), 'std': group.std()} + + return metrics + + +def get_wall_clock_time(data_df): + # Ensure timestamps are in datetime format + if not pd.api.types.is_datetime64_any_dtype(data_df['timestamp']): + data_df['timestamp'] = pd.to_datetime(data_df['timestamp']) + + # Find the earliest and latest timestamps for each run + group_cols = ['domain', 'algo', 'task', 'experiment', 'seed', 'run_id'] + earliest_timestamp = data_df.groupby(group_cols)['timestamp'].min() + latest_timestamp = data_df.groupby(group_cols)['timestamp'].max() + + # Find the earliest shared and latest shared timestamps for each experiment + earliest_shared_timestamp = earliest_timestamp.groupby( + ['domain', 'algo', 'task', 'experiment', 'seed'] + ).max() + latest_shared_timestamp = latest_timestamp.groupby( + ['domain', 'algo', 'task', 'experiment', 'seed'] + ).min() + + # Compute wall clock time in hours + wall_clock_time = ( + latest_shared_timestamp - earliest_shared_timestamp + ).dt.total_seconds() / 3600 + + # Group by 'domain', 'algo', 'task' and calculate mean and std of wall clock time + metrics = {} + grouped_wall_clock = wall_clock_time.groupby(['domain', 'algo', 'task']) + for (domain, algo, task), group_data in grouped_wall_clock: + metrics[(domain, algo, task)] = { + 'mean': group_data.mean(), + 'std': group_data.std(), + } + + return metrics + + +def get_ram_power_usage(data_df): + # Not implemented since this is an estimate in codecarbon + return {} + + +def get_cpu_power_usage(data_df, tolerance=pd.Timedelta('10sec')): + # This is an estimate without access to Intel RAPL + cpu_power_df = get_distributed_experiment_metric( + data_df, 'cpu_power', tolerance=tolerance + ) + metrics = {} + + for (domain, algo, task), group in cpu_power_df.groupby( + ['domain', 'algo', 'task'] + ): + mean_ram_usage = group['experiment_cpu_power'].mean() + std_ram_usage = group['experiment_cpu_power'].std() + + metrics[(domain, algo, task)] = { + 'mean': mean_ram_usage, + 'std': std_ram_usage, + } + + return metrics + + +def get_total_energy(data_df): + total_energy_consumed = data_df.groupby( + ['domain', 'algo', 'task', 'experiment', 'seed', ] + )['energy_consumed'].sum().astype(float) + + # Group by 'domain', 'algo', 'task' and calculate mean and std of total energy + metrics = {} + grouped_total_energy = total_energy_consumed.groupby( + ['domain', 'algo', 'task']) + for (domain, algo, task), group_data in grouped_total_energy: + metrics[(domain, algo, task)] = { + 'mean': group_data.mean(), + 'std': group_data.std(), + } + + return metrics + + +def get_power_usage(data_df): + ram_power_usage = get_ram_power_usage(data_df) + gpu_power_usage = get_gpu_power_usage(data_df) + cpu_power_usage = get_cpu_power_usage(data_df) + + return { + 'gpu_power_usage': gpu_power_usage, + # 'cpu_power_usage': cpu_power_usage, + } + + +def get_training_metrics(data_df): + wall_clock_time = get_wall_clock_time(data_df=data_df) + mean_ram_usage = get_mean_ram_usage(data_df=data_df) + peak_ram_usage = get_peak_ram_usage(data_df=data_df) + power_usage = get_power_usage(data_df=data_df) + energy_consumed = get_total_energy(data_df=data_df) + return { + 'mean_ram_usage': mean_ram_usage, + 'peak_ram_usage': peak_ram_usage, + 'wall_clock_time': wall_clock_time, + 'energy_consumed': energy_consumed, + **power_usage, + } + + +def get_inference_metrics(data_df): + mean_ram_usage = get_mean_ram_usage(data_df=data_df) + peak_ram_usage = get_peak_ram_usage(data_df=data_df) + power_usage = get_power_usage(data_df=data_df) + + return { + 'mean_ram_usage': mean_ram_usage, + 'peak_ram_usage': peak_ram_usage, + **power_usage, + } diff --git a/a2perf/constants.py b/a2perf/constants.py new file mode 100644 index 0000000..b7be25b --- /dev/null +++ b/a2perf/constants.py @@ -0,0 +1,46 @@ +import enum + +import gin + + +@gin.constants_from_enum +class BenchmarkDomain(enum.Enum): + QUADRUPED_LOCOMOTION = "QuadrupedLocomotion-v0" + WEB_NAVIGATION = "WebNavigation-v0" + CIRCUIT_TRAINING = "CircuitTraining-v0" + + +@gin.constants_from_enum +class BenchmarkMode(enum.Enum): + TRAIN = "train" + INFERENCE = "inference" + GENERALIZATION = "generalization" + + +@gin.constants_from_enum +class SystemMetrics(enum.Enum): + INFERENCE_TIME = "InferenceTime" + TRAINING_TIME = "TrainingTime" + MEMORY_USAGE = "MemoryUsage" + + +@gin.constants_from_enum +class ReliabilityMetrics(enum.Enum): + IqrWithinRuns = "IqrWithinRuns" + IqrAcrossRuns = "IqrAcrossRuns" + LowerCVaROnDiffs = "LowerCVaROnDiffs" + LowerCVaROnDrawdown = "LowerCVaROnDrawdown" + LowerCVarOnAcross = "LowerCVarOnAcross" + MedianPerfDuringTraining = "MedianPerfDuringTraining" + MadAcrossRollouts = "MadAcrossRollouts" + IqrAcrossRollouts = "IqrAcrossRollouts" + StddevAcrossRollouts = "StddevAcrossRollouts" + UpperCVaRAcrossRollouts = "UpperCVaRAcrossRollouts" + LowerCVaRAcrossRollouts = "LowerCVaRAcrossRollouts" + + +ENV_NAMES = { + BenchmarkDomain.QUADRUPED_LOCOMOTION: BenchmarkDomain.QUADRUPED_LOCOMOTION.value, + BenchmarkDomain.WEB_NAVIGATION: BenchmarkDomain.WEB_NAVIGATION.value, + BenchmarkDomain.CIRCUIT_TRAINING: BenchmarkDomain.CIRCUIT_TRAINING.value, +} diff --git a/a2perf/domains/tfa/suite_gym.py b/a2perf/domains/tfa/suite_gym.py index f047912..4a5cb52 100644 --- a/a2perf/domains/tfa/suite_gym.py +++ b/a2perf/domains/tfa/suite_gym.py @@ -46,12 +46,12 @@ from a2perf.domains.tfa import gym_wrapper TimeLimitWrapperType = Callable[ - [py_environment.PyEnvironment, int], py_environment.PyEnvironment + [py_environment.PyEnvironment, int], py_environment.PyEnvironment ] -WEB_NAVIGATION = 'WebNavigation-v0' -CIRCUIT_TRAINING = 'CircuitTraining-v0' -QUADRUPED_LOCOMOTION = 'QuadrupedLocomotion-v0' +WEB_NAVIGATION = "WebNavigation-v0" +CIRCUIT_TRAINING = "CircuitTraining-v0" +QUADRUPED_LOCOMOTION = "QuadrupedLocomotion-v0" @gin.configurable @@ -65,49 +65,49 @@ def load( gym_kwargs: Optional[Dict[str, Any]] = None, render_kwargs: Optional[Dict[str, Any]] = None, ) -> py_environment.PyEnvironment: - """Loads the selected environment and wraps it with the specified wrappers. - - Note that by default a TimeLimit wrapper is used to limit episode lengths - to the default benchmarks defined by the registered environments. - - Args: - environment_name: Name for the environment to load. - discount: Discount to use for the environment. - max_episode_steps: If None the max_episode_steps will be set to the default - step limit defined in the environment's spec. No limit is applied if set - to 0 or if there is no max_episode_steps set in the environment's spec. - gym_env_wrappers: Iterable with references to wrapper classes to use - directly on the gym environment. - env_wrappers: Iterable with references to wrapper classes to use on the - gym_wrapped environment. - spec_dtype_map: A dict that maps gym spaces to np dtypes to use as the - default dtype for the arrays. An easy way how to configure a custom - mapping through Gin is to define a gin-configurable function that returns - desired mapping and call it in your Gin congif file, for example: - `suite_gym.load.spec_dtype_map = @get_custom_mapping()`. - gym_kwargs: Optional kwargs to pass to the Gym environment class. - render_kwargs: Optional kwargs for rendering to pass to `render()` of the - gym_wrapped environment. - - Returns: - A PyEnvironment instance. - """ - gym_kwargs = gym_kwargs if gym_kwargs else {} - gym_spec = gym.spec(environment_name) - gym_env = gym_spec.make(**gym_kwargs) - - if max_episode_steps is None and gym_spec.max_episode_steps is not None: - max_episode_steps = gym_spec.max_episode_steps - - return wrap_env( - gym_env, - discount=discount, - max_episode_steps=max_episode_steps, - gym_env_wrappers=gym_env_wrappers, - env_wrappers=env_wrappers, - spec_dtype_map=spec_dtype_map, - render_kwargs=render_kwargs, - ) + """Loads the selected environment and wraps it with the specified wrappers. + + Note that by default a TimeLimit wrapper is used to limit episode lengths + to the default benchmarks defined by the registered environments. + + Args: + environment_name: Name for the environment to load. + discount: Discount to use for the environment. + max_episode_steps: If None the max_episode_steps will be set to the default + step limit defined in the environment's spec. No limit is applied if set + to 0 or if there is no max_episode_steps set in the environment's spec. + gym_env_wrappers: Iterable with references to wrapper classes to use + directly on the gym environment. + env_wrappers: Iterable with references to wrapper classes to use on the + gym_wrapped environment. + spec_dtype_map: A dict that maps gym spaces to np dtypes to use as the + default dtype for the arrays. An easy way how to configure a custom + mapping through Gin is to define a gin-configurable function that returns + desired mapping and call it in your Gin congif file, for example: + `suite_gym.load.spec_dtype_map = @get_custom_mapping()`. + gym_kwargs: Optional kwargs to pass to the Gym environment class. + render_kwargs: Optional kwargs for rendering to pass to `render()` of the + gym_wrapped environment. + + Returns: + A PyEnvironment instance. + """ + gym_kwargs = gym_kwargs if gym_kwargs else {} + gym_spec = gym.spec(environment_name) + gym_env = gym_spec.make(**gym_kwargs) + + if max_episode_steps is None and gym_spec.max_episode_steps is not None: + max_episode_steps = gym_spec.max_episode_steps + + return wrap_env( + gym_env, + discount=discount, + max_episode_steps=max_episode_steps, + gym_env_wrappers=gym_env_wrappers, + env_wrappers=env_wrappers, + spec_dtype_map=spec_dtype_map, + render_kwargs=render_kwargs, + ) @gin.configurable @@ -122,131 +122,139 @@ def wrap_env( auto_reset: bool = True, render_kwargs: Optional[Dict[str, Any]] = None, ) -> py_environment.PyEnvironment: - """Wraps given gym environment with TF Agent's GymWrapper. - - Note that by default a TimeLimit wrapper is used to limit episode lengths - to the default benchmarks defined by the registered environments. - - Args: - gym_env: An instance of OpenAI gym environment. - discount: Discount to use for the environment. - max_episode_steps: Used to create a TimeLimitWrapper. No limit is applied if - set to None or 0. Usually set to `gym_spec.max_episode_steps` in `load`. - gym_env_wrappers: Iterable with references to wrapper classes to use - directly on the gym environment. - time_limit_wrapper: Wrapper that accepts (env, max_episode_steps) params to - enforce a TimeLimit. Usuaully this should be left as the default, - wrappers.TimeLimit. - env_wrappers: Iterable with references to wrapper classes to use on the - gym_wrapped environment. - spec_dtype_map: A dict that maps gym specs to tf dtypes to use as the - default dtype for the tensors. An easy way how to configure a custom - mapping through Gin is to define a gin-configurable function that returns - desired mapping and call it in your Gin config file, for example: - `suite_gym.load.spec_dtype_map = @get_custom_mapping()`. - auto_reset: If True (default), reset the environment automatically after a - terminal state is reached. - render_kwargs: Optional `dict` of keywoard arguments for rendering. - - Returns: - A PyEnvironment instance. - """ - - for wrapper in gym_env_wrappers: - gym_env = wrapper(gym_env) - env = gym_wrapper.GymWrapper( - gym_env, - discount=discount, - spec_dtype_map=spec_dtype_map, - auto_reset=auto_reset, - render_kwargs=render_kwargs, - ) - - if max_episode_steps is not None and max_episode_steps > 0: - env = time_limit_wrapper(env, max_episode_steps) - - for wrapper in env_wrappers: - env = wrapper(env) - - return env + """Wraps given gym environment with TF Agent's GymWrapper. + + Note that by default a TimeLimit wrapper is used to limit episode lengths + to the default benchmarks defined by the registered environments. + + Args: + gym_env: An instance of OpenAI gym environment. + discount: Discount to use for the environment. + max_episode_steps: Used to create a TimeLimitWrapper. No limit is applied if + set to None or 0. Usually set to `gym_spec.max_episode_steps` in `load`. + gym_env_wrappers: Iterable with references to wrapper classes to use + directly on the gym environment. + time_limit_wrapper: Wrapper that accepts (env, max_episode_steps) params to + enforce a TimeLimit. Usuaully this should be left as the default, + wrappers.TimeLimit. + env_wrappers: Iterable with references to wrapper classes to use on the + gym_wrapped environment. + spec_dtype_map: A dict that maps gym specs to tf dtypes to use as the + default dtype for the tensors. An easy way how to configure a custom + mapping through Gin is to define a gin-configurable function that returns + desired mapping and call it in your Gin config file, for example: + `suite_gym.load.spec_dtype_map = @get_custom_mapping()`. + auto_reset: If True (default), reset the environment automatically after a + terminal state is reached. + render_kwargs: Optional `dict` of keywoard arguments for rendering. + + Returns: + A PyEnvironment instance. + """ + + for wrapper in gym_env_wrappers: + gym_env = wrapper(gym_env) + env = gym_wrapper.GymWrapper( + gym_env, + discount=discount, + spec_dtype_map=spec_dtype_map, + auto_reset=auto_reset, + render_kwargs=render_kwargs, + ) + + if max_episode_steps is not None and max_episode_steps > 0: + env = time_limit_wrapper(env, max_episode_steps) + + for wrapper in env_wrappers: + env = wrapper(env) + + return env @gin.configurable -def create_domain(env_name, root_dir=None, env_wrappers=(), - gym_env_wrappers=(), **env_kwargs): - if env_name == WEB_NAVIGATION: - # noinspection PyUnresolvedReferences - from a2perf.domains import web_navigation - from a2perf.domains.web_navigation.gwob.CoDE import vocabulary_node - save_vocab_dir = os.path.join(root_dir, 'vocabulary') - reload_vocab = env_kwargs.pop('reload_vocab', True) - vocab_type = env_kwargs.pop('vocab_type', 'threaded') - if vocab_type == 'threaded': - global_vocab = vocabulary_node.LockedThreadedVocabulary() - elif vocab_type == 'unlocked': - global_vocab = vocabulary_node.UnlockedVocabulary() - elif vocab_type == 'multiprocessing': - global_vocab = vocabulary_node.LockedMultiprocessingVocabulary() +def create_domain( + env_name, root_dir=None, env_wrappers=(), gym_env_wrappers=(), **env_kwargs +): + if env_name == WEB_NAVIGATION: + # noinspection PyUnresolvedReferences + from a2perf.domains import web_navigation + from a2perf.domains.web_navigation.gwob.CoDE import vocabulary_node + + save_vocab_dir = os.path.join(root_dir, "vocabulary") + reload_vocab = env_kwargs.pop("reload_vocab", True) + vocab_type = env_kwargs.pop("vocab_type", "threaded") + if vocab_type == "threaded": + global_vocab = vocabulary_node.LockedThreadedVocabulary() + elif vocab_type == "unlocked": + global_vocab = vocabulary_node.UnlockedVocabulary() + elif vocab_type == "multiprocessing": + global_vocab = vocabulary_node.LockedMultiprocessingVocabulary() + else: + raise ValueError(f"Unknown vocabulary type: {vocab_type}") + + if os.path.exists(save_vocab_dir) and reload_vocab: + vocab_files = os.listdir(save_vocab_dir) + if vocab_files: + vocab_files.sort() + latest_vocab_file = vocab_files[-1] + with open(os.path.join(save_vocab_dir, latest_vocab_file), "r") as f: + global_vocab_dict = json.load(f) + global_vocab.restore(state=global_vocab_dict) + seed = int(os.environ.get("SEED", None)) + num_websites = int(os.environ.get("NUM_WEBSITES", None)) + difficulty = int(os.environ.get("DIFFICULTY_LEVEL", None)) + + env_kwargs.update( + { + "global_vocabulary": global_vocab, + "seed": seed, + "num_websites": num_websites, + "difficulty": difficulty, + "browser_args": dict( + threading=False, + chrome_options={ + "--headless", + "--no-sandbox", + "--disable-gpu", + # '--disable-dev-shm-usage', + }, + ), + } + ) + env_wrappers = [wrappers.ActionClipWrapper] + list(env_wrappers) + elif env_name == CIRCUIT_TRAINING: + # noinspection PyUnresolvedReferences + from a2perf.domains import circuit_training + + env_kwargs.pop("netlist", None) + netlist_file_path = os.environ.get("NETLIST_PATH", None) + seed = int(os.environ.get("SEED", None)) + init_placement_file_path = os.environ.get("INIT_PLACEMENT_PATH", None) + std_cell_placer_mode = os.environ.get("STD_CELL_PLACER_MODE", None) + env_kwargs.update( + { + "global_seed": seed, + "netlist_file": netlist_file_path, + "init_placement": init_placement_file_path, + "output_plc_file": os.path.join(root_dir, "output.plc"), + "std_cell_placer_mode": std_cell_placer_mode, + } + ) + env_wrappers = [wrappers.ActionClipWrapper] + list(env_wrappers) + elif env_name == QUADRUPED_LOCOMOTION: + # noinspection PyUnresolvedReferences + from a2perf.domains import quadruped_locomotion + + motion_file_path = os.environ.get("MOTION_FILE_PATH", None) + env_kwargs["motion_files"] = [motion_file_path] + env_wrappers = [wrappers.ActionClipWrapper] + list(env_wrappers) else: - raise ValueError(f'Unknown vocabulary type: {vocab_type}') - - if os.path.exists(save_vocab_dir) and reload_vocab: - vocab_files = os.listdir(save_vocab_dir) - if vocab_files: - vocab_files.sort() - latest_vocab_file = vocab_files[-1] - with open(os.path.join(save_vocab_dir, latest_vocab_file), 'r') as f: - global_vocab_dict = json.load(f) - global_vocab.restore(state=global_vocab_dict) - seed = int(os.environ.get('SEED', None)) - num_websites = int(os.environ.get('NUM_WEBSITES', None)) - difficulty = int(os.environ.get('DIFFICULTY_LEVEL', None)) - - env_kwargs.update({ - 'global_vocabulary': global_vocab, - 'seed': seed, - 'num_websites': num_websites, - 'difficulty': difficulty, - 'browser_args': dict( - threading=False, - chrome_options={ - '--headless', - '--no-sandbox', - '--disable-gpu', - '--disable-dev-shm-usage', - }, - ), - }) - env_wrappers = [wrappers.ActionClipWrapper] + list(env_wrappers) - elif env_name == CIRCUIT_TRAINING: - # noinspection PyUnresolvedReferences - from a2perf.domains import circuit_training - - env_kwargs.pop('netlist', None) - netlist_file_path = os.environ.get('NETLIST_PATH', None) - seed = int(os.environ.get('SEED', None)) - init_placement_file_path = os.environ.get('INIT_PLACEMENT_PATH', None) - std_cell_placer_mode = os.environ.get('STD_CELL_PLACER_MODE', None) - env_kwargs.update({ - 'global_seed': seed, - 'netlist_file': netlist_file_path, - 'init_placement': init_placement_file_path, - 'output_plc_file': os.path.join(root_dir, - 'output.plc'), - 'std_cell_placer_mode': std_cell_placer_mode - }) - env_wrappers = [wrappers.ActionClipWrapper] + list(env_wrappers) - elif env_name == QUADRUPED_LOCOMOTION: - # noinspection PyUnresolvedReferences - from a2perf.domains import quadruped_locomotion - motion_file_path = os.environ.get('MOTION_FILE_PATH', None) - env_kwargs['motion_files'] = [motion_file_path] - env_wrappers = [wrappers.ActionClipWrapper] + list(env_wrappers) - else: - raise NotImplementedError(f'Unknown environment: {env_name}') - - logging.info('Creating domain %s with kwargs %s', env_name, env_kwargs) - return load(environment_name=env_name, - env_wrappers=env_wrappers, - gym_env_wrappers=gym_env_wrappers, - gym_kwargs=env_kwargs) + raise NotImplementedError(f"Unknown environment: {env_name}") + + logging.info("Creating domain %s with kwargs %s", env_name, env_kwargs) + return load( + environment_name=env_name, + env_wrappers=env_wrappers, + gym_env_wrappers=gym_env_wrappers, + gym_kwargs=env_kwargs, + ) diff --git a/a2perf/launch/__init__.py b/a2perf/launch/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/a2perf/launch/docker_utils.py b/a2perf/launch/docker_utils.py new file mode 100644 index 0000000..96ee3a7 --- /dev/null +++ b/a2perf/launch/docker_utils.py @@ -0,0 +1,198 @@ +import os +from typing import Optional + +from absl import logging +from xmanager import xm + +from a2perf.constants import BenchmarkDomain + + +GENERIC_GIN_CONFIG_NAME = "submission_config.gin" +DOCKER_EXPERIMENT_DIR = "/experiment_dir" +DOCKER_PARTICIPANT_DIR = "/participant_code" +DOCKER_DATASETS_PATH = "/workdir/datasets" + + +def _get_common_setup(uid: str, user: str): + return [ + """ + ARG APT_COMMAND="apt-get -o Acquire::Retries=3 \ + --no-install-recommends -y" + """, + "ENV DEBIAN_FRONTEND=noninteractive", + "ENV TZ=America/New_York", + """ + RUN ${APT_COMMAND} update --allow-releaseinfo-change && \ + ${APT_COMMAND} install sudo \ + wget \ + software-properties-common \ + curl \ + tmux \ + telnet \ + net-tools \ + vim \ + less \ + unzip && \ + rm -rf /var/lib/apt/lists/* + """, + """ + RUN add-apt-repository -y ppa:ubuntu-toolchain-r/test && \ + ${APT_COMMAND} install -y g++-11 + """, + f""" + RUN if ! getent passwd {uid}; then \ + useradd -m -u {uid} {user}; \ + else \ + existing_user=$(getent passwd {uid} | cut -d: -f1); \ + if [ "{user}" != "$existing_user" ]; then \ + usermod -l {user} $existing_user; \ + usermod -d /home/{user} -m {user}; \ + fi; \ + fi + """, + f""" + RUN echo "{user} ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers + """, + ] + + +def get_entrypoint(domain: str, user: str) -> xm.CommandList: + entrypoints = { + BenchmarkDomain.QUADRUPED_LOCOMOTION.value: xm.CommandList( + [ + "echo $@", + f""" +su {user} -c /bin/bash < \ - typing.List[multiprocessing.Process]: - processes = [] - for profiler_class, profiler_event in zip(profilers, profiler_events): - logging.info(f'Starting profiler: {profiler_class}') - profiler = profiler_class(participant_process_event=participant_event, - profiler_event=profiler_event, - participant_process=participant_process, - base_log_dir=log_dir) - profiler_process = mp_context.Process(target=profiler.start) - profiler_process.start() - processes.append(profiler_process) - return processes - - -def _start_inference_profilers(participant_event, profilers, pipes, - profiler_started_events, base_log_dir, mp_context): - processes = [] - profiler_objects = [] - for profiler_class, pipe, profiler_event in zip(profilers, pipes, - profiler_started_events): - logging.info(f'Starting profiler: {profiler_class}') - profiler = profiler_class(pipe_for_participant_process=pipe, - profiler_event=profiler_event, - participant_process_event=participant_event, - base_log_dir=base_log_dir) - profiler_objects.append(profiler) - profiler_process = mp_context.Process(target=profiler.start, ) - profiler_process.start() - processes.append(profiler_process) - return processes, profiler_objects + """Context manager for temporarily changing the working directory.""" + prev_cwd = os.getcwd() + os.chdir(path) + sys.path.insert(0, path) + try: + yield + finally: + os.chdir(prev_cwd) + sys.path.remove(path) @gin.configurable -class Submission: - def __init__(self, - root_dir: str, - participant_module_path: str = None, - profilers: typing.List[typing.Type[BaseProfiler]] = None, - mode: BenchmarkMode = BenchmarkMode.TRAIN, - domain: BenchmarkDomain = BenchmarkDomain.WEB_NAVIGATION, - metric_values_dir: str = None, - train_logs_dirs: typing.List[str] = None, - num_inference_steps: int = 1000, - num_inference_episodes: int = 1, - time_participant_code: bool = True, - measure_emissions: bool = False, - baseline_measure_sec: float = 0, - plot_metrics: bool = True, - run_offline_metrics_only: bool = False, - reliability_metrics: typing.List[ReliabilityMetrics] = None, - tracking_mode: str = None): - """Object that represents a submission to the benchmark. +def track_emissions_decorator( + project_name: typing.Optional[str] = None, + measure_power_secs: typing.Optional[int] = None, + api_call_interval: typing.Optional[int] = None, + api_endpoint: typing.Optional[str] = None, + api_key: typing.Optional[str] = None, + output_dir: typing.Optional[str] = None, + output_file: typing.Optional[str] = None, + save_to_file: typing.Optional[bool] = None, + save_to_api: typing.Optional[bool] = None, + save_to_logger: typing.Optional[bool] = None, + save_to_prometheus: typing.Optional[bool] = None, + prometheus_url: typing.Optional[str] = None, + logging_logger: typing.Optional[ + codecarbon.output_methods.logger.LoggerOutput + ] = None, + offline: typing.Optional[bool] = None, + emissions_endpoint: typing.Optional[str] = None, + experiment_id: typing.Optional[str] = None, + country_iso_code: typing.Optional[str] = None, + region: typing.Optional[str] = None, + cloud_provider: typing.Optional[str] = None, + cloud_region: typing.Optional[str] = None, + gpu_ids: typing.Optional[typing.List] = None, + co2_signal_api_token: typing.Optional[str] = None, + log_level: typing.Optional[typing.Union[int, str]] = None, + default_cpu_power: typing.Optional[int] = None, + pue: typing.Optional[float] = 1.0, +): + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + decorator_instance = codecarbon.track_emissions( + project_name=project_name, + measure_power_secs=measure_power_secs, + api_call_interval=api_call_interval, + api_endpoint=api_endpoint, + api_key=api_key, + output_dir=output_dir, + output_file=output_file, + save_to_file=save_to_file, + save_to_api=save_to_api, + save_to_logger=save_to_logger, + save_to_prometheus=save_to_prometheus, + prometheus_url=prometheus_url, + logging_logger=logging_logger, + offline=offline, + emissions_endpoint=emissions_endpoint, + experiment_id=experiment_id, + country_iso_code=country_iso_code, + region=region, + cloud_provider=cloud_provider, + cloud_region=cloud_region, + gpu_ids=gpu_ids, + co2_signal_api_token=co2_signal_api_token, + log_level=log_level, + default_cpu_power=default_cpu_power, + pue=pue, + ) + return decorator_instance(func)(*args, **kwargs) + + return wrapper + + return decorator + + +def setup_subprocess_env(gin_config_str, absl_flags): + # Parse the gin config + gin.parse_config(gin_config_str) + logging.info("Gin config parsed") + + # Register absl flags from the dictionary + for flag_name, flag_value in absl_flags.items(): + if flag_name in flags.FLAGS: + flags.FLAGS[flag_name].value = flag_value + logging.info("Flag %s set to %s", flag_name, flag_value) + else: + logging.warning("Flag %s not found", flag_name) - Args: - participant_module_path: Path to the module that contains the participant's code. - profilers: List of profilers to use. - mode: Benchmark mode (train or inference). - domain: Benchmark domain (web navigation, circuit training or quadruped locomotion). - root_dir: Root directory for the submission. - metric_values_dir: Directory where the metric values will be saved. - train_logs_dirs: List of directories where the training logs are saved. Relative to the root directory. - num_inference_steps: Number of steps to run inference for. - num_inference_episodes: Number of episodes to run inference for. - time_participant_code: Whether to time the participant's code. - measure_emissions: Whether to measure emissions. - baseline_measure_sec: Baseline time to measure emissions for. - plot_metrics: Whether to plot the metrics. - run_offline_metrics_only: Whether to run only the offline metrics. - reliability_metrics: List of reliability metrics to compute. - tracking_mode: Tracking mode for the participant's code. - """ - self.root_dir = root_dir - self.metric_values_dir = metric_values_dir - self.train_logs_dirs = train_logs_dirs - os.makedirs(self.root_dir, exist_ok=True) - if self.metric_values_dir is None: - self.metric_values_dir = os.path.join(self.root_dir, 'metrics') - os.makedirs(self.metric_values_dir, exist_ok=True) - if self.train_logs_dirs is not None: - self.train_logs_dirs = [os.path.join(self.root_dir, train_logs_dir) for - train_logs_dir in - self.train_logs_dirs] - self.run_offline_metrics_only = run_offline_metrics_only - self.baseline_measure_sec = baseline_measure_sec - self.tracking_mode = tracking_mode - self.mp_context = multiprocessing.get_context('spawn') - self.gin_config_str = None - - self.measure_emissions = measure_emissions - self.plot_metrics = plot_metrics - self.num_inference_steps = num_inference_steps - self.num_inference_episodes = num_inference_episodes - self.time_inference_steps = time_participant_code - self.profilers = profilers if profilers is not None else [] - for profiler in self.profilers: - profiler.base_log_dir = self.root_dir - self.participant_module_path = os.path.abspath(participant_module_path) - self.domain = domain - self.mode = mode - self.reliability_metrics = reliability_metrics - - self.metrics_results = {} - metrics_path = os.path.join(self.metric_values_dir, - 'inference_metrics.json' if self.mode == BenchmarkMode.INFERENCE else 'train_metrics.json') - if os.path.exists(metrics_path): - logging.info( - f'Loading pre-existing metric results from {metrics_path}') - with open(metrics_path, 'r') as f: - self.metrics_results = json.load(f) - - def _load_participant_spec(self, filename): - """Loads the participant spec from the participant module path.""" - participant_file_path = os.path.join(self.participant_module_path, filename) - spec = importlib.util.spec_from_file_location(f"{filename}", - participant_file_path) + +def _load_spec(module_path, filename): + """Loads the spec from the given module path.""" + participant_file_path = os.path.join(module_path, filename) + spec = importlib.util.spec_from_file_location(f"{filename}", participant_file_path) return spec - def _load_participant_module(self, filename): - """Load the participant module and return the module object.""" - spec = self._load_participant_spec(filename) - participant_module = importlib.util.module_from_spec(spec) - return participant_module, spec - - @gin.configurable("Submission.create_domain") - def create_domain(self, **kwargs): - if self.domain == BenchmarkDomain.WEB_NAVIGATION: - from rl_perf.domains.web_nav.gwob.CoDE import vocabulary_node - - if kwargs.get('reload_vocab', False): - global_vocab_dict = np.load( - os.path.join(self.root_dir, 'train', 'global_vocab.npy'), - allow_pickle=True).item() - global_vocab = vocabulary_node.LockedVocabulary() - global_vocab.restore(dict(global_vocab=global_vocab_dict)) - kwargs['global_vocabulary'] = global_vocab - kwargs.pop('reload_vocab') - elif self.domain == BenchmarkDomain.CIRCUIT_TRAINING: - pass - elif self.domain == BenchmarkDomain.QUADRUPED_LOCOMOTION: - from rl_perf.domains import quadruped_locomotion - else: - raise NotImplementedError(f'Domain {self.domain} not implemented') - return gym.make(id=self.domain.value, **kwargs) - - def _get_observation_data(self, env): - data = [] - for _ in range(self.num_inference_steps): - observation = env.observation_space.sample() - data.append(observation) - return data - - def _train(self, participant_event: multiprocessing.Event, - profiler_events: typing.List[multiprocessing.Event]): - gin.parse_config(self.gin_config_str) - participant_event.set() - - # Wait for all profilers to start up before continuing - for profiler_event in profiler_events: - profiler_event.wait() - - with working_directory(self.participant_module_path): - participant_module, participant_module_spec = self._load_participant_module( - 'train.py') - print(self.participant_module_path) - print(participant_module_spec) - participant_module_spec.loader.exec_module(participant_module) - - if self.measure_emissions: - @codecarbon.track_emissions(output_dir=self.metric_values_dir, - output_file='train_emissions.csv', ) - def train(): - return participant_module.train() - - train() - else: - participant_module.train() - - participant_event.clear() - for profiler_event in profiler_events: - profiler_event.clear() - - def _inference(self, participant_event: multiprocessing.Event, - profiler_events: typing.List[multiprocessing.Event], - inference_data: typing.List[typing.Any], - rollout_data_queue: multiprocessing.Queue, ): - gin.parse_config(self.gin_config_str) - participant_event.set() - - env = self.create_domain() - metric_results = {} - with working_directory(self.participant_module_path): - participant_module, participant_module_spec = self._load_participant_module( - 'inference.py') - print(self.participant_module_path) - print(participant_module_spec) - participant_module_spec.loader.exec_module(participant_module) - participant_model = participant_module.load_model(env=env) - - preprocessed_data = [participant_module.preprocess_observation(x) for x in - inference_data] - - def inference_step(): - return participant_module.infer_once(model=participant_model, - observation=preprocessed_data[i]) - - if self.time_inference_steps: - inference_times = [] - for i in range(self.num_inference_steps): - inference_times.append(timeit.timeit(inference_step, number=1)) - - metric_results['inference_time'] = dict(values=inference_times, - mean=np.mean( - inference_times), - std=np.std( - inference_times)) - - def perform_rollouts(): - all_rewards = [] - for _ in range(self.num_inference_episodes): - observation, info = env.reset() - terminated = False - truncated = False - rewards = 0 - while not terminated and not truncated: - preprocessed_obs = participant_module.preprocess_observation( - observation) - action = participant_module.infer_once(model=participant_model, - observation=preprocessed_obs) - observation, reward, terminated, truncated, _ = env.step(action) - rewards += reward - all_rewards.append(rewards) - print(f'Episode reward: {rewards}') - return all_rewards - - if self.measure_emissions: - @codecarbon.track_emissions(output_dir=self.metric_values_dir, - output_file='inference_emissions.csv', ) - def perform_rollouts_and_track_emissions(): - return perform_rollouts() - - all_rewards = perform_rollouts_and_track_emissions() - else: - all_rewards = perform_rollouts() - - metric_results['rollout_returns'] = dict(values=all_rewards, - mean=np.mean(all_rewards), - std=np.std(all_rewards)) - rollout_data_queue.put(metric_results) - participant_event.clear() - for profiler_event in profiler_events: - profiler_event.clear() - - print('Finished inference. Now saving') - with open(os.path.join(self.metric_values_dir, - 'inference_metrics_results.json'), - 'w') as f: - json.dump(metric_results, f) - - def _run_inference_reliability_metrics(self, values=None): - metrics = [] - for metric in self.reliability_metrics: - if metric == ReliabilityMetrics.MadAcrossRollouts: - metrics.append(MadAcrossRollouts()) - elif metric == ReliabilityMetrics.IqrAcrossRollouts: - metrics.append(IqrAcrossRollouts()) - elif metric == ReliabilityMetrics.UpperCVaRAcrossRollouts: - metrics.append(UpperCVaRAcrossRollouts()) - elif metric == ReliabilityMetrics.LowerCVaRAcrossRollouts: - metrics.append(LowerCVaRAcrossRollouts()) - elif metric == ReliabilityMetrics.StddevAcrossRollouts: - metrics.append(StddevAcrossRollouts()) - elif metric == ReliabilityMetrics.UpperCVaRAcrossRollouts: - metrics.append(UpperCVaRAcrossRollouts()) - elif metric == ReliabilityMetrics.LowerCVaRAcrossRollouts: - metrics.append(LowerCVaRAcrossRollouts()) - else: - raise ValueError(f'Invalid metric: {metric}') - with open(os.path.join(self.metric_values_dir, 'rollouts.csv'), 'w') as f: - writer = csv.writer(f) - writer.writerow(['episode_num', 'reward']) - for i, value in enumerate(values): - writer.writerow([i, value]) - - evaluator = Evaluator(metrics=metrics, ) - reliability_metrics = evaluator.evaluate( - run_paths=[os.path.join(self.metric_values_dir, 'rollouts.csv')], ) - self.metrics_results.update(reliability_metrics) - - def _run_train_reliability_metrics(self): - # TODO make sure to write gin config for metric parameters - metrics = [] - for metric in self.reliability_metrics: - if metric == ReliabilityMetrics.IqrWithinRuns: - metrics.append(IqrWithinRuns()) - elif metric == ReliabilityMetrics.IqrAcrossRuns: - metrics.append(IqrAcrossRuns()) - elif metric == ReliabilityMetrics.LowerCVaROnDiffs: - metrics.append(LowerCVaROnDiffs()) - elif metric == ReliabilityMetrics.LowerCVaROnDrawdown: - metrics.append(LowerCVaROnDiffs()) - elif metric == ReliabilityMetrics.LowerCVarOnAcross: - metrics.append(LowerCVaROnAcross()) - elif metric == ReliabilityMetrics.MedianPerfDuringTraining: - metrics.append(MedianPerfDuringTraining()) - else: - raise ValueError(f'Invalid metric: {metric}') - - logging.info(f'Running reliability metrics: {metrics}') - logging.info(f'Logging to {self.metric_values_dir}') - - if self.train_logs_dirs: - run_paths = self.train_logs_dirs - logging.info(f'Found {len(run_paths)} runs in {self.train_logs_dirs}') - logging.info(f'Run paths: {run_paths}') - - evaluator = Evaluator(metrics=metrics, ) - reliability_metrics = evaluator.evaluate(run_paths=run_paths, ) - else: - logging.warning(f'No runs found in {self.train_logs_dirs}') - reliability_metrics = {} - self.metrics_results.update(reliability_metrics) - - def _run_training_benchmark(self): - if not self.run_offline_metrics_only: - # Need a participant event to signal to profilers - participant_started_event = multiprocessing.Event() - profilers_started_events = [multiprocessing.Event() for _ in - self.profilers] - - participant_process = self.mp_context.Process(target=self._train, - args=( - participant_started_event, - profilers_started_events)) - participant_process.start() - profilers = _start_profilers(profilers=self.profilers, - participant_event=participant_started_event, - profiler_events=profilers_started_events, - participant_process=participant_process, - log_dir=self.root_dir, - mp_context=self.mp_context) - logging.info(f'Participant module process ID: {participant_process.pid}') - participant_process.join() - logging.info( - f'Participant module process {participant_process.pid} finished') - if participant_process.is_alive(): - logging.error('Participant process is still running') - elif participant_process.exitcode != 0: - logging.error( - f'Participant process exited with code {participant_process.exitcode}') - else: - logging.info(f'Participant process {participant_process.pid} finished') - - for profiler in profilers: - profiler.join() - if profiler.is_alive(): - logging.error(f'Profiler process {profiler.pid} is still running') - elif profiler.exitcode != 0: - logging.error( - f'Profiler process {profiler.pid} exited with code {profiler.exitcode}') + +def _load_module(module_path, filename): + """Loads the module from the given module path.""" + spec = _load_spec(module_path, filename) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module, spec + + +def _load_policy(module_path, env): + """Loads the policy from the participant's module.""" + with working_directory(module_path): + participant_module, participant_module_spec = _load_module( + module_path, "inference.py" + ) + policy = participant_module.load_policy(env) + return policy, participant_module + + +def perform_rollouts( + module_path, + create_domain_fn, + num_episodes=1, + gin_config_str=None, + absl_flags=None, + rollout_rewards_queue=None, +): + """Performs rollouts using the given policy. + + Args: + create_domain_fn: Function that creates the domain. + preprocess_obs_fn: Function that preprocesses the observation. + infer_once_fn: Function that performs inference. + num_episodes: Number of episodes to perform rollouts. + policy: Policy to use for performing rollouts. + gin_config_str: Gin config string to use for creating the domain. + + Returns: + List of rewards from each episode. + """ + setup_subprocess_env(gin_config_str, absl_flags) + env = create_domain_fn() + policy, participant_module = _load_policy(module_path, env) + episode_reward_metric = py_metrics.AverageReturnMetric() + rollout_actor = actor.Actor( + env=env, + train_step=policy._train_step_from_last_restored_checkpoint_path, + policy=policy, + observers=[episode_reward_metric], + episodes_per_run=1, + ) + + all_rewards = [] + for _ in range(num_episodes): + rollout_actor.run() + all_rewards.append(float(episode_reward_metric.result())) + episode_reward_metric.reset() + + if rollout_rewards_queue: + for reward in all_rewards: + rollout_rewards_queue.put(reward) + + return all_rewards + + +def _perform_rollout_task( + generalization_task, + domain, + root_dir, + participant_module_path, + num_generalization_episodes, + gin_config_str, + absl_flags, +): + generalization_env_vars = {} + + if domain == BenchmarkDomain.WEB_NAVIGATION: + if generalization_task == "difficulty_level_1_num_websites_1": + generalization_env_vars["DIFFICULTY_LEVEL"] = "1" + generalization_env_vars["NUM_WEBSITES"] = "1" + elif generalization_task == "difficulty_level_1_num_websites_5": + generalization_env_vars["DIFFICULTY_LEVEL"] = "1" + generalization_env_vars["NUM_WEBSITES"] = "5" + elif generalization_task == "difficulty_level_1_num_websites_10": + generalization_env_vars["DIFFICULTY_LEVEL"] = "1" + generalization_env_vars["NUM_WEBSITES"] = "10" else: - logging.info(f'Profiler process {profiler.pid} finished') - - if self.reliability_metrics: - self._run_train_reliability_metrics() - - ################################################## - # Save raw metrics to disk, and plot results - ################################################## - - with open(os.path.join(self.metric_values_dir, 'metric_results.json'), - 'w') as f: - json.dump(self.metrics_results, f) - - # Plot metrics and save to file - if self.plot_metrics: - for profiler_object in self.profilers: - title, fig = profiler_object.plot_results() - plt.savefig(os.path.join(self.metric_values_dir, f'{title}.png')) - self._plot_metrics() - - def _run_inference_benchmark(self): - if not self.run_offline_metrics_only: - env = self.create_domain() - inference_data = self._get_observation_data(env) - - # Need a participant event to signal to profilers - participant_started_event = multiprocessing.Event() - profilers_started_events = [multiprocessing.Event() for _ in - self.profilers] - - rollout_data_queue = self.mp_context.Queue() - participant_process = self.mp_context.Process(target=self._inference, - args=( - participant_started_event, - profilers_started_events, - inference_data, - rollout_data_queue, - )) - - participant_process.start() - profilers = _start_profilers(profilers=self.profilers, - participant_event=participant_started_event, - profiler_events=profilers_started_events, - participant_process=participant_process, - log_dir=self.root_dir, - mp_context=self.mp_context) - - logging.info(f'Participant module process ID: {participant_process.pid}') - participant_process.join() - logging.info( - f'Participant module process {participant_process.pid} finished') - if participant_process.is_alive(): - logging.error('Participant process is still running') - elif participant_process.exitcode != 0: - logging.error( - f'Participant process exited with code {participant_process.exitcode}') - else: - logging.info(f'Participant process {participant_process.pid} finished') - - for profiler in profilers: - profiler.join() - if profiler.is_alive(): - logging.error(f'Profiler process {profiler.pid} is still running') - elif profiler.exitcode != 0: - logging.error( - f'Profiler process {profiler.pid} exited with code {profiler.exitcode}') + raise ValueError( + "Generalization tasks for WebNavigation domain must be either" + " difficulty_level_1_num_websites_1, " + "difficulty_level_1_num_websites_5, " + "or difficulty_level_1_num_websites_10" + ) + elif domain == BenchmarkDomain.CIRCUIT_TRAINING: + if generalization_task == "toy_macro_stdcell": + generalization_env_vars["NETLIST_PATH"] = pkg_resources.resource_filename( + "a2perf", + "domains/circuit_training/circuit_training/environment/test_data/toy_macro_stdcell/netlist.pb.txt", + ) + generalization_env_vars["INIT_PLACEMENT_PATH"] = ( + pkg_resources.resource_filename( + "a2perf", + "domains/circuit_training/circuit_training/environment/test_data/toy_macro_stdcell/initial.plc", + ) + ) + elif generalization_task == "ariane": + generalization_env_vars["NETLIST_PATH"] = pkg_resources.resource_filename( + "a2perf", + "domains/circuit_training/circuit_training/environment/test_data/ariane/netlist.pb.txt", + ) + generalization_env_vars["INIT_PLACEMENT_PATH"] = ( + pkg_resources.resource_filename( + "a2perf", + "domains/circuit_training/circuit_training/environment/test_data/ariane/initial.plc", + ) + ) else: - logging.info(f'Profiler process {profiler.pid} finished') - - def run_benchmark(self): - self.gin_config_str = gin.config_str() # save gin configs for multiprocessing - if self.mode == BenchmarkMode.TRAIN: - self._run_training_benchmark() - elif self.mode == BenchmarkMode.INFERENCE: - self._run_inference_benchmark() + raise ValueError( + "Generalization tasks for CircuitTraining domain must be either toy_macro_stdcell or ariane" + ) + elif domain == BenchmarkDomain.QUADRUPED_LOCOMOTION: + if generalization_task == "dog_pace": + generalization_env_vars["MOTION_FILE_PATH"] = ( + pkg_resources.resource_filename( + "a2perf", + "domains/quadruped_locomotion/motion_imitation/data/motions/dog_pace.txt", + ) + ) + elif generalization_task == "dog_trot": + generalization_env_vars["MOTION_FILE_PATH"] = ( + pkg_resources.resource_filename( + "a2perf", + "domains/quadruped_locomotion/motion_imitation/data/motions/dog_trot.txt", + ) + ) + elif generalization_task == "dog_spin": + generalization_env_vars["MOTION_FILE_PATH"] = ( + pkg_resources.resource_filename( + "a2perf", + "domains/quadruped_locomotion/motion_imitation/data/motions/dog_spin.txt", + ) + ) else: - raise ValueError('Benchmark mode must be either train or inference') + raise ValueError( + "Generalization tasks are only supported for WebNavigation, CircuitTraining, and QuadrupedLocomotion domains." + ) + + for key, value in generalization_env_vars.items(): + os.environ[key] = value + + create_domain_fn = functools.partial( + suite_gym.create_domain, env_name=domain.value, root_dir=root_dir + ) + all_rewards = perform_rollouts( + module_path=participant_module_path, + create_domain_fn=create_domain_fn, + num_episodes=num_generalization_episodes, + gin_config_str=gin_config_str, + absl_flags=absl_flags, + rollout_rewards_queue=None, + ) + + # Reset the environment variables after rolling out each task + for key in generalization_env_vars.keys(): + os.environ.pop(key) + + return generalization_task, all_rewards + + +def train( + module_path, gin_config_str=None, absl_flags=None, participant_args: dict = None +): + """Trains the participant's policy.""" + setup_subprocess_env(gin_config_str, absl_flags) + with working_directory(module_path): + participant_module, participant_module_spec = _load_module( + module_path, "train.py" + ) + if participant_args is None: + participant_args = {} + print(participant_args) + participant_module.train(**participant_args) + + +@gin.configurable +class Submission: + + def __init__( + self, + root_dir: str, + metric_values_dir: str, + participant_module_path: str, + mode: BenchmarkMode, + domain: BenchmarkDomain, + participant_args: str, + num_inference_steps: int = 1000, + num_inference_episodes: int = 1, + num_generalization_episodes: int = 1, + time_participant_code: bool = True, + measure_emissions: bool = False, + plot_metrics: bool = True, + run_offline_metrics_only: bool = False, + reliability_metrics: typing.List[ReliabilityMetrics] = None, + generalization_tasks: typing.List = None, + ): + """Object that represents a submission to the benchmark. + + Args: + participant_module_path: Path to the module that contains the + participant's code. + mode: Benchmark mode (train or inference). + domain: Benchmark domain (web navigation, circuit training or quadruped + locomotion). + root_dir: Root directory for the submission. + metric_values_dir: Directory where the metric values will be saved. + num_inference_steps: Number of steps to run inference for. + num_inference_episodes: Number of episodes to run inference for. + time_participant_code: Whether to time the participant's code. + measure_emissions: Whether to measure emissions. + plot_metrics: Whether to plot the metrics. + run_offline_metrics_only: Whether to run only the offline metrics. + reliability_metrics: List of reliability metrics to compute. + """ + self.root_dir = root_dir + self.generalization_tasks = generalization_tasks + self.metric_values_dir = metric_values_dir + os.makedirs(self.root_dir, exist_ok=True) + os.makedirs(self.metric_values_dir, exist_ok=True) + self.run_offline_metrics_only = run_offline_metrics_only + self.mp_context = multiprocessing.get_context("spawn") + self.gin_config_str = None + self.absl_flags = None + + self.participant_args = parse_participant_args(participant_args) + self.measure_emissions = measure_emissions + self.plot_metrics = plot_metrics + self.num_inference_steps = num_inference_steps + self.num_inference_episodes = num_inference_episodes + self.num_generalization_episodes = num_generalization_episodes + self.time_inference_steps = time_participant_code + self.participant_module_path = os.path.abspath(participant_module_path) + self.domain = domain + self.mode = mode + self.reliability_metrics = reliability_metrics + + self.metrics_results = {} + + if self.mode == BenchmarkMode.TRAIN: + metrics_path = os.path.join(self.metric_values_dir, "train_metrics.json") + elif self.mode == BenchmarkMode.INFERENCE: + metrics_path = os.path.join( + self.metric_values_dir, "inference_metrics.json" + ) + elif self.mode == BenchmarkMode.GENERALIZATION: + metrics_path = os.path.join( + self.metric_values_dir, "generalization_metrics.json" + ) + else: + raise ValueError( + "Benchmark mode must be either train, inference or generalization" + ) + + if os.path.exists(metrics_path): + logging.info(f"Loading pre-existing metric results from {metrics_path}") + with open(metrics_path, "r") as f: + self.metrics_results = json.load(f) + + def _get_observation_data(self, env): + data = [] + for _ in range(self.num_inference_steps): + observation = env.observation_space.sample() + data.append(observation) + return data + + def _train( + self, + ): + setup_subprocess_env(self.gin_config_str, self.absl_flags) + + @track_emissions_decorator( + output_dir=self.metric_values_dir, output_file="train_emissions.csv" + ) + def train_and_track_emissions(): + train_process = self.mp_context.Process( + target=train, + args=( + self.participant_module_path, + self.gin_config_str, + self.absl_flags, + self.participant_args, + ), + ) + train_process.start() + train_process.join() + + if self.measure_emissions: + return train_and_track_emissions() + else: + return train( + self.participant_module_path, + self.gin_config_str, + self.absl_flags, + self.participant_args, + ) + + def _perform_rollouts( + self, num_episodes, measure_emissions, output_dir, rollout_rewards_queue + ): + """ + Perform rollouts and optionally track emissions. + + Args: + num_episodes: Number of episodes to perform rollouts. + measure_emissions: Flag to indicate if emissions should be measured. + output_dir: Directory to save the emissions data. + + Returns: + List of rewards from each episode. + """ + setup_subprocess_env(self.gin_config_str, self.absl_flags) + + create_domain_fn = functools.partial( + suite_gym.create_domain, env_name=self.domain.value, root_dir=self.root_dir + ) + if measure_emissions: + + @track_emissions_decorator( + output_dir=output_dir, output_file="inference_emissions.csv" + ) + def perform_rollouts_and_track_emissions(): + rollout_process = multiprocessing.Process( + target=perform_rollouts, + args=( + self.participant_module_path, + create_domain_fn, + num_episodes, + self.gin_config_str, + self.absl_flags, + rollout_rewards_queue, + ), + ) + rollout_process.start() + rollout_process.join() + + return perform_rollouts_and_track_emissions() + else: + return perform_rollouts( + create_domain_fn=create_domain_fn, + num_episodes=num_episodes, + module_path=self.participant_module_path, + gin_config_str=self.gin_config_str, + absl_flags=self.absl_flags, + ) + + def _run_training_benchmark(self): + if not self.run_offline_metrics_only: + participant_training_process = self.mp_context.Process( + target=self._train, + ) + + participant_training_process.start() + logging.info( + f"Participant training process ID: {participant_training_process.pid}" + ) + + participant_training_process.join() + logging.info( + f"Participant module process {participant_training_process.pid} finished" + ) + + if participant_training_process.is_alive(): + logging.error("Participant process is still running") + elif participant_training_process.exitcode != 0: + logging.error( + "Participant process exited with code" + f" {participant_training_process.exitcode}" + ) + else: + logging.info( + f"Participant process {participant_training_process.pid} finished" + ) + + def _run_generalization_benchmark(self): + all_returns = collections.defaultdict(list) + tasks = [] + + for generalization_task in self.generalization_tasks: + logging.info("Running generalization task: %s", generalization_task) + tasks.append( + ( + generalization_task, + self.domain, + self.root_dir, + self.participant_module_path, + self.num_generalization_episodes, + self.gin_config_str, + self.absl_flags, + ) + ) + + with multiprocessing.Pool() as pool: + results = pool.starmap(_perform_rollout_task, tasks) + pool.close() + pool.join() + + for generalization_task, all_rewards in results: + all_returns[generalization_task] = all_rewards + + # Save the rollouts for each generalization task to a file + logging.info("Saving generalization rollouts to file") + logging.info("Generalization rollouts: %s", all_returns) + with open( + os.path.join(self.metric_values_dir, "generalization_rollouts.json"), "w" + ) as f: + json.dump(all_returns, f) + + def _run_inference_benchmark(self): + if not self.run_offline_metrics_only: + logging.info("Creating Gymnasium domain...") + env = suite_gym.create_domain( + env_name=self.domain.value, root_dir=self.root_dir + ) + logging.info("Successfully created domain") + + logging.info("Generating inference data...") + inference_data = self._get_observation_data(env) + logging.info("Successfully generated inference data") + + metric_results = {} + + logging.info("Loading the policy for inference...") + participant_policy, participant_module = _load_policy( + module_path=self.participant_module_path, env=env + ) + + # Only include time_step_spec if the participant policy has it as an + # attribute. This will be useful for participants using TF agents. + time_step_spec = getattr(participant_policy, "time_step_spec", None) + preprocessed_data = [ + participant_module.preprocess_observation( + x, time_step_spec=time_step_spec + ) + for x in inference_data + ] + logging.info("Finished preprocessing the observation data") + + if self.time_inference_steps: + logging.info("Timing inference steps...") + inference_times = [] + for i in range(self.num_inference_steps): + inference_step = lambda: participant_module.infer_once( + policy=participant_policy, + preprocessed_observation=preprocessed_data[i], + ) + inference_times.append(timeit.timeit(inference_step, number=1)) + logging.info("Finished timing inference steps") + + metric_results["inference_time"] = { + "values": inference_times, + "mean": np.mean(inference_times), + "std": np.std(inference_times), + "max": np.max(inference_times), + "median": np.median(inference_times), + "min": np.min(inference_times), + } + + # Running rollouts in a subprocess + rollout_returns_queue = multiprocessing.Queue() + rollout_process = multiprocessing.Process( + target=self._perform_rollouts, + args=( + self.num_inference_episodes, + self.measure_emissions, + self.metric_values_dir, + rollout_returns_queue, + ), + ) + + rollout_process.start() + rollout_process.join() + + all_rewards = [] + while not rollout_returns_queue.empty(): + all_rewards.append(rollout_returns_queue.get()) + + print(f"All rewards: {all_rewards}") + metric_results["rollout_returns"] = { + "values": [float(reward) for reward in all_rewards], + "mean": np.mean(all_rewards).astype(float), + "std": np.std(all_rewards).astype(float), + "max": np.max(all_rewards).astype(float), + "median": np.median(all_rewards).astype(float), + "min": np.min(all_rewards).astype(float), + } + + logging.info("Metrics Results: %s", metric_results) + with open( + os.path.join(self.metric_values_dir, "inference_metrics_results.json"), + "w", + ) as f: + json.dump(metric_results, f) + + def run_benchmark(self): + # Gin configs and absl flags must be saved to pass to subprocesses + self.gin_config_str = gin.config_str() + self.absl_flags = {name: flags.FLAGS[name].value for name in flags.FLAGS} + + if not os.path.exists(self.participant_module_path): + raise FileNotFoundError( + f"Participant module path {self.participant_module_path} not found. This is necessary for running training and inference code." + ) + + if self.mode == BenchmarkMode.TRAIN: + self._run_training_benchmark() + elif self.mode == BenchmarkMode.INFERENCE: + if not os.path.exists(self.root_dir): + raise FileNotFoundError( + f"Root directory {self.root_dir} not found. This is necessary for loading the trained model" + ) + self._run_inference_benchmark() + elif self.mode == BenchmarkMode.GENERALIZATION: + if not os.path.exists(self.root_dir): + raise FileNotFoundError( + f"Root directory {self.root_dir} not found. This is necessary for loading the trained model" + ) + self._run_generalization_benchmark() + + else: + raise ValueError("Benchmark mode must be either train or inference") diff --git a/a2perf/tutorials/example_submission/__init__.py b/a2perf/tutorials/example_submission/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/a2perf/tutorials/example_submission/inference.py b/a2perf/tutorials/example_submission/inference.py new file mode 100644 index 0000000..149d264 --- /dev/null +++ b/a2perf/tutorials/example_submission/inference.py @@ -0,0 +1,32 @@ +from absl import app + +import stable_baselines3 as sb3 + +def load_model(env): + # This function is intended to load and return the model. + # The `env` parameter can be used to specify the environment or any other context needed for loading the model. + model = sb3.PPO.load("ppo_cartpole") + return model + + + +def infer_once(model, observation): + # Use the model (assumed to be from Stable Baselines) to run inference on a single observation. + # The function receives a `model` and an `observation`, then returns the predicted action. + action, _states = model.predict(observation) + return action + + +def preprocess_observation(observation): + # Preprocess the observation data before feeding it to the model. + # Modify this function to suit the preprocessing needs of your specific model. + return observation + + +def main(_): + # Unused + pass + + +if __name__ == '__main__': + app.run(main) \ No newline at end of file diff --git a/a2perf/tutorials/example_submission/main.py b/a2perf/tutorials/example_submission/main.py new file mode 100644 index 0000000..cfba740 --- /dev/null +++ b/a2perf/tutorials/example_submission/main.py @@ -0,0 +1,26 @@ +import rl_perf +import rl_perf.domains.quadruped_locomotion + +import gymnasium as gym + +from stable_baselines3 import PPO +from stable_baselines3.common.env_util import make_vec_env + +# env = gym.make('QuadrupedLocomotion-v0') +# Parallel environments +vec_env = make_vec_env("QuadrupedLocomotion-v0", n_envs=4) + +model = PPO("MlpPolicy", vec_env, verbose=1) +model.learn(total_timesteps=250) +model.save("ppo_cartpole") + +del model # remove to demonstrate saving and loading + +model = PPO.load("ppo_cartpole") + +obs = vec_env.reset() +env = gym.make('QuadrupedLocomotion-v0',enable_rendering=True) +env.reset() +while True: + action, _states = model.predict(obs) + obs, rewards, dones, info = env.step(action) diff --git a/a2perf/tutorials/example_submission/requirements.txt b/a2perf/tutorials/example_submission/requirements.txt new file mode 100644 index 0000000..1081318 --- /dev/null +++ b/a2perf/tutorials/example_submission/requirements.txt @@ -0,0 +1 @@ +stable_baselines3 diff --git a/a2perf/tutorials/example_submission/train.py b/a2perf/tutorials/example_submission/train.py new file mode 100644 index 0000000..69e656e --- /dev/null +++ b/a2perf/tutorials/example_submission/train.py @@ -0,0 +1,130 @@ +# 1) Import the necessary packages. For this tutorial, we will use the `quadruped_locomotion` environment. +from a2perf.domains.web_navigation.gwob.CoDE import environment +# import the relevant A2Perf domain +import a2perf +# import a2perf.domains.quadruped_locomotion +import a2perf.domains.web_navigation.gwob.CoDE +# import gymnasium to create the environment +import gymnasium as gym +from gymnasium import spaces +from typing import Callable +import gin + +import snntorch as snn +from snntorch import surrogate +# import the abseil app to run the experiment +from absl import app + +# import packages needed for your training +from stable_baselines3 import PPO +from stable_baselines3.common.env_util import make_vec_env +from stable_baselines3.common.policies import ActorCriticPolicy +from sb3_contrib import RecurrentPPO + +import torch +import torch.nn as nn + +import time +# print all registered gym environments +print('Creating environment') +env = gym.make('WebNavigation-v0',num_websites=2, difficulty=2, raw_state=True,step_limit=7) +# env = gym.make('QuadrupedLocomotion-v0') + + +env_low = env.env.unwrapped +miniwobstate = env.reset()[0] +print(miniwobstate) +print('Utterance') +print(miniwobstate.utterance) +print('Phrase') +print(miniwobstate.phrase) +print('Tokens') +print(miniwobstate.tokens) +print('Fields') +print(miniwobstate.fields) +print('Dom') +print(miniwobstate.dom) +print('Dom elements') +print(miniwobstate.dom_elements) + + +class CustomNet(nn.Module): + def __init__(self, state_shape, action_shape): + super(CustomNet, self).__init__() + + self.latent_dim_pi = action_shape.n + self.latent_dim_vf = 1 + self.actor = nn.Sequential( + nn.Linear(state_shape.shape[0], 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, action_shape.n), + nn.Softmax(dim=-1) + ) + + self.critic = nn.Sequential( + nn.Linear(state_shape.shape[0], 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 1) + ) + def forward_actor(self, features: torch.Tensor) -> torch.Tensor: + return self.actor(features) + + def forward_critic(self, features: torch.Tensor) -> torch.Tensor: + return self.critic(features) + + def forward(self, x): + return self.forward_actor(x), self.forward_critic(x) + +class CustomActorCriticPolicy(ActorCriticPolicy): + def __init__( + self, + observation_space: spaces.Space, + action_space: spaces.Space, + lr_schedule: Callable[[float], float], + *args, + **kwargs, + ): + # Disable orthogonal initialization + kwargs["ortho_init"] = False + super().__init__( + observation_space, + action_space, + lr_schedule, + # Pass remaining arguments to base class + *args, + **kwargs, + ) + def _build_mlp_extractor(self) -> None: + self.mlp_extractor = CustomNet(self.observation_space, self.action_space) + + +# 2) Next, we define our training function. This function will be called by the abseil app. +def train(): + # ac = CustomActorCriticPolicy(env.observation_space.shape, env.action_space) + '''Include your training algorithm here.''' + # Create the environment + vec_env = make_vec_env("CartPole-v1", n_envs=8) + + # Create the agent + model = PPO(CustomActorCriticPolicy, vec_env, verbose=1) + # Train the agent + model.learn(total_timesteps=25e3) + # Save the agent + model.save("ppo_cartpole") + + del model # remove to demonstrate saving and loading + +# 3) Optionally, we define the main function. This function will be called when the script is run directly. +def main(_): + # The main function where the training process is initiated. + train() + + +if __name__ == '__main__': + # Run the main function using the abseil app. This allows us to pass command line arguments to the script. +# app.run(main) + pass \ No newline at end of file diff --git a/circuit_training_environment.yml b/circuit_training_environment.yml new file mode 100644 index 0000000..21fe4e8 --- /dev/null +++ b/circuit_training_environment.yml @@ -0,0 +1,234 @@ +channels: +- pytorch +- conda-forge +- defaults +dependencies: +- gcc +- gxx +- gxx_linux-64 +- gcc_linux-64 +- _openmp_mutex=4.5=2_gnu +- abseil-cpp=20230802.0=h6a678d5_2 +- aiohttp=3.9.1=py310h2372a71_0 +- aiosignal=1.3.1=pyhd8ed1ab_0 +- async-timeout=4.0.3=pyhd8ed1ab_0 +- attrs=23.2.0=pyh71513ae_0 +- blas=1.0=mkl +- blinker=1.7.0=pyhd8ed1ab_0 +- brotli-python=1.0.9=py310hd8f1fbe_7 +- bzip2=1.0.8=h7b6447c_0 +- c-ares=1.26.0=hd590300_0 +- ca-certificates=2024.2.2=hbcca054_0 +- cachetools=5.3.2=pyhd8ed1ab_0 +- certifi=2023.11.17=py310h06a4308_0 +- cffi=1.16.0=py310h2fee648_0 +- charset-normalizer=3.3.2=pyhd8ed1ab_0 +- click=8.1.7=unix_pyh707e725_0 +- cryptography=42.0.2=py310hb8475ec_0 +- decorator=5.1.1=pyhd8ed1ab_0 +- frozenlist=1.4.1=py310h2372a71_0 +- fsspec=2023.12.2=pyhca7485f_0 +- gcsfs=2023.12.2.post1=pyhd8ed1ab_0 +- google-api-core=2.16.1=pyhd8ed1ab_0 +- google-auth=2.27.0=pyhca7485f_0 +- google-auth-oauthlib=1.2.0=pyhd8ed1ab_0 +- google-cloud-core=2.4.1=pyhd8ed1ab_0 +- google-cloud-storage=2.14.0=pyhca7485f_0 +- google-crc32c=1.1.2=py310hc5c09a0_5 +- google-resumable-media=2.7.0=pyhd8ed1ab_0 +- googleapis-common-protos=1.62.0=pyhd8ed1ab_0 +- grpc-cpp=1.48.2=he1ff14a_4 +- grpcio=1.48.2=py310he1ff14a_4 +- gtest=1.14.0=hdb19cb5_0 +- idna=3.6=pyhd8ed1ab_0 +- intel-openmp=2023.1.0=hdb19cb5_46306 +- ld_impl_linux-64=2.38=h1181459_1 +- libcrc32c=1.1.2=h9c3ff4c_0 +- libffi=3.4.4=h6a678d5_0 +- libgcc-ng=13.2.0=h807b86a_5 +- libgomp=13.2.0=h807b86a_5 +- libprotobuf=3.20.3=he621ea3_0 +- libstdcxx-ng=11.2.0=h1234567_1 +- libuuid=1.41.5=h5eee18b_0 +- mkl=2023.1.0=h213fc3f_46344 +- multidict=6.0.4=py310h2372a71_1 +- ncurses=6.4=h6a678d5_0 +- oauthlib=3.2.2=pyhd8ed1ab_0 +- openssl=3.2.1=hd590300_0 +- pip=23.3.1=py310h06a4308_0 +- protobuf=3.20.3=py310h6a678d5_0 +- pyasn1=0.5.1=pyhd8ed1ab_0 +- pyasn1-modules=0.3.0=pyhd8ed1ab_0 +- pycparser=2.21=pyhd8ed1ab_0 +- pyjwt=2.8.0=pyhd8ed1ab_1 +- pyopenssl=24.0.0=pyhd8ed1ab_0 +- pysocks=1.7.1=pyha2e5f31_6 +- python=3.10.13=h955ad1f_0 +- python_abi=3.10=2_cp310 +- pytorch=1.13.1=py3.10_cpu_0 +- pytorch-mutex=1.0=cpu +- pyu2f=0.1.5=pyhd8ed1ab_0 +- re2=2022.04.01=h27087fc_0 +- readline=8.2=h5eee18b_0 +- requests=2.31.0=pyhd8ed1ab_0 +- requests-oauthlib=1.3.1=pyhd8ed1ab_0 +- rsa=4.9=pyhd8ed1ab_0 +- setuptools=68.2.2=py310h06a4308_0 +- six=1.16.0=pyh6c4a22f_0 +- sqlite=3.41.2=h5eee18b_0 +- tbb=2021.8.0=hdb19cb5_0 +- tk=8.6.12=h1ccaba5_0 +- urllib3=2.2.0=pyhd8ed1ab_0 +- wheel=0.41.2=py310h06a4308_0 +- xz=5.4.5=h5eee18b_0 +- yarl=1.9.4=py310h2372a71_0 +- zlib=1.2.13=h5eee18b_0 +- pip: + - absl-py==2.1.0 + - alabaster==0.7.16 + - arrow==1.3.0 + - astunparse==1.6.3 + - attr==0.3.2 + - babel==2.14.0 + - black==24.1.1 + - cairocffi==1.6.1 + - chardet==5.2.0 + - cloudpickle==3.0.0 + - colorama==0.4.6 + - dash==2.15.0 + - dash-bootstrap-components==1.5.0 + - dash-core-components==2.0.0 + - dash-html-components==2.0.0 + - dash-table==5.0.0 + - dataclasses==0.6 + - distlib==0.3.8 + - dm-reverb==0.14.0 + - dm-sonnet==2.0.2 + - dm-tree==0.1.8 + - docutils==0.20.1 + - easyprocess==1.1 + - entrypoint2==1.1 + - exceptiongroup==1.2.0 + - farama-notifications==0.0.4 + - filelock==3.13.1 + - fire==0.5.0 + - flake8==7.0.0 + - flask==3.0.1 + - flatbuffers==23.5.26 + - fuzzywuzzy==0.18.0 + - gast==0.5.4 + - gin-config==0.5.0 + - gnureadline==8.1.2 + - google-pasta==0.2.0 + - gym==0.23.0 + - gym-notices==0.0.8 + - gymnasium==0.29.1 + - h11==0.14.0 + - h5py==3.10.0 + - imagesize==1.4.1 + - importlib-metadata==7.0.1 + - iniconfig==2.0.0 + - isort==5.13.2 + - itsdangerous==2.1.2 + - jinja2==3.1.3 + - keras==2.15.0 + - libclang==16.0.6 + - markdown==3.5.2 + - markdown-it-py==3.0.0 + - markupsafe==2.1.5 + - matplotlib==3.8.2 + - mccabe==0.7.0 + - mdurl==0.1.2 + - minari==0.4.3 + - ml-dtypes==0.2.0 + - mypy==1.8.0 + - mypy-extensions==1.0.0 + - nest-asyncio==1.6.0 + - numpy==1.23.5 + - nvidia-cublas-cu12==12.2.5.6 + - nvidia-cuda-cupti-cu12==12.2.142 + - nvidia-cuda-nvcc-cu12==12.2.140 + - nvidia-cuda-nvrtc-cu12==12.2.140 + - nvidia-cuda-runtime-cu12==12.2.140 + - nvidia-cudnn-cu12==8.9.4.25 + - nvidia-cufft-cu12==11.0.8.103 + - nvidia-curand-cu12==10.3.3.141 + - nvidia-cusolver-cu12==11.5.2.141 + - nvidia-cusparse-cu12==12.1.2.141 + - nvidia-nccl-cu12==2.16.5 + - nvidia-nvjitlink-cu12==12.2.140 + - opt-einsum==3.3.0 + - outcome==1.3.0.post0 + - packaging==23.2 + - pandas==2.2.0 + - pathspec==0.12.1 + - patool==2.1.1 + - pkgconfig==1.5.5 + - platformdirs==4.2.0 + - plotly==5.18.0 + - pluggy==1.4.0 + - portion==2.4.2 + - portpicker==1.6.0 + - psutil==5.9.8 + - py-cpuinfo==9.0.0 + - pycodestyle==2.11.1 + - pyfiglet==1.0.2 + - pyflakes==3.2.0 + - pygame==2.1.3 + - pygments==2.17.2 + - pynvml==11.5.0 + - pyparsing==3.1.1 + - pyproject-api==1.6.1 + - pytest==8.0.0 + - python-dateutil==2.8.2 + - python-dotenv==1.0.1 + - pytz==2024.1 + - pyunpack==0.3 + - pyyaml==6.0.1 + - regex==2023.12.25 + - responses==0.24.1 + - retrying==1.3.4 + - rich==13.7.0 + - rlds==0.1.8 + - scipy==1.12.0 + - selenium<4.17.0 + - shapely==2.0.2 + - shellingham==1.5.4 + - sniffio==1.3.0 + - snowballstemmer==2.2.0 + - sortedcontainers==2.4.0 + - sphinx==7.2.6 + - sphinx-rtd-theme==2.0.0 + - sphinxcontrib-applehelp==1.0.8 + - sphinxcontrib-devhelp==1.0.6 + - sphinxcontrib-htmlhelp==2.0.5 + - sphinxcontrib-jquery==4.1 + - sphinxcontrib-jsmath==1.0.1 + - sphinxcontrib-qthelp==1.0.7 + - sphinxcontrib-serializinghtml==1.1.10 + - tabulate==0.9.0 + - tenacity==8.2.3 + - tensorboard==2.15.1 + - tensorboard-data-server==0.7.2 + - tensorflow==2.15.0.post1 + - tensorflow-estimator==2.15.0 + - tensorflow-io-gcs-filesystem==0.35.0 + - tensorflow-probability==0.23.0 + - termcolor==2.4.0 + - tf-agents==0.19.0 + - timeout-decorator==0.5.0 + - tomli==2.0.1 + - tox==4.12.1 + - tqdm==4.66.1 + - trio==0.24.0 + - trio-websocket==0.11.1 + - typer==0.9.0 + - types-python-dateutil==2.8.19.20240106 + - typing-extensions==4.5.0 + - tzdata==2023.4 + - virtualenv==20.25.0 + - webdriver-manager==4.0.1 + - werkzeug==3.0.1 + - wrapt==1.14.1 + - wsproto==1.2.0 + - zipp==3.17.0 diff --git a/docs/404.md b/docs/404.md new file mode 100644 index 0000000..8780572 --- /dev/null +++ b/docs/404.md @@ -0,0 +1,7 @@ +--- +hide-toc: true +--- + +# 404 - Page Not Found + +## The requested page could not be found. diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..1903506 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,5 @@ +# A2Perf documentation + +This directory contains the documentation for A2Perf. + +For more information about how to contribute to the documentation go to our [CONTRIBUTING.md](https://github.com/Farama-Foundation/Celshast/blob/main/CONTRIBUTING.md) diff --git a/docs/_scripts/ipynb_to_gallery.py b/docs/_scripts/ipynb_to_gallery.py new file mode 100644 index 0000000..5580fef --- /dev/null +++ b/docs/_scripts/ipynb_to_gallery.py @@ -0,0 +1,62 @@ +"""Convert jupyter notebook to sphinx gallery notebook styled examples. + +Usage: python ipynb_to_gallery.py + +Dependencies: pypandoc, beautifulsoup4, numpy +install using `pip install pypandoc, beautifulsoup4, numpy` +""" +import json +import warnings + +import numpy as np +import pypandoc as pdoc +from bs4 import BeautifulSoup + +warnings.filterwarnings( + "ignore", + message="The input looks more like a filename than markup. You may want to open this file and pass the filehandle into Beautiful Soup", +) + + +def convert_ipynb_to_gallery(file_name): + python_file = "" + + nb_dict = json.load(open(file_name)) + cells = nb_dict["cells"] + + for i, cell in enumerate(cells): + if i == 0: + assert cell["cell_type"] == "markdown", "First cell has to be markdown" + + md_source = "".join(cell["source"]) + rst_source = pdoc.convert_text(md_source, "rst", "md") + python_file = '"""\n' + rst_source + '\n"""' + else: + if cell["cell_type"] == "markdown": + md_source = "".join(cell["source"]) + is_all_lines_html = np.all( + [ + bool(BeautifulSoup(line, "html.parser").find()) + for line in cell["source"] + ] + ) + if is_all_lines_html: + rst_source = pdoc.convert_text( + source=md_source, to="rst", format="html" + ) + else: + rst_source = pdoc.convert_text(md_source, "rst", "md") + commented_source = "\n".join(["# " + x for x in rst_source.split("\n")]) + python_file = python_file + "\n\n\n" + "# %%" + "\n" + commented_source + elif cell["cell_type"] == "code": + source = "".join(cell["source"]) + python_file = python_file + "\n" * 2 + source + + python_file = python_file.replace("\n%", "\n# %") + open(file_name.replace(".ipynb", ".py"), "w").write(python_file) + + +if __name__ == "__main__": + import sys + + convert_ipynb_to_gallery(sys.argv[-1]) diff --git a/docs/_scripts/move_404.py b/docs/_scripts/move_404.py new file mode 100644 index 0000000..b53bae5 --- /dev/null +++ b/docs/_scripts/move_404.py @@ -0,0 +1,14 @@ +import sys + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Provide a path") + filePath = sys.argv[1] + + with open(filePath, "r+") as fp: + content = fp.read() + content = content.replace('href="../', 'href="/').replace('src="../', 'src="/') + fp.seek(0) + fp.truncate() + + fp.write(content) diff --git a/docs/_static/img/CircuitTraining-Ariane-v0.gif b/docs/_static/img/CircuitTraining-Ariane-v0.gif new file mode 100644 index 0000000..9c1c9a4 Binary files /dev/null and b/docs/_static/img/CircuitTraining-Ariane-v0.gif differ diff --git a/docs/_static/img/PROJECT-github.png b/docs/_static/img/PROJECT-github.png new file mode 100644 index 0000000..562eb42 Binary files /dev/null and b/docs/_static/img/PROJECT-github.png differ diff --git a/docs/_static/img/PROJECT_black.svg b/docs/_static/img/PROJECT_black.svg new file mode 100644 index 0000000..33c6aad --- /dev/null +++ b/docs/_static/img/PROJECT_black.svg @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + + diff --git a/docs/_static/img/PROJECT_white.svg b/docs/_static/img/PROJECT_white.svg new file mode 100644 index 0000000..244e946 --- /dev/null +++ b/docs/_static/img/PROJECT_white.svg @@ -0,0 +1,81 @@ + + + + + + + + + + + + + + + diff --git a/docs/_static/img/QuadrupedLocomotion-DogPace-v0.gif b/docs/_static/img/QuadrupedLocomotion-DogPace-v0.gif new file mode 100644 index 0000000..af37f01 Binary files /dev/null and b/docs/_static/img/QuadrupedLocomotion-DogPace-v0.gif differ diff --git a/docs/_static/img/QuadrupedLocomotion-DogSpin-v0.gif b/docs/_static/img/QuadrupedLocomotion-DogSpin-v0.gif new file mode 100644 index 0000000..89d8d78 Binary files /dev/null and b/docs/_static/img/QuadrupedLocomotion-DogSpin-v0.gif differ diff --git a/docs/_static/img/QuadrupedLocomotion-DogTrot-v0.gif b/docs/_static/img/QuadrupedLocomotion-DogTrot-v0.gif new file mode 100644 index 0000000..d397526 Binary files /dev/null and b/docs/_static/img/QuadrupedLocomotion-DogTrot-v0.gif differ diff --git a/docs/_static/img/WebNavigation-DifficultyLevel-01-v0.gif b/docs/_static/img/WebNavigation-DifficultyLevel-01-v0.gif new file mode 100644 index 0000000..5d0de94 Binary files /dev/null and b/docs/_static/img/WebNavigation-DifficultyLevel-01-v0.gif differ diff --git a/docs/_static/img/favicon.png b/docs/_static/img/favicon.png new file mode 100644 index 0000000..b1ec927 Binary files /dev/null and b/docs/_static/img/favicon.png differ diff --git a/docs/_static/img/motion_imitation.gif b/docs/_static/img/motion_imitation.gif new file mode 100644 index 0000000..a5f4f5d Binary files /dev/null and b/docs/_static/img/motion_imitation.gif differ diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..b79dddd --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,107 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys + +# sys.path.insert(0, os.path.abspath("../a2perf")) +sys.path.insert(0, os.path.abspath("../a2perf/domains/circuit_training")) +sys.path.insert(0, os.path.abspath("../a2perf/domains/quadruped_locomotion")) +sys.path.insert(0, os.path.abspath("../a2perf/domains/web_navigation")) + + +# -- Project information ----------------------------------------------------- +from typing import Any, Dict + +# TODO: Replace A2Perf, remove comment and remove this line +# import A2Perf + + +project = "A2Perf" +copyright = "2022 Farama Foundation" +author = "Farama Foundation" + +# The full version, including alpha/beta/rc tags +# TODO: Replace A2Perf, remove comment and remove this line +# release = A2Perf.__version__ +release = "0.0.1" + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.napoleon", + "sphinx.ext.doctest", + "sphinx.ext.autodoc", + "sphinx.ext.githubpages", + "myst_parser", + "nbsphinx", +] + + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [ + "a2perf/a2perf_benchmark_submission", + "a2perf/analysis", + "a2perf/launch", +] +autodoc_mock_imports = [ + "a2perf.analysis.evaluation", + "a2perf.submission.main_submission", +] + +# Napoleon settings +# napoleon_use_ivar = True +napoleon_use_admonition_for_references = True +# See https://github.com/sphinx-doc/sphinx/issues/9119 +napoleon_custom_sections = [("Returns", "params_style")] + +# Autodoc +autoclass_content = "both" +autodoc_preserve_defaults = True + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "furo" +html_title = "A2Perf Documentation" +html_baseurl = "https://a2perf.farama.org" +html_copy_source = False +html_favicon = "_static/img/favicon.png" +html_theme_options = { + "light_logo": "img/a2perf_black.svg", + "dark_logo": "img/a2perf_white.svg", + "gtag": "G-6H9C8TWXZ8", + "description": "A2Perf is a benchmarking suite for evaluating autonomous agents on real-world problems.", + "image": "img/a2perf-github.png", + "versioning": True, + "source_repository": "https://github.com/Farama-Foundation/A2Perf/", + "source_branch": "main", + "source_directory": "docs/", +} + +html_static_path = ["_static"] +html_css_files = [] + +# -- Generate Changelog ------------------------------------------------- + +sphinx_github_changelog_token = os.environ.get("SPHINX_GITHUB_CHANGELOG_TOKEN") diff --git a/docs/content/basic_usage.md b/docs/content/basic_usage.md new file mode 100644 index 0000000..bce7553 --- /dev/null +++ b/docs/content/basic_usage.md @@ -0,0 +1,75 @@ +--- +layout: "contents" +title: Basic Usage +firstpage: +--- + +# Basic Usage + +After installing A2Perf, you can easily instantiate environments from different +domains. Here are some examples: + +## Circuit Training + +```python +import gymnasium as gym + +env = gym.make('CircuitTraining-Ariane-v0', + netlist_file='path/to/netlist.pb.txt') +``` + +## Web Navigation + +```python +import gymnasium as gym + +env = gym.make('WebNavigation-Difficulty-01-v0', difficulty=1, num_websites=1) +``` + +## Quadruped Locomotion + +```python +import gymnasium as gym + +env = gym.make('QuadrupedLocomotion-DogPace-v0') +# Other available environments: +# env = gym.make('QuadrupedLocomotion-DogTrot-v0') +# env = gym.make('QuadrupedLocomotion-DogSpin-v0') +``` + + +For more detailed usage + +# Installation + +To install A2Perf, the easiest way is to use `pip`. You can install specific +domains or all domains depending on your needs: + +```bash +# Install all domains +pip install a2perf[all] + +# Install specific domains +pip install a2perf[circuit-training] +pip install a2perf[web-navigation] +pip install a2perf[quadruped-locomotion] +``` + +## Installing from source + +If you would like to install A2Perf from source, follow these steps: + +1. Clone the repository: + +```bash +git clone https://github.com/Farama-Foundation/A2Perf.git +cd A2Perf +git submodule update --init --recursive +pip install . +``` + +If you want to install the package in development mode, use the `-e` flag: + +```bash +pip install -e . +``` diff --git a/docs/content/circuit_training/CircuitTraining-Ariane-v0.ipynb b/docs/content/circuit_training/CircuitTraining-Ariane-v0.ipynb new file mode 100644 index 0000000..7a4d9ed --- /dev/null +++ b/docs/content/circuit_training/CircuitTraining-Ariane-v0.ipynb @@ -0,0 +1,230 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "# Ariane" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-07-21 09:06:26.759553: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2024-07-21 09:06:26.786028: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-07-21 09:06:26.786050: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-07-21 09:06:26.786964: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-07-21 09:06:26.791350: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2024-07-21 09:06:27.568549: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "WARNING:absl:JSONDecode Error for GetMacroAndClusteredPortAdjacency \n", + " Expecting value: line 1 column 219265 (char 219264)\n", + "WARNING:absl:JSONDecode Error for GetMacroAndClusteredPortAdjacency \n", + " Expecting ',' delimiter: line 1 column 438529 (char 438528)\n", + "WARNING:absl:JSONDecode Error for GetMacroAndClusteredPortAdjacency \n", + " Expecting value: line 1 column 657793 (char 657792)\n", + "WARNING:absl:JSONDecode Error for GetMacroAndClusteredPortAdjacency \n", + " Expecting ',' delimiter: line 1 column 877057 (char 877056)\n", + "WARNING:absl:JSONDecode Error for GetMacroAndClusteredPortAdjacency \n", + " Expecting value: line 1 column 1096321 (char 1096320)\n", + "WARNING:absl:JSONDecode Error for GetMacroAndClusteredPortAdjacency \n", + " Expecting value: line 1 column 1315585 (char 1315584)\n", + "WARNING:absl:JSONDecode Error for GetMacroAndClusteredPortAdjacency \n", + " Expecting value: line 1 column 1534849 (char 1534848)\n", + "WARNING:absl:JSONDecode Error for GetMacroAndClusteredPortAdjacency \n", + " Expecting value: line 1 column 1754113 (char 1754112)\n", + "/home/ike2030/miniconda3/envs/a2perf_circuit_training/lib/python3.10/site-packages/gymnasium/utils/passive_env_checker.py:32: UserWarning: \u001B[33mWARN: A Box observation space maximum and minimum values are equal. Actual equal coordinates: [(0,)]\u001B[0m\n", + " logger.warn(\n" + ] + } + ], + "execution_count": 1, + "source": [ + "from a2perf.domains import circuit_training\n", + "import gymnasium as gym\n", + "\n", + "env = gym.make('CircuitTraining-Ariane-v0')" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Action SpaceDiscrete(16384)
Observation Space\n", + " Dict('current_node': Box(0, 3499, (1,), int32), 'fake_net_heatmap': Box(0.0, 1.0, (16384,), float32), 'is_node_placed': Box(0, 1, (3500,), int32), 'locations_x': Box(0.0, 1.0, (3500,), float32), 'locations_y': Box(0.0, 1.0, (3500,), float32), 'mask': Box(0, 1, (16384,), int32), 'netlist_index': Box(0, 0, (1,), int32))\n", + "
Reward Range(0, 1)
Creationgym.make(\"CircuitTraining-Ariane-v0\")
" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Description\n", + "\n", + "Circuit Training is an open-source framework for generating chip floor plans with distributed deep reinforcement learning. This framework reproduces the methodology published in the Nature 2021 paper:\n", + "\n", + "A graph placement methodology for fast chip design. Azalia Mirhoseini, Anna Goldie, Mustafa Yazgan, Joe Wenjie Jiang, Ebrahim Songhori, Shen Wang, Young-Joon Lee, Eric Johnson, Omkar Pathak, Azade Nazi, Jiwoo Pak, Andy Tong, Kavya Srinivasa, William Hang, Emre Tuncer, Quoc V. Le, James Laudon, Richard Ho, Roger Carpenter & Jeff Dean, 2021. Nature, 594(7862), pp.207-212. [PDF]\n", + "\n", + "At each timestep, the agent must place a single macro onto the chip canvas. \n" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "## Action Space\n" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-07-21T13:06:40.008121Z", + "start_time": "2024-07-21T13:06:40.004369Z" + } + }, + "cell_type": "code", + "source": "env.action_space", + "outputs": [ + { + "data": { + "text/plain": [ + "Discrete(16384)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 2 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Circuit Training represents the chip canvas as a grid. The action space corresponds to the different locations that the next macro can be placed onto the canvas. In the Ariane netlist case, the canvas is of size $128 \\times 128$, resulting in $16384$ possible actions." + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": "## Observation Encoding\n" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-07-21T13:13:09.395228Z", + "start_time": "2024-07-21T13:13:09.391323Z" + } + }, + "cell_type": "code", + "source": "env.observation_space", + "outputs": [ + { + "data": { + "text/plain": [ + "Dict('current_node': Box(0, 3499, (1,), int32), 'fake_net_heatmap': Box(0.0, 1.0, (16384,), float32), 'is_node_placed': Box(0, 1, (3500,), int32), 'locations_x': Box(0.0, 1.0, (3500,), float32), 'locations_y': Box(0.0, 1.0, (3500,), float32), 'mask': Box(0, 1, (16384,), int32), 'netlist_index': Box(0, 0, (1,), int32))" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 3 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "| Key | Description |\n", + "|-----|-------------|\n", + "| current_node | The node currently being considered for placement |\n", + "| fake_net_heatmap | A representation of estimated connections between nodes |\n", + "| is_node_placed | Indicates which nodes have already been placed on the chip |\n", + "| locations_x | The x-coordinates of placed nodes |\n", + "| locations_y | The y-coordinates of placed nodes |\n", + "| mask | Indicates which actions are valid in the current state |\n", + "| netlist_index | Identifier for the current netlist being processed |" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Rewards\n", + "\n", + "The reward is evaluated at the end of each episode. The placement cost binary is used to calculate the reward based on proxy wirelength, congestion, and density. An infeasible placement results in a reward of -1.0.\n", + "\n", + "The reward function is defined as:\n", + "\n", + "$$R(p, g) = -\\text{Wirelength}(p, g) - \\lambda \\cdot \\text{Congestion}(p, g) - \\gamma \\cdot \\text{Density}(p, g)$$\n", + "\n", + "Where:\n", + "- $p$ represents the placement\n", + "- $g$ represents the netlist graph\n", + "- $\\lambda$ is the congestion weight\n", + "- $\\gamma$ is the density weight\n", + "\n", + "Default values in A2Perf:\n", + "- The congestion weight $\\lambda$ is set to 0.01\n", + "- The density weight $\\gamma$ is set to 0.01 \n", + "- The maximum density threshold is set to 0.6\n", + "\n", + "These default values are based on the methodology described in [Mirhoseini et al. (2021)][1].\n", + "\n", + "[1]: https://www.nature.com/articles/s41586-021-03544-w \"A graph placement methodology for fast chip design\"" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Termination\n", + "\n", + "The episode is terminated once all macros have been placed on the canvas, then the final reward is calculated." + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Registered Configurations\n", + "* `CircuitTraining-Ariane-v0`" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "" + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "kernelspec": { + "name": "python3", + "language": "python", + "display_name": "Python 3 (ipykernel)" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/content/circuit_training/CircuitTraining-ToyMacroStdcell-v0.ipynb b/docs/content/circuit_training/CircuitTraining-ToyMacroStdcell-v0.ipynb new file mode 100644 index 0000000..f0111e1 --- /dev/null +++ b/docs/content/circuit_training/CircuitTraining-ToyMacroStdcell-v0.ipynb @@ -0,0 +1,267 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "# Toy Macro Standard Cell" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-07-21T17:47:38.578728Z", + "start_time": "2024-07-21T17:47:35.999779Z" + } + }, + "cell_type": "code", + "source": [ + "from a2perf.domains import circuit_training\n", + "import gymnasium as gym\n", + "\n", + "env = gym.make('CircuitTraining-ToyMacro-v0')" + ], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-07-21 13:47:36.273879: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2024-07-21 13:47:36.299009: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-07-21 13:47:36.299034: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-07-21 13:47:36.300083: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-07-21 13:47:36.304647: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2024-07-21 13:47:36.808584: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "WARNING:absl:block_name is not set. Please add the block_name in:\n", + "/home/ike2030/workspace/a2perf/repo_new/a2perf/domains/circuit_training/circuit_training/environment/test_data/toy_macro_stdcell/netlist.pb.txt\n", + "or in:\n", + "/home/ike2030/workspace/a2perf/repo_new/a2perf/domains/circuit_training/circuit_training/environment/test_data/toy_macro_stdcell/initial.plc\n", + "/home/ike2030/miniconda3/envs/a2perf_circuit_training/lib/python3.10/site-packages/numpy/core/fromnumeric.py:3432: RuntimeWarning: Mean of empty slice.\n", + " return _methods._mean(a, axis=axis, dtype=dtype,\n", + "/home/ike2030/miniconda3/envs/a2perf_circuit_training/lib/python3.10/site-packages/numpy/core/_methods.py:190: RuntimeWarning: invalid value encountered in divide\n", + " ret = ret.dtype.type(ret / rcount)\n", + "/home/ike2030/miniconda3/envs/a2perf_circuit_training/lib/python3.10/site-packages/gymnasium/utils/passive_env_checker.py:32: UserWarning: \u001B[33mWARN: A Box observation space maximum and minimum values are equal. Actual equal coordinates: [(0,)]\u001B[0m\n", + " logger.warn(\n" + ] + } + ], + "execution_count": 3 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-07-21T17:47:43.889997Z", + "start_time": "2024-07-21T17:47:43.885078Z" + } + }, + "cell_type": "code", + "source": "env.observation_space", + "outputs": [ + { + "data": { + "text/plain": [ + "Dict('current_node': Box(0, 3499, (1,), int32), 'fake_net_heatmap': Box(0.0, 1.0, (16384,), float32), 'is_node_placed': Box(0, 1, (3500,), int32), 'locations_x': Box(0.0, 1.0, (3500,), float32), 'locations_y': Box(0.0, 1.0, (3500,), float32), 'mask': Box(0, 1, (16384,), int32), 'netlist_index': Box(0, 0, (1,), int32))" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 4 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-07-21T17:47:58.159889Z", + "start_time": "2024-07-21T17:47:58.157250Z" + } + }, + "cell_type": "code", + "source": "env.action_space", + "outputs": [ + { + "data": { + "text/plain": [ + "Discrete(16384)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 5 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Action SpaceDiscrete(16384)
Observation Space\n", + " Dict('current_node': Box(0, 3499, (1,), int32), 'fake_net_heatmap': Box(0.0, 1.0, (16384,), float32), 'is_node_placed': Box(0, 1, (3500,), int32), 'locations_x': Box(0.0, 1.0, (3500,), float32), 'locations_y': Box(0.0, 1.0, (3500,), float32), 'mask': Box(0, 1, (16384,), int32), 'netlist_index': Box(0, 0, (1,), int32))\n", + "
Creationgym.make(\"CircuitTraining-ToyMacro-v0\")
" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Description\n", + "\n", + "Circuit Training is an open-source framework for generating chip floor plans with distributed deep reinforcement learning. This framework reproduces the methodology published in the Nature 2021 paper:\n", + "\n", + "A graph placement methodology for fast chip design. Azalia Mirhoseini, Anna Goldie, Mustafa Yazgan, Joe Wenjie Jiang, Ebrahim Songhori, Shen Wang, Young-Joon Lee, Eric Johnson, Omkar Pathak, Azade Nazi, Jiwoo Pak, Andy Tong, Kavya Srinivasa, William Hang, Emre Tuncer, Quoc V. Le, James Laudon, Richard Ho, Roger Carpenter & Jeff Dean, 2021. Nature, 594(7862), pp.207-212. [PDF]\n", + "\n", + "At each timestep, the agent must place a single macro onto the chip canvas. \n" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "## Action Space\n" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-07-21T17:51:19.077192Z", + "start_time": "2024-07-21T17:51:19.071196Z" + } + }, + "cell_type": "code", + "source": "env.action_space", + "outputs": [ + { + "data": { + "text/plain": [ + "Discrete(16384)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 7 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "\n", + "Circuit Training represents the chip canvas as a grid. The action space corresponds to the different locations that the next macro can be placed onto the canvas. In the Toy Macro netlist case, the canvas is of size $128 \\times 128$, resulting in $16384$ possible actions." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": "## Observation Encoding\n" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-07-21T17:51:49.691864Z", + "start_time": "2024-07-21T17:51:49.683712Z" + } + }, + "cell_type": "code", + "source": "env.observation_space", + "outputs": [ + { + "data": { + "text/plain": [ + "Dict('current_node': Box(0, 3499, (1,), int32), 'fake_net_heatmap': Box(0.0, 1.0, (16384,), float32), 'is_node_placed': Box(0, 1, (3500,), int32), 'locations_x': Box(0.0, 1.0, (3500,), float32), 'locations_y': Box(0.0, 1.0, (3500,), float32), 'mask': Box(0, 1, (16384,), int32), 'netlist_index': Box(0, 0, (1,), int32))" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 8 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "| Key | Description |\n", + "|-----|-------------|\n", + "| current_node | The node currently being considered for placement |\n", + "| fake_net_heatmap | A representation of estimated connections between nodes |\n", + "| is_node_placed | Indicates which nodes have already been placed on the chip |\n", + "| locations_x | The x-coordinates of placed nodes |\n", + "| locations_y | The y-coordinates of placed nodes |\n", + "| mask | Indicates which actions are valid in the current state |\n", + "| netlist_index | Identifier for the current netlist being processed |" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Rewards\n", + "\n", + "The reward is evaluated at the end of each episode. The placement cost binary is used to calculate the reward based on proxy wirelength, congestion, and density. An infeasible placement results in a reward of -1.0.\n", + "\n", + "The reward function is defined as:\n", + "\n", + "$$R(p, g) = -\\text{Wirelength}(p, g) - \\lambda \\cdot \\text{Congestion}(p, g) - \\gamma \\cdot \\text{Density}(p, g)$$\n", + "\n", + "Where:\n", + "- $p$ represents the placement\n", + "- $g$ represents the netlist graph\n", + "- $\\lambda$ is the congestion weight\n", + "- $\\gamma$ is the density weight\n", + "\n", + "Default values in A2Perf:\n", + "- The congestion weight $\\lambda$ is set to 0.01\n", + "- The density weight $\\gamma$ is set to 0.01 \n", + "- The maximum density threshold is set to 0.6\n", + "\n", + "These default values are based on the methodology described in [Mirhoseini et al. (2021)][1].\n", + "\n", + "[1]: https://www.nature.com/articles/s41586-021-03544-w \"A graph placement methodology for fast chip design\"" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Termination\n", + "\n", + "The episode is terminated once all macros have been placed on the canvas, then the final reward is calculated." + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Registered Configurations\n", + "- `CircuitTraining-ToyMacro-v0`" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "kernelspec": { + "name": "python3", + "language": "python", + "display_name": "Python 3 (ipykernel)" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/content/circuit_training/index.md b/docs/content/circuit_training/index.md new file mode 100644 index 0000000..7a386dc --- /dev/null +++ b/docs/content/circuit_training/index.md @@ -0,0 +1,49 @@ +# Circuit Training Environments + + +
+
+
+ Ariane + Ariane +
+ +
+
+ +```{toctree} +:hidden: +:caption: Circuit Training Environments + +CircuitTraining-Ariane-v0 +CircuitTraining-ToyMacroStdcell-v0 diff --git a/docs/content/publications.md b/docs/content/publications.md new file mode 100644 index 0000000..835c45e --- /dev/null +++ b/docs/content/publications.md @@ -0,0 +1,10 @@ +--- +layout: "contents" +title: List of Publications +firstpage: +--- + +# List of Publications + +List of publications and submissions using A2Perf (please open a pull request to +add missing entries): diff --git a/docs/content/quadruped_locomotion/QuadrupedLocomotion-DogPace-v0.ipynb b/docs/content/quadruped_locomotion/QuadrupedLocomotion-DogPace-v0.ipynb new file mode 100644 index 0000000..c079a68 --- /dev/null +++ b/docs/content/quadruped_locomotion/QuadrupedLocomotion-DogPace-v0.ipynb @@ -0,0 +1,160 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "# Dog Pace\n", + "\n", + "[![Learning Agile Robotic Locomotion Skills by Imitating Animals](../../_static/img/motion_imitation.gif)](https://www.youtube.com/watch?v=lKYh6uuCwRY&feature=youtu.be&hd=1 \"Learning Agile Robotic Locomotion Skills by Imitating Animals\")" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "\n", + "| | |\n", + "|--------------------|----------------------------------------------------|\n", + "| Action Space | `Box(-2` $\\pi$`,2` $\\pi$`, (12,), float32)` |\n", + "| Observation Space| `Box(_, _, (160,), float32)` |\n", + "| import | `gymnasium.make('QuadrupedLocomotion-DogPace-v0')` |\n" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Description\n", + "This quadruped locomotion environment was proposed by [![Xue Bin Peng et al.](https://xbpeng.github.io/projects/Robotic_Imitation/index.html)](https://xbpeng.github.io/projects/Robotic_Imitation/index.html \"Learning Agile Robotic Locomotion Skills by Imitating Animals\"). They showed that by imitating skills of real-world reference motion data, a diverse repertoire of behaviors can be learned using a single learning-based approach. The quadruped is a legged robot which has 18 degrees of freedom (DoF), 3 actuated DoF per leg and 6 under-actuated DoF for the torso.\n", + "The objective is to learn from imitating real animals. The observation is a reference motion of a desired skill, captured with motion capture. The policy enables a quadruped to reproduce the skill in the real world, using the 12 actuated degrees of freedom of the legs. The controller runs at a control frequency of 30Hz." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Action Space" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The actions specify target rotations for PD controllers at each joint. There are 12 controllable DoFs, three for each leg. The each action therefore corresponds to a target rotation for a single DoF. The action space is a `Box(-2` $\\pi$`,2` $\\pi$`, (12,), float32)`.\n", + "\n", + "For smooth motions, the PD targets are passed through a low-pass filter before being applied to the robot." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Observation Space\n", + "The observation space of this environment consists of the reference motion of the skills that need to be learned. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Rewards\n", + "The reward function is designed to encourage the policy to track a sequence of poses from the reference motion. Using reference motions, we avoid the need of designing skill-specific reward functions, enabling a common framework to learn a diverse array of behaviors. The reward at each step is:\n", + "$$\n", + "r_t = 0.5r^p_t + 0.05 r^v_t + 0.2 r^e_t + 0.15r^{rp}_t + 0.1r^{rv}_t\n", + "$$\n", + "Where the pose reward $r^p_t$ reflects the difference between the reference joint rotations, $\\hat q^j_t$, and the robot joint rotations, $q^j_t$:\n", + "$$\n", + "r^p_t = exp(-5 \\sum_{j}||\\hat q^j_t - q^j_t||^2)\n", + "$$\n", + "The velocity reward $r^v_t$ reflects the difference between the reference joint velocities, $\\hat{ \\dot{q}}^j_t$, and the robot joint velocities, $\\dot{q}^j_t$:\n", + "$$\n", + "r^p_t = exp(-0.1 \\sum_{j}||\\hat{ \\dot{q}}^j_t - \\dot{q}^j_t||^2)\n", + "$$\n", + "The end-effector reward $r^e_t$ encourages the robot to track the end-effector positions. $x^e_t$ is the relative 3D position of the end-effector e with respect to the root:\n", + "$$\n", + "r^e_t = exp(-40 \\sum_{e}||\\hat{x}^e_t - e^e_t||^2)\n", + "$$\n", + "Finally, the root pose reward $r^{rp}_t$, and the root velocity reward $r^{rv}_t$ encourage the robot to track the reference root motion. $x^{root}_t$ is the root's global position and $\\dot x^{root}_t$ is the root's linear velocity, $q^{root}_t$ is the root's global porotationsition and $\\dot q^{root}_t$ is the root's angular velocity :\n", + "$$\n", + "r^{rp}_t = exp(-20 ||\\hat{x}^{root}_t -x^{root}_t||^2 - 10 || \\hat{q}^{root}_t - q^{root}_t || ^2) \\\\\n", + "r^{rv}_t = exp(-20 ||\\hat{\\dot{x}}^{root}_t -\\dot{x}^{root}_t||^2 - 10 || \\hat{\\dot{q}}^{root}_t - \\dot{q}^{root}_t || ^2)\n", + "$$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Starting State\n", + "When resetting the environment, the robot's position in the world is reset. The initial condition of the reference motion varies for very reset, to encourage a robust policy. The starting state therefore varies for every episode. \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Episode End\n", + "The episode ends under two conditions. Either the task is completed, or the robot is in an unsafe condition." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Arguments\n", + "When creating the quadruped locomotion environment, we can pass several kwargs such as `enable_rendering`, `mode`, `num_parallel_envs` etc.\n", + "```python\n", + "import gymnasium as gym\n", + "import rl_perf.domains.quadruped_locomotion\n", + "\n", + "env = gym.make('QuadrupedLocomotion-v0', enable_rendering=False, ...)\n", + "```\n", + "| Parameter | Type | Default | Description|\n", + "|--------------------|------|---|---|\n", + "| `enable_rendering` | bool | `False` | If `True`, the environment will be rendered|\n", + "| `mode` | str | `\"train\"` | Can be either `\"train\"` or `\"test\"`, in the training mode, the randomizer is automatically disabled.|\n", + "| `num_parallel_envs` | int | `None` | Number of parallel `MPI` workers. Most likely, you will not use this parameter. |\n", + "| `enable_randomizer` | bool | `None` | If `True`, the dynamics of the robot get randomized. If the mode is `train`, defaults to `True`, else `False`. Most likely, you will not use this parameter.|\n", + "| `robot_class` | Class | `laikago.Laikago` | Provide a `Class` rather than an instance. Most likely, you will not use this parameter.|\n", + "| `trajectory_generator` | | `LaikagoPoseOffsetGenerator()`| A trajectory_generator that can potentially modify the action and observation. Expected to have `get_action` and `get_observation` interfaces. Most likely, you will not use this parameter.|\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, the `reset` method allows several options. \n", + "```python\n", + "env.reset(seed, options)\n", + "``` \n", + "Where the options should be passes as a `Dict`. The possible options are:\n", + "\n", + "| Parameter | Type | Default | Description|\n", + "|--------------------|------|---|---|\n", + "| `initial_motor_angles` | list | `None` | A list of Floats. The desired joint angles after reset. If None, the robot will use its built-in value.|\n", + "| `reset_duration` | Float | `0.0` | The time (in seconds) needed to rotate all motors to the desired initial values.|\n", + "| `reset_visualization_camera` | bool | `True` |Whether to reset debug visualization camera on reset.|" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Version History\n", + "- v0: Initial versions release" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/content/quadruped_locomotion/QuadrupedLocomotion-DogSpin-v0.ipynb b/docs/content/quadruped_locomotion/QuadrupedLocomotion-DogSpin-v0.ipynb new file mode 100644 index 0000000..c40dbe8 --- /dev/null +++ b/docs/content/quadruped_locomotion/QuadrupedLocomotion-DogSpin-v0.ipynb @@ -0,0 +1,160 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "# Dog Spin\n", + "\n", + "[![Learning Agile Robotic Locomotion Skills by Imitating Animals](../../_static/img/motion_imitation.gif)](https://www.youtube.com/watch?v=lKYh6uuCwRY&feature=youtu.be&hd=1 \"Learning Agile Robotic Locomotion Skills by Imitating Animals\")" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "\n", + "| | |\n", + "|--------------------|----------------------------------------------------|\n", + "| Action Space | `Box(-2` $\\pi$`,2` $\\pi$`, (12,), float32)` |\n", + "| Observation Space| `Box(_, _, (160,), float32)` |\n", + "| import | `gymnasium.make('QuadrupedLocomotion-DogSpin-v0')` |\n" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Description\n", + "This quadruped locomotion environment was proposed by [![Xue Bin Peng et al.](https://xbpeng.github.io/projects/Robotic_Imitation/index.html)](https://xbpeng.github.io/projects/Robotic_Imitation/index.html \"Learning Agile Robotic Locomotion Skills by Imitating Animals\"). They showed that by imitating skills of real-world reference motion data, a diverse repertoire of behaviors can be learned using a single learning-based approach. The quadruped is a legged robot which has 18 degrees of freedom (DoF), 3 actuated DoF per leg and 6 under-actuated DoF for the torso.\n", + "The objective is to learn from imitating real animals. The observation is a reference motion of a desired skill, captured with motion capture. The policy enables a quadruped to reproduce the skill in the real world, using the 12 actuated degrees of freedom of the legs. The controller runs at a control frequency of 30Hz." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Action Space" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The actions specify target rotations for PD controllers at each joint. There are 12 controllable DoFs, three for each leg. The each action therefore corresponds to a target rotation for a single DoF. The action space is a `Box(-2` $\\pi$`,2` $\\pi$`, (12,), float32)`.\n", + "\n", + "For smooth motions, the PD targets are passed through a low-pass filter before being applied to the robot." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Observation Space\n", + "The observation space of this environment consists of the reference motion of the skills that need to be learned. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Rewards\n", + "The reward function is designed to encourage the policy to track a sequence of poses from the reference motion. Using reference motions, we avoid the need of designing skill-specific reward functions, enabling a common framework to learn a diverse array of behaviors. The reward at each step is:\n", + "$$\n", + "r_t = 0.5r^p_t + 0.05 r^v_t + 0.2 r^e_t + 0.15r^{rp}_t + 0.1r^{rv}_t\n", + "$$\n", + "Where the pose reward $r^p_t$ reflects the difference between the reference joint rotations, $\\hat q^j_t$, and the robot joint rotations, $q^j_t$:\n", + "$$\n", + "r^p_t = exp(-5 \\sum_{j}||\\hat q^j_t - q^j_t||^2)\n", + "$$\n", + "The velocity reward $r^v_t$ reflects the difference between the reference joint velocities, $\\hat{ \\dot{q}}^j_t$, and the robot joint velocities, $\\dot{q}^j_t$:\n", + "$$\n", + "r^p_t = exp(-0.1 \\sum_{j}||\\hat{ \\dot{q}}^j_t - \\dot{q}^j_t||^2)\n", + "$$\n", + "The end-effector reward $r^e_t$ encourages the robot to track the end-effector positions. $x^e_t$ is the relative 3D position of the end-effector e with respect to the root:\n", + "$$\n", + "r^e_t = exp(-40 \\sum_{e}||\\hat{x}^e_t - e^e_t||^2)\n", + "$$\n", + "Finally, the root pose reward $r^{rp}_t$, and the root velocity reward $r^{rv}_t$ encourage the robot to track the reference root motion. $x^{root}_t$ is the root's global position and $\\dot x^{root}_t$ is the root's linear velocity, $q^{root}_t$ is the root's global porotationsition and $\\dot q^{root}_t$ is the root's angular velocity :\n", + "$$\n", + "r^{rp}_t = exp(-20 ||\\hat{x}^{root}_t -x^{root}_t||^2 - 10 || \\hat{q}^{root}_t - q^{root}_t || ^2) \\\\\n", + "r^{rv}_t = exp(-20 ||\\hat{\\dot{x}}^{root}_t -\\dot{x}^{root}_t||^2 - 10 || \\hat{\\dot{q}}^{root}_t - \\dot{q}^{root}_t || ^2)\n", + "$$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Starting State\n", + "When resetting the environment, the robot's position in the world is reset. The initial condition of the reference motion varies for very reset, to encourage a robust policy. The starting state therefore varies for every episode. \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Episode End\n", + "The episode ends under two conditions. Either the task is completed, or the robot is in an unsafe condition." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Arguments\n", + "When creating the quadruped locomotion environment, we can pass several kwargs such as `enable_rendering`, `mode`, `num_parallel_envs` etc.\n", + "```python\n", + "import gymnasium as gym\n", + "import rl_perf.domains.quadruped_locomotion\n", + "\n", + "env = gym.make('QuadrupedLocomotion-v0', enable_rendering=False, ...)\n", + "```\n", + "| Parameter | Type | Default | Description|\n", + "|--------------------|------|---|---|\n", + "| `enable_rendering` | bool | `False` | If `True`, the environment will be rendered|\n", + "| `mode` | str | `\"train\"` | Can be either `\"train\"` or `\"test\"`, in the training mode, the randomizer is automatically disabled.|\n", + "| `num_parallel_envs` | int | `None` | Number of parallel `MPI` workers. Most likely, you will not use this parameter. |\n", + "| `enable_randomizer` | bool | `None` | If `True`, the dynamics of the robot get randomized. If the mode is `train`, defaults to `True`, else `False`. Most likely, you will not use this parameter.|\n", + "| `robot_class` | Class | `laikago.Laikago` | Provide a `Class` rather than an instance. Most likely, you will not use this parameter.|\n", + "| `trajectory_generator` | | `LaikagoPoseOffsetGenerator()`| A trajectory_generator that can potentially modify the action and observation. Expected to have `get_action` and `get_observation` interfaces. Most likely, you will not use this parameter.|\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, the `reset` method allows several options. \n", + "```python\n", + "env.reset(seed, options)\n", + "``` \n", + "Where the options should be passes as a `Dict`. The possible options are:\n", + "\n", + "| Parameter | Type | Default | Description|\n", + "|--------------------|------|---|---|\n", + "| `initial_motor_angles` | list | `None` | A list of Floats. The desired joint angles after reset. If None, the robot will use its built-in value.|\n", + "| `reset_duration` | Float | `0.0` | The time (in seconds) needed to rotate all motors to the desired initial values.|\n", + "| `reset_visualization_camera` | bool | `True` |Whether to reset debug visualization camera on reset.|" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Version History\n", + "- v0: Initial versions release" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/content/quadruped_locomotion/QuadrupedLocomotion-DogTrot-v0.ipynb b/docs/content/quadruped_locomotion/QuadrupedLocomotion-DogTrot-v0.ipynb new file mode 100644 index 0000000..9f726d6 --- /dev/null +++ b/docs/content/quadruped_locomotion/QuadrupedLocomotion-DogTrot-v0.ipynb @@ -0,0 +1,160 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dog Trot\n", + "\n", + "[![Learning Agile Robotic Locomotion Skills by Imitating Animals](../../_static/img/motion_imitation.gif)](https://www.youtube.com/watch?v=lKYh6uuCwRY&feature=youtu.be&hd=1 \"Learning Agile Robotic Locomotion Skills by Imitating Animals\")" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "\n", + "| | |\n", + "|--------------------|----------------------------------------------------|\n", + "| Action Space | `Box(-2` $\\pi$`,2` $\\pi$`, (12,), float32)` |\n", + "| Observation Space| `Box(_, _, (160,), float32)` |\n", + "| import | `gymnasium.make('QuadrupedLocomotion-DogTrot-v0')` |\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Description\n", + "This quadruped locomotion environment was proposed by [![Xue Bin Peng et al.](https://xbpeng.github.io/projects/Robotic_Imitation/index.html)](https://xbpeng.github.io/projects/Robotic_Imitation/index.html \"Learning Agile Robotic Locomotion Skills by Imitating Animals\"). They showed that by imitating skills of real-world reference motion data, a diverse repertoire of behaviors can be learned using a single learning-based approach. The quadruped is a legged robot which has 18 degrees of freedom (DoF), 3 actuated DoF per leg and 6 under-actuated DoF for the torso.\n", + "The objective is to learn from imitating real animals. The observation is a reference motion of a desired skill, captured with motion capture. The policy enables a quadruped to reproduce the skill in the real world, using the 12 actuated degrees of freedom of the legs. The controller runs at a control frequency of 30Hz." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Action Space" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The actions specify target rotations for PD controllers at each joint. There are 12 controllable DoFs, three for each leg. The each action therefore corresponds to a target rotation for a single DoF. The action space is a `Box(-2` $\\pi$`,2` $\\pi$`, (12,), float32)`.\n", + "\n", + "For smooth motions, the PD targets are passed through a low-pass filter before being applied to the robot." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Observation Space\n", + "The observation space of this environment consists of the reference motion of the skills that need to be learned. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Rewards\n", + "The reward function is designed to encourage the policy to track a sequence of poses from the reference motion. Using reference motions, we avoid the need of designing skill-specific reward functions, enabling a common framework to learn a diverse array of behaviors. The reward at each step is:\n", + "$$\n", + "r_t = 0.5r^p_t + 0.05 r^v_t + 0.2 r^e_t + 0.15r^{rp}_t + 0.1r^{rv}_t\n", + "$$\n", + "Where the pose reward $r^p_t$ reflects the difference between the reference joint rotations, $\\hat q^j_t$, and the robot joint rotations, $q^j_t$:\n", + "$$\n", + "r^p_t = exp(-5 \\sum_{j}||\\hat q^j_t - q^j_t||^2)\n", + "$$\n", + "The velocity reward $r^v_t$ reflects the difference between the reference joint velocities, $\\hat{ \\dot{q}}^j_t$, and the robot joint velocities, $\\dot{q}^j_t$:\n", + "$$\n", + "r^p_t = exp(-0.1 \\sum_{j}||\\hat{ \\dot{q}}^j_t - \\dot{q}^j_t||^2)\n", + "$$\n", + "The end-effector reward $r^e_t$ encourages the robot to track the end-effector positions. $x^e_t$ is the relative 3D position of the end-effector e with respect to the root:\n", + "$$\n", + "r^e_t = exp(-40 \\sum_{e}||\\hat{x}^e_t - e^e_t||^2)\n", + "$$\n", + "Finally, the root pose reward $r^{rp}_t$, and the root velocity reward $r^{rv}_t$ encourage the robot to track the reference root motion. $x^{root}_t$ is the root's global position and $\\dot x^{root}_t$ is the root's linear velocity, $q^{root}_t$ is the root's global porotationsition and $\\dot q^{root}_t$ is the root's angular velocity :\n", + "$$\n", + "r^{rp}_t = exp(-20 ||\\hat{x}^{root}_t -x^{root}_t||^2 - 10 || \\hat{q}^{root}_t - q^{root}_t || ^2) \\\\\n", + "r^{rv}_t = exp(-20 ||\\hat{\\dot{x}}^{root}_t -\\dot{x}^{root}_t||^2 - 10 || \\hat{\\dot{q}}^{root}_t - \\dot{q}^{root}_t || ^2)\n", + "$$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Starting State\n", + "When resetting the environment, the robot's position in the world is reset. The initial condition of the reference motion varies for very reset, to encourage a robust policy. The starting state therefore varies for every episode. \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Episode End\n", + "The episode ends under two conditions. Either the task is completed, or the robot is in an unsafe condition." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Arguments\n", + "When creating the quadruped locomotion environment, we can pass several kwargs such as `enable_rendering`, `mode`, `num_parallel_envs` etc.\n", + "```python\n", + "import gymnasium as gym\n", + "import rl_perf.domains.quadruped_locomotion\n", + "\n", + "env = gym.make('QuadrupedLocomotion-v0', enable_rendering=False, ...)\n", + "```\n", + "| Parameter | Type | Default | Description|\n", + "|--------------------|------|---|---|\n", + "| `enable_rendering` | bool | `False` | If `True`, the environment will be rendered|\n", + "| `mode` | str | `\"train\"` | Can be either `\"train\"` or `\"test\"`, in the training mode, the randomizer is automatically disabled.|\n", + "| `num_parallel_envs` | int | `None` | Number of parallel `MPI` workers. Most likely, you will not use this parameter. |\n", + "| `enable_randomizer` | bool | `None` | If `True`, the dynamics of the robot get randomized. If the mode is `train`, defaults to `True`, else `False`. Most likely, you will not use this parameter.|\n", + "| `robot_class` | Class | `laikago.Laikago` | Provide a `Class` rather than an instance. Most likely, you will not use this parameter.|\n", + "| `trajectory_generator` | | `LaikagoPoseOffsetGenerator()`| A trajectory_generator that can potentially modify the action and observation. Expected to have `get_action` and `get_observation` interfaces. Most likely, you will not use this parameter.|\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, the `reset` method allows several options. \n", + "```python\n", + "env.reset(seed, options)\n", + "``` \n", + "Where the options should be passes as a `Dict`. The possible options are:\n", + "\n", + "| Parameter | Type | Default | Description|\n", + "|--------------------|------|---|---|\n", + "| `initial_motor_angles` | list | `None` | A list of Floats. The desired joint angles after reset. If None, the robot will use its built-in value.|\n", + "| `reset_duration` | Float | `0.0` | The time (in seconds) needed to rotate all motors to the desired initial values.|\n", + "| `reset_visualization_camera` | bool | `True` |Whether to reset debug visualization camera on reset.|" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Version History\n", + "- v0: Initial versions release" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/content/quadruped_locomotion/index.md b/docs/content/quadruped_locomotion/index.md new file mode 100644 index 0000000..9a5df41 --- /dev/null +++ b/docs/content/quadruped_locomotion/index.md @@ -0,0 +1,53 @@ +# Quadruped Locomotion Environments + +
+
+
+ Dog Pace + Dog Pace +
+
+ Dog Trot + Dog Trot +
+
+ Dog Spin + Dog Spin +
+
+
+ +```{toctree} +:hidden: +:caption: Quadruped Locomotion Environments + +QuadrupedLocomotion-DogPace-v0 +QuadrupedLocomotion-DogTrot-v0 +QuadrupedLocomotion-DogSpin-v0 diff --git a/docs/content/tutorials/inference.md b/docs/content/tutorials/inference.md new file mode 100644 index 0000000..6229f98 --- /dev/null +++ b/docs/content/tutorials/inference.md @@ -0,0 +1,9 @@ +--- +layout: "contents" +title: Tutorial on Benchmarking Inference Code +firstpage: +--- + +# Tutorial on Benchmarking Inference Code + +This tutorial is coming soon. Stay tuned! diff --git a/docs/content/tutorials/training.ipynb b/docs/content/tutorials/training.ipynb new file mode 100644 index 0000000..6f33d28 --- /dev/null +++ b/docs/content/tutorials/training.ipynb @@ -0,0 +1,146 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "# Tutorial on Benchmarking Training Code" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "\n", + "This notebook walks through the steps and components necessary to launch your first A2Perf submission. \n", + "\n", + "## Python quick start\n", + "To install the latest release of A2Perf, run:\n", + "```bash\n", + "git clone https://github.com/Farama-Foundation/A2Perf.git\n", + "cd A2Perf\n", + "git submodule sync --recursive\n", + "git submodule update --init --recursive\n", + "pip install -e .\n", + "pip install -r requirements.txt\n", + "```\n", + "\n", + "\n", + "Both x86-64 and Arch64 (ARM64) architectures are supported.\n", + "\n", + "\n", + "\n", + "Please note that the Windows version is not as well-tested as Linux and macOS versions.\n", + "It can be used for development and testing but if you want to conduct serious (time and resource-extensive) experiments on Windows,\n", + "please consider using [Docker](https://docs.docker.com/docker-for-windows/install/) or [WSL](https://docs.microsoft.com/en-us/windows/wsl/install-win10) with Linux version.\n", + "\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Submission folder\n", + "In order to benchmark your reinforcement learning algorithms, your submission should have the following structure: \n", + "```\n", + "├── my_submission\n", + "│ ├── __init__.py\n", + "│ ├── train.py\n", + "│ ├── inference.py\n", + "│ ├── requirements.txt\n", + "| ├── your supporting files\n", + "```\n", + "### Explanation of files:\n", + "- `__init__.py`:\n", + "The `__init__.py` file can be an empty file. \n", + "\n", + "- `train.py`:\n", + "The `train.py` file includes the function `train()`, which A2Perf calls for the training of your algorithm. \n", + "\n", + "\n", + "- `inference.py`\n", + "Next, the `inference.py` file is subsequently used for benchmarking the trained model.\n", + "This file includes several key functions.\\\n", + "\\\n", + "__`load_model(env)`:__\n", + "This function loads and returns the trained model. A2Perf passes the environment that is being tested via the `env` parameter. This allows the model loading logic to use any context needed, such as the environment name.\n", + "\\\n", + "__`preprocess_observation(observation)`:__\n", + "Preprocesses the observation before feeding it to the model. If no preprocessing is required, simply return the initial observation.\n", + "\\\n", + "__`infer_once(model, observation)`:__\n", + "Passes a single observation to the loaded model and returns the predicted action. This function performs a single inference step.\n", + "\n", + "- `requirements.txt`:\n", + "Specifies any package dependencies required to run the submission code. This file may include version constraints for the dependencies. Having an explicit requirements file ensures a consistent environment for evaluation." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Running the Benchmark\n", + "To finally run the benchmark, we run the `A2Perf/rl_perf/submission/main_submission.py` file from a command line interface. We need to define several flags. While some of these flags are necessary for running the benchmark, other flags are optional.\\\n", + "Required:\n", + "- `gin_config`: the path to the gin_config of the environment you want to run. For this tutorial, we use the quadruped_locomotion library. This file can be found under `A2Perf/rl_perf/submission/configs/quadruped_locomotion/train.gin`.\n", + "- `participant_module_path`: the path to the module of your submission. This is the path to the `my_submission` folder is the folder structure section.\n", + "\n", + "Optional:\n", + "- `root_dir`: base directory for logs and results. Defaults to `/tmp/xm_local`.\n", + "- `metric_values_dir`: directory to save metric values.\n", + "- `train_logs_dirs`: directories for train logs from all experiments. Defaults to `train_logs`.\n", + "- `extra_gin_bindings`: extra GIN bindings to add configurations on the fly.\n", + "- `run_offline_metrics_only`: boolean defining whether to run offline metrics only. Defaults to `False`.\n", + "\n", + "To run the benchmark, we use the following command:\n", + "```bash\n", + "python3 A2Perf/rl_perf/submission/main_submission.py --gin_config='YOUR_PATH' --participant_module_path='YOUR_MODULE' \n", + "```" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/content/web_navigation/WebNavigation-Difficulty-01-v0.ipynb b/docs/content/web_navigation/WebNavigation-Difficulty-01-v0.ipynb new file mode 100644 index 0000000..9fd4cf8 --- /dev/null +++ b/docs/content/web_navigation/WebNavigation-Difficulty-01-v0.ipynb @@ -0,0 +1,150 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Web Navigation\n", + "\n", + "This environment is included in A2Perf, described in ADD A2PERF PAPER." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Description\n", + "The web navigation environment aims at enabling the development of compositional tasks that can be represented by a dependency graph. Using Compositional Design of Environments (CoDE), propesed by Google Research in \n", + "['Environment Generation for Zero-Shot Compositional Reinforcement Learning'](https://openreview.net/pdf?id=CeByDMy0YTL \"Environment Generation for Zero-Shot Compositional Reinforcement Learning\"), websites are generated automatically, after which the policy has to complete the proposed webpages.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Action Space\n", + "The action should be passed as a scalar. Two types of actions are possible. Firstly, abstract navigation allows to directly refer to an element, and the profile is irrelevant. In this case, the action is converted to a tuple. If abstract navigation is desired, we have to pass `use_conceptual=True` when initializing the environment. Secondly, the action can refer to a pair of elements and profile fields. The agent will then enter the value of the profile key corresponding to the selected DOM element.\n", + "\n", + "For example, with abstract navigation, `action=5` refers to the 5-th\n", + "element in the DOM tree where the tree is linearized using the\n", + "`get_dom_elements` function. Without abstract navigation, '5' refers to\n", + "both profile and element indices, i.e., (element_index, profile_index)\n", + "where `action=profile_index*number_of_dom_elements+element_index`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Observation Space\n", + "When performing one step in the environment, we are given the option to return the raw state of wrapped state. To return the raw state, we pass `raw_state=True`, by default, the observation is wrapped. \\\n", + "The wrapped structure will return a dictionary with the following keys: `profile_key, profile_value, profile_key_mask, profile_value_mask, dom_elements, dom_profile_joint_mask, time_step, dom_attribute_mask, dom_profile_intersection, dom_profile_intersection_mask, dom_features`, where the values are arrays.\n", + "- `profile_key`, `profile_value`, `profile_key_mask`, `profile_value_mask`: \\\n", + "The profile arrays are 2D arrays with the shape (number of fields, sequence length), dom arrays have the shape. The first keys of the wrapped observation relate to the profile, while the last few relate to the DOM tree. The profile of the webpage is the user profile, which contains keys and values which need to be filled in. When booking flights, this could be `{\"Departure Date\": \"Friday\", \"Destination Airport\": \"Los Angeles (LAX)\"}`. These keys and values are subsequently embedded into vectors. All keys and values are embedded in fixed length vectors, which raises the need for padding the shorter embeddings. This is where the masks comes in, the mask contains ones where the embedding relates to the original key or value, and zeros where the embedding is padded. \n", + "- `dom_elements`, `dom_elements_mask`, `dom_attribute_mask`, `dom_features`: \\\n", + "Next the webpage is returned as DOM elements, again embedded. Examples of DOM elements are eg. `
`, `
` etc. \n", + "- `dom_profile_intersection`, `dom_profile_intersection_mask`: \\\n", + "Next, the intersection between the profile and DOM elements is embedded and returned. For each profile field key and value tokens (such as `[\"first\", \"name\"]`) and for each element attribute tokens (such as `[\"initial\", \"name\", \":\"]`), the overlapping tokens are embedded. The intersection is a 5D tensor of shape `(number of elements, max number of attributes, number of profile fields, number of action types (2), max sequence length)`.\n", + "- `time_step`: \\\n", + "The timestep is calculated as the number of steps taken, divided by the maximum number of steps allowed.\n", + "\n", + "\\\n", + "When `raw_state=True`, the raw state is returned as a `MiniWoBState` object. This object stores the raw website information, which can be accessed with the following attributes:\n", + "\n", + "- `obs.utterance`: returns the task utterance, a dictionary providing the profile key and value pairs.\n", + "- `obs.phrase`: returns the Phrase object of the utterance.\n", + "- `obs.tokens`: returns the tokens used for the encoding of the utterance.\n", + "- `obs.fields`: returns the key-value pairs extracted from the utterance.\n", + "- `obs.dom`: returns the root DOM structure.\n", + "- `obs.dom_elements`: returns a flattened lsit of all DOM elements " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Rewards\n", + "At every timestep, the agent receives a small penalty to encourage efficient navigation. Next, a potential-based positive reward is given to the agent after successfully linking a key to a field and entering the correct value. Lastly, the agent receives a task success reward (1.0 or -1.0) when the final state is reached or a time out is encountered. \n", + "\n", + "To give an example, consider booking flights where the agent has the following profile:\n", + "`{\"Departure Date\": \"Friday\", \"Destination Airport\": \"Los Angeles (LAX)\"}`, which has two fields (Departure Date and Destination Airport). The agent starts by picking a field, and tries to find the correpsonding text box in the page. The value corresponding to the field (eg. Destination Airport), is subsequently typed into the text box. If this is correct, the agent will receive a positive reward of 1/2, where the denominator, 2, is the number of fields in the profile." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Starting State\n", + "Upon resetting the website, all episode fields are emptied and a clean webpage is returned." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Episode End\n", + "The episode can be ended under multiple conditions.\n", + "Firstly, if the number of steps is greater then the maximum allowed number of steps, the environment is terminated. The maximum number of steps can be defined when initializing the environment, by default, 6 steps is the limit." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Arguments\n", + "When creating the web navigation environment, there is parameters we have to define and optional parameters. Firstly, we have to define the number of websites that needs to be created, with the `num_websites` parameter. Next, we have to either specify a difficulty for the websites or we can provide a design. When creating the environment we thus either specify `difficulty` or `designs`.\n", + "```python\n", + "import gymnasium as gym\n", + "import a2perf.domains.web_navigation\n", + "\n", + "env = gym.make('WebNavigation-v0', num_websites=1, difficutly=1, ...)\n", + "\n", + "```\n", + "#### Required parameters:\n", + "\n", + "| Parameter | Type | Default | Description|\n", + "|--------------------|------|---|---|\n", + "| `num_websites` | int | `None` | The number of websites to be created. |\n", + "| `difficulty` | Optional, int | `1` | Defines the difficulty of the webpage(s) that are being created. A random agent has a >=50% chance of completing a level 1 website, a >=25% chance of completing a level 2 website and a >=10% chance of completing a level 3 website. You either define `difficulty` or `designs`, not both.|\n", + "| `designs` | Optional, list[dict[str, Any]] | `None` | You can pass the design for a number of websites, where each website corresponds to one dictionary in the list. If you specify designs, note that you need to specify at least the number of websites defined with `num_websites`. The designs returned is then `num_websites` randomly sampled from `designs`. You either define `difficulty` or `designs`, not both.|\n", + "\n", + "#### Optional parameters:\n", + "\n", + "| Parameter | Type | Default | Description|\n", + "|--------------------|------|---|---|\n", + "| `seed` | int | `0` | Defines the seed for the random number generator and the `reset` method.|\n", + "| `data_dir` | str | `\"a2perf/domains/web_navigation/environment_generation/data\"` | Path to the directory which contains a zipfile with json files describing difficulty levels for webpages.|\n", + "| `global_vocabulary` | Vocabulary | `vocabulary_node.LockedThreadedVocabulary()`| The global_vocabulary gathers all characters and corresponding tokens. Is used to create embeddings for profile dictionaries and DOM dictionaries. |\n", + "| `use_legacy_reset` | bool | `False` | If `True`, the `reset` method returns only the observation. If `False` both the observation and info are returned.|\n", + "| `use_legacy_step` | bool | `False` | If `True`, the `step` method returns the observation, reward, (terminated or truncated), info. If `False`, both terminated and truncated are returned.|\n", + "| `step_limit` | int | `25` | Defines the maximum number of steps that can be taken by the environment. |\n", + "| `render_mode` | str | `image` | Possible render modes are `test`, which saves screenshots of the website in the screenshots attribute, `rgb_array` which returns a 3D array, and `image`, which returns a screenshot of the webstie.|\n", + "| `raw_state` | bool | `False` | If `True`, the raw observation is returned, else the observation is wrapped, for more info, see the Observation section of this notebook.|\n", + "| `use_conceptual` | bool | `False` | If true, the action spac expects abstract navigation, else an action refers to a pair of elements and profile fields. |\n", + "| `**kwargs` | dict | `None` | |" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Reset you can pass kwargs in the options argument, not possible in step though" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Version History\n", + "- v0: Initial versions release" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/content/web_navigation/index.md b/docs/content/web_navigation/index.md new file mode 100644 index 0000000..f3b22a2 --- /dev/null +++ b/docs/content/web_navigation/index.md @@ -0,0 +1,45 @@ +# Web Navigation Environments + + +
+
+ +
+
+ +```{toctree} +:hidden: +:caption: Web Navigation Environments + +WebNavigation-Difficulty-01-v0 +``` diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..7e69e25 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,80 @@ +--- +hide-toc: false +firstpage: +lastpage: +--- + +```{project-logo} _static/A2Perf-text.png +:alt: A2Perf Logo +``` + +```{project-heading} +A2Perf is a benchmarking suite for evaluating agents on sequential decision-making problems that are relevant to the real world. +``` + +```{figure} _static/REPLACE_ME.gif + :alt: REPLACE ME + :width: 500 +``` + +This library contains a collection of environments from domains spanning +computer chip-floorplanning, web navigation, and quadruped locomotion. + + +The Gymnasium interface allows users to initialize and interact with the A2Perf +environments as follows: + +```{code-block} python +import gymnasium as gym +from a2perf.domains import circuit_training +# from a2perf.domains import web_navigation +# from a2perf.domains import quadruped_locomotion + +# Choose one of the A2Perf environments +env = gym.make("CircuitTraining-Ariane-v0") +# or env = gym.make("WebNavigation-Difficulty-01-v0") +# or env = gym.make("QuadrupedLocomotion-DogPace-v0") + +observation, info = env.reset(seed=42) +for _ in range(1000): + action = env.action_space.sample() # Replace with your policy + observation, reward, terminated, truncated, info = env.step(action) + if terminated or truncated: + observation, info = env.reset() +env.close() +``` + +```{toctree} +:hidden: +:caption: Introduction + +content/basic_usage +content/publications + +``` + +```{toctree} +:hidden: +:caption: Environments + +content/circuit_training/index +content/quadruped_locomotion/index +content/web_navigation/index + +``` + +```{toctree} +:hidden: +:caption: Tutorials + +content/tutorials/training +content/tutorials/inference +``` + +```{toctree} +:hidden: +:caption: Development + +release_notes +Github +``` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..8084272 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/release_notes.md b/docs/release_notes.md new file mode 100644 index 0000000..d2f0948 --- /dev/null +++ b/docs/release_notes.md @@ -0,0 +1,12 @@ +--- +title: Release Notes +--- + +# Release Notes + +```{eval-rst} +.. changelog:: + :github: https://github.com/Farama-Foundation/A2Perf/releases + :pypi: https://pypi.org/project/a2perf/ + :changelog-url: +``` diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..2ae0c04 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,5 @@ +sphinx +sphinx-autobuild +myst-parser +git+https://github.com/Farama-Foundation/Celshast#egg=furo +sphinx_github_changelog diff --git a/media/a3_robot_sideview.png b/media/a3_robot_sideview.png new file mode 100644 index 0000000..14e4cd0 Binary files /dev/null and b/media/a3_robot_sideview.png differ diff --git a/media/ariane_placements.png b/media/ariane_placements.png new file mode 100644 index 0000000..c8cc6bf Binary files /dev/null and b/media/ariane_placements.png differ diff --git a/pyproject.toml b/pyproject.toml index 6f6e45d..ab88b96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ dependencies = [ "gymnasium", "minari", "absl-py", + "codecarbon" ] dynamic = ["version"] @@ -47,8 +48,10 @@ web-navigation = [ ] quadruped-locomotion = [ "pybullet", + "scipy", ] all = [ + # circuit-training "torch==1.13.1", "tensorflow<2.16.1", "tf-agents", @@ -56,9 +59,11 @@ all = [ "matplotlib", "cairocffi", "shapely", + # web-navigation "selenium", "regex", "chromedriver-py", + # quadruped-locomotion "pybullet", ] diff --git a/quadruped_locomotion_environment.yml b/quadruped_locomotion_environment.yml new file mode 100644 index 0000000..9f1c905 --- /dev/null +++ b/quadruped_locomotion_environment.yml @@ -0,0 +1,233 @@ +channels: +- conda-forge +- defaults +dependencies: +- gcc +- gxx +- gxx_linux-64 +- gcc_linux-64 +- _libgcc_mutex=0.1=conda_forge +- _openmp_mutex=4.5=2_gnu +- abseil-cpp=20230802.0=h6a678d5_2 +- aiohttp=3.9.1=py39hd1e30aa_0 +- aiosignal=1.3.1=pyhd8ed1ab_0 +- async-timeout=4.0.3=pyhd8ed1ab_0 +- attrs=23.2.0=pyh71513ae_0 +- blinker=1.7.0=pyhd8ed1ab_0 +- brotli-python=1.0.9=py39h5a03fae_7 +- c-ares=1.26.0=hd590300_0 +- ca-certificates=2024.2.2=hbcca054_0 +- cachetools=5.3.2=pyhd8ed1ab_0 +- certifi=2024.2.2=pyhd8ed1ab_0 +- cffi=1.16.0=py39h7a31438_0 +- charset-normalizer=3.3.2=pyhd8ed1ab_0 +- click=8.1.7=unix_pyh707e725_0 +- cryptography=42.0.2=py39he6105cc_0 +- decorator=5.1.1=pyhd8ed1ab_0 +- frozenlist=1.4.1=py39hd1e30aa_0 +- fsspec=2023.12.2=pyhca7485f_0 +- gcsfs=2023.12.2.post1=pyhd8ed1ab_0 +- google-api-core=2.16.1=pyhd8ed1ab_0 +- google-auth=2.27.0=pyhca7485f_0 +- google-auth-oauthlib=1.2.0=pyhd8ed1ab_0 +- google-cloud-core=2.4.1=pyhd8ed1ab_0 +- google-cloud-storage=2.14.0=pyhca7485f_0 +- google-crc32c=1.1.2=py39h328ec2c_5 +- google-resumable-media=2.7.0=pyhd8ed1ab_0 +- googleapis-common-protos=1.62.0=pyhd8ed1ab_0 +- grpc-cpp=1.48.2=he1ff14a_4 +- grpcio=1.48.2=py39he1ff14a_4 +- gtest=1.14.0=hdb19cb5_0 +- idna=3.6=pyhd8ed1ab_0 +- ld_impl_linux-64=2.38=h1181459_1 +- libcrc32c=1.1.2=h9c3ff4c_0 +- libffi=3.4.4=h6a678d5_0 +- libgcc-ng=13.2.0=h807b86a_5 +- libgomp=13.2.0=h807b86a_5 +- libprotobuf=3.20.3=he621ea3_0 +- libstdcxx-ng=11.2.0=h1234567_1 +- multidict=6.0.5=py39hd1e30aa_0 +- ncurses=6.4=h6a678d5_0 +- oauthlib=3.2.2=pyhd8ed1ab_0 +- openssl=3.2.1=hd590300_0 +- pip=23.3.1=py39h06a4308_0 +- protobuf=3.20.3=py39h6a678d5_0 +- pyasn1=0.5.1=pyhd8ed1ab_0 +- pyasn1-modules=0.3.0=pyhd8ed1ab_0 +- pycparser=2.21=pyhd8ed1ab_0 +- pyjwt=2.8.0=pyhd8ed1ab_1 +- pyopenssl=24.0.0=pyhd8ed1ab_0 +- pysocks=1.7.1=pyha2e5f31_6 +- python=3.9.18=h955ad1f_0 +- python_abi=3.9=2_cp39 +- pyu2f=0.1.5=pyhd8ed1ab_0 +- re2=2022.04.01=h27087fc_0 +- readline=8.2=h5eee18b_0 +- requests=2.31.0=pyhd8ed1ab_0 +- requests-oauthlib=1.3.1=pyhd8ed1ab_0 +- rsa=4.9=pyhd8ed1ab_0 +- setuptools=68.2.2=py39h06a4308_0 +- six=1.16.0=pyh6c4a22f_0 +- sqlite=3.41.2=h5eee18b_0 +- tk=8.6.12=h1ccaba5_0 +- urllib3=2.2.0=pyhd8ed1ab_0 +- wheel=0.41.2=py39h06a4308_0 +- xz=5.4.5=h5eee18b_0 +- yarl=1.9.4=py39hd1e30aa_0 +- zlib=1.2.13=h5eee18b_0 +- pip: + - absl-py==2.1.0 + - alabaster==0.7.16 + - arrow==1.3.0 + - astunparse==1.6.3 + - attr==0.3.2 + - babel==2.14.0 + - black==24.1.1 + - cairocffi==1.6.1 + - chardet==5.2.0 + - cloudpickle==3.0.0 + - colorama==0.4.6 + - contourpy==1.2.0 + - cycler==0.12.1 + - dash==2.15.0 + - dash-bootstrap-components==1.5.0 + - dash-core-components==2.0.0 + - dash-html-components==2.0.0 + - dash-table==5.0.0 + - dataclasses==0.6 + - distlib==0.3.8 + - dm-reverb==0.14.0 + - dm-sonnet==2.0.2 + - dm-tree==0.1.8 + - docutils==0.20.1 + - easyprocess==1.1 + - entrypoint2==1.1 + - exceptiongroup==1.2.0 + - farama-notifications==0.0.4 + - filelock==3.13.1 + - fire==0.5.0 + - flake8==7.0.0 + - flask==3.0.2 + - flatbuffers==23.5.26 + - fonttools==4.47.2 + - fuzzywuzzy==0.18.0 + - gast==0.5.4 + - gin-config==0.5.0 + - gnureadline==8.1.2 + - google-pasta==0.2.0 + - gym==0.23.0 + - gym-notices==0.0.8 + - gymnasium==0.29.1 + - h11==0.14.0 + - h5py==3.10.0 + - imagesize==1.4.1 + - importlib-metadata==7.0.1 + - importlib-resources==6.1.1 + - iniconfig==2.0.0 + - isort==5.13.2 + - itsdangerous==2.1.2 + - jinja2==3.1.3 + - keras==2.15.0 + - kiwisolver==1.4.5 + - libclang==16.0.6 + - markdown==3.5.2 + - markdown-it-py==3.0.0 + - markupsafe==2.1.5 + - matplotlib==3.8.2 + - mccabe==0.7.0 + - mdurl==0.1.2 + - minari==0.4.3 + - ml-dtypes==0.2.0 + - mypy==1.8.0 + - mypy-extensions==1.0.0 + - nest-asyncio==1.6.0 + - numpy==1.23.5 + - nvidia-cublas-cu12==12.2.5.6 + - nvidia-cuda-cupti-cu12==12.2.142 + - nvidia-cuda-nvcc-cu12==12.2.140 + - nvidia-cuda-nvrtc-cu12==12.2.140 + - nvidia-cuda-runtime-cu12==12.2.140 + - nvidia-cudnn-cu12==8.9.4.25 + - nvidia-cufft-cu12==11.0.8.103 + - nvidia-curand-cu12==10.3.3.141 + - nvidia-cusolver-cu12==11.5.2.141 + - nvidia-cusparse-cu12==12.1.2.141 + - nvidia-nccl-cu12==2.16.5 + - nvidia-nvjitlink-cu12==12.2.140 + - opt-einsum==3.3.0 + - outcome==1.3.0.post0 + - packaging==23.2 + - pandas==2.2.0 + - pathspec==0.12.1 + - patool==1.15.0 + - pillow==10.2.0 + - pkgconfig==1.5.5 + - platformdirs==4.2.0 + - plotly==5.18.0 + - pluggy==1.4.0 + - portion==2.4.2 + - portpicker==1.6.0 + - psutil==5.9.8 + - py-cpuinfo==9.0.0 + - pybullet==3.2.6 + - pybullet-envs-gymnasium==0.4.0 + - pycodestyle==2.11.1 + - pyfiglet==1.0.2 + - pyflakes==3.2.0 + - pygame==2.1.3 + - pygments==2.17.2 + - pynvml==11.5.0 + - pyparsing==3.1.1 + - pyproject-api==1.6.1 + - pytest==8.0.0 + - python-dateutil==2.8.2 + - python-dotenv==1.0.1 + - pytz==2024.1 + - pyunpack==0.3 + - pyyaml==6.0.1 + - regex==2023.12.25 + - responses==0.24.1 + - retrying==1.3.4 + - rich==13.7.0 + - rlds==0.1.8 + - scipy==1.12.0 + - selenium<4.17.0 + - shapely==2.0.2 + - shellingham==1.5.4 + - sniffio==1.3.0 + - snowballstemmer==2.2.0 + - sortedcontainers==2.4.0 + - sphinx==7.2.6 + - sphinx-rtd-theme==2.0.0 + - sphinxcontrib-applehelp==1.0.8 + - sphinxcontrib-devhelp==1.0.6 + - sphinxcontrib-htmlhelp==2.0.5 + - sphinxcontrib-jquery==4.1 + - sphinxcontrib-jsmath==1.0.1 + - sphinxcontrib-qthelp==1.0.7 + - sphinxcontrib-serializinghtml==1.1.10 + - tabulate==0.9.0 + - tenacity==8.2.3 + - tensorboard==2.15.1 + - tensorboard-data-server==0.7.2 + - tensorflow==2.15.0.post1 + - tensorflow-estimator==2.15.0 + - tensorflow-io-gcs-filesystem==0.35.0 + - tensorflow-probability==0.23.0 + - termcolor==2.4.0 + - tf-agents==0.19.0 + - timeout-decorator==0.5.0 + - tomli==2.0.1 + - tox==4.12.1 + - tqdm==4.66.1 + - trio==0.24.0 + - trio-websocket==0.11.1 + - typer==0.9.0 + - types-python-dateutil==2.8.19.20240106 + - tzdata==2023.4 + - virtualenv==20.25.0 + - webdriver-manager==4.0.1 + - werkzeug==3.0.1 + - wrapt==1.14.1 + - wsproto==1.2.0 + - zipp==3.17.0 diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..f4acf47 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,4 @@ +matplotlib +pandas +seaborn +xmanager diff --git a/requirements.txt b/requirements.txt index d7f4eaf..b84ed87 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,24 +1,5 @@ -psutil -setuptools -matplotlib absl-py gin-config -black -scipy -regex -gym - -# reliability -# -r a2perf/metrics/reliability/requirements.txt - -# codecarbon --r a2perf/metrics/system/codecarbon/requirements-dev.txt - -# quadruped --r a2perf/domains/quadruped_locomotion/requirements.txt - -# web_nav --r a2perf/domains/web_navigation/requirements.txt - -# circuit_training --r a2perf/domains/circuit_training/requirements.txt +gymnasium +matplotlib +minari diff --git a/web_navigation_environment.yml b/web_navigation_environment.yml new file mode 100644 index 0000000..486d436 --- /dev/null +++ b/web_navigation_environment.yml @@ -0,0 +1,229 @@ +channels: + - conda-forge + - defaults +dependencies: + - gcc + - gxx + - gxx_linux-64 + - gcc_linux-64 + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=2_gnu + - abseil-cpp=20230802.0=h6a678d5_2 + - aiohttp=3.9.1=py310h2372a71_0 + - aiosignal=1.3.1=pyhd8ed1ab_0 + - async-timeout=4.0.3=pyhd8ed1ab_0 + - attrs=23.2.0=pyh71513ae_0 + - blinker=1.7.0=pyhd8ed1ab_0 + - brotli-python=1.0.9=py310hd8f1fbe_7 + - bzip2=1.0.8=h7b6447c_0 + - c-ares=1.26.0=hd590300_0 + - ca-certificates=2024.2.2=hbcca054_0 + - cachetools=5.3.2=pyhd8ed1ab_0 + - certifi=2024.2.2=pyhd8ed1ab_0 + - cffi=1.16.0=py310h2fee648_0 + - charset-normalizer=3.3.2=pyhd8ed1ab_0 + - click=8.1.7=unix_pyh707e725_0 + - cryptography=42.0.2=py310hb8475ec_0 + - decorator=5.1.1=pyhd8ed1ab_0 + - frozenlist=1.4.1=py310h2372a71_0 + - fsspec=2024.2.0=pyhca7485f_0 + - gcsfs=2024.2.0=pyhd8ed1ab_0 + - google-api-core=2.16.1=pyhd8ed1ab_0 + - google-auth=2.27.0=pyhca7485f_0 + - google-auth-oauthlib=1.2.0=pyhd8ed1ab_0 + - google-cloud-core=2.4.1=pyhd8ed1ab_0 + - google-cloud-storage=2.14.0=pyhca7485f_0 + - google-crc32c=1.1.2=py310hc5c09a0_5 + - google-resumable-media=2.7.0=pyhd8ed1ab_0 + - googleapis-common-protos=1.62.0=pyhd8ed1ab_0 + - grpc-cpp=1.48.2=he1ff14a_4 + - grpcio=1.48.2=py310he1ff14a_4 + - gtest=1.14.0=hdb19cb5_0 + - idna=3.6=pyhd8ed1ab_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libcrc32c=1.1.2=h9c3ff4c_0 + - libgcc-ng=13.2.0=h807b86a_5 + - libgomp=13.2.0=h807b86a_5 + - libprotobuf=3.20.3=he621ea3_0 + - libstdcxx-ng=11.2.0=h1234567_1 + - libuuid=1.41.5=h5eee18b_0 + - multidict=6.0.5=py310h2372a71_0 + - ncurses=6.4=h6a678d5_0 + - oauthlib=3.2.2=pyhd8ed1ab_0 + - openssl=3.2.1=hd590300_0 + - pip=23.3.1=py310h06a4308_0 + - protobuf=3.20.3=py310h6a678d5_0 + - pyasn1=0.5.1=pyhd8ed1ab_0 + - pyasn1-modules=0.3.0=pyhd8ed1ab_0 + - pycparser=2.21=pyhd8ed1ab_0 + - pyjwt=2.8.0=pyhd8ed1ab_1 + - pyopenssl=24.0.0=pyhd8ed1ab_0 + - pysocks=1.7.1=pyha2e5f31_6 + - python=3.10.13=h955ad1f_0 + - python_abi=3.10=2_cp310 + - pyu2f=0.1.5=pyhd8ed1ab_0 + - re2=2022.04.01=h27087fc_0 + - readline=8.2=h5eee18b_0 + - requests=2.31.0=pyhd8ed1ab_0 + - requests-oauthlib=1.3.1=pyhd8ed1ab_0 + - rsa=4.9=pyhd8ed1ab_0 + - setuptools=68.2.2=py310h06a4308_0 + - six=1.16.0=pyh6c4a22f_0 + - sqlite=3.41.2=h5eee18b_0 + - tk=8.6.12=h1ccaba5_0 + - urllib3=2.2.0=pyhd8ed1ab_0 + - wheel=0.41.2=py310h06a4308_0 + - xz=5.4.5=h5eee18b_0 + - yarl=1.9.4=py310h2372a71_0 + - zlib=1.2.13=h5eee18b_0 + - pip: + - absl-py==2.1.0 + - alabaster==0.7.16 + - arrow==1.3.0 + - astunparse==1.6.3 + - attr==0.3.2 + - babel==2.14.0 + - black==24.1.1 + - cairocffi==1.6.1 + - chardet==5.2.0 + - cloudpickle==3.0.0 + - colorama==0.4.6 + - dash==2.15.0 + - dash-bootstrap-components==1.5.0 + - dash-core-components==2.0.0 + - dash-html-components==2.0.0 + - dash-table==5.0.0 + - dataclasses==0.6 + - distlib==0.3.8 + - dm-reverb==0.14.0 + - dm-sonnet==2.0.2 + - dm-tree==0.1.8 + - docutils==0.20.1 + - easyprocess==1.1 + - entrypoint2==1.1 + - exceptiongroup==1.2.0 + - farama-notifications==0.0.4 + - filelock==3.13.1 + - fire==0.5.0 + - flake8==7.0.0 + - flask==3.0.2 + - flatbuffers==23.5.26 + - fuzzywuzzy==0.18.0 + - gast==0.5.4 + - gin-config==0.5.0 + - gnureadline==8.1.2 + - google-pasta==0.2.0 + - gym==0.23.0 + - gym-notices==0.0.8 + - gymnasium==0.29.1 + - h11==0.14.0 + - h5py==3.10.0 + - imagesize==1.4.1 + - importlib-metadata==7.0.1 + - iniconfig==2.0.0 + - isort==5.13.2 + - itsdangerous==2.1.2 + - jinja2==3.1.3 + - keras==2.15.0 + - libclang==16.0.6 + - markdown==3.5.2 + - markdown-it-py==3.0.0 + - markupsafe==2.1.5 + - matplotlib==3.8.2 + - mccabe==0.7.0 + - mdurl==0.1.2 + - minari==0.4.3 + - ml-dtypes==0.2.0 + - mypy==1.8.0 + - mypy-extensions==1.0.0 + - nest-asyncio==1.6.0 + - numpy==1.23.5 + - nvidia-cublas-cu12==12.2.5.6 + - nvidia-cuda-cupti-cu12==12.2.142 + - nvidia-cuda-nvcc-cu12==12.2.140 + - nvidia-cuda-nvrtc-cu12==12.2.140 + - nvidia-cuda-runtime-cu12==12.2.140 + - nvidia-cudnn-cu12==8.9.4.25 + - nvidia-cufft-cu12==11.0.8.103 + - nvidia-curand-cu12==10.3.3.141 + - nvidia-cusolver-cu12==11.5.2.141 + - nvidia-cusparse-cu12==12.1.2.141 + - nvidia-nccl-cu12==2.16.5 + - nvidia-nvjitlink-cu12==12.2.140 + - opt-einsum==3.3.0 + - outcome==1.3.0.post0 + - packaging==23.2 + - pandas==2.2.0 + - pathspec==0.12.1 + - patool==2.1.1 + - pkgconfig==1.5.5 + - platformdirs==4.2.0 + - plotly==5.18.0 + - pluggy==1.4.0 + - portion==2.4.2 + - portpicker==1.6.0 + - psutil==5.9.8 + - py-cpuinfo==9.0.0 + - pybullet==3.2.6 + - pybullet-envs-gymnasium==0.4.0 + - pycodestyle==2.11.1 + - pyfiglet==1.0.2 + - pyflakes==3.2.0 + - pygame==2.1.3 + - pygments==2.17.2 + - pynvml==11.5.0 + - pyparsing==3.1.1 + - pyproject-api==1.6.1 + - pytest==8.0.0 + - python-dateutil==2.8.2 + - python-dotenv==1.0.1 + - pytz==2024.1 + - pyunpack==0.3 + - pyyaml==6.0.1 + - regex==2023.12.25 + - responses==0.24.1 + - retrying==1.3.4 + - rich==13.7.0 + - rlds==0.1.8 + - scipy==1.12.0 + - selenium<4.17.0 + - shapely==2.0.2 + - shellingham==1.5.4 + - sniffio==1.3.0 + - snowballstemmer==2.2.0 + - sortedcontainers==2.4.0 + - sphinx==7.2.6 + - sphinx-rtd-theme==2.0.0 + - sphinxcontrib-applehelp==1.0.8 + - sphinxcontrib-devhelp==1.0.6 + - sphinxcontrib-htmlhelp==2.0.5 + - sphinxcontrib-jquery==4.1 + - sphinxcontrib-jsmath==1.0.1 + - sphinxcontrib-qthelp==1.0.7 + - sphinxcontrib-serializinghtml==1.1.10 + - tabulate==0.9.0 + - tenacity==8.2.3 + - tensorboard==2.15.1 + - tensorboard-data-server==0.7.2 + - tensorflow==2.15.0.post1 + - tensorflow-estimator==2.15.0 + - tensorflow-io-gcs-filesystem==0.36.0 + - tensorflow-probability==0.23.0 + - termcolor==2.4.0 + - tf-agents==0.19.0 + - timeout-decorator==0.5.0 + - tomli==2.0.1 + - tox==4.12.1 + - tqdm==4.66.1 + - trio==0.24.0 + - trio-websocket==0.11.1 + - typer==0.9.0 + - types-python-dateutil==2.8.19.20240106 + - typing-extensions==4.5.0 + - tzdata==2023.4 + - virtualenv==20.25.0 + - webdriver-manager==4.0.1 + - werkzeug==3.0.1 + - wrapt==1.14.1 + - wsproto==1.2.0 + - zipp==3.17.0 diff --git a/xm_launch.py b/xm_launch.py new file mode 100644 index 0000000..f96f14b --- /dev/null +++ b/xm_launch.py @@ -0,0 +1,155 @@ +import os +import shutil + +from absl import app +from absl import flags +from xmanager import xm +from xmanager import xm_local + +from a2perf.constants import BenchmarkDomain +from a2perf.launch.docker_utils import DOCKER_EXPERIMENT_DIR +from a2perf.launch.docker_utils import DOCKER_PARTICIPANT_DIR +from a2perf.launch.docker_utils import GENERIC_GIN_CONFIG_NAME +from a2perf.launch.docker_utils import get_docker_instructions +from a2perf.launch.docker_utils import get_entrypoint +from typing import Any, Dict + +_NUM_GPUS = flags.DEFINE_integer("num-gpus", 1, "Number of GPUs to use") +_CPU_BASE_IMAGE = flags.DEFINE_string( + "cpu-base-image", + "gcr.io/deeplearning-platform-release/base-cpu:latest", + "Base image for CPU jobs", +) +_GPU_BASE_IMAGE = flags.DEFINE_string( + "gpu-base-image", + "gcr.io/deeplearning-platform-release/base-gpu:latest", + "Base image for GPU jobs", +) +_DOMAIN = flags.DEFINE_enum( + "domain", + None, + [ + BenchmarkDomain.QUADRUPED_LOCOMOTION.value, + BenchmarkDomain.WEB_NAVIGATION.value, + BenchmarkDomain.CIRCUIT_TRAINING.value, + ], + "Domain to run", +) +_USER_ID = flags.DEFINE_integer("user_id", 1000, "User ID") +_USER = flags.DEFINE_string("user", os.getlogin(), "User") +_EXPERIMENT_ID = flags.DEFINE_string("experiment-id", None, "Experiment number") +_EXPERIMENT_NAME = flags.DEFINE_string("experiment-name", None, "Experiment name") +_INTERACTIVE = flags.DEFINE_bool( + "interactive", False, "Whether to run in interactive mode" +) +_SUBMISSION_GIN_CONFIG_PATH = flags.DEFINE_string( + "submission-gin-config-path", + None, + "Path to the gin configuration file", +) +_PARTICIPANT_MODULE_PATH = flags.DEFINE_string( + "participant-module-path", + None, + "Path to the participant training and inference Python modules", +) +_PARTICIPANT_ARGS = flags.DEFINE_string( + "participant-args", + None, + "Additional arguments to pass to the participant's train function", +) +_ROOT_DIR = flags.DEFINE_string( + "root-dir", + None, + "Root directory for the experiment", +) + + +def main(_): + """Main function to set up and run the experiment.""" + create_experiment = xm_local.create_experiment + + with create_experiment(experiment_title=_EXPERIMENT_NAME.value) as experiment: + experiment_id = _EXPERIMENT_ID.value or experiment.experiment_id + base_root_dir = os.path.join( + os.path.expanduser(_ROOT_DIR.value), + str(experiment_id), + _EXPERIMENT_NAME.value, + ) + + async def make_job(work_unit: xm.WorkUnit, **hparams: Dict[str, Any]) -> None: + work_unit_id = work_unit.work_unit_id + full_root_dir = os.path.join(base_root_dir, str(work_unit_id)) + os.makedirs(full_root_dir, exist_ok=True) + + participant_module_path = _PARTICIPANT_MODULE_PATH.value + + docker_gin_config_path = os.path.join( + full_root_dir, GENERIC_GIN_CONFIG_NAME + ) + try: + shutil.copy(_SUBMISSION_GIN_CONFIG_PATH.value, docker_gin_config_path) + except IOError as e: + raise IOError(f"Error copying gin config file: {e}") + + executor = xm_local.Local( + requirements=xm.JobRequirements( + resources={xm.ResourceType.LOCAL_GPU: _NUM_GPUS.value}, + ), + docker_options=xm_local.DockerOptions( + ports={}, + volumes={ + full_root_dir: DOCKER_EXPERIMENT_DIR, + participant_module_path: DOCKER_PARTICIPANT_DIR, + }, + interactive=_INTERACTIVE.value, + ), + experimental_stream_output=True, + ) + docker_instructions = get_docker_instructions( + uid=_USER_ID.value, env_name=_DOMAIN.value, user=_USER.value + ) + + base_image = ( + _GPU_BASE_IMAGE.value if _NUM_GPUS.value > 0 else _CPU_BASE_IMAGE.value + ) + + [executable] = experiment.package( + [ + xm.python_container( + executor_spec=executor.Spec(), + path=".", + use_deep_module=True, + base_image=base_image, + docker_instructions=docker_instructions, + entrypoint=get_entrypoint(_DOMAIN.value, _USER.value), + ) + ] + ) + + hparams.update( + { + "root-dir": DOCKER_EXPERIMENT_DIR, + "gin-config": os.path.join( + DOCKER_EXPERIMENT_DIR, GENERIC_GIN_CONFIG_NAME + ), + "participant-module-path": DOCKER_PARTICIPANT_DIR, + "participant-args": _PARTICIPANT_ARGS.value, + } + ) + + job = xm.Job(executable, args=hparams, executor=executor) + work_unit.add(job) + + experiment.add(make_job, args={}) + + +if __name__ == "__main__": + flags.mark_flags_as_required( + [ + _DOMAIN.name, + _EXPERIMENT_NAME.name, + _ROOT_DIR.name, + _SUBMISSION_GIN_CONFIG_PATH.name, + ] + ) + app.run(main)