Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
talikagupta committed Oct 18, 2024
0 parents commit 2ce58fb
Show file tree
Hide file tree
Showing 169 changed files with 443,421 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

35 changes: 35 additions & 0 deletions .github/workflows/cargo-test-cs511.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: CS511 AutoGrader

on: push

env:
CARGO_TERM_COLOR: always

jobs:
tpch:
name: TPC-H Query
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v2
- name: Build TPCH examples
run: cargo build --release --examples
working-directory: deepola/wake
- name: Test Provided Query Q1
run: cargo run --release --example tpch_polars -- test q1
working-directory: deepola/wake
- name: Test Provided Query Q14
run: cargo run --release --example tpch_polars -- test q14
working-directory: deepola/wake
- name: Test New Query a
run: cargo run --release --example tpch_polars -- test qa
working-directory: deepola/wake
- name: Test New Query b
run: cargo run --release --example tpch_polars -- test qb
working-directory: deepola/wake
- name: Test New Query c
run: cargo run --release --example tpch_polars -- test qc
working-directory: deepola/wake
- name: Test New Query d
run: cargo run --release --example tpch_polars -- test qd
working-directory: deepola/wake
133 changes: 133 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
*.lock
*.zip
src/outputs/
.DS_Store
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "tpch-kit"]
path = tpch-kit
url = https://github.com/gregrahn/tpch-kit/
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# DeepOLA: Online Aggregation for Nested Queries
Online Aggregation (OLA) is a technique that incrementally improves the query result estimates allowing the user to observe the query progress as well as control its execution on the fly. OLA provides the user with an approximate estimate of the query result as soon as it has processed a small partition of the data. With DeepOLA, we intend to speed-up approximate (as well as actual) query computation when the available data is divided into various chunks that can be processed online and merged to obtain the complete result.

## Setup Instructions
DeepOLA is implemented in Rust. The current implementation has been tested with `rustc 1.81.0`. You can install Rust using https://www.rust-lang.org/tools/install. Once you have Rust installed, follow the following instructions to setup the repository.
- Clone the repository
`git clone https://github.com/illinoisdata/cs511-fall2024-p2.git; cd deepola/`
- Make sure the pre-generated TPC-H data is stored in `resources/tpc-h/data/` directory (`scale=1/partition=10/*.tbl` files).
- To run the provided example queries, from `deepola/wake`, run `cargo run --release --example tpch_polars -- query q<query-no>`. Example: `cargo run --release --example tpch_polars -- query q1`.
22 changes: 22 additions & 0 deletions baselines/postgres/runPostgresQueries.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#Update accordingly
MY_DIR='/home/awsuser/QueryResults'
QUERY_FILE='/home/awsuser/DeepOLA/baselines/presto/queries'

for FILE in $QUERY_FILE/*;
do
echo "This is the file we are running $FILE" >> $MY_DIR/timeFile;
for i in {1..10};
do

#This should clear the OS cache
#sudo sh -c 'echo 1 > /proc/sys/vm/drop_caches'
TIMEFORMAT=%lR;
{ time /usr/local/pgsql/bin/psql -d test -U postgres -f $FILE > $MY_DIR/outputFile; } 2>> $MY_DIR/timeFile;

#Note have to clear outputFile otherwise the files can causes issues when trying to open it due to the size
> $MY_DIR/outputFile;
#Remove this line if you want the query output

unset TIMEFORMAT;
done
done
42 changes: 42 additions & 0 deletions baselines/postgres/setup_postgres.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash
#setup_postgres.sh
#PostgreSQL version 14.2

#pull docker image
docker pull postgres

#variables
container="postgres-db" #container name
password="docker" #container password
port="5563" #port for PSQL
path="$HOME/DeepOLA" #path to main directory
scale=1 #scale of data
partition=1 #number of partitions of data

#run container mounted onto local directory
docker run --rm --name $container -e POSTGRES_PASSWORD=$password -d -p $port:$port -v $path/baselines/postgres/results:/deepola postgres

#change user to postgres
docker exec $container sh -c "su postgres && exit && exit"

#create tables in PSQL
docker exec -it $container psql -U postgres -c "\i /deepola/baselines/resources/tpch-create.sql"
docker exec -it $container psql -U postgres -c "\i /deepola/baselines/resources/execution_stats.sql"

#copy .tbl files into database
for tbl in nation region part customer supplier partsupp orders lineitem
do
docker exec -it $container psql -U postgres -c "\copy $tbl FROM '/deepola/data/tpc-h/scale=$scale/partition=$partition/$tbl.tbl' WITH (FORMAT csv, DELIMITER '|')"
done

#add indexes
docker exec -it $container psql -U postgres -c "\i /deepola/baselines/resources/tpch-alter.sql"

#run the queries
for i in {1..22}
do
docker exec -it $container psql -U postgres -c "\pset pager off" -c "\timing" -c "\i /deepola/baselines/resources/$i.sql" -c "\timing" > $path/baselines/postgres/results/result_$i.csv
done

#kill container
docker kill $container
Loading

0 comments on commit 2ce58fb

Please sign in to comment.