Skip to content

Commit

Permalink
First Version of Simulation
Browse files Browse the repository at this point in the history
  • Loading branch information
cloudray8580 committed Jul 17, 2020
1 parent 2b4e9fc commit 537a7e0
Show file tree
Hide file tree
Showing 47 changed files with 23,863 additions and 2,901 deletions.
884 changes: 884 additions & 0 deletions .ipynb_checkpoints/AQWA-checkpoint.ipynb

Large diffs are not rendered by default.

159 changes: 159 additions & 0 deletions .ipynb_checkpoints/BatchRead-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import time\n",
"from numpy import genfromtxt"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"- - - 0 - - - \n",
"load time: 0.7495675086975098\n",
"- - - 1 - - - \n",
"load time: 0.8139548301696777\n",
"- - - 2 - - - \n",
"load time: 0.855985164642334\n",
"- - - 3 - - - \n",
"load time: 0.9320805072784424\n",
"- - - 4 - - - \n",
"load time: 1.012078046798706\n",
"- - - 5 - - - \n",
"load time: 1.1780333518981934\n",
"- - - 6 - - - \n",
"load time: 1.167330026626587\n",
"- - - 7 - - - \n",
"load time: 1.2551357746124268\n",
"- - - 8 - - - \n",
"load time: 1.4044301509857178\n",
"- - - 9 - - - \n",
"load time: 1.4435057640075684\n"
]
}
],
"source": [
"batch_size = 100000\n",
"TOTAL_SIZE = 100000\n",
"for i in range(10):\n",
" s_time = time.time()\n",
" dataset = genfromtxt('C:/Users/Cloud/iCloudDrive/HUAWEI_LKD/Dataset/Legacy/data/TPCH_12M_8Field.csv', delimiter=',',\n",
" skip_header = i*batch_size, max_rows = batch_size)\n",
" e_time = time.time()\n",
" print('- - -',i,'- - - ')\n",
" print('load time: ', e_time - s_time)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# conclusion: numpy method is not good"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"process time: 1.2433969974517822\n",
"1000000\n",
"process time: 1.279536247253418\n",
"1000000\n",
"process time: 1.2443997859954834\n",
"1000000\n",
"process time: 1.2444000244140625\n",
"1000000\n",
"process time: 1.1829118728637695\n",
"1000000\n",
"process time: 1.1985292434692383\n",
"1000000\n",
"process time: 1.2190296649932861\n",
"1000000\n",
"process time: 1.2278022766113281\n",
"1000000\n",
"process time: 1.246352195739746\n",
"1000000\n",
"process time: 1.2473280429840088\n",
"1000000\n",
"process time: 1.2580626010894775\n",
"1000000\n",
"process time: 1.1985280513763428\n",
"997995\n"
]
}
],
"source": [
"last_time = time.time()\n",
"for chunk_df in pd.read_csv('C:/Users/Cloud/iCloudDrive/HUAWEI_LKD/Dataset/Legacy/data/TPCH_12M_8Field.csv', chunksize=1000000):\n",
" current_time = time.time()\n",
" print('process time: ',current_time - last_time)\n",
" print(len(chunk_df))\n",
" last_time = current_time"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# conclusion: pandas method for chunk processing is acceptable"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
65 changes: 65 additions & 0 deletions .ipynb_checkpoints/Future-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class HistManager:\n",
" \n",
" def __init__(self, m, n):\n",
" self.query_hist\n",
" self.query_index_hist\n",
" \n",
" def "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"\n",
"# new kdnode format: [dims, l1,l2...ln,u1,u2,...,un, id, pid, cid1, cid2]\n",
"class PartitionLayout:\n",
" \n",
" def __init__(self):\n",
" self.idx = None # used to index the partitions\n",
" self.kdnodes = {} # id : partition dictionary\n",
" \n",
" def insert_kdnode(self, kdnode, kid):\n",
" pass\n",
" \n",
" def delete_kdnode(self, kdnode, kid):\n",
" pass"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.5.1\n"
]
}
],
"source": [
"import numpy as np\n",
"import torch\n",
"print(torch.__version__)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# LSTM\n",
"rnn = nn.LSTM(10, 20, 2) # number of expected features in the input x, number of features in the hidden state h, Number of recurrent layers\n",
"input = torch.randn(5, 3, 10) # sequence length, batch, input size (features)\n",
"h0 = torch.randn(2, 3, 20) # num_layers * num_directions, batch, hidden_size\n",
"c0 = torch.randn(2, 3, 20) # num_layers * num_directions, batch, hidden_size\n",
"output, (hn, cn) = rnn(input, (h0, c0)) # output: seq_len, batch, num_directions * hidden_size"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([1., 1., 1., 1., 1.])\n",
"[1. 1. 1. 1. 1.]\n",
"tensor([2., 2., 2., 2., 2.])\n",
"[2. 2. 2. 2. 2.]\n"
]
}
],
"source": [
"# convert torch to np\n",
"a = torch.ones(5)\n",
"print(a)\n",
"b = a.numpy()\n",
"print(b)\n",
"a.add_(1)\n",
"print(a)\n",
"print(b)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1. 1. 1. 1. 1.]\n",
"tensor([1., 1., 1., 1., 1.], dtype=torch.float64)\n",
"[2. 2. 2. 2. 2.]\n",
"tensor([2., 2., 2., 2., 2.], dtype=torch.float64)\n"
]
}
],
"source": [
"# convert np to torch\n",
"a = np.ones(5)\n",
"b = torch.from_numpy(a)\n",
"print(a)\n",
"print(b)\n",
"np.add(a, 1, out=a)\n",
"print(a)\n",
"print(b)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# using GPU\n",
"if torch.cuda.is_available():\n",
" device = torch.device(\"cuda\") # a CUDA device object\n",
" y = torch.ones_like(x, device=device) # directly create a tensor on GPU\n",
" x = x.to(device) # or just use strings ``.to(\"cuda\")``\n",
" z = x + y\n",
" print(z)\n",
" print(z.to(\"cpu\", torch.double))"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n",
"Using device: cuda\n"
]
}
],
"source": [
"print(torch.cuda.is_available())\n",
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
"print('Using device:', device)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 537a7e0

Please sign in to comment.