From 90ad91cdbb329972900c145fc23e40dd84d1f449 Mon Sep 17 00:00:00 2001
From: lhbvvvvv <104072573+lhbvvvvv@users.noreply.github.com>
Date: Mon, 25 Sep 2023 21:06:06 +0800
Subject: [PATCH] refactor: improve python kernel v2.0(#1375)

* refactor: improve python kernel

* organize open-digger python kernel code in a more object-oriented way

* refactor:python->python_v2

* refactor: save python and create python_v2

Delete config file

add __init__.py

add __init__.py

add .gitignore
---
 .gitignore                         |   6 +-
 python_v2/README.md                |  52 ++
 python_v2/config.py                |  50 ++
 python_v2/db/clickhouse_wrapper.py |  24 +
 python_v2/db/neo4j_wrapper.py      |  23 +
 python_v2/label_data_utils.py      | 175 +++++++
 python_v2/metrics/__init__.py      |   4 +
 python_v2/metrics/basic.py         | 345 +++++++++++++
 python_v2/metrics/chaoss.py        | 795 +++++++++++++++++++++++++++++
 python_v2/metrics/index.py         | 275 ++++++++++
 python_v2/metrics/related_users.py |  11 +
 python_v2/open_digger.py           |  53 ++
 12 files changed, 1812 insertions(+), 1 deletion(-)
 create mode 100644 python_v2/README.md
 create mode 100644 python_v2/config.py
 create mode 100644 python_v2/db/clickhouse_wrapper.py
 create mode 100644 python_v2/db/neo4j_wrapper.py
 create mode 100644 python_v2/label_data_utils.py
 create mode 100644 python_v2/metrics/__init__.py
 create mode 100644 python_v2/metrics/basic.py
 create mode 100644 python_v2/metrics/chaoss.py
 create mode 100644 python_v2/metrics/index.py
 create mode 100644 python_v2/metrics/related_users.py
 create mode 100644 python_v2/open_digger.py

diff --git a/.gitignore b/.gitignore
index 702588245..f81b85efa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,8 +18,12 @@ node_modules
 sample_data/data
 
 # Ignore python
-*/**/__init__.py
 */**/__pycache__
 python/workspace/*
 python/workspace.py
 python/local_config.py
+
+# Ignore python_v2
+python_v2/workspace/*
+python_v2/workspace.py
+python_v2/local_config.py
diff --git a/python_v2/README.md b/python_v2/README.md
new file mode 100644
index 000000000..e2621e6be
--- /dev/null
+++ b/python_v2/README.md
@@ -0,0 +1,52 @@
+# Getting Start
+
+## If you want to do some data analysis work:
+Start your ClickHouse container, which should be set up in [Clickhouse-sample-data](../sample_data/README.md)
+
+1. Clone OpenDigger `git clone https://github.com/X-lab2017/open-digger.git`
+
+2. Enter the repo path `cd open-digger`
+
+3. Go to the `python` folder in the open-digger root directory, create a file named 'local_config.py'(this file has already added into `.gitignore` file.) for Python Kernel with the following contents:
+
+   ```python
+   local_config = {
+     'db': {
+       'clickhouse': {
+         'host':'172.17.0.1', 
+         'user':'default'
+       },
+       'neo4j':{
+         'port': '7687',
+       }
+     }
+   }
+   ```
+   the `host` above is the host of the ClickHouse server. We can find it using `docker inspect containert_name`, and copy the `Gateway` like this:
+
+   ```shell
+   $ docker inspect container_name | grep Gateway
+               "Gateway": "172.17.0.1",
+               "IPv6Gateway": "",
+                       "Gateway": "172.17.0.1",
+                       "IPv6Gateway": "",
+   ```
+    If you use your own data, you can also change `host` field to your own host IP
+4. Use `docker build -t opendigger-jupyter-python:1.0 $(pwd)` to make a docker image, this image is based on `miniconda`. You can check the `Dockerfile` in root directory.
+
+   > If you are using **Windows CMD**, all the `$(pwd)` here should be replaced by `%cd%`. And if you are using **Windows Powershell**,  all the `$(pwd)` here should be replaced by `${pwd}`.
+   >
+   > **Notice:** Pathnames of directories like "pwd" may use `\` to join the directory in some versions of Windows. We recommend using absolute paths.
+
+5. Then we can use `docker run -it --name python_notebook_name --rm -p 8888:8888 -v $(pwd):/python_kernel/notebook opendigger-jupyter-python:1.0` to create and run the container.
+
+6. Open the link in console log like `http://127.0.0.1:8888/lab?token=xxxxx`.
+
+7. If the source code under `python` folder changed, you need to stop the notebook docker using `docker stop python_notebook_name` and restart the notebook kernel using `docker run -it --name python_notebook_name --rm -p 8888:8888 -v $(pwd):/python_kernel/notebook opendigger-jupyter-python:1.0` to reload the sorce code.
+
+8. You can find the notebook folder, where we provide demos in the handbook. You can create a new file, and happy data exploring!
+    Attention: you need to do this work in `notebook` or other parallel folder. If you run in root directory, it can't work because of python import rules.
+
+## If you are a developer:
+
+You can also make `workspace.py` in `python` folder. and run it.
diff --git a/python_v2/config.py b/python_v2/config.py
new file mode 100644
index 000000000..f2a10966f
--- /dev/null
+++ b/python_v2/config.py
@@ -0,0 +1,50 @@
+inited = False
+config = {
+  'general': {
+    'owner': 'X-lab2017',
+    'repo': 'OpenDigger',
+    'baseUrl': 'http://open-digger.opensource-service.cn/',
+  },
+  'db': {
+    'clickhouse': {
+      'host': 'localhost', #python里的clickhouse_driver用的tcp端口9000
+      'port': '9000',
+      'user': '',
+      'password': '',
+      'protocol': 'http:',
+      'format': 'JSON',
+      'database': 'opensource',
+    },
+    'neo4j': {
+      'host':'neo4j://localhost:7687',
+    }
+  },
+  'oss': {
+    'ali': {
+      'region': '',
+      'accessKeyId': '',
+      'accessKeySecret': '',
+      'bucket': '',
+    }
+  },
+  'ci': {
+    'token':'',
+  }
+}
+def mergeConfig(base_config, local_config):
+    for key, val in local_config.items():
+            if isinstance(val, dict):
+                mergeConfig(base_config[key], val)
+            else:
+                base_config[key] = val
+    return base_config
+def getConfig():
+    global config
+    if not inited: 
+        try:
+            from local_config import local_config
+            config = mergeConfig(config, local_config)
+            return config
+        except:
+          return config
+    return config
diff --git a/python_v2/db/clickhouse_wrapper.py b/python_v2/db/clickhouse_wrapper.py
new file mode 100644
index 000000000..e7e15afcf
--- /dev/null
+++ b/python_v2/db/clickhouse_wrapper.py
@@ -0,0 +1,24 @@
+from easydict import EasyDict
+from config import getConfig
+from clickhouse_driver import Client
+
+class ClickhouseWrapper(object):
+    def __init__(self):
+        if not hasattr(ClickhouseWrapper, "_first_init"):
+            config = EasyDict(getConfig()).db.clickhouse
+            try:
+                self.client = Client(config.host, config.port, config.database, config.user, config.password)
+            except :
+                print("CLICKHOUSE INIT FAILED")
+    def __new__(cls, *args, **kwargs):
+
+        if not hasattr(ClickhouseWrapper, "_instance" ):
+            ClickhouseWrapper._instance = object.__new__(cls)
+        return ClickhouseWrapper._instance
+
+
+    def query(self, q):
+        return self.client.execute(q)
+    
+    def queryDataframe(self,q):
+        return self.client.query_dataframe(q)
diff --git a/python_v2/db/neo4j_wrapper.py b/python_v2/db/neo4j_wrapper.py
new file mode 100644
index 000000000..39e0dcc2a
--- /dev/null
+++ b/python_v2/db/neo4j_wrapper.py
@@ -0,0 +1,23 @@
+from py2neo import Graph
+from easydict import EasyDict
+from config import getConfig
+
+class Neo4jWrapper(object):
+    def __init__(self):
+        neo4j_config = EasyDict(getConfig()).db.neo4j
+        # self.driver = Graph(neo4j_config.host)
+        try:
+            self.driver = Graph(neo4j_config.host)
+        except Exception as e:
+            print(e)
+            print("NEO4J INIT ERROR")
+            
+    def __new__(cls, *args, **kwargs):
+
+        if not hasattr(Neo4jWrapper, "_instance" ):
+            Neo4jWrapper._instance = object.__new__(cls)
+        return Neo4jWrapper._instance
+
+    def query(self, query_sql):
+        result = self.driver.run(query_sql) # return a cursor object
+        return result.data()
diff --git a/python_v2/label_data_utils.py b/python_v2/label_data_utils.py
new file mode 100644
index 000000000..c4899addf
--- /dev/null
+++ b/python_v2/label_data_utils.py
@@ -0,0 +1,175 @@
+import os
+import yaml
+import platform
+from typing import List
+labelInputDir = '../labeled_data'
+labelInputPath = os.path.join(os.path.dirname(os.path.abspath(__file__)), labelInputDir)
+
+supportedTypes = set(['Region', 'Company', 'Community', 'Project', 'Foundation','Tech-0', 'Tech-1', 'Tech-2','Domain-0', 'Bot'])
+
+supportedKey = set(['label', 'github_repo', 'github_org', 'github_user'])
+GitHubData = {
+  'githubRepos': [],
+  'githubOrgs': [],
+  'githubUsers': [],
+}
+
+emptyData = {
+  'githubRepos': [],
+  'githubOrgs': [],
+  'githubUsers': [],
+}
+
+LabelItem = {
+  'identifier': '',
+  'content': {
+    'name': '',
+    'type': '',
+    'data': '',
+  },
+  'parsed': True
+}
+LabelItem.update(GitHubData)
+
+ParsedLabelItem = {
+  'identifier': '',
+  'type': '',
+  'name': ''
+}
+ParsedLabelItem.update(GitHubData)
+
+def getLabelData():
+    if not os.path.isdir(labelInputPath):
+        print('{} input path is not a directory.'.format(labelInputPath))
+        return []
+    labelMap = {} #<string, LabelItem>()
+    indexFileName = '{}index.yml'.format(os.path.sep)
+    labelFileSuffix = '.yml'
+    def getfileProcessor(f):
+        if not f.endswith('.yml'): return
+        # convert windows favor path to linux favor path
+        
+        identifier = processLabelIdentifier(':{}'.format(f[0:f.find(indexFileName)] if f.endswith(indexFileName) else f[0:f.find(labelFileSuffix)]))
+        content = open(os.path.join(labelInputPath, f),encoding='utf-8').read()
+        content = yaml.load(content,Loader=yaml.FullLoader)
+        labelMap[identifier] = {
+          'identifier':identifier,
+          'content':content,
+          'parsed': False,
+          'githubOrgs': [],
+          'githubRepos': [],
+          'githubUsers': [],
+        }
+
+    readPath(labelInputPath, '', getfileProcessor)
+    data = processLabelItems(labelMap)
+    return data
+
+def readPath(p, base, fileProcessor):
+    """_summary_
+
+    Args:
+        p (string): _description_
+        base (string): _description_
+        fileProcessor(f:string)->void.
+    """
+    if not os.path.isdir(p):
+        fileProcessor(base)
+    else:
+        for f in os.listdir(p):
+            readPath(os.path.join(p, f), os.path.join(base, f), fileProcessor)
+
+def processLabelItems(map_item)->List:
+  """_summary_
+
+  Args:
+      map_item (Map<string, LabelItem>): _description_
+      LabelItem (_type_): _description_
+
+  Returns:
+      ParsedLabelItem[]: _description_
+  """
+  for item in map_item.values():
+      parseItem(item, map_item)
+  return list(map(lambda item: {'identifier': item.get('identifier'),
+                                  'type': item.get('content').get('type'),
+                                  'name': item.get('content').get('name'),
+                                  'githubRepos': list(set(item.get('githubRepos'))),
+                                  'githubOrgs': list(set(item.get('githubOrgs'))),
+                                  'githubUsers': list(set(item.get('githubUsers'))),
+                                  }, list(map_item.values())))
+
+def parseItem(item, map_item):
+    """_summary_
+
+    Args:
+        item (LabelItem): _description_
+        map_item (Map<string, LabelItem>): _description_
+    """
+    if item.get('parsed'): return
+    if item.get('content').get('type') and item.get('content').get('type') not in supportedTypes:
+        raise Exception('Not supported type {}'.format(item.get('content').get('type')))
+    for key in item.get('content').get('data'): 
+        if not key in supportedKey:
+            raise Exception('Not supported element={}, identifier={}').format(key, item.get('identifier'))
+        if key == 'github_repo':
+            item.get('githubRepos').extend(x for x in item.get('content').get('data')[key])
+        elif key == 'github_org':
+            item.get('githubOrgs').extend(x for x in item.get('content').get('data')[key])
+        elif key == 'github_user':
+            item.get('githubUsers').extend(x for x in item.get('content').get('data')[key])
+        elif key == 'label':
+            labels = item.get('content').get('data')[key]
+            for label in labels:
+                identifier = label if label.startswith(':') else processLabelIdentifier(os.path.join(item.get('identifier'), label))
+                innerItem = map_item.get(identifier)
+                if innerItem == None:
+                    raise Exception('Can not find nest identifier {} for {}'.format(identifier, item.get('identifier')))
+                if not innerItem.get('parsed'):
+                    parseItem(innerItem, map_item)
+                item.get('githubOrgs').extend(x for x in innerItem.get('githubOrgs'))
+                item.get('githubRepos').extend(x for x in innerItem.get('githubRepos'))
+                item.get('githubUsers').extend(x for x in innerItem.get('githubUsers'))
+    item['parsed'] = True
+
+def processLabelIdentifier(identifier: str)-> str:
+    if platform.system() == 'Windows':
+        return os.path.altsep.join(identifier.split(os.path.sep))
+    else: return identifier
+
+def labelDataToGitHubData(data)->GitHubData:
+    """_summary_
+
+    Args:
+        data (list of ParsedLabelItem): _description_
+
+    Returns:
+        GitHubData: _description_
+    """
+    repoSet = set([])
+    orgSet = set([])
+    userSet = set([])
+    for item in data:
+        for r in item.get('githubRepos'): repoSet.add(r)
+        for o in item.get('githubOrgs'): orgSet.add(o)
+        for u in item.get('githubUsers'): userSet.add(u)
+    return {
+      "githubRepos": list(repoSet),
+      "githubOrgs": list(orgSet),
+      "githubUsers": list(userSet),
+    }
+
+def getGitHubData(typeOrIds: List)-> GitHubData:
+    """_summary_
+
+    Args:
+        typeOrIds (List<str>): _description_
+
+    Returns:
+        GitHubData: _description_
+    """
+    if len(typeOrIds) == 0: return emptyData
+    data = getLabelData()
+    if data == None: return emptyData
+    arr = list(filter(lambda i: i.get('type') in typeOrIds or i.get('identifier') in typeOrIds, data))
+    return labelDataToGitHubData(arr)
diff --git a/python_v2/metrics/__init__.py b/python_v2/metrics/__init__.py
new file mode 100644
index 000000000..bfc434d45
--- /dev/null
+++ b/python_v2/metrics/__init__.py
@@ -0,0 +1,4 @@
+class Metric(object):
+    from metrics.chaoss import Chaoss as chaoss
+    from metrics.index import Index as index
+    from metrics.related_users import Relation as relation
diff --git a/python_v2/metrics/basic.py b/python_v2/metrics/basic.py
new file mode 100644
index 000000000..87ff913d3
--- /dev/null
+++ b/python_v2/metrics/basic.py
@@ -0,0 +1,345 @@
+from itertools import groupby
+import db.clickhouse_wrapper as clickhouse_wrapper
+from numpy import append
+from label_data_utils import getGitHubData, getLabelData
+import datetime
+from easydict import EasyDict
+import math
+
+QueryConfig = {
+                'labelUnion': None,
+                'labelIntersect': None,
+                'repoIds': None,
+                'orgIds': None,
+                'repoNames': None,
+                'orgNames': None,
+                'userIds': None,
+                'userLogins': None,
+                'startYear': 2015,
+                'startMonth': 1,
+                'endYear': 2015,
+                'endMonth': 12,
+                'order': 'DESC',
+                'limit': 10,
+                'precision': 2,
+                'groupBy': None,
+                'groupTimeRange': None,
+                'options': None
+}
+
+def getMergedConfig(config):
+    defaultConfig = {
+        'startYear': 2015,
+        'startMonth': 1,
+        'endYear': datetime.datetime.today().year,
+        'endMonth': datetime.datetime.today().month,
+        'orderOption': 'latest',
+        'order': 'DESC',
+        'limit': 10,
+        'limitOption': 'all',
+        'precision': 2,
+    } 
+    defaultConfig.update(config)
+    return defaultConfig
+
+
+def forEveryMonthByConfig(config, func):
+    return forEveryMonth(config.get('startYear'), config.get('startMonth'), config.get('endYear'), config.get('endMonth'), func)
+
+def forEveryMonth(startYear, startMonth, endYear, endMonth, func):
+    for y in range(startYear, endYear + 1):
+        begin_month = startMonth if y == startYear else 1
+        end_month = endMonth if y == endYear else 12
+        for m in range(begin_month, end_month + 1):
+            func(y, m)
+
+# Repo
+def getRepoWhereClauseForNeo4j(config):
+    def process(l):
+        data = getGitHubData([l])
+        data = EasyDict(data)
+        arr = []
+        if len(data.githubRepos) > 0: arr.append('r.id IN {}'.format(data.githubRepos))
+        if len(data.githubOrgs) > 0: arr.append('r.org_id IN {}'.format(data.githubOrgs))
+        if len(arr) == 0: return None
+        return '({})'.format(' OR '.join(arr))
+    repoWhereClauseArray = []
+    if config.get('repoIds'): repoWhereClauseArray.append('r.id IN {}'.format(config.get('repoIds')))
+    if config.get('repoNames'): repoWhereClauseArray.append('r.name IN {}'.format(config.get('repoNames')))
+    if config.get('orgIds'): repoWhereClauseArray.append('r.org_id IN {}'.format(config.get('orgIds')))
+    if config.get('orgNames'): repoWhereClauseArray.append('r.org_name IN {}'.format(config.get('orgNames')))
+    if config.get('labelIntersect'):
+        return '(' + ' AND '.join(list(filter(lambda i: i != None, list(map(process, config.get('labelIntersect')))))) + ')'
+    if config.get('labelUnion'):
+        data = EasyDict(getGitHubData(config.get('labelUnion')))
+        if len(data.githubRepos > 0): repoWhereClauseArray.append('r.id IN {}'.format(data.githubRepos))
+        if len(data.githubOrgs  > 0): repoWhereClauseArray.append('r.org_id IN {}'.format(data.githubOrgs))
+    repoWhereClause = '({})'.format(' OR '.join(repoWhereClauseArray)) if len(repoWhereClauseArray) > 0 else None
+    return repoWhereClause
+
+def getRepoWhereClauseForClickhouse(config):
+    def process(l):
+        data = getGitHubData([l])
+        data = EasyDict(data)
+        arr = []
+        if len(data.githubRepos) > 0: arr.append('repo_id IN {}'.format(data.githubRepos))
+        if len(data.githubOrgs) > 0: arr.append('org_id IN {}'.format(data.githubOrgs))
+        if len(arr) == 0: return None
+        return '({})'.format(' OR '.join(arr))
+    repoWhereClauseArray = []
+    if config.get('repoIds'): repoWhereClauseArray.append('repo_id IN {}'.format(config.get('repoIds')))
+    if config.get('repoNames'):
+      # find id first
+      sql = 'SELECT DISTINCT(repo_id) FROM opensource.gh_events WHERE repo_name IN {}'.format(config.get('repoNames'))
+      ids = clickhouse_wrapper.query(sql)
+      repoWhereClauseArray.append('repo_id IN {}'.format(list(map(lambda i: i[0], ids))))
+    if config.get('orgIds'): repoWhereClauseArray.append('org_id IN {}'.format(config.get('orgIds')))
+    if config.get('orgNames'):
+      # find id first
+      sql = 'SELECT DISTINCT(org_id) FROM opensource.gh_events WHERE org_login IN {}'.format(config.get('orgNames'))
+      ids = clickhouse_wrapper.query(sql)
+      repoWhereClauseArray.append('org_id IN {}'.format(list(map(lambda i: i[0], ids))))
+    if config.get('labelIntersect'):
+        return '(' + ' AND '.join(list(filter(lambda i: i != None, list(map(process, config.get('labelIntersect')))))) + ')'
+    if config.get('labelUnion'):
+        data = EasyDict(getGitHubData(config.get('labelUnion')))
+        if len(data.githubRepos > 0): repoWhereClauseArray.append('repo_idIN {}'.format(data.githubRepos))
+        if len(data.githubOrgs  > 0): repoWhereClauseArray.append('org_id IN {}'.format(data.githubOrgs))
+    repoWhereClause = '({})'.format(' OR '.join(repoWhereClauseArray)) if len(repoWhereClauseArray) > 0 else None
+    return repoWhereClause
+  
+# User
+def getUserWhereClauseForNeo4j(config):
+    def process(l):
+        data = getGitHubData([l])
+        data = EasyDict(data)
+        if len(data.githubUsers) > 0: return 'u.id IN {}'.format(data.githubUsers)
+        return None
+    userWhereClauseArray = []
+    if config.get('userIds'): userWhereClauseArray.append('u.id IN {}'.format(config.get('userIds')))
+    if config.get('userLogins'): userWhereClauseArray.append('u.login IN {}'.format(config.get('userLogins')))
+    if config.get('labelIntersect'):
+        return '(' + ' AND '.join(list(filter(lambda i: i != None, list(map(process, config.get('labelIntersect')))))) + ')'
+    if config.get('labelUnion'):
+        data = EasyDict(getGitHubData(config.get('labelUnion')))
+        if len(data.githubUsers > 0): userWhereClauseArray.append('u.id IN {}'.format(data.githubUsers))
+    userWhereClause = '({})'.format(' OR '.join(userWhereClauseArray)) if len(userWhereClauseArray) > 0 else None
+    return userWhereClause
+
+def getUserWhereClauseForClickhouse(config):
+    def process(l):
+        data = getGitHubData([l])
+        data = EasyDict(data)
+        if len(data.githubUsers) > 0: return 'actor_id IN {}'.format(data.githubUsers)
+        return None
+    userWhereClauseArray = []
+    if config.get('userIds'): userWhereClauseArray.append('actor_id IN {}'.format(config.get('userIds')))
+    if config.get('userLogins'):
+      # get id first
+      sql = 'SELECT DISTINCT(actor_id) FROM opensource.gh_events WHERE actor_login IN {}'.format(config.get('userLogins'))
+      ids = clickhouse_wrapper.query(sql)
+      userWhereClauseArray.append('actor_id IN {}'.format(list(map(lambda i: i[0], ids))))
+    if config.get('labelIntersect'):
+        return '(' + ' AND '.join(list(filter(lambda i: i != None, list(map(process, config.get('labelIntersect')))))) + ')'
+    if config.get('labelUnion'):
+        data = EasyDict(getGitHubData(config.get('labelUnion')))
+        if len(data.githubRepos > 0): userWhereClauseArray.append('actor_id IN {}'.format(data.githubUsers))
+    userWhereClause = '({})'.format(' OR '.join(userWhereClauseArray)) if len(userWhereClauseArray) > 0 else None
+    return userWhereClause
+
+# Time
+def getTimeRangeWhereClauseForNeo4j(config, type):
+    timeWhereClauseArray = []
+    forEveryMonthByConfig(config, lambda y, m: timeWhereClauseArray.append('{}.activity_{}{} > 0'.format(type, y, m)))
+    if len(timeWhereClauseArray) == 0: raise Exception('Not valid time range.')
+    timeWhereClause = '({})'.format(' OR '.join(timeWhereClauseArray))
+    return timeWhereClause
+
+def getTimeRangeSumClauseForNeo4j(config, type):
+    lastYear = 0
+    lastQuarter = 0
+    def process_quarter(y, m):
+        nonlocal lastQuarter
+        q = math.ceil(m / 3)
+        if q != lastQuarter: timeRangeSumClauseArray.append([])
+        timeRangeSumClauseArray[len(timeRangeSumClauseArray) - 1].append('COALESCE({}_{}{}, 0.0)'.format(type, y, m))
+        lastQuarter = q
+    def process_year(y, m):
+        nonlocal lastYear
+        if y != lastYear: timeRangeSumClauseArray.append([])
+        timeRangeSumClauseArray[len(timeRangeSumClauseArray) - 1].append('COALESCE({}_{}{}, 0.0)'.format(type, y, m))
+        lastYear = y
+    timeRangeSumClauseArray = []
+    if config.get('groupTimeRange') == 'month':
+        # for every month individual, every element belongs to a individual element
+        forEveryMonthByConfig(config, lambda y, m: timeRangeSumClauseArray.append(['COALESCE({}_{}{}, 0.0)'.format(type, y, m)]))
+    elif config.get('groupTimeRange') == 'quarter':
+        # for every quarter, need to find out when to push a new element by quarter
+        forEveryMonthByConfig(config, process_quarter)
+    elif config.get('groupTimeRange') == 'year':
+        # for every year, need to find out when to push a new element by the year;
+        forEveryMonthByConfig(config, process_year)
+    else:
+        # for all to single one, push to the first element
+        timeRangeSumClauseArray.push([])
+        forEveryMonthByConfig(config, lambda y, m: timeRangeSumClauseArray[0].append('COALESCE({}_{}{}, 0.0)'.format(type, y, m)))
+    if len(timeRangeSumClauseArray) == 0: raise Exception('Not valid time range.')
+    timeRangeSumClause = list(map(lambda i: 'round({}, {})'.format(' + '.join(i), config.get('percision')), timeRangeSumClauseArray))
+    return timeRangeSumClause
+
+def getTimeRangeWhereClauseForClickhouse(config):
+    endDate = datetime.date(year = config.get('endYear')+1 if config.get('endMonth')+1>12 else config.get('endYear'), month = (config.get('endMonth')+1)%12, day = 1)
+    # endDate.setMonth(config.get('endMonth'))  # find next month
+    return ' created_at >= toDate(\'{}-{}-1\') AND created_at < toDate(\'{}-{}-1\') '.format(config.get('startYear'), config.get('startMonth'), endDate.year, endDate.month)
+
+# clickhouse label group condition
+def getLabelGroupConditionClauseForClickhouse(config): 
+    labelData = list(filter(lambda l: l.get('type') == config.get('groupBy'), getLabelData())) if getLabelData() != None else None
+    if (labelData==None or len(labelData) == 0): raise Exception('Invalide group by label: {}'.format(config.get('groupBy')))
+    idLabelRepoMap = {}
+    idLabelOrgMap = {}
+    idLabelUserMap = {}
+    def addToMap(my_map, id, label):
+        if not id in my_map: my_map[id] = []
+        if my_map.get(id) != None: my_map.get(id).append(label) 
+
+    for l in labelData:
+        for id in l.get('githubOrgs'): addToMap(idLabelOrgMap, id, l.get('name'))
+        for id in l.get('githubRepos'): addToMap(idLabelRepoMap, id, l.get('name'))
+        for id in l.get('githubUsers'): addToMap(idLabelUserMap, id, l.get('name'))
+
+    resultMap = {}  # <string, { labels: string[], repoIds: number[], orgIds: number[], userIds: number[] }>
+    def addToResultMap(my_map, id:int, labels:str, type):
+        """_summary_
+        Args:
+            my_map (dict): dict<string, { labels: string[], repoIds: number[], orgIds: number[], userIds: number[] }>
+            id (int): number
+            labels (str): string list
+            type (str): 'repo' | 'org' | 'user'
+        """
+        key = str(labels)
+        if not key in my_map: my_map[key] = { 'labels':labels, 'repoIds': [], 'orgIds': [], 'userIds': [] }
+        if type == 'repo': 
+            if my_map.get(key) != None: my_map.get(key).get('repoIds').append(id)
+        elif type == 'org': 
+            if my_map.get(key) != None: my_map.get(key).get('orgIds').append(id)
+        elif type == 'user': 
+            if my_map.get(key) != None: my_map.get(key).get('userIds').append(id)
+            
+    for id, labels in idLabelRepoMap.items(): addToResultMap(resultMap, id, labels, 'repo')
+    for id, labels in idLabelOrgMap.items(): addToResultMap(resultMap, id, labels, 'org')
+    for id, labels in idLabelUserMap.items(): addToResultMap(resultMap, id, labels, 'user')
+
+    def process(v):
+        c = []
+        if len(v.get('orgIds')) > 0: c.append('org_id IN ({})'.format(','.join(str(i) for i in v.get('orgIds'))))
+        if len(v.get('repoIds')) > 0: c.append('repo_id IN ({})'.format(','.join(str(i) for i in v.get('repoIds'))))
+        if len(v.get('userIds')) > 0: c.append('actor_id IN ({})'.format(','.join(str(i) for i in v.get('userIds'))))
+        return '({}),[{}]'.format(' OR '.join(c), ','.join(map(lambda l: '\'{}\''.format(l),v.get('labels'))))
+    conditions = ','.join(list(map(process, resultMap.values())))
+
+    return 'arrayJoin(multiIf({}, [\'Others\']))'.format(conditions)
+
+def getGroupArrayInsertAtClauseForClickhouse(config, option):
+    """_summary_
+    Args:
+        config (dict): QueryConfig
+        option (_type_): { key: string; defaultValue?: string; value?: string; }
+    """
+    start_time = f"toDate('{config['startYear']}-{config['startMonth']}-1')"
+    end_time = f"toDate('{config['endYear']}-{config['endMonth']}-1')"
+    
+    default_value = option.get('defaultValue', 0)
+    
+    total_length = ""
+    if config.get('groupTimeRange'):
+        total_length = f"toUInt32(dateDiff('{config['groupTimeRange']}', {start_time}, {end_time})) + 1"
+    else:
+        total_length = "1"
+    
+    fieldName = option.get('value', option['key'])
+    if config['precision'] > 0 and not option.get('noPrecision'):
+        group_key = f"ROUND({fieldName}, {config['precision']})"
+    else:
+        group_key = fieldName
+    
+    if not config.get('groupTimeRange'):
+        position = "0"
+    else:
+        if config['groupTimeRange'] == 'quarter':
+            start_time = f"toStartOfQuarter({start_time})"
+        elif config['groupTimeRange'] == 'year':
+            start_time = f"toStartOfYear({start_time})"
+        position = f"toUInt32(dateDiff('{config['groupTimeRange']}', {start_time}, time){'-1' if option.get('positionByEndTime') else ''})"
+    
+    return f'''groupArrayInsertAt(
+            {default_value}, 
+            {total_length})({group_key}, 
+            {position}) AS {option['key']}'''
+
+def getGroupTimeClauseForClickhouse(config, timeCol = 'created_at') -> str:
+    """_summary_
+    Args:
+        config (_type_): _description_
+        timeCol (str, optional): _description_. Defaults to 'created_at'.
+
+    Returns:
+        str: _description_
+    """
+    groupEle = '1' # no time range, aggregate all data to a single value
+    if config.get('groupTimeRange') == 'month': groupEle = 'toStartOfMonth({})'.format(timeCol)
+    elif config.get('groupTimeRange') == 'quarter': groupEle = 'toStartOfQuarter({})'.format(timeCol)
+    elif config.get('groupTimeRange') == 'year': groupEle = 'toStartOfYear({})'.format(timeCol)
+    return '{} AS time'.format(groupEle)
+
+def getGroupIdClauseForClickhouse(config, type = 'repo', timeCol = 'created_at') -> str:
+    """_summary_
+    Args:
+        config (_type_): _description_
+        type (str, optional): _description_. Defaults to 'repo'.
+        timeCol (str, optional): _description_. Defaults to 'created_at'.
+
+    Returns:
+        str: _description_
+    """
+    if config.get('groupBy') == None:  #group by repo'
+        if type == 'repo':
+            return 'repo_id AS id, argMax(repo_name, time) AS name'
+        else:
+            return 'actor_id AS id, argMax(actor_login, time) AS name'
+    elif config.get('groupBy') == 'org':
+        return 'org_id AS id, argMax(org_login, time) AS name'
+    else :  # group by label
+        return '{} AS id, id AS name'.format(getLabelGroupConditionClauseForClickhouse(config))        
+
+def getInnerOrderAndLimit(config, col, index=None):
+    if config.get('limitOption') == 'each' and config.get('limit', 0) > 0:
+        order_by_clause = f"ORDER BY {col}[{index}] {config.get('order')}" if config.get('order') else ''
+        limit_clause = f"LIMIT {config.get('limit')} BY time"
+        return f"{order_by_clause} {limit_clause}"
+    else:
+        return ''
+
+def getOutterOrderAndLimit(config, col, index=None):
+    order_clause = ""
+    if config.get('order'):
+        if config.get('orderOption') == 'latest':
+            order_clause = f"ORDER BY {col}[-1]{f'[{index}]' if index is not None else ''}"
+        else:
+            index_clause = f"x -> x[{index}], " if index is not None else ''
+            order_clause = f"ORDER BY arraySum({index_clause}{col})"
+    limit_clause = f"LIMIT {config.get('limit')}" if config.get('limitOption') == 'all' and config.get('limit', 0) > 0 else ''
+    return f"{order_clause} {config.get('order', '')} {limit_clause}"
+
+def filterEnumType(value, types, defautlValue: str) -> str:
+    """_summary_
+    Args:
+        value (_type_): _description_
+        types (str list): _description_
+        defautlValue (str): _description_
+
+    Returns:
+        str: _description_
+    """
+    if not value or not value in types: return defautlValue
+    return value
diff --git a/python_v2/metrics/chaoss.py b/python_v2/metrics/chaoss.py
new file mode 100644
index 000000000..63dcbf4c5
--- /dev/null
+++ b/python_v2/metrics/chaoss.py
@@ -0,0 +1,795 @@
+import datetime
+from typing import Tuple,List
+from .basic import filterEnumType,\
+                  getGroupArrayInsertAtClauseForClickhouse,\
+                  getGroupTimeClauseForClickhouse,\
+                  getGroupIdClauseForClickhouse,\
+                  getMergedConfig,\
+                  getRepoWhereClauseForClickhouse,\
+                  getTimeRangeWhereClauseForClickhouse,\
+                  getInnerOrderAndLimit,\
+                  getOutterOrderAndLimit,\
+                  getUserWhereClauseForClickhouse,\
+                  QueryConfig
+from db.clickhouse_wrapper import ClickhouseWrapper
+clickhouse = ClickhouseWrapper()
+
+class Chaoss():
+    __ISSUE_COMMENT_WEIGHT = 1
+    __OPEN_ISSUE_WEIGHT = 2
+    __OPEN_PULL_WEIGHT = 3
+    __REVIEW_COMMENT_WEIGHT = 4
+    __PULL_MERGED_WEIGHT = 2
+    __basicActivitySqlComponent = f''' 
+            if(type=\'PullRequestEvent\' AND action=\'closed\' AND pull_merged=1, issue_author_id, actor_id) AS actor_id, 
+            argMax(if(type=\'PullRequestEvent\' AND action=\'closed\' AND pull_merged=1, issue_author_login, actor_login), created_at) AS actor_login, 
+            countIf(type=\'IssueCommentEvent\' AND action=\'created\') AS issue_comment, 
+            countIf(type=\'IssuesEvent\' AND action=\'opened\')  AS open_issue, 
+            countIf(type=\'PullRequestEvent\' AND action=\'opened\') AS open_pull, 
+            countIf(type=\'PullRequestReviewCommentEvent\' AND action=\'created\') AS review_comment, 
+            countIf(type=\'PullRequestEvent\' AND action=\'closed\' AND pull_merged=1) AS merged_pull, 
+            sqrt({__ISSUE_COMMENT_WEIGHT}*issue_comment + {__OPEN_ISSUE_WEIGHT}*open_issue + {__OPEN_PULL_WEIGHT}*open_pull + {__REVIEW_COMMENT_WEIGHT}*review_comment + {__PULL_MERGED_WEIGHT}*merged_pull) AS activity 
+    '''
+
+    CodeChangeCommitsOptions= {
+        # a filter regular expression for commit message
+        'messageFilter': '^(build:|chore:|ci:|docs:|feat:|fix:|perf:|refactor:|revert:|style:|test:).*'
+    }
+
+    timeDurationConstants = {
+        "unitArray": ['week', 'day', 'hour', 'minute'],
+        "sortByArray": ['avg', 'levels', 'quantile_0', 'quantile_1', 'quantile_2', 'quantile_3', 'quantile_4'],
+        "quantileArray": list(range(5)),
+    }
+
+    def __bulidInnnerCountSql(config, whereClauses, type='repo'):
+        return f'''
+            SELECT 
+                {getGroupTimeClauseForClickhouse(config)},
+                {getGroupIdClauseForClickhouse(config, type)}, 
+                COUNT() AS count 
+            FROM opensource.gh_events         
+            WHERE {' AND '.join(whereClauses)} 
+            GROUP BY id, time 
+            {getInnerOrderAndLimit(config, 'count')} 
+        '''
+
+    def __bulidOuterCountSql(config, inner_sql, countColName):
+        return f'''
+        SELECT 
+            id, 
+            argMax(name, time) AS name, 
+            SUM(count) AS total_count,
+            {getGroupArrayInsertAtClauseForClickhouse(config, { 'key': countColName, 'value':'count' })} 
+        FROM 
+        ({inner_sql})
+        GROUP BY id 
+        {getOutterOrderAndLimit(config, countColName)}
+        '''
+    
+    def __executeInnnerSql(inner_sql, columns=['time','id','name','count']):
+        queryResult = clickhouse.query(inner_sql)
+        rst = list(map(lambda row: dict(zip(columns,row)), queryResult))
+        return rst
+    
+    def __executeOuterSql(generated_sql, columns, processMethod):
+        queryResult = clickhouse.query(generated_sql)
+        rst = [processMethod(row, columns) for row in queryResult]
+        # rst = list(map(lambda row: dict(zip(columns,row)), queryResult))
+        return rst
+
+    def __process(row, cloumns):
+        processResult = dict(zip(cloumns,row))
+        return processResult
+
+    def __processAppendRatio(row, cloumns, countIndex = -1, totalCountIndex = -2):
+        processResult = dict(zip(cloumns,row))
+        count = row[countIndex]
+        total_count = row[totalCountIndex]
+        processResult['ratio'] = list(map(lambda v: '{}%'.format(str(format((v*100/total_count), '.2f'))), count))
+        return processResult
+
+    def chaossCodeChangeCommits(config, mode='outer') -> (List,str):
+        """_summary_
+
+        Args:
+            config (QueryConfig<CodeChangeCommitsOptions>): _description_
+        """
+        config = getMergedConfig(config)
+        whereClauses = ["type = \'PushEvent\' "]
+        repoWhereClause = getRepoWhereClauseForClickhouse(config)
+        if repoWhereClause != None: whereClauses.append(repoWhereClause)
+        whereClauses.append(getTimeRangeWhereClauseForClickhouse(config))
+        if config.get('options') and config.get('options').get('messageFilter'):
+            arrayJoinMessage = 'arrayFilter(x -> match(x, \'{}\'), push_commits.message)'.format(config.get('options').get('messageFilter')) 
+        else:
+            arrayJoinMessage = 'push_commits.message'
+
+        rst = []
+        inner_sql = f'''
+            SELECT 
+                {getGroupTimeClauseForClickhouse(config)},
+                {getGroupIdClauseForClickhouse(config, 'repo')}, 
+                COUNT(arrayJoin({arrayJoinMessage})) AS count 
+            FROM opensource.gh_events         
+            WHERE {' AND '.join(whereClauses)} 
+            GROUP BY id, time 
+            {getInnerOrderAndLimit(config, 'count')} 
+        '''
+
+        if mode == 'origin':
+            rst =  Chaoss.__executeInnnerSql(inner_sql)
+            return rst, inner_sql
+        
+        generated_sql = Chaoss.__bulidOuterCountSql(config, inner_sql, 'commits_count')
+        columns = ['id', 'name', 'total_count', 'count']
+        rst = Chaoss.__executeOuterSql(generated_sql, columns, Chaoss.__process)
+        return rst, generated_sql
+ 
+    def __chaossCount(config, mode, whereClauses, countColName) -> (List,str):
+        config = getMergedConfig(config)
+        repoWhereClause = getRepoWhereClauseForClickhouse(config)
+        if repoWhereClause != None: whereClauses.append(repoWhereClause)
+        whereClauses.append(getTimeRangeWhereClauseForClickhouse(config))
+        rst = []
+        inner_sql = Chaoss.__bulidInnnerCountSql(config, whereClauses)
+
+        if mode == 'origin':
+            rst = Chaoss.__executeInnnerSql(inner_sql)
+            return rst, inner_sql
+        generated_sql = Chaoss.__bulidOuterCountSql(config, inner_sql, countColName)
+        columns = ['id', 'name', 'total_count', 'count', 'ratio']
+        rst = Chaoss.__executeOuterSql(generated_sql, columns, Chaoss.__processAppendRatio)
+        return rst, generated_sql
+
+    def chaossIssuesNew(config, mode='outer') -> (List,str):
+        """_summary_
+
+        Args:
+            config (dict): QueryConfig
+        """
+        whereClauses = ["type = \'IssuesEvent\' AND action IN (\'opened\', \'reopened\')"]
+        return Chaoss.__chaossCount(config, mode, whereClauses, 'issues_new_count')
+        
+    def chaossIssuesClosed(config, mode='outer') -> (List,str):
+        """_summary_
+
+        Args:
+            config (QueryConfig): _description_
+        """
+        whereClauses = ["type = \'IssuesEvent\' AND action = \'closed\'"]
+        return Chaoss.__chaossCount(config, mode, whereClauses, 'issues_close_count')
+
+    def chaossChangeRequestsAccepted(config: QueryConfig, mode='outer') -> (List,str):
+        """_summary_
+
+        Args:
+            config (QueryConfig): _description_
+        """
+        whereClauses = ["type = \'PullRequestEvent\' AND action = \'closed\' AND pull_merged = 1"]
+        return Chaoss.__chaossCount(config, mode, whereClauses, 'change_requests_accepted')
+
+    def chaossChangeRequestsDeclined(config: QueryConfig, mode='outer') -> (List,str):
+        """_summary_
+
+        Args:
+            config (QueryConfig): _description_
+        """
+        whereClauses = ["type = \'PullRequestEvent\' AND action = \'closed\' AND pull_merged = 0"]
+        return Chaoss.__chaossCount(config, mode, whereClauses, 'change_requests_declined')
+
+    BusFactorOptions = {
+        # calculate bus factor by change request or git commit, or activity index. default: activity  ('commit' | 'change request' | 'activity')
+        'by': 'activity',
+        # the bus factor percentage thredhold, default: 0.5
+        'percentage': 0.5,
+        # include GitHub Apps account, default: false
+        'withBot': False,
+    }
+
+    def chaossBusFactor(config, mode='outer') -> (List,str):
+        """_summary_
+
+        Args:
+            config (QueryConfig<BusFactorOptions>): QueryConfig<BusFactorOptions>
+
+        Returns:
+            _type_: _description_
+        """
+        config = getMergedConfig(config)
+        by = filterEnumType(config.get('options').get('by') if config.get('options') != None else None, ['commit', 'change request', 'activity'], 'activity')
+        whereClauses = []
+        if by == 'commit':
+            whereClauses.append("type = \'PushEvent\'")
+        elif by == 'change request':
+            whereClauses.append("type = \'PullRequestEvent\' AND action = \'closed\' AND pull_merged = 1")
+        elif by == 'activity':
+            whereClauses.append("type IN (\'IssuesEvent\', \'IssueCommentEvent\', \'PullRequestEvent\', \'PullRequestReviewCommentEvent\')")
+        repoWhereClause = getRepoWhereClauseForClickhouse(config)
+        if repoWhereClause != None: whereClauses.append(repoWhereClause)
+        whereClauses.append(getTimeRangeWhereClauseForClickhouse(config))
+
+        percentage = str(1 - config.get('options').get('percentage')) if config.get('options') and 'percentage' in config.get('options') else '0.5'
+
+        authorFieldName = 'actor_login' if by == 'activity' else 'author'
+        if config.get('options', {}).get('withBot') and by != 'commit':
+            botFilterHavingClause = ""
+        else:
+            botFilterHavingClause = f"HAVING {authorFieldName} NOT LIKE '%[bot]'" 
+        
+        rst = []
+        inner_sql = f'''
+            SELECT
+                time,
+                id,
+                any(name) AS name,
+                SUM(count) AS total_contributions,
+                length(detail) AS bus_factor,
+                arrayFilter(x -> tupleElement(x, 2) >= quantileExactWeighted({percentage}) (count, count), arrayMap((x, y) -> (x, y), groupArray({authorFieldName}), groupArray(count))) AS detail
+            FROM
+            (
+                SELECT
+                {getGroupTimeClauseForClickhouse(config)},
+                {getGroupIdClauseForClickhouse(config)},
+                {
+                'arrayJoin(push_commits.name) AS author, COUNT() AS count' if by == 'commit' else
+                'issue_author_id AS actor_id, argMax(issue_author_login, created_at) AS author, COUNT() AS count' if by == 'change request' else
+                f'{Chaoss.__basicActivitySqlComponent}, toUInt32(ceil(activity)) AS count'
+                }
+                FROM opensource.gh_events 
+                WHERE {' AND '.join(whereClauses)}
+                GROUP BY id, time, {('author' if by == 'commit' else 'actor_id')}
+                {botFilterHavingClause}
+            )
+            GROUP BY id, time
+            {getInnerOrderAndLimit(config, 'bus_factor')}
+        '''
+        if mode == 'origin':
+            columns = ['time', 'id', 'name', 'total_contributions', 'bus_factor', 'detail']
+            rst = Chaoss.__executeInnnerSql(inner_sql, columns)
+            return rst, inner_sql
+    
+        generated_sql = f'''
+            SELECT
+            id,
+            argMax(name, time) AS name,
+            {getGroupArrayInsertAtClauseForClickhouse(config, {"key": "bus_factor"})},
+            {getGroupArrayInsertAtClauseForClickhouse(config, {"key": "detail", "noPrecision": True, "defaultValue": "[]"})},
+            {getGroupArrayInsertAtClauseForClickhouse(config, {"key": "total_contributions"})}
+        FROM
+        ({inner_sql})
+        GROUP BY id
+        {getOutterOrderAndLimit(config, 'bus_factor')}
+        '''
+        columns = ['id', 'name', 'bus_factor', 'detail', 'total_contributions']
+        rst = Chaoss.__executeOuterSql(generated_sql, columns, Chaoss.__process)
+        return rst, generated_sql
+
+    IssueResolutionDurationOptions = {
+    'by': 'open', #'open' | 'close'
+    'type': 'avg', #'avg' | 'median'
+    'unit': 'week' #'week' | 'day' | 'hour' | 'minute'
+    }
+
+    def __chaossResolutionDuration(config, type, mode) -> (List,str):
+        """_summary_
+
+        Args:
+            config (QueryConfig<IssueResolutionDurationOptions>): _description_
+        """
+        config = getMergedConfig(config)
+        whereClauses = ["type = 'IssuesEvent'"] if type == 'issue' else ["type = 'PullRequestEvent'"]
+        repoWhereClause = getRepoWhereClauseForClickhouse(config)
+        if repoWhereClause: whereClauses.append(repoWhereClause)
+
+        endDate = datetime.date(year = config.get('endYear')+1 if config.get('endMonth')+1>12 else config.get('endYear'), month = (config.get('endMonth')+1)%12, day = 1)  
+
+        by = filterEnumType(config.get("options", {}).get("by"), ['open', 'close'], 'open')
+        byCol = 'opened_at' if by == 'open' else 'closed_at'
+        unit = filterEnumType(config.get("options", {}).get("unit"), Chaoss.timeDurationConstants["unitArray"], 'day')
+        thresholds = config.get("options", {}).get("thresholds", [3, 7, 15])
+        ranges = thresholds + [-1]
+        sortBy = filterEnumType(config.get("options", {}).get("sortBy"), Chaoss.timeDurationConstants["sortByArray"], 'avg')
+
+        rst = []
+        inner_sql = f'''
+            SELECT
+                {getGroupTimeClauseForClickhouse(config, byCol)},
+                {getGroupIdClauseForClickhouse(config, 'repo')},
+                avg(resolution_duration) AS avg,
+                {', '.join([f'quantile({q / 4})(resolution_duration) AS quantile_{q}' for q in Chaoss.timeDurationConstants["quantileArray"]])},
+                [{', '.join([f'countIf(resolution_level = {i})' for i in range(len(ranges))])}] AS resolution_levels
+            FROM
+            (
+                SELECT
+                repo_id,
+                argMax(repo_name, created_at) AS repo_name,
+                org_id,
+                argMax(org_login, created_at) AS org_login,
+                issue_number,
+                argMaxIf(action, created_at, action IN ('opened', 'closed' , 'reopened')) AS last_action,
+                argMax(issue_created_at, created_at) AS opened_at,
+                maxIf(created_at, action = 'closed') AS closed_at,
+                dateDiff('{unit}', opened_at, closed_at) AS resolution_duration,
+                multiIf({', '.join([f'resolution_duration <= {t}, {i}' for i, t in enumerate(thresholds)])}, {len(thresholds)}) AS resolution_level
+                FROM opensource.gh_events 
+                WHERE {' AND '.join(whereClauses)}
+                GROUP BY repo_id, org_id, issue_number
+                HAVING {byCol} >= toDate('{config['startYear']}-{config['startMonth']}-1') AND {byCol} < toDate('{endDate.year}-{endDate.month}-1') AND last_action='closed'
+            )
+            GROUP BY id, time
+            {getInnerOrderAndLimit(config, 'resolution_duration')}
+        '''
+        if mode == 'origin':
+            columns = ['time','id','name','avg', 'quantile_0', 'quantile_1', 'quantile_2', 'quantile_3', 'quantile_4', 'resolution_levels']
+            rst = Chaoss.__executeInnnerSql(inner_sql, columns)
+            return rst, inner_sql
+    
+        generated_sql = f'''
+        SELECT
+            id,
+            argMax(name, time) As name,
+            {getGroupArrayInsertAtClauseForClickhouse(config, { "key": "avg", "defaultValue": 'NaN' })},
+            {getGroupArrayInsertAtClauseForClickhouse(config, { "key": 'levels', "value": 'resolution_levels', "defaultValue": "[]", "noPrecision": True })},
+            {', '.join([getGroupArrayInsertAtClauseForClickhouse(config, { "key": f"quantile_{q}", "defaultValue": 'NaN' }) for q in Chaoss.timeDurationConstants["quantileArray"]])}
+        FROM
+        ({inner_sql})
+        GROUP BY id
+        {getOutterOrderAndLimit(config, sortBy, 1 if sortBy == 'levels' else None)}
+        '''
+        columns = ['id', 'name', 'resolution_duration_avg', 'levels', 'quantile_0', 'quantile_1', 'quantile_2', 'quantile_3', 'quantile_4']
+        rst = Chaoss.__executeInnnerSql(generated_sql, columns)
+        return rst, generated_sql
+    
+    def chaossIssueResolutionDuration(config, mode='outer') -> (List,str):
+        return Chaoss.__chaossResolutionDuration(config, 'issue', mode)
+
+    def chaossChangeRequestResolutionDuration(config, mode='outer') -> (List,str):
+        return Chaoss.__chaossResolutionDuration(config, 'change request', mode)
+    
+    def __chaossResponseTime(config, type, mode) -> (List,str):
+        config = getMergedConfig(config)
+        whereClauses = []
+
+        if type == 'issue':
+            whereClauses.append("type IN ('IssueCommentEvent', 'IssuesEvent') AND actor_login NOT LIKE '%[bot]'")
+        else:
+            whereClauses.append("type IN ('IssueCommentEvent', 'PullRequestEvent', 'PullRequestReviewCommentEvent', 'PullRequestReviewEvent') AND actor_login NOT LIKE '%[bot]'")
+
+        repoWhereClause = getRepoWhereClauseForClickhouse(config)
+        if repoWhereClause:
+            whereClauses.append(repoWhereClause)
+        
+        endDate = datetime.date(year = config.get('endYear')+1 if config.get('endMonth')+1>12 else config.get('endYear'), month = (config.get('endMonth')+1)%12, day = 1) 
+        unit = filterEnumType(config.get("options", {}).get("unit"), Chaoss.timeDurationConstants["unitArray"], 'day')
+        thresholds = config.get("options", {}).get("thresholds", [3, 7, 15])
+        ranges = thresholds + [-1]
+        sortBy = filterEnumType(config.get("options", {}).get("sortBy"), Chaoss.timeDurationConstants["sortByArray"], 'avg')
+
+        rst = []
+        inner_sql = f'''
+            SELECT
+                {getGroupTimeClauseForClickhouse(config, 'issue_created_at')},
+                {getGroupIdClauseForClickhouse(config)},
+                avg(response_time) AS avg,
+                {', '.join([f'quantile({q / 4})(response_time) AS quantile_{q}' for q in Chaoss.timeDurationConstants["quantileArray"]])},
+                [{', '.join([f'countIf(response_level = {i})' for i in range(len(ranges))])}] AS response_levels
+            FROM
+            (
+                SELECT
+                    repo_id,
+                    argMax(repo_name, created_at) AS repo_name,
+                    org_id,
+                    argMax(org_login, created_at) AS org_login,
+                    issue_number,
+                    minIf(created_at, action = 'opened' AND issue_comments = 0) AS issue_created_at,
+                    minIf(created_at, (action = 'created' AND actor_id != issue_author_id) OR (action = 'closed')) AS responded_at,
+                    if(responded_at = toDate('1970-01-01'), now(), responded_at) AS first_responded_at,
+                    dateDiff('{unit}', issue_created_at, first_responded_at) AS response_time,
+                    multiIf({', '.join([f'response_time <= {t}, {i}' for i, t in enumerate(thresholds)])}, {len(thresholds)}) AS response_level
+                FROM opensource.gh_events 
+                WHERE {' AND '.join(whereClauses)}
+                GROUP BY repo_id, org_id, issue_number
+                HAVING issue_created_at >= toDate('{config.get('startYear')}-{config.get('startMonth')}-1') 
+                        AND issue_created_at < toDate('{endDate.year}-{endDate.month}-1')
+            )
+            GROUP BY id, time
+            {getInnerOrderAndLimit(config, 'resolution_duration')}
+        '''
+        if mode == 'origin':
+            columns = ['time','id','name','avg', 'quantile_0', 'quantile_1', 'quantile_2', 'quantile_3', 'quantile_4', 'response_levels']
+            rst = Chaoss.__executeInnnerSql(inner_sql,columns)
+            return rst, inner_sql
+        
+        generated_sql = f'''
+        SELECT
+            id,
+            argMax(name, time),
+            {getGroupArrayInsertAtClauseForClickhouse(config, { "key": "avg", "defaultValue": 'NaN' })},
+            {getGroupArrayInsertAtClauseForClickhouse(config, { "key": 'levels', "value": 'response_levels', "defaultValue": "[]", "noPrecision": True })},
+            {', '.join([getGroupArrayInsertAtClauseForClickhouse(config, { "key": f"quantile_{q}", "defaultValue": 'NaN' }) for q in Chaoss.timeDurationConstants["quantileArray"]])}
+        FROM
+        ({inner_sql})
+        GROUP BY id
+        {getOutterOrderAndLimit(config, sortBy, 1 if sortBy == 'levels' else None)}
+        '''
+        columns = ['id', 'name', 'response_time_avg', 'levels', 'quantile_0', 'quantile_1', 'quantile_2', 'quantile_3', 'quantile_4']
+        rst = Chaoss.__executeOuterSql(generated_sql, columns, Chaoss.__process)
+        return rst, generated_sql
+
+    def chaossIssueResponseTime(config, mode='outer') -> (List,str):
+        return Chaoss.__chaossResponseTime(config, 'issue', mode)
+
+    def chaossChangeRequestResponseTime(config, mode='outer') -> (List,str):
+        return Chaoss.__chaossResponseTime(config, 'change request', mode)
+    
+    def __chaossAge(config, type, mode) -> (List,str):
+        config = getMergedConfig(config)
+        whereClauses = []
+
+        if type == 'issue':
+            whereClauses.append("type='IssuesEvent'")
+        else:
+            whereClauses.append("type='PullRequestEvent'")
+
+        repoWhereClause = getRepoWhereClauseForClickhouse(config)
+        if repoWhereClause:
+            whereClauses.append(repoWhereClause)
+
+        endDate = datetime.date(year = config.get('endYear')+1 if config.get('endMonth')+1>12 else config.get('endYear'), month = (config.get('endMonth')+1)%12, day = 1) 
+        endTimeClause = f"toDate('{endDate.year}-{endDate.month}-1')"
+        whereClauses.append(f"created_at < {endTimeClause}")
+        if config['groupTimeRange']:
+            timeClause = f"arrayJoin(arrayMap(x -> dateAdd({config.get('groupTimeRange')}, x + 1, toDate('{config.get('startYear')}-{config.get('startMonth')}-1')), range(toUInt64(dateDiff('{config.get('groupTimeRange')}', toDate('{config.get('startYear')}-{config.get('startMonth')}-1'), {endTimeClause}))))) AS time"
+        else:
+            timeClause = f"{endTimeClause} AS time"
+
+        unit = filterEnumType(config.get("options", {}).get("unit"), Chaoss.timeDurationConstants["unitArray"], 'day')
+        thresholds = config.get("options", {}).get("thresholds", [15, 30, 60])
+        ranges = thresholds + [-1]
+        sortBy = filterEnumType(config.get("options", {}).get("sortBy"), Chaoss.timeDurationConstants["sortByArray"], 'avg')
+
+        inner_sql = f'''
+            SELECT
+                {timeClause},
+                {getGroupIdClauseForClickhouse(config)},
+                avgIf(dateDiff('{unit}', opened_at, time), opened_at < time AND closed_at >= time) AS avg,
+                {', '.join([f"quantileIf({q / 4})(dateDiff('{unit}', opened_at, time), opened_at < time AND closed_at >= time) AS quantile_{q}" for q in Chaoss.timeDurationConstants["quantileArray"]])},
+                [{', '.join([f"""countIf(multiIf({', '.join([f"dateDiff('{unit}', opened_at, time) <= {t}, {i}" for i, t in enumerate(thresholds)])}, {len(thresholds)}) = {i} AND opened_at < time AND closed_at >= time)""" for i in range(len(ranges))])}] AS age_levels
+            FROM
+            (
+                SELECT
+                    repo_id,
+                    argMax(repo_name, created_at) AS repo_name,
+                    org_id,
+                    argMax(org_login, created_at) AS org_login,
+                    issue_number,
+                    minIf(created_at, action = 'opened') AS opened_at,
+                    maxIf(created_at, action = 'closed') AS real_closed_at,
+                    if(real_closed_at=toDate('1970-1-1'), {endTimeClause}, real_closed_at) AS closed_at
+                FROM opensource.gh_events
+                WHERE {' AND '.join(whereClauses)}
+                GROUP BY repo_id, org_id, issue_number
+                HAVING opened_at > toDate('1970-01-01')
+            )
+            GROUP BY id, time
+            {getInnerOrderAndLimit(config, 'age')}
+        '''
+        if mode == 'origin':
+            columns = ['time','id','name','avg', 'quantile_0', 'quantile_1', 'quantile_2', 'quantile_3', 'quantile_4', 'age_levels']
+            rst = Chaoss.__executeInnnerSql(inner_sql,columns)
+            return rst, inner_sql
+
+        generated_sql = f'''
+        SELECT
+            id,
+            argMax(name, time),
+            {getGroupArrayInsertAtClauseForClickhouse(config, { "key": "avg", "defaultValue": 'NaN', "positionByEndTime": True })},
+            {getGroupArrayInsertAtClauseForClickhouse(config, { "key": 'levels', "value": 'if(arrayAll(x -> x = 0, age_levels), [], age_levels)', "defaultValue": "[]", "noPrecision": True, "positionByEndTime": True })},
+            {', '.join([getGroupArrayInsertAtClauseForClickhouse(config, { "key": f'quantile_{q}', "defaultValue": 'NaN', "positionByEndTime": True}) for q in Chaoss.timeDurationConstants["quantileArray"]])}
+        FROM
+        ({inner_sql})
+        GROUP BY id
+        {getOutterOrderAndLimit(config, sortBy, 1 if sortBy == 'levels' else None)}
+        '''
+        columns = ['id', 'name', 'response_time_avg', 'levels', 'quantile_0', 'quantile_1', 'quantile_2', 'quantile_3', 'quantile_4']
+        rst = Chaoss.__executeOuterSql(generated_sql, columns, Chaoss.__process)
+        return rst, generated_sql       
+
+    def chaossIssueAge(config, mode='outer') -> (List,str):
+        return Chaoss.__chaossAge(config, 'issue', mode)
+
+    def chaossChangeRequestAge(config, mode='outer') -> (List,str):
+        return Chaoss.__chaossAge(config, 'change request', mode)
+    
+    #Evolution - Code Development Efficiency
+    def chaossChangeRequestsAccepted(config, mode='outer') -> (List,str):
+        whereClauses = ["type = 'PullRequestEvent' AND action = 'closed' AND pull_merged = 1"]
+        return Chaoss.__chaossCount(config, mode, whereClauses, 'change_requests_accepted')
+
+    def chaossChangeRequestsDeclined(config, mode='outer') -> (List,str):
+        whereClauses = ["type = 'PullRequestEvent' AND action = 'closed' AND pull_merged = 0"]
+        return Chaoss.__chaossCount(config, mode, whereClauses, 'change_requests_accepted')
+
+    def chaossChangeRequestsAcceptanceRatio(config, mode='outer') -> (List,str):
+        config = getMergedConfig(config)
+        whereClauses = ["type = 'PullRequestEvent' AND action = 'closed' "]
+        repoWhereClause = getRepoWhereClauseForClickhouse(config)
+        if repoWhereClause:
+            whereClauses.append(repoWhereClause)
+        whereClauses.append(getTimeRangeWhereClauseForClickhouse(config))
+
+        inner_sql = f'''
+            SELECT
+                {getGroupTimeClauseForClickhouse(config)},
+                {getGroupIdClauseForClickhouse(config)},
+                COUNT() AS count,
+                countIf(pull_merged = 1) AS accepted_count,
+                countIf(pull_merged = 0) AS declined_count,
+                accepted_count / count AS ratio
+            FROM opensource.gh_events
+            WHERE {" AND ".join(whereClauses)}
+            GROUP BY id, time
+            {getInnerOrderAndLimit(config, 'ratio')}
+        '''
+        if mode == 'origin':
+            columns = ['time','id','name', 'count', 'accepted_count', 'declined_count', 'ratio']
+            rst = Chaoss.__executeInnnerSql(inner_sql,columns)
+            return rst, inner_sql        
+        
+        generated_sql = f'''
+        SELECT
+            id,
+            argMax(name, time) AS name,
+            {getGroupArrayInsertAtClauseForClickhouse(config, {'key': 'change_requests_accepted_ratio', 'value': 'ratio'})},
+            {getGroupArrayInsertAtClauseForClickhouse(config, {'key': 'change_requests_accepted', 'value': 'accepted_count'})},
+            {getGroupArrayInsertAtClauseForClickhouse(config, {'key': 'change_requests_declined', 'value': 'declined_count'})}
+        FROM
+        ({inner_sql})
+        GROUP BY id
+        {getOutterOrderAndLimit(config, 'change_requests_accepted_ratio')}
+        '''
+        columns = ['id', 'name', 'ratio', 'accepted_count', 'declined_count']
+        rst = Chaoss.__executeOuterSql(generated_sql, columns, Chaoss.__process)
+        return rst, generated_sql    
+    
+    # Evolution - Code Development Process Quality
+    def chaossChangeRequests(config, mode='outer') -> (List,str):
+        whereClauses = ["type = 'PullRequestEvent' AND action = 'opened'"]
+        return Chaoss.__chaossCount(config, mode, whereClauses, 'change_requests_count')
+
+    def chaossChangeRequestReviews(config, mode='outer') -> (List,str):
+        whereClauses = ["type = 'PullRequestReviewCommentEvent'"]
+        return Chaoss.__chaossCount(config, mode, whereClauses, 'change_requests_reviews_count')
+
+    NewContributorsOptions = {
+        'by': 'commit', #'commit' | 'change request'
+        'withBot': False
+    }
+
+    def chaossNewContributors(config, mode='outer') -> (List,str):
+        config = getMergedConfig(config)
+        by = filterEnumType(config.get('options').get('by') if config.get('options') != None else None, ['commit', 'change request'], 'change request')
+        whereClauses = []
+        
+        endDate = datetime.date(year = config.get('endYear')+1 if config.get('endMonth')+1>12 else config.get('endYear'), month = (config.get('endMonth')+1)%12, day = 1) 
+        
+        if by == 'commit':
+            whereClauses.append("type = 'PushEvent'")
+        elif by == 'change request':
+            whereClauses.append("type = 'PullRequestEvent' AND action = 'closed' AND pull_merged = 1")
+        
+        repoWhereClause = getRepoWhereClauseForClickhouse(config)
+        if repoWhereClause:
+            whereClauses.append(repoWhereClause)
+
+        inner_sql = f'''
+            SELECT
+                {getGroupTimeClauseForClickhouse(config, 'first_time')},
+                {getGroupIdClauseForClickhouse(config)},
+                length(detail) AS new_contributor,
+                (arrayMap((x) -> (x), groupArray(author))) AS detail
+            FROM
+            (
+                SELECT
+                    min(created_at) AS first_time,
+                    repo_id,
+                    argMax(repo_name, created_at) AS repo_name,
+                    org_id,
+                    argMax(org_login, created_at) AS org_login,
+                    {'author' if by == 'commit' else('actor_id, argMax(author,created_at) AS author' if by == 'change request' else '' )}
+                FROM
+                (
+                    SELECT
+                        repo_id,
+                        repo_name,
+                        org_id,
+                        org_login,
+                        {'arrayJoin(push_commits.name) AS author' if by == 'commit' 
+                         else('issue_author_id AS actor_id, issue_author_login AS author' if by == 'change request' else '' )},
+                        created_at
+                    FROM opensource.gh_events
+                    WHERE {" AND ".join(whereClauses)}
+                    {'' if config.get("options", {}).get("withBot") and by != 'commit' else "HAVING author NOT LIKE '%[bot]'"}
+                )
+                GROUP BY repo_id, org_id, {'author' if by == 'commit' else 'actor_id'}
+                HAVING first_time >= toDate('{config.get('startYear')}-{config.get('startMonth')}-1') AND first_time < toDate('{endDate.year}-{endDate.month}-1')
+            )
+            GROUP BY id, time
+            {getInnerOrderAndLimit(config, 'new_contributor')}
+        '''
+        if mode == 'origin':
+            columns = ['time','id','name', 'new_contributor', 'detail']
+            rst = Chaoss.__executeInnnerSql(inner_sql,columns)
+            return rst, inner_sql        
+        
+        generated_sql = f'''
+        SELECT
+            id,
+            argMax(name, time) AS name,
+            {getGroupArrayInsertAtClauseForClickhouse(config, {'key': 'new_contributors', 'value': 'new_contributor'})},
+            {getGroupArrayInsertAtClauseForClickhouse(config, {'key': 'detail', 'noPrecision': True, 'defaultValue': '[]'})},
+            SUM(new_contributor) AS total_new_contributors
+        FROM
+        ({inner_sql})
+        GROUP BY id
+        {getOutterOrderAndLimit(config, 'new_contributors')}
+        '''
+        columns = ['id', 'name', 'new_contributors', 'detail', 'total_new_contributors']
+        rst = Chaoss.__executeOuterSql(generated_sql, columns, Chaoss.__process)
+        return rst, generated_sql  
+
+    InactiveContributorsOptions = {
+        # time interval to determine inactive contributor, default: 6
+        'timeInterval': 6,  
+        # time interval unit, default: month
+        'timeIntervalUnit': 'month',
+        # determine contributor by commit or by change request
+        'by': 'commit', # 'commit'| 'change request',
+        # min count of contributions to determine inactive contributor
+        'minCount': 0,
+        'withBot': False
+    }  
+   
+    def chaossInactiveContributors(config, mode='outer') -> (List,str):
+        config = getMergedConfig(config)
+        by = filterEnumType(config.get("options", {}).get('by'), ['commit', 'change request'], 'change request')
+        timeInterval = config.get("options", {}).get('timeInterval', 6)
+        timeIntervalUnit = filterEnumType(config.get("options", {}).get('timeIntervalUnit'), ['month', 'quarter', 'year'], 'month')
+        minCount = config.get("options", {}).get('minCount', 0)
+        whereClauses = []
+        
+        endDate = datetime.date(year = config.get('endYear')+1 if config.get('endMonth')+1>12 else config.get('endYear'), month = (config.get('endMonth')+1)%12, day = 1) 
+        endTimeClause = f"toDate('{endDate.year}-{endDate.month}-1')"
+    
+        if by == 'commit':
+            whereClauses.append("type = 'PushEvent'")
+        elif by == 'change request':
+            whereClauses.append("type = 'PullRequestEvent' AND action = 'closed' AND pull_merged = 1")
+        
+        repoWhereClause = getRepoWhereClauseForClickhouse(config)
+        if repoWhereClause:
+            whereClauses.append(repoWhereClause)
+        
+        whereClauses.append(f"created_at < {endTimeClause}")
+    
+        inner_sql = f'''
+            SELECT
+                id,
+                argMax(name, time) AS name,
+                time,
+                countIf(first_time < time AND contributions <= {minCount}) AS inactive_contributors,
+                groupArrayIf(author, first_time < time AND contributions <= {minCount}) AS detail
+            FROM
+            (
+                SELECT
+                {(
+                    f"arrayJoin(arrayMap(x -> dateAdd({config['groupTimeRange']}, x + 1, toDate('{config['startYear']}-{config['startMonth']}-1')), " +
+                    f"range(toUInt64(dateDiff('{config['groupTimeRange']}', toDate('{config['startYear']}-{config['startMonth']}-1'), {endTimeClause})))))"
+                ) if config.get('groupTimeRange') else endTimeClause} AS time,
+                {getGroupIdClauseForClickhouse(config)},
+                {('author' if by == 'commit' else 'actor_id, argMax(author, created_at) AS author')},
+                min(created_at) AS first_time,
+                countIf(created_at >= dateSub({timeIntervalUnit}, {timeInterval}, time) AND created_at <= time) AS contributions
+                FROM
+                (
+                SELECT 
+                    repo_id,
+                    repo_name,
+                    org_id,
+                    org_login,
+                    {('arrayJoin(push_commits.name) AS author' if by == 'commit' else 'issue_author_id AS actor_id, issue_author_login AS author')},
+                    created_at
+                FROM opensource.gh_events
+                WHERE {' AND '.join(whereClauses)}
+                {(config.get('options', {}).get('withBot') and by != 'commit') and '' or "HAVING author NOT LIKE '%[bot]'"}
+                )
+                GROUP BY id, {('author' if by == 'commit' else 'actor_id')}, time
+            )
+            GROUP BY id, time
+            {getInnerOrderAndLimit(config, 'inactive_contributors')}        
+        '''
+        if mode == 'origin':
+            columns = ['id','name', 'time', 'inactive_contributors', 'detail']
+            rst = Chaoss.__executeInnnerSql(inner_sql,columns)
+            return rst, inner_sql  
+
+        generated_sql = f'''
+        SELECT
+            id,
+            argMax(name, time) AS name,
+            {getGroupArrayInsertAtClauseForClickhouse(config, {'key': 'inactive_contributors', 'positionByEndTime': True})},
+            {getGroupArrayInsertAtClauseForClickhouse(config, {'key': 'detail', 'noPrecision': True, 'defaultValue': '[]', 'positionByEndTime': True})}
+        FROM
+        ({inner_sql})
+         GROUP BY id
+        {getOutterOrderAndLimit(config, 'inactive_contributors')}       
+        '''   
+        columns = ['id', 'name', 'inactive_contributors', 'detail']
+        rst = Chaoss.__executeOuterSql(generated_sql, columns, Chaoss.__process)
+        return rst, generated_sql  
+    
+    InactiveContributorsOptions = {
+        # normalize the results by this option as max value
+        'normalize': 100
+    }  
+
+    def __chaossActiveDatesAndTimes(config, type, mode='outer') -> (List,str):
+        config = getMergedConfig(config)
+        whereClauses = [getTimeRangeWhereClauseForClickhouse(config)]
+        
+        if type == 'user':
+            userWhereClause = getUserWhereClauseForClickhouse(config)
+            if userWhereClause:
+                whereClauses.append(userWhereClause)
+        elif type == 'repo':
+            repoWhereClause = getRepoWhereClauseForClickhouse(config)
+            if repoWhereClause:
+                whereClauses.append(repoWhereClause)
+        else:
+            raise ValueError(f"Not supported type: {type}")
+        
+        inner_sql = f'''
+            SELECT id, argMax(name, time) AS name, time, arrayMap(x -> {f"round(x*{config.get('options', {}).get('normalize')} * max(count))" if config.get('options', {}).get('normalize') else 'x'}, 
+                    groupArrayInsertAt(0, 168)(count, toUInt32((day - 1) * 24 + hour))) AS count
+            FROM
+            (
+                SELECT
+                {getGroupTimeClauseForClickhouse(config)},
+                {getGroupIdClauseForClickhouse(config, type)},
+                toHour(created_at) AS hour,
+                toDayOfWeek(created_at) AS day,
+                COUNT() AS count
+                FROM opensource.gh_events
+                WHERE {' AND '.join(whereClauses)}
+                GROUP BY id, time, hour, day
+                ORDER BY day, hour
+            )
+            GROUP BY id, time
+            {getInnerOrderAndLimit(config, 'count', 1)}
+        '''
+        if mode == 'origin':
+            columns = ['id','name', 'time', 'list', 'count']
+            rst = Chaoss.__executeInnnerSql(inner_sql,columns)
+            return rst, inner_sql  
+        
+        generated_sql = f'''
+        SELECT
+            id,
+            argMax(name, time) AS name,
+            {getGroupArrayInsertAtClauseForClickhouse(config, {'key': 'count', 'noPrecision': True, 'defaultValue': '[]'})}
+        FROM
+        ({inner_sql})
+        GROUP BY id
+        {getOutterOrderAndLimit(config, 'count', 1)}
+        '''
+        columns = ['id', 'name', 'count']
+        rst = Chaoss.__executeOuterSql(generated_sql, columns, Chaoss.__process)
+        return rst, generated_sql 
+
+    def chaossUserActiveDatesAndTimes(config, mode='outer') -> (List,str):
+        return Chaoss.__chaossActiveDatesAndTimes(config, 'user', mode)
+
+    def chaossRepoActiveDatesAndTimes(config, mode='outer') -> (List,str):
+        return Chaoss.__chaossActiveDatesAndTimes(config, 'repo', mode)
+        
\ No newline at end of file
diff --git a/python_v2/metrics/index.py b/python_v2/metrics/index.py
new file mode 100644
index 000000000..4a3942041
--- /dev/null
+++ b/python_v2/metrics/index.py
@@ -0,0 +1,275 @@
+from .basic import QueryConfig, \
+                  getMergedConfig, \
+                  getRepoWhereClauseForNeo4j, \
+                  getTimeRangeWhereClauseForNeo4j, \
+                  getTimeRangeSumClauseForNeo4j, \
+                  getUserWhereClauseForNeo4j, \
+                  getRepoWhereClauseForClickhouse,\
+                  getUserWhereClauseForClickhouse,\
+                  getTimeRangeWhereClauseForClickhouse,\
+                  getGroupArrayInsertAtClauseForClickhouse,\
+                  getGroupTimeClauseForClickhouse,\
+                  getGroupIdClauseForClickhouse
+from label_data_utils import getLabelData
+from db.neo4j_wrapper import Neo4jWrapper
+from db.clickhouse_wrapper import ClickhouseWrapper
+from functools import cmp_to_key
+import numpy as np
+clickhouse = ClickhouseWrapper()
+neo4j = Neo4jWrapper()
+
+class Index():
+    def getRepoOpenrank(config):
+        """_summary_
+
+        Args:
+            config (QueryConfig): config of query.
+        Returns:
+            neo4j cursor: query results of neo4j
+        """
+        config = getMergedConfig(config)
+        calType = 'open_rank'
+        repoWhereClause = getRepoWhereClauseForNeo4j(config)
+        timeWhereClause = getTimeRangeWhereClauseForNeo4j(config, 'r')
+        timeActivityOrOpenrankClause = getTimeRangeSumClauseForNeo4j(config, 'r.{}'.format(calType))
+        if not config.get('groupBy'):
+            query = 'MATCH (r:Repo) WHERE {} {} RETURN r.name AS repo_name, r.org_login AS org, [{}] AS {} ORDER BY reverse({}) {} {};'.format(repoWhereClause+' AND ' if repoWhereClause else '', timeWhereClause, ','.join(timeActivityOrOpenrankClause), calType, calType, config.get('order'), 'LIMIT {}'.format(config.get('limit')) if config.get('limit') > 0 else '')
+            return neo4j.query(query)
+        elif config.get('groupBy') == 'org':
+            query = 'MATCH (r:Repo) WHERE {} {} RETURN r.org_login AS org_login, count(r.id) AS repo_count, [{}] AS {} ORDER BY reverse({}) {} {};'.format(repoWhereClause+' AND ' if repoWhereClause else '', timeWhereClause, list(map(lambda i:'round(SUM({}), {})'.format(i, config.get('percision')), timeActivityOrOpenrankClause)), calType, calType, config.get('order'), 'LIMIT {}'.format(config.get('limit')) if config.get('limit') > 0 else '')
+            return neo4j.query(query)
+        else:
+            query = 'MATCH (r:Repo) WHERE {} {} RETURN r.id AS repo_id, r.org_id AS org_id, [{}] AS {};'.format(repoWhereClause + ' AND ' if repoWhereClause else '', timeWhereClause, ','.join(timeActivityOrOpenrankClause), calType)
+            queryResult = neo4j.query(query)
+            labelData = list(filter(lambda l: l.get('type') == config.get('groupBy'), getLabelData())) if getLabelData() != None else None
+            result = {}
+            if labelData == None: return None
+            for row in queryResult:
+                labels = list(filter(lambda l: int(row.get('repo_id')) in l.get('githubRepos') or int(row.get('org_id')) in l.get('githubOrgs'),labelData))
+                for label in labels:
+                    if not label.get('name') in result.keys(): values = row[calType]
+                    else:
+                        values = result.get(label.get('name'))[calType]
+                        for i in range(len(values)):
+                            values[i] += row[calType][i]
+                    result[label.get('name')] = {
+                        'label': label.get('name'),
+                        'repo_count': (result.get(label.get('name'))['repo_count'] if label.get('name') in result else 0) + 1,
+                    }
+                    result[label.get('name')][calType] = values
+            resultArr = list(result.values())
+            if config.get('order') == 'ASC': resultArr.sort(key = cmp_to_key(lambda a, b: a[calType][len(a[calType]) - 1] - b[calType][len(b[calType]) - 1]))
+            if config.get('order') == 'DESC': resultArr.sort(key = cmp_to_key(lambda a, b: b[calType][len(b[calType]) - 1] - a[calType][len(a[calType]) - 1]))
+            for i in resultArr:
+                i[calType] = np.around(i[calType])
+            return resultArr[0:config.get('limit')]
+
+    def getUserOpenrank(config):
+        """_summary_
+
+        Args:
+            config (QueryConfig): config of query.
+        Returns:
+            neo4j cursor: query results of neo4j
+        """
+        config = getMergedConfig(config)
+        calType = 'open_rank'
+        userWhereClause = getUserWhereClauseForNeo4j(config)
+        timeWhereClause = getTimeRangeWhereClauseForNeo4j(config, 'u')
+        timeActivityClause = getTimeRangeSumClauseForNeo4j(config, 'u.{}'.format(calType))
+        query = 'MATCH (u:User) WHERE {} {} RETURN u.login AS user_login, [{}] AS {} ORDER BY {} {} {};'.format(userWhereClause +' AND ' if userWhereClause else '', timeWhereClause, ','.join(timeActivityClause), calType, calType, config.get('order'), 'LIMIT {}'.format(config.get('limit')) if config.get('limit') > 0 else '')
+        return neo4j.query(query)
+
+    def getRepoActivity(config):
+        config = getMergedConfig(config)
+        whereClauses = ["type IN ('IssuesEvent', 'IssueCommentEvent', 'PullRequestEvent', 'PullRequestReviewCommentEvent')"] # specify types to reduce memory usage and calculation
+        repoWhereClause = getRepoWhereClauseForClickhouse(config)
+        if repoWhereClause: whereClauses.append(repoWhereClause)
+        whereClauses.append(getTimeRangeWhereClauseForClickhouse(config))
+        sql = "SELECT id, argMax(name, time) AS name, \
+        {}, \
+        {}, \
+        {}, \
+        {}, \
+        {}, \
+        {} \
+    FROM \
+    (".format(getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'activity', 'defaultValue': '0' }),
+                getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'issue_comment', 'defaultValue': '0' }),
+                getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'open_issue', 'defaultValue': '0' }),
+                getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'open_pull', 'defaultValue': '0' }),
+                getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'review_comment', 'defaultValue': '0' }),
+                getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'merged_pull', 'defaultValue': '0' }),
+                ) + \
+    "SELECT \
+        {}, \
+        {}, \
+        ROUND(SUM(activity), 2) AS activity, \
+        SUM(issue_comment) AS issue_comment, \
+        SUM(open_issue) AS open_issue, \
+        SUM(open_pull) AS open_pull, \
+        SUM(review_comment) AS review_comment, \
+        SUM(merged_pull) AS merged_pull \
+        FROM \
+        (".format(getGroupTimeClauseForClickhouse(config, 'month'), getGroupIdClauseForClickhouse(config, 'repo', 'month')) + \
+        "SELECT \
+            toStartOfMonth(created_at) AS month, \
+            repo_id, argMax(repo_name, created_at) AS repo_name, \
+            org_id, argMax(org_login, created_at) AS org_login, \
+            if(type='PullRequestEvent' AND action='closed' AND pull_merged=1, issue_author_id, actor_id) AS actor_id, \
+            countIf(type='IssueCommentEvent' AND action='created') AS issue_comment, \
+            countIf(type='IssuesEvent' AND action='opened')  AS open_issue, \
+            countIf(type='PullRequestEvent' AND action='opened') AS open_pull, \
+            countIf(type='PullRequestReviewCommentEvent' AND action='created') AS review_comment, \
+            countIf(type='PullRequestEvent' AND action='closed' AND pull_merged=1) AS merged_pull, \
+            sqrt({}*issue_comment + {}*open_issue + {}*open_pull + {}*review_comment + {}*merged_pull) AS activity \
+        FROM opensource.gh_events \
+        WHERE {} \
+        GROUP BY repo_id, org_id, actor_id, month \
+        HAVING activity > 0 \
+        ) \
+        GROUP BY id, time\
+        {}\
+    ) \
+    GROUP BY id \
+    ORDER BY activity[-1] {} \
+    FORMAT JSONCompact".format(Index.ISSUE_COMMENT_WEIGHT, Index.OPEN_ISSUE_WEIGHT, 
+                                Index.OPEN_PULL_WEIGHT, Index.REVIEW_COMMENT_WEIGHT, Index.PULL_MERGED_WEIGHT,
+                                ' AND '.join(whereClauses),
+                                'ORDER BY activity DESC LIMIT {} BY time'.format(config.get('limit')) if config.get('limit') > 0 else '',
+                                config.get('order')
+                                ) # use JSONCompact to reduce network I/O
+
+        result = clickhouse.query(sql)
+        def return_row(row):
+            id, name, activity, issue_comment, open_issue, open_pull, review_comment, merged_pull = row
+            return {
+            'id':id,
+            'name':name,
+            'activity':activity,
+            'issue_comment':issue_comment,
+            'open_issue':open_issue,
+            'open_pull':open_pull,
+            'review_comment':review_comment,
+            'merged_pull':merged_pull,
+            }
+        return list(map(return_row, result))
+
+    def getUserActivity(config = QueryConfig, withBot = True):
+        config = getMergedConfig(config)
+        whereClauses = ["type IN ('IssuesEvent', 'IssueCommentEvent', 'PullRequestEvent', 'PullRequestReviewCommentEvent')"] # specify types to reduce memory usage and calculation
+        userWhereClause = getUserWhereClauseForClickhouse(config)
+        if userWhereClause != None: whereClauses.append(userWhereClause)
+        whereClauses.append(getTimeRangeWhereClauseForClickhouse(config))
+        sql = "SELECT id, argMax(name, time) AS name, \
+    {}, \
+    {}, \
+    {}, \
+    {}, \
+    {}, \
+    {} \
+    FROM \
+    (".format(getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'activity', 'defaultValue': '0' }),
+            getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'issue_comment', 'defaultValue': '0' }),
+            getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'open_issue', 'defaultValue': '0' }),
+            getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'open_pull', 'defaultValue': '0' }),
+            getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'review_comment', 'defaultValue': '0' }),
+            getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'merged_pull', 'defaultValue': '0' })
+            ) + \
+    "SELECT \
+        {}, \
+        {}, \
+        ROUND(SUM(activity), 2) AS activity, \
+        SUM(issue_comment) AS issue_comment, \
+        SUM(open_issue) AS open_issue, \
+        SUM(open_pull) AS open_pull, \
+        SUM(review_comment) AS review_comment, \
+        SUM(merged_pull) AS merged_pull \
+    FROM \
+    (".format(getGroupTimeClauseForClickhouse(config, 'month'), getGroupIdClauseForClickhouse(config, 'actor', 'month')) + \
+        "SELECT \
+        toStartOfMonth(created_at) AS month, \
+        repo_id, \
+        if(type='PullRequestEvent' AND action='closed' AND pull_merged=1, issue_author_id, actor_id) AS actor_id, \
+        argMax(if(type='PullRequestEvent' AND action='closed' AND pull_merged=1, issue_author_login, actor_login), created_at) AS actor_login, \
+        countIf(type='IssueCommentEvent' AND action='created') AS issue_comment, \
+        countIf(type='IssuesEvent' AND action='opened')  AS open_issue, \
+        countIf(type='PullRequestEvent' AND action='opened') AS open_pull, \
+        countIf(type='PullRequestReviewCommentEvent' AND action='created') AS review_comment, \
+        countIf(type='PullRequestEvent' AND action='closed' AND pull_merged=1) AS merged_pull, \
+        sqrt({}*issue_comment + {}*open_issue + {}*open_pull + {}*review_comment + {}*merged_pull) AS activity \
+        FROM opensource.gh_events \
+        WHERE {} \
+        GROUP BY repo_id, actor_id, month \
+        HAVING activity > 0 {} \
+    ) \
+    GROUP BY id, time \
+    {} \
+    ) \
+    GROUP BY id \
+    ORDER BY activity[-1] {} \
+    FORMAT JSONCompact".format(Index.ISSUE_COMMENT_WEIGHT, Index.OPEN_ISSUE_WEIGHT, Index.OPEN_PULL_WEIGHT, Index.REVIEW_COMMENT_WEIGHT, Index.PULL_MERGED_WEIGHT,
+                            ' AND '.join(whereClauses), '' if withBot else 'AND actor_login NOT LIKE \'%[bot]\'',
+                            'ORDER BY activity DESC LIMIT {} BY time'.format(config.get('limit')) if config.get('limit') > 0 else '',
+                            config.get('order'))
+
+        result = clickhouse.query(sql)
+        def return_row(row):
+            id, name, activity, issue_comment, open_issue, open_pull, review_comment, merged_pull = row
+            return {
+            'id':id,
+            'name':name,
+            'activity':activity,
+            'issue_comment':issue_comment,
+            'open_issue':open_issue,
+            'open_pull':open_pull,
+            'review_comment':review_comment,
+            'merged_pull':merged_pull,
+            }
+        return list(map(return_row, result))
+
+    def getAttention(config: QueryConfig):
+        """_summary_
+
+        Args:
+            config (QueryConfig): _description_
+        """
+        config = getMergedConfig(config)
+        whereClauses = ["type IN (\'WatchEvent\', \'ForkEvent\')"]
+        repoWhereClause = getRepoWhereClauseForClickhouse(config)
+        if repoWhereClause != None: whereClauses.append(repoWhereClause)
+        whereClauses.append(getTimeRangeWhereClauseForClickhouse(config))
+        
+        sql = ' \
+        SELECT \
+        id, \
+        argMax(name, time) AS name, \
+        {} \
+        FROM \
+        ('.format(getGroupArrayInsertAtClauseForClickhouse(config, { 'key': 'attention' })) + \
+        'SELECT \
+            {}, \
+            {}, \
+            countIf(type=\'WatchEvent\') AS stars, \
+            countIf(type=\'ForkEvent\') AS forks, \
+            stars + 2 * forks AS attention \
+        FROM opensource.gh_events \
+        WHERE {} \
+        GROUP BY id, time \
+        {} \
+        ) \
+        GROUP BY id \
+        ORDER BY attention[-1] {} \
+        FORMAT JSONCompact'.format(getGroupTimeClauseForClickhouse(config), getGroupIdClauseForClickhouse(config), ' AND '.join(whereClauses), 
+                                'ORDER BY attention DESC LIMIT {} BY time'.format(config.get('limit')) if config.get('limit') > 0 else '', 
+                                config.get('order'))
+
+        result = clickhouse.query(sql)
+        def getResult(row):
+            id, name, attention = row
+            return {
+            'id':id,
+            'name':name,
+            'attention':attention,
+            }
+        return list(map(getResult, result))
diff --git a/python_v2/metrics/related_users.py b/python_v2/metrics/related_users.py
new file mode 100644
index 000000000..96eb2e368
--- /dev/null
+++ b/python_v2/metrics/related_users.py
@@ -0,0 +1,11 @@
+from .basic import getMergedConfig, getRepoWhereClauseForNeo4j, getTimeRangeWhereClauseForNeo4j
+from db.neo4j_wrapper import Neo4jWrapper 
+neo4j = Neo4jWrapper()
+
+class Relation():
+    def getRelatedUsers(config):
+        config = getMergedConfig(config)
+        repoWhereClause = getRepoWhereClauseForNeo4j(config)
+        timeWhereClause = getTimeRangeWhereClauseForNeo4j(config, 'a')
+        query = 'MATCH (r:Repo)<-[a:ACTION]-(u:User) WHERE {} {} RETURN DISTINCT u.login AS user_login {};'.format(repoWhereClause + ' AND ' if repoWhereClause != None else '', timeWhereClause, 'LIMIT {}'.format(config.get('limit')) if config.get('limit') > 0 else '')
+        return neo4j.query(query)
diff --git a/python_v2/open_digger.py b/python_v2/open_digger.py
new file mode 100644
index 000000000..4a583716d
--- /dev/null
+++ b/python_v2/open_digger.py
@@ -0,0 +1,53 @@
+import label_data_utils as label
+from functools import cmp_to_key
+import metrics
+from db.clickhouse_wrapper import ClickhouseWrapper
+from db.neo4j_wrapper import Neo4jWrapper
+import plotly.graph_objs as go
+from plotly.subplots import make_subplots
+
+class openDigger(object):
+    def __init__(self):
+        self.label = label
+        self.render = go
+        self.metric = metrics.Metric()
+        self.clickhouse = ClickhouseWrapper()
+        self.neo4j = Neo4jWrapper()
+
+    class quick():
+        @classmethod
+        def showAll(self, repoName, startYear = 2015, endYear = 2021):
+            config = { 'repoNames': [repoName], 'startYear': startYear, 'endYear': endYear, 'groupTimeRange': 'month' }
+            activity = self.index.getRepoActivity(config)
+            openrank = self.index.getRepoOpenrank(config)
+            for year in range(startYear, endYear + 1):
+                for month in range(1, 13):
+                    k = '{}{}'.format(year, month)
+            fig = make_subplots(specs=[[{"secondary_y": True}]])
+            fig.add_trace(
+                openDigger().render.Scatter(
+                    y = activity[0].get('activity'),
+                    mode="markers+lines",
+                    name='activity'
+            ))
+            fig.add_trace(
+                openDigger().render.Scatter(
+                    y = openrank[0].get('open_rank'),
+                    mode="markers+lines",
+                    name='openrank'
+            ), secondary_y=True)
+            fig.update_layout(
+                title="Activity/OpenRank for {} from {} to {}".format(repoName, startYear, endYear),
+            )
+            fig.show()
+
+    def getRank(self, values, nameGetter, valueGetter):
+        resultMap = {}
+        for v in values:
+            resultMap[nameGetter(v)] = []
+        valueLength = len(valueGetter(values[0]))
+        for i in range(valueLength):
+            values.sort(key = cmp_to_key(lambda a, b: valueGetter(b)[i] - valueGetter(a)[i]))
+            for index, v in enumerate(values):
+                resultMap.get(nameGetter(v)).append(None if valueGetter(v)[i] == 0 else index + 1)
+        return list(map(lambda e: {'name': e[0], 'values': e[1],}, resultMap.items()))