From 50a9a314b0e6ed2dc181ec107a1fbddd4e73b1b2 Mon Sep 17 00:00:00 2001
From: Amanda Goldani <amanda.goldani@hotmail.com>
Date: Thu, 11 Nov 2021 02:00:25 +0100
Subject: [PATCH 01/17] Set custom logger in McMClient

---
 src/python/Services/McM/McMClient.py |  3 +--
 src/python/Utilities/DataTools.py    | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/python/Services/McM/McMClient.py b/src/python/Services/McM/McMClient.py
index 879d6be69..ed2bb4c2b 100644
--- a/src/python/Services/McM/McMClient.py
+++ b/src/python/Services/McM/McMClient.py
@@ -5,7 +5,7 @@
 from logging import Logger
 
 import logging
-from logging import getLogger  # from Utilities.Logging import getLogger
+from Utilities.Logging import getLogger
 
 from typing import Optional
 
@@ -19,7 +19,6 @@ class McMClient(object):
     def __init__(self, logger: Optional[Logger] = None, **kwargs) -> None:
         try:
             super().__init__()
-            logging.basicConfig(level=logging.INFO)
             self.logger = logger or getLogger(self.__class__.__name__)
 
             self.devMode = os.getenv("UNIFIED_MCM") == "dev" or kwargs.get("dev") or False
diff --git a/src/python/Utilities/DataTools.py b/src/python/Utilities/DataTools.py
index 826391f96..68c65d608 100644
--- a/src/python/Utilities/DataTools.py
+++ b/src/python/Utilities/DataTools.py
@@ -254,3 +254,17 @@ def flattenTaskTree(task: str, **selectParam) -> list:
         allTasks.extend(flattenTaskTree(childSpec, **selectParam))
 
     return allTasks
+
+def unnestList(lst: list) -> list:
+    """
+    The function to unnest a list of lists
+    :lst: list of lists
+    :return: unnested list
+    """
+    allItems = []
+    for item in lst:
+        if isinstance(item, lst):
+            allItems.extend(unnestList(item))
+        elif item is not None:
+            allItems.extend(item)
+    return allItems
\ No newline at end of file

From 6a796415b7ca9743c257f1046f125d4f589bbfc9 Mon Sep 17 00:00:00 2001
From: Amanda Goldani <amanda.goldani@hotmail.com>
Date: Mon, 6 Dec 2021 04:32:56 +0100
Subject: [PATCH 02/17] Migrate checkor

---
 src/python/Utilities/DataTools.py             |   17 +-
 src/python/Utilities/Logging.py               |   32 +-
 src/python/WorkflowMgmt/WorkflowController.py |   19 +
 unified/Checkor.py                            | 1466 +++++++++++++++++
 4 files changed, 1518 insertions(+), 16 deletions(-)
 create mode 100644 unified/Checkor.py

diff --git a/src/python/Utilities/DataTools.py b/src/python/Utilities/DataTools.py
index 68c65d608..1be3842f8 100644
--- a/src/python/Utilities/DataTools.py
+++ b/src/python/Utilities/DataTools.py
@@ -255,6 +255,21 @@ def flattenTaskTree(task: str, **selectParam) -> list:
 
     return allTasks
 
+
+def flattenDictKeys(dct: dict, values: set) -> set:
+    """
+    The function to flatten dict keys
+    :param dct: a dict
+    :param values: values
+    :return: set of keys
+    """
+    allValues = set(values)
+    for value in allValues:
+        if value in dct:
+            allValues.update(flattenDictKeys(dct, dct[value]))
+    return allValues
+
+
 def unnestList(lst: list) -> list:
     """
     The function to unnest a list of lists
@@ -267,4 +282,4 @@ def unnestList(lst: list) -> list:
             allItems.extend(unnestList(item))
         elif item is not None:
             allItems.extend(item)
-    return allItems
\ No newline at end of file
+    return allItems
diff --git a/src/python/Utilities/Logging.py b/src/python/Utilities/Logging.py
index 54f1ccbf1..accd56eb2 100644
--- a/src/python/Utilities/Logging.py
+++ b/src/python/Utilities/Logging.py
@@ -240,25 +240,27 @@ def getLogger(name: str, level: str = "INFO", flushEveryLog: bool = True, **kwar
     :return: a logger
     """
     logger = logging.getLogger(name)
-    logger.setLevel(level)
 
-    streamHandler = logging.StreamHandler()
-    streamHandler.setLevel(level=level)
-    streamHandler.setFormatter(logging.Formatter("[%(asctime)s:%(name)s:%(module)s] %(levelname)s: %(message)s"))
-    logger.addHandler(streamHandler)
+    if not len(logger.handlers):
+        logger.setLevel(level)
 
-    if kwargs.get("addWfLevel"):
-        logging.addLevelName(100, "WORKFLOW")
-        logging.workflow = getWorkflowLogLevel
-        logging.Logger.workflow = getWorkflowLogLevel
+        streamHandler = logging.StreamHandler()
+        streamHandler.setLevel(level=level)
+        streamHandler.setFormatter(logging.Formatter("[%(asctime)s:%(name)s:%(module)s] %(levelname)s: %(message)s"))
+        logger.addHandler(streamHandler)
 
-    if kwargs.get("elasticSearch"):
-        esHandler = ElasticSearchHandler(level=kwargs.get("elasticSearch") or level, flushEveryLog=flushEveryLog)
-        logger.addHandler(esHandler)
+        if kwargs.get("addWfLevel"):
+            logging.addLevelName(100, "WORKFLOW")
+            logging.workflow = getWorkflowLogLevel
+            logging.Logger.workflow = getWorkflowLogLevel
 
-    if kwargs.get("email"):
-        emailHandler = EmailHandler(level=kwargs.get("email") or level, flushEveryLog=flushEveryLog)
-        logger.addHandler(emailHandler)
+        if kwargs.get("elasticSearch"):
+            esHandler = ElasticSearchHandler(level=kwargs.get("elasticSearch") or level, flushEveryLog=flushEveryLog)
+            logger.addHandler(esHandler)
+
+        if kwargs.get("email"):
+            emailHandler = EmailHandler(level=kwargs.get("email") or level, flushEveryLog=flushEveryLog)
+            logger.addHandler(emailHandler)
 
     return logger
 
diff --git a/src/python/WorkflowMgmt/WorkflowController.py b/src/python/WorkflowMgmt/WorkflowController.py
index 74a92d5a8..d392eb9a6 100644
--- a/src/python/WorkflowMgmt/WorkflowController.py
+++ b/src/python/WorkflowMgmt/WorkflowController.py
@@ -458,6 +458,25 @@ def getOutputDatasetsPerTask(self) -> dict:
         """
         return self.request.getOutputDatasetsPerTask(self.getWorkTasks())
 
+    def getCampaignsFromOutputDatasets(self) -> dict:
+        """
+        The function to get the campaigns based on the output datasets
+        :return: a dict of campaigns by dataset names
+        """
+        campaigns = {}
+        for dataset in self.request.get("OutputDatasets", []):
+            campaign = self.request.get("Campaign")
+            if not self.isRelVal():
+                try:
+                    era = dataset.split('/')[2].split('-')[0]
+                except:
+                    era = None
+                campaign = era if era else campaign
+            
+            campaigns[dataset] = campaign
+        
+        return campaigns
+
     def getCampaignByTask(self, task: str) -> str:
         """
         The function to get the campaign for a given task
diff --git a/unified/Checkor.py b/unified/Checkor.py
new file mode 100644
index 000000000..800b8870e
--- /dev/null
+++ b/unified/Checkor.py
@@ -0,0 +1,1466 @@
+import os
+import re
+import optparse
+import random
+import json
+import math
+from logging import Logger
+from collections import defaultdict
+from time import mktime, asctime, gmtime, struct_time
+from jinja2.runtime import F
+
+from sqlalchemy.sql.expression import true
+
+from Utilities.DataTools import unnestList, flattenDictKeys
+from Utilities.Decorators import runWithMultiThreading, runWithRetries
+from Utilities.Logging import getLogger
+from Utilities.ConfigurationHandler import ConfigurationHandler
+from Cache.DataCacheLoader import DataCacheLoader
+from Databases.Oracle.OracleClient import OracleClient
+from Databases.Oracle.OracleDB import Workflow
+from MongoControllers.CampaignController import CampaignController
+from MongoControllers.CloseoutController import CloseoutController
+from MongoControllers.SiteController import SiteController
+from MongoControllers.WTCController import WTCController
+from Services.ServicesChecker import ServicesChecker
+from Services.McM.McMClient import McMClient
+from Services.ReqMgr.ReqMgrReader import ReqMgrReader
+from Services.ACDC.ACDCReader import ACDCReader
+from Services.DBS.DBSReader import DBSReader
+from Services.DBS.DBSWriter import DBSWriter
+from Services.Rucio.RucioReader import RucioReader
+from WorkflowMgmt.UserLockChecker import UserLockChecker
+from WorkflowMgmt.WorkflowController import WorkflowController
+from WorkflowMgmt.WorkflowStatusEnforcer import WorkflowStatusEnforcer
+
+from typing import Optional, Tuple, Union
+
+
+class Checkor(OracleClient):
+    """
+    __Checkor__
+    General API for checking workflows in completed status
+    """
+
+    def __init__(self, logger: Optional[Logger] = None, **kwargs) -> None:
+        try:
+            super().__init__(self)
+            self.logger = logger or getLogger(self.__class__.__name__)
+
+            self.options, self.specificWf = kwargs.get("options"), kwargs.get("specificWf")
+            if self.options is None:
+                self.options, self.specificWf = self.parseOptions()
+
+            now = mktime(gmtime())
+            self.timePoint = {"lap": now, "subLap": now, "start": now}
+
+            self.unifiedConfiguration = ConfigurationHandler("config/unifiedConfiguration.json")
+
+            self.reqMgrReader = ReqMgrReader()
+            self.acdcReader = ACDCReader()
+            self.dbs = {"reader": DBSReader(), "writer": DBSWriter()}
+
+            self.dataCacheLoader = DataCacheLoader()
+            self.mcmClient = McMClient() if self.useMcM else None
+
+            self.campaignController = CampaignController()
+            self.closeoutController = CloseoutController()
+            self.siteController = SiteController()
+            self.wtcController = WTCController()
+
+            self.logMsg = {
+                "strict": "Strict option is on: checking workflows that freshly completed",
+                "update": "Update option is on: checking workflows that have not completed yet",
+                "clear": "Clear option is on: checking workflows that are ready to toggle closed-out",
+                "reviewRecover": "Review-recovering option is on: checking only the workflows that had been already acted on",
+                "reviewManual": "Review-manual option is on: checking the workflows to be acted on",
+                "timeCheck": "Time check (%s) point at: %s",
+                "sinceStart": "Since start: %s [s]",
+                "lap": "%s: %s [s]",
+                "notAllowed": "%s is not allowed to %s",
+                "allowed": "%s allowed to %s %s",
+                "nWfs": "Considering %s workflows (before any limitation)",
+                "limitOption": "Command line to limit workflows to %s",
+                "limit": "Limiting workflows to %s this round",
+                "startCheck": "Starting checkor with {}",
+                "setClose": "%s is already %s, setting as close",
+                "setForget": "%s is %s, but will not be set in trouble to find a replacement",
+                "setTrouble": "%s is %s, setting as trouble",
+                "setOnHold": "%s is %s, setting as on hold",
+                "notRunning": "%s is %s, it is not running yet",
+                "skipCheck": "No need to check %s in status %s",
+                "bypassChecks": "Bypassing checks on %s because of keyword %s",
+                "bypassForceComplete": "Bypassing checks and force completing %s because of %s %s",
+                "failedCheck": "Failed on checking %s",
+                "onHoldSince": "On hold since %s, timeout at %s",
+                "bypassOnHold": "%s is on hold and stopped for %.2f days, letting this through with current statistics",
+                "onHold": "%s is on hold",
+                "campaigns": "Campaigns: %s",
+                "checkSummary": "Initial outputs: %s\nWill check on: %s\nTiers out: %s\n, Tiers no custodials: %s",
+                "inconsistentAcdc": "Inconsistent ACDC %s",
+                "acdc": "%s still has an ACDC running",
+                "failedAcdc": "For %s, ACDC %s failed",
+                "badAcdc": "For %s, ACDC %s is inconsistent, preventing from closing or will create a mess.",
+                "forceRecovering": "%s is being force completed while recovering",
+                "forceComplete": "The workflow %s was force completed",
+                "missingTotalEvents": "TotalInputEvents is missing from the workload of %s",
+                "sinceCompleted": "%s days since completed",
+                "earlyAnnounce": "Allowed to announce the output %s over %.2f by campaign requirement",
+                "overrideFraction": "Overriding fraction to %s for %s by %s",
+                "truncateRecovery": "Allowed to truncate recovery of %s over %.2f by campaign requirement",
+                "fractionDamping": "The passing fraction could be reduced to %s given it has been in for long",
+                "reducePass": "Reducing pass threshold by %.3f%% for long lasting workflow %s",
+                "truncatePass": "Truncating at pass threshold because of ACDC at rank %d",
+            }
+
+            self.useMcM = False
+            self.exceptions = set()
+            self._setWfs()
+            # TODO: update JIRA tickets
+
+        except Exception as error:
+            raise Exception(f"Error initializing Checkor\n{str(error)}")
+
+    @staticmethod
+    def parseOptions() -> Tuple[dict, Optional[str]]:
+        """
+        The function to parse the Checkor's options and specific workflow
+        :return: options and the specific workflow, if any
+        """
+        parser = optparse.OptionParser()
+
+        parser.add_option("--go", help="Does not check on duplicate process", action="store_true", default=False)
+        parser.add_option(
+            "--update",
+            help="Running workflows that have not yet reached completed",
+            action="store_true",
+            default=False,
+        )
+        parser.add_option(
+            "--strict", help="Only running workflows that reached completed", action="store_true", default=False
+        )
+        parser.add_option(
+            "--clear", help="Only running workflows that have reached custodial", action="store_true", default=False
+        )
+        parser.add_option(
+            "--review",
+            help="Look at the workflows that have already completed and had required actions",
+            action="store_true",
+            default=False,
+        )
+        parser.add_option(
+            "--recovering",
+            help="Look at the workflows that already have on-going acdc",
+            action="store_true",
+            default=False,
+        )
+        parser.add_option("--manual", help='Look at the workflows in "manual"', action="store_true", default=False)
+        parser.add_option("--limit", help="The number of workflows to consider for checking", default=0, type=int)
+        parser.add_option("--threads", help="The number of threads for processing workflows", default=10, type=int)
+        parser.add_option(
+            "--fractionPass", help="The completion fraction that is permitted", default=0.0, type="float"
+        )
+        parser.add_option("--lumiSize", help="Force the upper limit on lumisection", default=0, type="float")
+        parser.add_option(
+            "--ignoreFiles", help="Force ignoring dbs/rucio differences", action="store_true", default=False
+        )
+        parser.add_option(
+            "--ignoreInvalid", help="Force ignoring high level of invalid files", action="store_true", default=False
+        )
+        parser.add_option(
+            "--ignoreDuplicates", help="Force ignoring lumi duplicates", action="store_true", default=False
+        )
+        parser.add_option("--tapeSizeLimit", help="The limit in size of all outputs", default=0, type=int)
+        parser.add_option("--html", help="Build the the monitor page", action="store_true", default=False)
+        parser.add_option(
+            "--noReport", help="Prevent from making the error report", action="store_true", default=False
+        )
+        parser.add_option(
+            "--backfill", help="To run in test mode (only with backfill workflows)", action="store_true", default=False
+        )
+
+        options, args = parser.parse_args()
+        options = vars(options)
+
+        options["manual"] = not options.get("recovering")
+        actions = ["strict", "update", "clear", "review"]
+        if all(not options.get(option) for option in actions):
+            for option in actions + ["recovering", "manual"]:
+                options[option] = True
+
+        return options, args[0] if args else None
+
+    def _setWfs(self) -> None:
+        """
+        The function to set workflows to override, on hold, bypass and force complete.
+        """
+        self.overrideWfs = self._getWorkflowsByAction("force-complete", details=True)
+        self.onHoldWfs = self._getWorkflowsByAction("hold")
+        self.bypassWfs = self._getWorkflowsByAction("bypass") + unnestList(self.overrideWfs)
+        self.forceCompleteWfs = self.mcmClient.get("/restapi/requests/forcecomplete") if self.useMcM else {}
+
+    def _checkPoint(self, label: str = "", subLap: bool = False, now: struct_time = gmtime()) -> None:
+        """
+        The function to get the check points
+        :label: log message label
+        :subLap: True if for sub lap, False o/w
+        :now: time now
+        """
+        self.logger.info(self.logMsg["timeCheck"], label, asctime(now))
+        self.logger.info(self.logMsg["sinceStart"], now - self.timePoint.get("start", now))
+
+        self.logger.info(
+            self.logMsg["lap"],
+            "Sub lap" if subLap else "Lap",
+            now - self.timePoint.get("subLap" if subLap else "lap", now),
+        )
+        self.timePoint["subLap"] = now
+        if not subLap:
+            self.timePoint["lap"] = now
+
+    def _filterBackfills(self, workflows: list) -> list:
+        """
+        The function to filter only backfill workflows
+        :workflows: workflows
+        :return: backfill workflows
+        """
+        if self.options.get("backfill"):
+            self.logger.info(self.logMsg["backfill"])
+            return [wf for wf in workflows if "backfill" in wf.name.lower()]
+        return workflows
+
+    def _getWorkflowsToCheck(self) -> list:
+        """
+        The function to get the workflows to check
+        :return: workflows
+        """
+        workflows = set()
+
+        awayWfs = self.session.query(Workflow).filter(Workflow.status == "away").all()
+        assistanceWfs = self.session.query(Workflow).filter(Workflow.status.startswith("assistance")).all()
+        completedWfs = self.reqMgrReader.getWorkflowsByStatus("completed")
+
+        if self.options.get("strict"):
+            self.logger.info(self.logMsg["strict"])
+            workflows.update(filter(lambda wf: wf.name in completedWfs, awayWfs))
+
+        if self.options.get("update"):
+            self.logger.info(self.logMsg["update"])
+            workflows.update(filter(lambda wf: wf.name not in completedWfs, awayWfs))
+
+        if self.options.get("clear"):
+            self.logger.info(self.logMsg["clear"])
+            workflows.update(filter(lambda wf: "custodial" in wf.status, assistanceWfs))
+
+        if self.options.get("review"):
+            nonCustodialWfs = [*filter(lambda wf: "custodial" not in wf.status, assistanceWfs)]
+            if self.options.get("recovering"):
+                self.logger.info(self.logMsg["reviewRecover"])
+                workflows.update(filter(lambda wf: "manual" not in wf.status, nonCustodialWfs))
+            if self.options.get("manual"):
+                self.logger.info(self.logMsg["reviewManual"])
+                workflows.update(filter(lambda wf: "manual" in wf.status, nonCustodialWfs))
+
+        return list(workflows)
+
+    def _getWorkflowsByAction(self, action: str, details: bool = False) -> Union[list, dict]:
+        """
+        The function to get workflows for a given user action
+        :action: user action
+        :details: if True return dict of workflows by user. O/w return workflows names
+        :return: workflows
+        """
+        workflows = defaultdict(set)
+
+        allowedUsers = [*self.unifiedConfiguration.get("allowed_bypass", {}).keys()]
+
+        userWfs = (
+            self.wtcController.getHold()
+            if action == "hold"
+            else self.wtcController.getBypass()
+            if action == "bypass"
+            else self.wtcController.getForce()
+            if action == "force-complete"
+            else {}
+        )
+        for user, wf in userWfs.items():
+            if user not in allowedUsers:
+                self.logger.info(self.logMsg["notAllowed"], user, action)
+                continue
+
+            self.logger.info(self.logMsg["allowed"], user, action, wf)
+            workflows[user].add(wf)
+
+        if details:
+            return workflows
+        return unnestList(workflows)
+
+    def _filterMaxNumberOfWorkflows(self, workflows) -> list:
+        maxPerRound = self.unifiedConfiguration.get("max_per_round", {}).get("checkor")
+        if self.options.get("limit"):
+            self.logger.info(self.logMsg["limitOption"], self.options.get("limit"))
+            maxPerRound = self.options.get("limit")
+
+        if maxPerRound and not self.specific:
+            self.logger.info(self.logMsg["limit"], maxPerRound)
+
+            workflows = self._rankWorkflows(workflows)
+            if self.option.get("update"):
+                random.shuffle(workflows)
+            return workflows[:maxPerRound]
+
+        return workflows
+
+    def _rankWorkflows(self, workflows: list) -> list:
+        """
+        The function to rank the workflows by their priority
+        :param workflows: workflows
+        :return: sorted workflows
+        """
+        completedWfs = self.reqMgrReader.getWorkflowsByStatus("completed", details=True)
+        completedWfs = sorted(completedWfs, key=lambda wf: wf.get("RequestPriority", 0))
+        completedWfs = [wf.get("RequestName") for wf in completedWfs]
+
+        return sorted(workflows, key=lambda wf: completedWfs.index(wf) if wf in completedWfs else 0, reverse=True)
+
+    def _updateWorkflowsRecords(self, wfsRecords: dict) -> None:
+        for wf in wfsRecords:
+            if wf.get("record"):
+                self.closeoutController.set(wf.get("wf"), wf.get("record"))
+
+    def _updateWorkflowsStatus(self, wfsStatus) -> None:
+        for wf in wfsStatus:
+            if wf.get("newStatus"):
+                newStatus = wf.get("newStatus")
+                wf["workflow"].status = newStatus
+                self.session.commit()
+                
+                if newStatus == "close":
+                    self.closeoutController.clean(wf.get("wf"))
+                    if self.useMcM and wf.get("mcmForceComplete"):
+                        for prepId in wf.get("prepIds"):
+                            self.mcmClient.clean(f"/restapi/requests/forcecomplete/{prepId}")
+    
+    def _checkExecutionTime(self, nWfs: int, now: struct_time = mktime(gmtime())) -> None:
+        if nWfs:
+            avgExecutionTime = (now - self.timePoint.get("start")) / nWfs
+            self.logger.info("Average time spend per workflow: %s s", avgExecutionTime)
+            
+            if avgExecutionTime > 120:
+                self.logger.critical("Checkor took %s [s] per workflow", avgExecutionTime)
+
+    def _countAssistanceWorkflowsByStatus(self) -> dict:
+        status = defaultdict(int)
+        for wf in self.session.query(Workflow).filter(Workflow.status.startswith("assistance")).all():
+            status[wf.status] += 1
+        
+        return status
+
+    def _writeSummary(self) -> str:
+        if not self.specificWf:
+            msg = ""
+            if self.options.get("strict"):
+                msg += "Workflows which just got in completed were looked at. Look in manual.\n"
+            if self.options.get("update"):
+                msg += "Workflows that are still running (and not completed) got looked at.\n"
+            if self.options.get("clear"):
+                msg += "Workflows that just need to close-out were verified. Nothing too new a-priori.\n"
+            if self.options.get("review"):
+                msg += "Workflows under intervention got review.\n"
+            
+            msg += "\n".join([f"{count} in status {status}" for status, count in self._countAssistanceWorkflowsByStatus()])
+
+            self.logger.info(msg)
+
+    def go(self) -> bool:
+        """
+        The function to check if the checkor can go
+        :return: True if it can go, False o/w
+        """
+        try:
+            userLockChecker = UserLockChecker()
+            servicesChecker = ServicesChecker(softServices=["mcm", "wtc"])
+
+            if not userLockChecker.isLocked() and servicesChecker.check():
+                self.useMcM = servicesChecker.status.get("mcm")
+                return True
+
+            return False
+
+        except Exception as error:
+            self.logger.error("Failed to check if Checkor can go")
+            self.logger.error(str(error))
+
+    def run(self) -> None:
+        """
+        The function to run checkor
+        """
+        try:
+            wfsToCheck = self._filterBackfills(self._getWorkflowsToCheck())
+            random.shuffle(wfsToCheck)
+
+            self.logger.info(self.logMsg["nWfs"], len(wfsToCheck))
+
+            wfsToCheck = self._filterMaxNumberOfWorkflows(wfsToCheck)
+            self._check(wfsToCheck)
+
+            self._writeSummary()
+
+        except Exception as error:
+            self.logger.error("Failed to run checkor")
+            self.logger.error(str(error))
+
+    def _check(self, wfsToCheck: list) -> None:
+        """
+        The wrapper function to check the workflows
+        :param wfsToCheck: workflows to check
+        """
+
+        @runWithMultiThreading(mtParam="wfsToCheck", maxThreads=len(wfsToCheck))
+        def _checkWorkflow(self, wfsToCheck: list) -> dict:
+            return WorkflowCheckor(wfsToCheck, checkor=self).check()
+
+        checkResponses = _checkWorkflow(wfsToCheck)
+        self._updateWorkflowsRecords(checkResponses)
+        self._updateWorkflowsStatus(checkResponses)
+
+        self._checkExecutionTime(len(wfsToCheck))
+
+class WorkflowCheckor:
+    """
+    __WorkflowCheckor__
+    General API for checking a fiven workflow
+    """
+
+    def __init__(self, wfToCheck: Workflow, logger: Optional[Logger] = None, **kwargs) -> None:
+        try:
+            super().__init__(self)
+            self.logger = logger or getLogger(self.__class__.__name__)
+            self.now = mktime(gmtime())
+
+            self.wf = wfToCheck.name
+            self.wfToCheck = wfToCheck
+            self.wfController = WorkflowController(wfToCheck.name)
+            self.wfsToCheck.wm_status = self.wfController.request.get("RequestStatus")
+
+            self.checkor = kwargs.get("checkor")
+            self.rucioReader = RucioReader()
+
+            self.existingAssistaceTags = set(wfToCheck.status.split("-")[1:])
+            self.assistanceTags = set()
+            self.outputDatasetsToCheck = list()
+            self.acdcWfs = dict()
+            self.fractions = dict()
+            self.campaigns = dict()
+            self.eventsPerLumi = dict()
+            self.percentCompletions, self.percentAvgCompletions = dict(),dict()
+            self.expectedLumis, self.expectedEvents =dict(),dict() 
+            self.producedLumis, self.producedEvents = dict(),dict()
+            self.passStatsCheck, self.passStatsCheckToAnnounce, self.passStatsCheckOverCompletion = dict(), dict(), dict()
+            self.lumiUpperLimit = dict()
+            self.rucioPresence = dict()
+            self.dbsPresence, self.dbsInvalid = dict(), dict()
+
+            self.failed = False
+            self.isClosing = False
+            self.newStatus = None
+            self._setBypassChecks()
+            self._setBypassChecksByMcMForceComplete(self.wfController.getPrepIDs())
+            self._setBypassChecksByUserForceComplete()
+
+            self.record = {}
+
+        except Exception as error:
+            raise Exception(f"Error initializing Checkor\n{str(error)}")
+    
+    def _skipWorkflow(self) -> bool:
+        """
+        The function to check if a given workflow should be skipped or not
+        :return: True if workflow should be skipped, False o/w
+        """
+        if (
+            (self.checkor.specific and self.checkor.specific not in self.wf)
+            or os.path.isfile(".checkor_stop")
+            or self.wfToCheck.wm_status in ["assigned", "acquired"]
+            or (self.wfToCheck.wm_status != "completed" and self.wf not in self.checkor.exceptions)
+        ):
+            self.logger.info("Skipping workflow %s", self.wf)
+            return True
+        return False
+
+    def _setWorkflowToClose(self) -> bool:
+        """
+        The function to check if a given workflow should be closed
+        :return: True if workflow should be closed, False o/w
+        """
+        if self.wfToCheck.wm_status in ["closed-out", "announced"] and self.wf not in self.checkor.exceptions:
+            self.logger.info(self.logMsg["setClose"], self.wf, self.wfToCheck.wm_status)
+            self.newStatus = "close"
+            return True
+        return False
+
+    def _setWorkflowToForget(self) -> bool:
+        """
+        The function to check if a given workflow should be forgotten
+        :return: True if workflow should be forgotten, False o/w
+        """
+        if self.wfController.request.isRelVal() and self.wfToCheck.wm_status in [
+            "failed",
+            "aborted",
+            "aborted-archived",
+            "rejected",
+            "rejected-archived",
+            "aborted-completed",
+        ]:
+            self.logger.info(self.logMsg["setForget"], self.wf, self.wfToCheck.wm_status)
+            self.newStatus = "forget"
+            return True
+        return False
+
+    def _setWorkflowToTrouble(self) -> bool:
+        """
+        The function to check if a given workflow should be set as trouble
+        :return: True if workflow should be set as trouble, False o/w
+        """
+        if not self.wfController.request.isRelVal() and self.wfToCheck.wm_status in [
+            "failed",
+            "aborted",
+            "aborted-archived",
+            "rejected",
+            "rejected-archived",
+            "aborted-completed",
+        ]:
+            self.logger.info(self.logMsg["setTrouble"], self.wf, self.wfToCheck.wm_status)
+            self.newStatus = "trouble"
+            return True
+        return False
+
+    def _setBypassChecks(self) -> None:
+        """
+        The function to check if bypass checks for a given workflow.
+        :param wf: workflow name
+        :return: True if bypass checks, False o/w
+        """
+        for bypassWf in self.checkor.bypassWfs:
+            if bypassWf == self.wf:
+                self.logger.info(self.logMsg["bypassCheck"], self.wf, bypassWf)
+                self.bypassChecks = True
+        self.bypassChecks = False
+
+    def _setBypassChecksByMcMForceComplete(self, prepIds: list) -> None:
+        """
+        The function to check if bypass checks for a given workflow because of McM force-complete status.
+        :param prepIds: list of prep ids
+        """
+        for forceCompleteWf in self.checkor.forceCompleteWfs:
+            if forceCompleteWf in prepIds:
+                self.logger.info(self.logMsg["bypassForceComplete"], self.wf, "prepid", forceCompleteWf)
+                self.bypassChecksByMcMForceComplete = True
+        self.bypassChecksByMcMForceComplete = False
+
+    def _setBypassChecksByUserForceComplete(self) -> None:
+        """
+        The function to check if bypass checks for a given workflow because of user force-complete status.
+        """
+        for user, userOverrideWfs in self.checkor.overrideWfs.items():
+            forceCompleteWfs = [overrideWf for overrideWf in userOverrideWfs if overrideWf in self.wf]
+            if forceCompleteWfs:
+                self.logger.info(
+                    self.logMsg["bypassForceComplete"], self.wf, "user/keyword", f"{user}/{forceCompleteWfs[0]}"
+                )
+                self.bypassChecksByUserForceComplete = True
+        self.bypassChecksByUserForceComplete =  False
+
+    def _isWorkflowOnHold(self) -> bool:
+        """
+        The function to check if a given workfloe is on hold
+        :return: True if workflow is on hold, False o/w
+        """
+        return "-onhold" in self.wfToCheck.wm_status or self.wf in self.checkor.onHoldWfs
+
+    def _getMinFamilyCompletedDelay(self, family: list) -> float:
+        """
+        The function to get how long a given workflow in the family has been requested to complete.
+        :param family: workflow family
+        :param now: time now
+        :return: how long a given family workflow has been requested to complete
+        """
+        completedFamily = [
+            filter(lambda wf: wf.get("Status") == "completed", member["RequestTransition"]) for member in family
+        ]
+        return min(
+            [
+                (self.now - completed[-1].get("UpdateTime", self.now)) / 86400.0 if completed else 0
+                for completed in completedFamily
+            ]
+        )
+
+
+    def _setNewOnHoldStatus(self, family: list) -> bool:
+        """
+        The function to get on hold status for a given workflow
+        :param family: workflow family
+        :return: if set new status
+        """
+        onHoldTimeout = self.unifiedConfiguration.get("onhold_timeout")
+        minFamilyCompletedDelay = self._getMinFamilyCompletedDelay(family, self.now)
+
+        self.logger.info(self.logMsg["onHoldSince"], minFamilyCompletedDelay, onHoldTimeout)
+
+        if onHoldTimeout > 0 and onHoldTimeout < minFamilyCompletedDelay:
+            self.logger.info(self.logMsg["bypassOnHold"], self.wf, minFamilyCompletedDelay)
+            self.bypassChecks = True
+            return False
+
+        if "-onhold" in self.wfToCheck.wm_status and self.wf in self.checkor.onHoldWfs and not self.bypassChecks:
+            self.logger.info(self.logMsg["onHold"], self.wf)
+            return True
+
+        if self.wf in self.checkor.onHoldWfs and not self.bypassChecks:
+            self.logger.info(self.logMsg["setOnHold"], self.wf, self.wfToCheck.wm_status)
+            self.newStatus = "assistance-onhold"
+            return True
+
+        return False
+
+    def _updateWorkflowStatus(self) -> bool:
+        """
+        The function to check if the status of a given workflow should be updated
+        :return: True if update status, False o/w
+        """
+        if self._setWorkflowToClose() or self._setWorkflowToForget() or self._setWorkflowToTrouble():
+            return True
+        if self._isWorkflowOnHold() and self._setNewOnHoldStatus(self.wfController.getFamily(includeItself=True)):
+            return True
+
+        return False
+
+    def _getTiersWithNoCheckAndCustodial(self) -> Tuple[list, list]:
+        tiersWithNoCheck = set(self.unifiedConfiguration.get("tiers_with_no_check"))
+        tiersWithNoCustodial = set(
+            self.unifiedConfiguration.get("tiers_with_no_custodial") if not self.wfController.request.isRelVal() else []
+        )
+
+        for campaign in self._getWorkflowCampaigns(self.wfController).values():
+            if (
+                campaign in self.campaignController.campaigns
+                and "custodial_override" in self.campaignController.campaigns[campaign]
+            ):
+                if isinstance(self.campaignController.campaigns[campaign]["custodial_override"], list):
+                    tiersWithNoCheck -= self.campaignController.campaigns[campaign]["custodial_override"]
+                elif self.campaignController.campaigns[campaign]["custodial_override"] == "notape":
+                    tiersWithNoCustodial = set(
+                        sorted([dataset.split("/")[-1] for dataset in self.wfController.request.get("OutputDatasets")])
+                    )
+
+        return tiersWithNoCheck, tiersWithNoCustodial
+
+    def _setOutputDatasetsToCheck(self) -> None:
+        tiersWithNoCheck, tiersWithNoCustodial = self._getTiersWithNoCheckAndCustodial(self.wfController)
+
+        expectedOutputsDatasets = self.wfController.request.get("OutputDatasets")
+        self.outputDatasetsToCheck = [
+            dataset
+            for dataset in expectedOutputsDatasets
+            if all([dataset.split("/")[-1] != tier for tier in tiersWithNoCheck])
+        ]
+
+        self.wfController.logger.info(
+            self.logMsg["checkSummary"],
+            ", ".join(sorted(expectedOutputsDatasets)),
+            ", ".join(sorted(self.outputDatasetsToCheck)),
+            ", ".join(sorted(tiersWithNoCheck)),
+            ", ".join(sorted(tiersWithNoCustodial)),
+        )
+
+    def _skipFamilyWorkflow(self, wfSchema: dict) -> bool:
+        return (
+            wfSchema.get("RequestType") != "Ressubmission"
+            or wfSchema.get("RequestDate") < wfSchema.request.get("RequestDate")
+            or wfSchema.get("PrepID") != wfSchema.request.get("PrepID")
+            or wfSchema.get("RequestStatus") == None
+        )
+
+
+    def _getWorkflowFamily(self) -> list:
+        family = []
+        for member in self.reqmgrReader.getWorkflowsByPrepId(self.wfController.request.get("PrepID"), details=True):
+            if (
+                member.get("RequestName") == self.wf
+                or self._skipFamilyWorkflow(member)
+                or not set(member.get("OutputDatasets", [])).issubset(set(self.wfController.request.get("OutputDatasets")))
+            ):
+                continue
+
+            family.append(member)
+
+        return family
+
+    def _setBadAcdcs(self) -> None:
+        badAcdcs = []
+        for member in self.reqmgrReader.getWorkflowsByPrepId(self.wfController.request.get("PrepID"), details=True):
+            if member.get("RequestName") == self.wf or self._skipFamilyWorkflow(member):
+                continue
+
+            if not set(member.get("OutputDatasets", [])).issubset(set(self.wfController.request.get("OutputDatasets"))):
+                if member.get("RequestStatus") not in [
+                    "rejected-archived",
+                    "rejected",
+                    "aborted",
+                    "aborted-archived",
+                ]:
+                    badAcdcs.append(member.get("RequestName"))
+                    self.wfController.logger.info(self.logMsg["inconsistentAcdc"], member.get("RequestName"))
+
+        self.acdcWfs["bad"] = badAcdcs
+        if badAcdcs:
+            self.assistanceTags.update(["manual", "assistance"])
+
+    def _setHealthyAcdcs(self, family: list) -> None:
+        healthyAcdcs = []
+        for member in family:
+            if member.get("RequestStatus") in [
+                "running-open",
+                "running-closed",
+                "assigned",
+                "acquired",
+                "staging",
+                "staged",
+            ]:
+                self.logger.info(self.logMsg["acdc"], member.get("RequestName"))
+                healthyAcdcs.append(member.get("RequestName"))
+
+        self.acdcWfs["healthy"] = healthyAcdcs
+        if healthyAcdcs:
+            self.assistanceTags.add("recovering")
+
+    def _setInactiveAcdcs(self, family: list) -> None:
+        inactiveAcdcs = []
+        for member in family:
+            if member.get("RequestStatus") not in [
+                "running-open",
+                "running-closed",
+                "assigned",
+                "acquired",
+                "staging",
+                "staged",
+                "failed",
+            ]:
+                inactiveAcdcs.append(member.get("RequestName"))
+
+        self.acdcWfs["inactive"] = inactiveAcdcs
+        if inactiveAcdcs:
+            self.assistanceTags.add("recovered")
+
+    def _setFailedAcdcs(self, family) -> None:
+        failedAcdcs = []
+        for member in family:
+            if member.get("RequestStatus") in ["failed"]:
+                failedAcdcs.append(member.get("RequestName"))
+
+        self.acdcWfs["failed"] = failedAcdcs
+    
+    def _setAcdcsOrder(self, family: list) -> None:
+        order = -1
+        for member in family:
+            memberOrder = sorted(filter(re.compile(f"^ACDC\d+$").search, member.get("RequestName")))
+            if memberOrder:
+                order = max(order, int(memberOrder[-1].split("ACDC")[1]))
+
+        self.acdcWfs["order"] = order
+
+    def _checkWorkflowFamily(self) -> None:
+        family = self._getWorkflowFamily()
+
+        self._setBadAcdcs()
+        self._setHealthyAcdcs(family)
+        self._setInactiveAcdcs(family)
+        self._setFailedAcdcs(family)
+        self._setAcdcsOrder(family)
+
+        if (self.bypassChecksByMcMForceComplete or self.bypassChecksByUserForceComplete) and len(self.acdcWfs.get("healthy")):
+            self.wfController.logger.info(self.logMsg["forceRecovering"], self.wf)
+            self.wfController.logger.critical(self.logMsg["forceComplete"], self.wf)
+            WorkflowStatusEnforcer(self.wf).forceComplete()
+
+        if self.acdcWfs.get("failed"):
+            self.logger.critical(self.logMsg["failedAcdc"], self.wf, ", ".join(self.acdcWfs.get("failed")))
+        if self.acdcWfs.get("bad"):
+            self.logger.critical(self.logMsg["badAcdc"], self.wf, ", ".join(self.acdcWfs.get("bad")))
+
+    def _getWorkflowCompletedDelay(self) -> float:
+        """
+        The function to get how long a given workflow has been requested to complete
+        :return: how long a given workflow has been requested to complete
+        """
+        completed = [*filter(lambda wf: wf.get("Status") == "completed", self.wfController.request.get("RequestTransition"))]
+        delay = (self.now - completed[-1].get("UpdateTime", self.now)) / 86400.0 if completed else 0
+
+        self.logger.info(self.logMsg["sinceCompleted"], delay)
+
+        return delay
+
+    def _getFractionDumping(self) -> float:
+        wfCompletedDelay = self._getWorkflowCompletedDelay()
+        fractionDamping = min(
+            0.01
+            * (
+                max(wfCompletedDelay - self.unifiedConfiguration.get("damping_fraction_pass"), 0)
+                / self.unifiedConfiguration.get("damping_fraction_pass_rate")
+            ),
+            self.unifiedConfiguration.get("damping_fraction_pass_max") / 100.0,
+        )
+
+        self.logger.info(self.logMsg["fractionDamping"], fractionDamping)
+
+        return fractionDamping
+
+    def _setWorkflowCampaigns(self) -> None:
+        campaigns = {}
+        wfCampaigns = self.wfController.request.getCampaigns(details=False)
+        if len(wfCampaigns) == 1:
+            for dataset in self.wfController.request.get("OutputDatasets"):
+                campaigns[dataset] = wfCampaigns[0]
+        else:
+            campaigns = self.wfController.getCampaignsFromOutputDatasets()
+
+        self.logger.info(self.logMsg["campaigns"], campaigns)
+        self.campaigns = campaigns
+
+    def _setDatasetsFractionsToAnnounce(self) -> None:
+        fractionsAnnounce = {}
+        for dataset in self.outputDatasetsToCheck:
+            fractionsAnnounce[dataset] = 1.0
+
+        self.fractions["announce"] = fractionsAnnounce
+
+    def _setDatasetsFractionsToPass(self) -> None:
+        fractionsPass = {}
+        defaultPass = self.unifiedConfiguration.get("default_fraction_pass")
+
+        campaigns = self.campaigns or self._setWorkflowCampaigns()
+
+        for dataset in self.outputDatasetsToCheck:
+            campaign = campaigns.get(dataset)
+
+            if self.options.get("fractionPass"):
+                fractionsPass[dataset] = self.options.get("fractionPass")
+                self.wfController.logger.info(
+                    self.logMsg["overrideFraction"], fractionsPass[dataset], dataset, "command line"
+                )
+            elif self.campaignController.campaigns.get(campaign, {}).get("fractionpass"):
+                fractionPass = self.campaignController.campaigns.get(campaign).get("fractionpass")
+                if isinstance(fractionPass, dict):
+                    tier = dataset.split("/")[-1]
+                    priority = str(self.wfController.request.get("RequestPriority"))
+                    fractionsPass[dataset] = fractionPass.get("all", defaultPass)
+                    if fractionPass.get(tier):
+                        tierFractionPass = fractionPass.get(tier)
+                        if isinstance(tierFractionPass, dict):
+                            fractionsPass[dataset] = tierFractionPass.get("all", defaultPass)
+                            for key, passValue in tierFractionPass.items():
+                                if dataset.startswith(key):
+                                    fractionsPass[dataset] = passValue
+                        else:
+                            fractionsPass[dataset] = tierFractionPass
+                    if fractionPass.get(priority):
+                        fractionsPass[dataset] = fractionPass.get("priority")
+                else:
+                    fractionsPass[dataset] = fractionPass
+                self.wfController.logger.info(
+                    self.logMsg["overrideFraction"], fractionsPass[dataset], dataset, "campaign requirement"
+                )
+            else:
+                fractionsPass[dataset] = defaultPass
+
+            for key, passValue in self.unifiedConfiguration.get("pattern_fraction_pass").items():
+                if key in dataset:
+                    fractionsPass[dataset] = passValue
+                    self.wfController.logger.info(self.logMsg["overrideFraction"], passValue, dataset, "dataset key")
+
+        self.fractions["pass"] = fractionsPass
+
+    def _setDatasetsFractionsToTruncateRecovery(self) -> None:
+        fractionsTruncateRecovery = {}
+
+        weightFull = 7.0
+        weightUnderPass = 0.0
+        weightPass = self._getWorkflowCompletedDelay()
+
+        campaigns = self.campaigns or self._setWorkflowCampaigns()
+
+        for dataset in self.outputDatasetsToCheck:
+            passPercentBelow = self.fractions["pass"][dataset] - 0.02
+            fractionsTruncateRecovery[dataset] = (
+                self.fractions["pass"][dataset] * weightPass + weightFull + passPercentBelow * weightUnderPass
+            ) / (weightPass + weightFull * weightUnderPass)
+
+            campaign = campaigns.get(dataset)
+            if self.campaignController.campaigns.get(campaign, {}).get("truncaterecovery"):
+                fractionsTruncateRecovery[dataset] = self.campaignController.campaigns.get(campaign).get(
+                    "truncaterecovery"
+                )
+                self.wfController.logger.info(
+                    self.logMsg["truncateRecovery"],
+                    dataset,
+                    self.campaignController.campaigns.get(campaign).get("truncaterecovery"),
+                )
+
+            if fractionsTruncateRecovery[dataset] < self.fractions["pass"][dataset]:
+                fractionsTruncateRecovery[dataset] = self.fractions["pass"][dataset]
+
+        self.fractions["truncate"] = fractionsTruncateRecovery
+
+    def _setStatisticsThresholds(self) -> None:
+        self._setDatasetsFractionsToAnnounce()
+        self._setDatasetsFractionsToPass()
+        self._setDatasetsFractionsToTruncateRecovery()
+
+        fractionDamping = self._getFractionDumping()
+        for dataset, value in self.fractions["pass"].items():
+            if value != 1.0 and fractionDamping and self.unifiedConfiguration.get("timeout_for_damping_fraction"):
+                self.fractions["pass"][dataset] -= fractionDamping
+                self.fractions["truncate"][dataset] -= fractionDamping
+
+        if self.acdsWfs.get("order") > self.unifiedConfiguration.get("acdc_rank_for_truncate"):
+            self.wfController.logger.info(self.logMsg["truncatePass"], self.acdsWfs.get("order"))
+            self.fractions["truncate"][dataset] = self.fractions["pass"][dataset]
+
+        self._updateFractionsToPassAndToTruncateRecovery()
+
+    def _updateFractionsToPassAndToTruncateRecovery(self) -> None:
+        family = dict([(dataset, self.dbs["reader"].getDatasetParent(dataset)) for dataset in self.fractions["pass"]])
+
+        for dataset, value in self.fractions["pass"].items():
+            ancestors = flattenDictKeys(family, family.get(dataset, []))
+
+            descendingTruncate = self.fractions["truncate"][dataset]
+            descendingPass = value
+            for ancestor in ancestors:
+                descendingPass *= self.fractions["pass"].get(ancestor, 1.0)
+                descendingTruncate *= self.fractions["truncate"].get(ancestor, 1.0)
+
+            if self.unifiedConfiguration.get("cumulative_fraction_pass"):
+                self.fractions["pass"][dataset] = descendingPass
+                self.fractions["truncate"][dataset] = descendingTruncate
+                self.logger.info(
+                    "For %s, previously passing at %s, is now passing at %s", dataset, value, descendingPass
+                )
+            else:
+                self.logger.info(
+                    "For %s, instead of passing at %s, could be passing at %s", dataset, value, descendingPass
+                )
+
+    def _getExpectedEvents(self) -> float:
+        if self.wfController.request.get("RequestType") in ["TaskChain", "StepChain"]:
+            return self.wfController.getRequestNumEvents()
+
+        expectedEvents = self.wfController.request.get("TotalInputEvents")
+        if expectedEvents is None:
+            self.wfController.logger.critical(self.logMsg["missingTotalEvents"], wf)
+            return 0
+        return expectedEvents
+
+    def _getTaskOutputDatasets(self) -> dict:
+        taskOutputDatasets = {}
+        for task, outputs in self.wfController.getOutputDatasetsPerTask().items():
+            for output in outputs:
+                taskOutputDatasets[output] = self.wfController.request.get(task, {}).get("TaskName", task)
+        return taskOutputDatasets
+
+    def _checkCompletionStatistics(self) -> None:
+        lumisExpected = self.wfController.request.get("TotalInputLumis")
+        eventsExpected = self._getExpectedEvents()
+        eventsExpectedPerTask = self.wfController.request.getExpectedEventsPerTask()
+        taskOutputs = self._getTaskOutputDatasets()
+
+        for dataset in self.outputDatasetsToCheck:
+            events, lumis = self.dbs["reader"].getDatasetEventsAndLumis(dataset)
+            self.producedEvents[dataset] = events
+            self.producedLumis[dataset] = lumis
+            self.eventsPerLumi[dataset] = events / float(lumis) if lumis else 100
+            self.percentCompletions[dataset] = 0.0
+
+            if lumisExpected:
+                self.wfController.logger.info("Lumi completion %s expected for %s", lumis, lumisExpected, dataset)
+                self.percentCompletions[dataset] = lumis / float(lumisExpected)
+                self.expectedLumis[dataset] = lumisExpected
+
+            outputEventsExpected = eventsExpectedPerTask.get(taskOutputs.get(dataset, "NoTaskFound"), eventsExpected)
+            if outputEventsExpected:
+                self.expectedEvents[dataset] = outputEventsExpected
+                eventsFraction = float(events) / float(outputEventsExpected)
+                if eventsFraction > self.percentCompletions[dataset]:
+                    self.percentCompletions[dataset] = eventsFraction
+                    self.wfController.logger.info(
+                        "Overriding: event completion real %s expected %s for %s",
+                        events,
+                        outputEventsExpected,
+                        dataset,
+                    )
+
+    def _setPassStatisticsCheck(self) -> None:
+        self.passStatsCheck = dict(
+            [
+                (dataset, self.bypassChecks or self.percentCompletions[dataset] >= passValue)
+                for dataset, passValue in self.fractions["pass"].items()
+            ]
+        )
+
+    def _setPassStatisticsCheckToAnnounce(self) -> None:
+        self.passStatsCheckToAnnounce = dict(
+            [
+                (dataset, self.percentAvgCompletions[dataset] >= passValue) for dataset, passValue in self.fractions["pass"].items()
+            ]
+        )
+
+    def _setPassStatisticsCheckOverCompletion(self) -> None:
+        defaultFractionOverdoing = self.unifiedConfiguration.get("default_fraction_overdoing")
+        self.passStatsCheckOverCompletion = dict([(dataset, value >= defaultFractionOverdoing) for dataset, value in self.percentCompletions.items()])
+
+
+    def _checkAvgCompletionStatistics(self) -> None:
+        percentAvgCompletions = {}
+
+        _, primaries, _, _ = self.wfController.request.getIO()
+        runWhiteList = self.wfController.getRunWhiteList()
+        lumiWhiteList = self.wfController.getLumiWhiteList()
+
+        lumisPerRun = {}
+        if not all(self.passStatsCheck.values()):
+            nRuns = 1
+            for primary in primaries:
+                if len(self.dbs["reader"].getDatasetRuns(primary)) > 1:
+                    self.logger.info("Fetching input lumis and files for %s", primary)
+                    lumisPerRun[primary], _ = self.dbs["reader"].getDatasetLumisAndFiles(
+                        primary, runs=runWhiteList, lumiList=lumiWhiteList
+                    )
+                    nRuns = len(set(lumisPerRun[primary].keys()))
+
+            for dataset in self.passStatsCheck:
+                if primaries and nRuns > 1:
+                    lumisPerRun[dataset], _ = self.dbs["reader"].getDatasetLumisAndFiles(dataset)
+
+                    fractionPerRun = {}
+                    primary = primaries[0]
+                    allRuns = sorted(set(lumisPerRun[primary].keys() + lumisPerRun[dataset].keys()))
+                    for run in allRuns:
+                        if lumisPerRun[primary].get(run, []):
+                            fractionPerRun[run] = float(len(lumisPerRun[dataset].get(run, []))) / lumisPerRun[
+                                primary
+                            ].get(run, [])
+
+                    if fractionPerRun:
+                        avgFraction = sum(fractionPerRun.values()) / len(fractionPerRun.values())
+                        percentAvgCompletions[dataset] = avgFraction
+
+                        self.logger.info("The average completion fraction per run for %s is %s", dataset, avgFraction)
+
+        self.percentAvgCompletions = percentAvgCompletions
+
+    def _getAnnounceAssistanceTags(self) -> list:
+        if self.passStatsCheckToAnnounce and all(self.passStatsCheckToAnnounce.values()):
+            self.wfController.logger.info(
+                "The output of this workflow are essentially good to be announced while we work on the rest"
+            )
+            return ["announced" if "announced" in self.wfToCheck.status else "announce"]
+        return []
+
+    def _getRecoveryAssistanceTags(self) -> list:
+        if not all(self.passStatsCheck.values()):
+            possibleRecoveries = self.acdcReader.getRecoveryDocs()
+            if possibleRecoveries:
+                self.wfController.logger.info(
+                    "The workflow has missing statistics, but nothing is recoverable. Passing through to announcement"
+                )
+                bypassChecks = True
+            else:
+                self.wfController.logger.info(
+                    "The workflow is not completed, but nothing is recoverable. Passing through to announcement"
+                )
+
+            if not bypassChecks:
+                return ["recovery" if self.unifiedConfiguration.get("use_recoveror") else "manual"]
+
+        return []
+
+    def _passOver100(self) -> bool:
+        lhe, primaries, _, _ = self.wfController.request.getIO()
+        return False if (lhe or primaries) else True
+
+    def _forceCompleteWorkflow(self) -> bool:
+        if self.acdcWfs.get("healthy") and all(self.passStatsCheck.values()) and all(self.passStatsCheckToAnnounce.values()):
+            self.logger.info("This is essentially good to truncate, setting to force-complete")
+            return True
+
+        return False
+
+
+    def _checkOutputSize(self,) -> None:
+        self.assistanceTags += set(self._getAnnounceAssistanceTags())
+
+        recoveryAssistanceTags = self._getRecoveryAssistanceTags()
+        if recoveryAssistanceTags:
+            self.assistanceTags += set(recoveryAssistanceTags)
+            self.bypassChecks, self.isClosing = False, False
+
+        if self._passOver100() and all(self.passStatsCheckOverCompletion.values()):
+            self.assistanceTags.add("over100")
+
+        if self._forceCompleteWorkflow():
+            WorkflowStatusEnforcer(self.wf).forceComplete()
+
+    def _hasSmallLumis(self) -> bool:
+        lumiLowerLimit = self.unifiedConfiguration.get("min_events_per_lumi_output")
+        _, primaries, _, _ = self.wfController.request.getIO()
+
+        if (
+            not self.wfController.isRelVal()
+            and not primaries
+            and any(
+                [
+                    self.eventsPerLumi[dataset] <= lumiLowerLimit
+                    for dataset in self.eventsPerLumi
+                    if not dataset.endswith(("DQMIO", "ALCARECO"))
+                ]
+            )
+        ):
+            self.wfController.logger.info("The workflow has very small lumisections")
+            return True
+
+        return False
+
+    def _hasBigLumis(self) -> bool:
+        if any(
+            [
+                self.lumiUpperLimit[dataset] > 0 and self.eventsPerLumi[dataset] >= self.lumiUpperLimit[dataset]
+                for dataset in self.eventsPerLumi
+            ]
+        ):
+            self.wfController.logger.info("The has large lumisections")
+            return True
+
+        return False
+
+
+    def _setLumiUpperLimit(self) -> None:
+        lumiUpperLimit = {}
+        campaigns = self.campaigns or self._getWorkflowCampaigns()
+
+        for dataset in self.outputDatasetsToCheck:
+            campaign = campaigns[dataset]
+
+            if self.wfController.request.get("RequestType") in ["ReDigi", "ReReco"]:
+                upperLimit = -1
+            elif self.options.get("lumisize"):
+                upperLimit = self.options.get("lumisize")
+                self.logger.info("Overriding the upper lumi size to %s for %s", upperLimit, campaign)
+            elif self.closeoutController.campaigns.get(campaign, {}).get("lumisize"):
+                upperLimit = self.closeoutController.campaigns.get(campaign, {}).get("lumisize")
+                self.logger.info("Overriding the upper lumi size to %s for %s", upperLimit, campaign)
+            else:
+                upperLimit = 1001
+
+            lumiUpperLimit[dataset] = upperLimit
+
+        self.lumiUpperLimit = lumiUpperLimit
+
+
+
+    def _checkLumiSize(self) -> None:
+        if self._hasSmallLumis():
+            self.assistanceTags.add("smalllumi")
+            self.isClosing = False
+
+        self._setLumiUpperLimit()
+        if self._hasBigLumis():
+            self.assistanceTags.add("biglumi")
+            self.isClosing = False
+
+    def _checkRucioCounts(self) -> None:
+        rucioPresence = {}
+
+        for dataset in self.wfController.request.get("OutputDatasets"):
+            filesPerBlock = set(self.rucioReader.countDatasetFilesPerBlock(dataset))
+            allBlocks = set([*map(lambda x: x[0], filesPerBlock)])
+            if len(allBlocks) == len(set(filesPerBlock)):
+                rucioPresence[dataset] = sum(map(lambda x: x[1], filesPerBlock))
+            else:
+                self.wfController.logger.info(
+                    "There are inconsistences of number of files per block for dataset: %s", dataset
+                )
+                rucioPresence[dataset] = 0
+
+        if any([nFiles == 0 for nFiles in rucioPresence.values()]) and "announce" in self.assistanceTags:
+            self.wfController.logger.info("No files in rucio yet, no good to announce")
+            self.assistanceTags.remove("announce")
+
+        self.rucioPresence = rucioPresence
+
+    def _checkDBSCounts(self) -> None:
+        dbsPresence, dbsInvalid = {}, {}
+        for dataset in self.wfController.request.get("OutputDatasets"):
+            #TODO: migrate dbs3Client.getFileCountDataset
+            dbsPresence[dataset] = self.checkor.dbs["reader"].countDatasetFiles(dataset)
+            dbsInvalid[dataset] = self.checkor.dbs["reader"].countDatasetFiles(dataset, onlyInvalid=True)
+
+        self.dbsPresence, self.dbsInvalid = dbsPresence, dbsInvalid
+
+    def _hasFileMismatch(self) -> bool:
+        if not self.options.get("ignoreFiles") and not all(
+            [self.dbsPresence[dataset] == self.dbsInvalid[dataset] + self.rucioPresence[dataset] for dataset in self.outputDatasetsToCheck]
+        ):
+            self.logger.info("The workflow has a dbs/rucio mismatch")
+            return True
+
+        return False
+
+    def _checkFileCounts(self) -> list:
+        showOnlyN = 10
+
+        for dataset in self.dbsPresence:
+            dbsFilenames = set(
+                [
+                    file.get("logical_file_name")
+                    for file in self.dbs["reader"].getDatasetFiles(dataset, validFileOnly=True, details=True)
+                ]
+            )
+            rucioReader = RucioReader()
+            rucioFilenames = set(rucioReader.getDatasetFileNames(dataset))
+
+            missingRucioFiles = dbsFilenames - rucioFilenames
+            missingDBSFiles = rucioFilenames - dbsFilenames
+
+            if missingRucioFiles:
+                self.wfController.logger.info(
+                    "These %d files are missing in Rucio, or extra in DBS, showing %s only.\n %s",
+                    len(missingRucioFiles),
+                    showOnlyN,
+                    "\n".join(missingRucioFiles[:showOnlyN]),
+                )
+                wereInvalidated = sorted(missingRucioFiles & set(self.dataCacheLoader.load("file_invalidation")))
+                if wereInvalidated:
+                    self.wfController.logger.info(
+                        "These %d files were invalidated globally, showing %d only.\n %s",
+                        len(wereInvalidated),
+                        showOnlyN,
+                        "\n".join(wereInvalidated[:showOnlyN]),
+                    )
+                    #TODO: migrate dbs3Client.setFileStatus
+                    self.dbs["writer"].setFileStatus(wereInvalidated, validate=False)
+
+            if missingDBSFiles:
+                self.wfController.logger.info(
+                    "These %d files are missing in DBS, or extra in Rucio, showing %s only.\n %s",
+                    len(missingDBSFiles),
+                    showOnlyN,
+                    "\n".join(missingDBSFiles[:showOnlyN]),
+                )
+                wereInvalidated = sorted(missingDBSFiles & set(self.dataCacheLoader.load("file_invalidation")))
+                if wereInvalidated:
+                    self.wfController.logger.info(
+                        "These %d files were invalidated globally, showing %d only.\n %s",
+                        len(wereInvalidated),
+                        showOnlyN,
+                        "\n".join(wereInvalidated[:showOnlyN]),
+                    )
+
+        minFamilyCompletedDelay = self._getMinFamilyCompletedDelay(self.wfController.getFamily(includeItself=True))
+        self.assistanceTags.add("agentfilemismatch" if minFamilyCompletedDelay < 2 else "filemismatch")
+
+        self.isClosing = False
+
+    def _checkInvalidations(self) -> list:
+        fractionInvalid = 0.2
+        if not self.options.get("ignoreinvalid") and not all(
+            [
+                self.dbsInvalid[dataset] <= int(fractionInvalid * self.dbsPresence[dataset])
+                for dataset in self.wfController.request.get("OutputDatasets")
+            ]
+        ):
+            self.wfController.logger.info("The workflow has a DBS invalid file level too high")
+            self.assistanceTags.add("invalidfiles")
+
+    def _setRecord(self) -> None:
+        putRecord = {
+            "datasets": {},
+            "name": self.wf,
+            "closeOutWorkflow": self.isClosing,
+            "priority": self.wfController.request.get("RequestPriority"),
+            "prepid": self.wfController.request.get("PrepId"),
+        }
+        for dataset in self.outputDatasetsToCheck:
+            record = putRecord["datasets"].get(dataset, {})
+            record["expectedL"] = self.expectedLumis[dataset]
+            record["expectedN"] = self.expectedEvents[dataset]
+            record["producedL"] = self.producedLumis[dataset]
+            record["producedN"] = self.producedEvents[dataset]
+            record["percentage"] = math.round(self.percentCompletions[dataset], 2)
+            record["fractionpass"] = math.round(self.fractions["pass"][dataset], 2)
+            record["duplicate"] = "N/A"
+            record["closeOutDataset"] = self.isClosing
+            record["correctLumis"] = (
+                int(self.eventsPerLumi[dataset]) if self.eventsPerLumi[dataset] > self.lumiUpperLimit[dataset] else True
+            )
+            record["dbsFiles"] = self.dbsPresence[dataset]
+            record["dbsInvFiles"] = self.dbsInvalid[dataset]
+            record["rucioFiles"] = set(RucioReader().getDatasetFileNames(dataset))
+            record[
+                "acdc"
+            ] = f"{len(self.acdcWfs.get('healthy', []))} / {len(self.acdcWfs.get('healthy', []) + self.acdcWfs.get('inactive', []))}"
+            record["family"] = self._getWorkflowFamily(self.wf, self.wfController)
+
+            now = gmtime()
+            record["timestamp"] = mktime(now)
+            record["updated"] = asctime(now) + " (GMT)"
+
+            putRecord["datasets"][dataset] = record
+
+        self.record = record
+
+    def _closeWorkflow(self) -> None:
+        self.wfController.logger.info("Setting %s as closed-out", self.wf)
+
+        if self.wfToCheck.status in ["closed-out", "announced", "normal-archived"]:
+            self.logger.info(
+                "%s is already %s, not trying to close-out as assuming it does",
+                self.wf,
+                self.wfToCheck.status,
+            )
+            self.newStatus = "close"
+            return
+
+        # TODO: migrate reqMgrClient.closeOutWorkflowCascade
+        response = self.checkor.ReqMgrReader.closeoutWorkflow(self.wf)
+        if response is None:
+            self.newStatus =  "close"
+            return
+
+        self.logger.info("Could not close-out, will try again next time")
+
+
+    def _checkAssistanceTags(self) -> None:
+        self.logger.info("%s was tagged with: %s", self.wf, self.assistanceTags)
+        if "recovering" in self.assistanceTags:
+            self.assistanceTags -= set(["recovery", "filemismatch", "manual"])
+        if "recovery" in self.assistanceTags and "recovered" in self.assistanceTags:
+            self.assistanceTags -= set(["recovery", "recovered"])
+            self.assistanceTags.add("manual")
+        if "recovery" in self.assistanceTags and "manual" in self.assistanceTags:
+            self.assistanceTags -= set(["recovery"])
+        if "custodial" in assistanceTags:
+            self.assistanceTags -= set(["announce", "announced"])
+        if any([tag in self.assistanceTags for tag in ["duplicates", "filemismatch", "agentfilemismatch"]]):
+            self.assistanceTags -= set(["announce"])
+
+        self.logger.info("%s needs assistance with: %s", self.wf, self.assistanceTags)
+        self.logger.info("%s has existing conditions: %s", self.wf, self.existingAssistaceTags)
+
+    def _warnRequestor(self) -> bool:
+        if self.assistanceTags and "manual" not in self.existingAssistaceTags and self.existingAssistaceTags != self.assistanceTags and any(tag in self.assistanceTags for tag in ["recovery", "biglumi"]):
+
+            msg = "The request PREPID (WORKFLOW) is facing issue in production.\n"
+
+            if "recovery" in self.assistanceTags:
+                msg += f"Samples completed with missing statistics\n{'\n'.join([f'{round(self.percentCompletions[dataset]*100, 2)}%% complete for {dataset}' for dataset in self.outputDatasetsToCheck ])}\nhttps://cmsweb.cern.ch/report/{wf}\n"
+            if "biglumi" in self.assistanceTags:
+                msg += f"Samples completed with large luminosity blocks:\n{'\n'.join([f'{self.eventsPerLumi[dataset]} > {self.lumiUpperLimit[dataset]} for {dataset}' for dataset in self.outputDatasetsToCheck])}\nhttps://cmsweb.cern.ch/reqmgr/view/splitting/{wf}\n"
+
+            msg += "You are invited to check, while this is being taken care of by Comp-Ops.\n"
+            msg += "This is an automated message from Comp-Ops.\n"
+
+            self.wfController.logger.critical(msg)
+
+    def _getAssistanceStatus(self) -> str:
+        if self.assistanceTags:
+            return "assistance-" + "-".join(sorted(self.assistanceTags))
+        return "assistance"
+    
+    def _setWorkflowToAssistance(self) -> None:
+        assistanceStatus = self._getAssistanceStatus()
+        if "manual" not in self.wfToCheck.status or assistanceStatus != "assistance-recovery":
+            self.newStatus = assistanceStatus
+
+    def check(self) -> dict:
+        try:
+            self.checkor._checkPoint(self.checkor.logMsg["startCheck"].format(self.wf))
+            if self._skipWorkflow() or self._updateWorkflowStatus():
+                return self._writeResponse()
+            self.checkor._checkPoint("Checked workflow status", subLap=True)
+        
+            self._setOutputDatasetsToCheck()
+            self._checkWorkflowFamily()
+            self.checkor._checkPoint("Checked workflow family", subLap=True)
+
+            self._setStatisticsThresholds()
+            self.checkor._checkPoint("Checked statistics threshold", subLap=True)
+
+            self._checkCompletionStatistics()
+            self.checkor._checkPoint("Checked observed statistics", subLap=True)
+
+            self._setPassStatisticsCheck()
+            self._checkAvgCompletionStatistics()
+            self.checkor._checkPoint("Checked more detailed observed statistics", subLap=True)
+
+            self._setPassStatisticsCheckToAnnounce()
+            self._setPassStatisticsCheckOverCompletion()
+            self._checkOutputSize()
+            self.checkor._checkPoint("Checked output size", subLap=True)
+
+            self._checkLumiSize()
+            self.checkor._checkPoint("Checked lumi size", subLap=True)
+
+            self._checkRucioCounts()
+            self.checkor_checkPoint("Checked Rucio count", subLap=True)
+
+            self._checkDBSCounts()
+            self.checkor._checkPoint("DBS file count", subLap=True)
+
+            if self._hasFileMismatch() and "recovering" not in self.assistanceTags:
+                self._checkFileCounts()
+            self.checkor._checkPoint("Checked file count", subLap=True)
+
+            self._checkInvalidations()
+            self.checkor._checkPoint("Checked invalidation", subLap=True)
+
+            self.checkor._checkPoint(f"Done with {self.wf}")
+            self._setRecord()
+            
+            ## TODO: lumi summary if re reco
+
+            if self.isClosing:
+                self._closeWorkflow()
+            else:
+                self._checkAssistanceTags()
+                self._warnRequestor()
+                self._setWorkflowToAssistance()
+                    
+                ## TODO: update jira
+            
+            return self._writeResponse()
+
+        except Exception as error:
+            self.logger.error(self.logMsg["failedCheck"], self.wf)
+            self.logger.error(str(error))
+            self.failed = True
+            return self._writeResponse()
+    
+    def _writeResponse(self) -> dict:
+        response = {
+            "workflow": self.wfToCheck,
+            "wf": self.wf,
+            "failed": self.failed, 
+            "isClosing": self.isClosing, 
+            "newStatus": self.newStatus, 
+            "prepIds": self.wfController.getPrepIDs(), 
+            "mcmForceComplete": self.bypassChecksByMcMForceComplete,
+            "record": self.record
+        }
+        return response
+
+
+if __name__ == "__main__":
+    options, specificWf = Checkor.parseOptions()
+    checkor = Checkor(options=options, specificWf=specificWf)
+    if checkor.go():
+        checkor.run()

From 7f6cc9b111e3372213ab85bc151cc1e7c098b728 Mon Sep 17 00:00:00 2001
From: Amanda Goldani <amanda.goldani@hotmail.com>
Date: Thu, 16 Dec 2021 23:48:38 +0100
Subject: [PATCH 03/17] Add fix to handle both old and new dbs server

---
 src/python/Services/DBS/DBSReader.py | 43 +++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/src/python/Services/DBS/DBSReader.py b/src/python/Services/DBS/DBSReader.py
index f5a5d6975..623551578 100644
--- a/src/python/Services/DBS/DBSReader.py
+++ b/src/python/Services/DBS/DBSReader.py
@@ -93,7 +93,48 @@ def _getBlockFileLumis(self, blocks: List[str], validFileOnly: bool = True) -> L
         This function runs by default with multithreading on the param blocks, which is a
         list of block names.
         """
-        return self.dbs.listFileLumis(block_name=blocks, validFileOnly=int(validFileOnly))
+        response = self.dbs.listFileLumis(block_name=blocks, validFileOnly=int(validFileOnly))
+
+        if isinstance(response[0]["lumi_section_num"], list):
+            self.logger.debug("Handling dbsapi.listFileLumis response from NEW DBS server")
+            return self._aggregateListFileLumis(response)
+
+        self.logger.debug("Handling dbsapi.listFileLumis response from OLD DBS server")
+        return response
+
+    def _aggregateListFileLumis(self, response: dict) -> List[dict]:
+        """
+        The function to aggregate the list of file lumis
+        :param response: raw response of listFileLumis
+        :return: lumi section files
+        """
+        aggregatedResponse = []
+        tmpDict = {}
+
+        for entry in response:
+            runNum = entry["run_num"]
+            lumiSectionNum = entry["lumi_section_num"]
+            logicalFileName = entry["logical_file_name"]
+            eventCount = entry["event_count"]
+
+            key = (runNum, logicalFileName)
+
+        if key in tmpDict:
+            tmpDict[key]["event_count"].append(eventCount)
+        else:
+            tmpDict[key] = {"event_count": [eventCount], "lumi_section_num": [lumiSectionNum]}
+
+        for key, value in tmpDict.iteritems():
+            aggregatedResponse.append(
+                {
+                    "run_num": key[0],
+                    "lumi_section_num": value["lumi_section_num"],
+                    "logical_file_name": key[1],
+                    "event_count": value["event_count"],
+                }
+            )
+
+        return aggregatedResponse
 
     def getDBSStatus(self, dataset: str) -> str:
         """

From dbfa67fae5cd65705c671fb82e165740f5438f02 Mon Sep 17 00:00:00 2001
From: Amanda Goldani <amanda.goldani@hotmail.com>
Date: Fri, 17 Dec 2021 04:30:38 +0100
Subject: [PATCH 04/17] Add missing functions and comments

---
 src/python/Services/DBS/DBSReader.py       |  26 ++
 src/python/Services/ReqMgr/ReqMgrWriter.py |  23 +-
 unified/Checkor.py                         | 489 +++++++++++++--------
 3 files changed, 365 insertions(+), 173 deletions(-)

diff --git a/src/python/Services/DBS/DBSReader.py b/src/python/Services/DBS/DBSReader.py
index 623551578..d320f4455 100644
--- a/src/python/Services/DBS/DBSReader.py
+++ b/src/python/Services/DBS/DBSReader.py
@@ -518,3 +518,29 @@ def getBlocksLumisAndFilesForCaching(self, blocks: List[dict], validFileOnly: bo
         except Exception as error:
             self.logger.error("Failed to get lumi sections and files from DBS for given blocks")
             self.logger.error(str(error))
+
+    def countDatasetFiles(self, dataset: str, skipInvalid: bool = False, onlyInvalid: bool = False) -> int:
+        """
+        The function to count the number of files in a given dataset
+        :param dataset: dataset name
+        :param skipInvalid: if True skip invalid files, o/w include them
+        :param onlyInvalid: if True include only invalid files, o/w include all
+        :return: number of files
+        """
+        try:
+            files = self.dbs.listFiles(dataset=dataset, detail=(skipInvalid or onlyInvalid))
+
+            mainLFN = "/".join(files[0]["logical_file_name"].split("/")[:3]) if files else ""
+
+            if skipInvalid:
+                files = [*filter(lambda file: file["is_file_valid"] == 1, files)]
+            elif onlyInvalid:
+                files = [*filter(lambda file: file["is_file_valid"] == 0, files)]
+
+            if (skipInvalid or onlyInvalid) and mainLFN:
+                return [*filter(lambda file: file["logical_file_name"].startswith(mainLFN), files)]
+            return files
+
+        except Exception as error:
+            self.logger.error("Failed to count files in dataset")
+            self.logger.error(str(error))
diff --git a/src/python/Services/ReqMgr/ReqMgrWriter.py b/src/python/Services/ReqMgr/ReqMgrWriter.py
index b9408ee9b..b5c29b889 100644
--- a/src/python/Services/ReqMgr/ReqMgrWriter.py
+++ b/src/python/Services/ReqMgr/ReqMgrWriter.py
@@ -115,10 +115,31 @@ def approveWorkflow(self, wf: str) -> bool:
         """
         try:
             result = sendResponse(
-                url=self.reqmgrUrl, endpoint=f"{self.reqmgrEndpoint['request']}/{wf}", param={"RequestStatus": "assignment-approved"}
+                url=self.reqmgrUrl,
+                endpoint=f"{self.reqmgrEndpoint['request']}/{wf}",
+                param={"RequestStatus": "assignment-approved"},
             )
             return result["result"][0]["ok"]
 
         except Exception as error:
             self.logger.error("Failed to approve workflow in reqmgr")
             self.logger.error(str(error))
+
+    def closeoutWorkflow(self, wf: str, cascade: bool = False) -> bool:
+        """
+        The function to close out a given workflow
+        :param wf: workflow name
+        :param cascade: if cascade or not
+        :return: True if succeeded, False o/w
+        """
+        try:
+            result = sendResponse(
+                url=self.reqmgrUrl,
+                endpoint=f"{self.reqmgrEndpoint['request']}/{wf}",
+                param={"RequestStatus": "closed-out", "cascade": cascade},
+            )
+            return result["result"][0][wf] == "OK"
+
+        except Exception as error:
+            self.logger.error("Failed to close out workflow in reqmgr")
+            self.logger.error(str(error))
diff --git a/unified/Checkor.py b/unified/Checkor.py
index 800b8870e..3c434cfa6 100644
--- a/unified/Checkor.py
+++ b/unified/Checkor.py
@@ -2,17 +2,13 @@
 import re
 import optparse
 import random
-import json
 import math
 from logging import Logger
 from collections import defaultdict
 from time import mktime, asctime, gmtime, struct_time
-from jinja2.runtime import F
-
-from sqlalchemy.sql.expression import true
 
 from Utilities.DataTools import unnestList, flattenDictKeys
-from Utilities.Decorators import runWithMultiThreading, runWithRetries
+from Utilities.Decorators import runWithMultiThreading
 from Utilities.Logging import getLogger
 from Utilities.ConfigurationHandler import ConfigurationHandler
 from Cache.DataCacheLoader import DataCacheLoader
@@ -20,15 +16,16 @@
 from Databases.Oracle.OracleDB import Workflow
 from MongoControllers.CampaignController import CampaignController
 from MongoControllers.CloseoutController import CloseoutController
-from MongoControllers.SiteController import SiteController
 from MongoControllers.WTCController import WTCController
 from Services.ServicesChecker import ServicesChecker
 from Services.McM.McMClient import McMClient
 from Services.ReqMgr.ReqMgrReader import ReqMgrReader
+from Services.ReqMgr.ReqMgrWriter import ReqMgrWriter
 from Services.ACDC.ACDCReader import ACDCReader
 from Services.DBS.DBSReader import DBSReader
 from Services.DBS.DBSWriter import DBSWriter
 from Services.Rucio.RucioReader import RucioReader
+from WorkflowMgmt.SiteController import SiteController
 from WorkflowMgmt.UserLockChecker import UserLockChecker
 from WorkflowMgmt.WorkflowController import WorkflowController
 from WorkflowMgmt.WorkflowStatusEnforcer import WorkflowStatusEnforcer
@@ -56,67 +53,24 @@ def __init__(self, logger: Optional[Logger] = None, **kwargs) -> None:
 
             self.unifiedConfiguration = ConfigurationHandler("config/unifiedConfiguration.json")
 
-            self.reqMgrReader = ReqMgrReader()
             self.acdcReader = ACDCReader()
             self.dbs = {"reader": DBSReader(), "writer": DBSWriter()}
+            self.reqMgr = {"reader": ReqMgrReader(), "writer": ReqMgrWriter()}
 
             self.dataCacheLoader = DataCacheLoader()
-            self.mcmClient = McMClient() if self.useMcM else None
 
             self.campaignController = CampaignController()
             self.closeoutController = CloseoutController()
             self.siteController = SiteController()
             self.wtcController = WTCController()
 
-            self.logMsg = {
-                "strict": "Strict option is on: checking workflows that freshly completed",
-                "update": "Update option is on: checking workflows that have not completed yet",
-                "clear": "Clear option is on: checking workflows that are ready to toggle closed-out",
-                "reviewRecover": "Review-recovering option is on: checking only the workflows that had been already acted on",
-                "reviewManual": "Review-manual option is on: checking the workflows to be acted on",
-                "timeCheck": "Time check (%s) point at: %s",
-                "sinceStart": "Since start: %s [s]",
-                "lap": "%s: %s [s]",
-                "notAllowed": "%s is not allowed to %s",
-                "allowed": "%s allowed to %s %s",
-                "nWfs": "Considering %s workflows (before any limitation)",
-                "limitOption": "Command line to limit workflows to %s",
-                "limit": "Limiting workflows to %s this round",
-                "startCheck": "Starting checkor with {}",
-                "setClose": "%s is already %s, setting as close",
-                "setForget": "%s is %s, but will not be set in trouble to find a replacement",
-                "setTrouble": "%s is %s, setting as trouble",
-                "setOnHold": "%s is %s, setting as on hold",
-                "notRunning": "%s is %s, it is not running yet",
-                "skipCheck": "No need to check %s in status %s",
-                "bypassChecks": "Bypassing checks on %s because of keyword %s",
-                "bypassForceComplete": "Bypassing checks and force completing %s because of %s %s",
-                "failedCheck": "Failed on checking %s",
-                "onHoldSince": "On hold since %s, timeout at %s",
-                "bypassOnHold": "%s is on hold and stopped for %.2f days, letting this through with current statistics",
-                "onHold": "%s is on hold",
-                "campaigns": "Campaigns: %s",
-                "checkSummary": "Initial outputs: %s\nWill check on: %s\nTiers out: %s\n, Tiers no custodials: %s",
-                "inconsistentAcdc": "Inconsistent ACDC %s",
-                "acdc": "%s still has an ACDC running",
-                "failedAcdc": "For %s, ACDC %s failed",
-                "badAcdc": "For %s, ACDC %s is inconsistent, preventing from closing or will create a mess.",
-                "forceRecovering": "%s is being force completed while recovering",
-                "forceComplete": "The workflow %s was force completed",
-                "missingTotalEvents": "TotalInputEvents is missing from the workload of %s",
-                "sinceCompleted": "%s days since completed",
-                "earlyAnnounce": "Allowed to announce the output %s over %.2f by campaign requirement",
-                "overrideFraction": "Overriding fraction to %s for %s by %s",
-                "truncateRecovery": "Allowed to truncate recovery of %s over %.2f by campaign requirement",
-                "fractionDamping": "The passing fraction could be reduced to %s given it has been in for long",
-                "reducePass": "Reducing pass threshold by %.3f%% for long lasting workflow %s",
-                "truncatePass": "Truncating at pass threshold because of ACDC at rank %d",
-            }
-
-            self.useMcM = False
-            self.exceptions = set()
-            self._setWfs()
-            # TODO: update JIRA tickets
+            self.overrideWfs = None
+            self.onHoldWfs = None
+            self.bypassWfs = None
+            self.forceCompleteWfs = None
+
+            self.jiraClient = None
+            self.mcmClient = None
 
         except Exception as error:
             raise Exception(f"Error initializing Checkor\n{str(error)}")
@@ -197,7 +151,11 @@ def _setWfs(self) -> None:
         self.overrideWfs = self._getWorkflowsByAction("force-complete", details=True)
         self.onHoldWfs = self._getWorkflowsByAction("hold")
         self.bypassWfs = self._getWorkflowsByAction("bypass") + unnestList(self.overrideWfs)
-        self.forceCompleteWfs = self.mcmClient.get("/restapi/requests/forcecomplete") if self.useMcM else {}
+        self.forceCompleteWfs = self.mcmClient.get("/restapi/requests/forcecomplete") if self.mcmClient is not None else {}
+
+        if self.jiraClient is not None:
+            # TODO: update wfs by JIRA tickets / migrate JiraClient
+            pass
 
     def _checkPoint(self, label: str = "", subLap: bool = False, now: struct_time = gmtime()) -> None:
         """
@@ -206,11 +164,11 @@ def _checkPoint(self, label: str = "", subLap: bool = False, now: struct_time =
         :subLap: True if for sub lap, False o/w
         :now: time now
         """
-        self.logger.info(self.logMsg["timeCheck"], label, asctime(now))
-        self.logger.info(self.logMsg["sinceStart"], now - self.timePoint.get("start", now))
+        self.logger.info("Time check (%s) point at: %s", label, asctime(now))
+        self.logger.info("Since start: %s [s]", now - self.timePoint.get("start", now))
 
         self.logger.info(
-            self.logMsg["lap"],
+            "%s: %s [s]",
             "Sub lap" if subLap else "Lap",
             now - self.timePoint.get("subLap" if subLap else "lap", now),
         )
@@ -238,27 +196,27 @@ def _getWorkflowsToCheck(self) -> list:
 
         awayWfs = self.session.query(Workflow).filter(Workflow.status == "away").all()
         assistanceWfs = self.session.query(Workflow).filter(Workflow.status.startswith("assistance")).all()
-        completedWfs = self.reqMgrReader.getWorkflowsByStatus("completed")
+        completedWfs = self.reqMgr["reader"].getWorkflowsByStatus("completed")
 
         if self.options.get("strict"):
-            self.logger.info(self.logMsg["strict"])
+            self.logger.info("Strict option is on: checking workflows that freshly completed")
             workflows.update(filter(lambda wf: wf.name in completedWfs, awayWfs))
 
         if self.options.get("update"):
-            self.logger.info(self.logMsg["update"])
+            self.logger.info("Update option is on: checking workflows that have not completed yet")
             workflows.update(filter(lambda wf: wf.name not in completedWfs, awayWfs))
 
         if self.options.get("clear"):
-            self.logger.info(self.logMsg["clear"])
+            self.logger.info("Clear option is on: checking workflows that are ready to toggle closed-out")
             workflows.update(filter(lambda wf: "custodial" in wf.status, assistanceWfs))
 
         if self.options.get("review"):
             nonCustodialWfs = [*filter(lambda wf: "custodial" not in wf.status, assistanceWfs)]
             if self.options.get("recovering"):
-                self.logger.info(self.logMsg["reviewRecover"])
+                self.logger.info("Review-recovering option is on: checking only the workflows that had been already acted on")
                 workflows.update(filter(lambda wf: "manual" not in wf.status, nonCustodialWfs))
             if self.options.get("manual"):
-                self.logger.info(self.logMsg["reviewManual"])
+                self.logger.info("Review-manual option is on: checking the workflows to be acted on")
                 workflows.update(filter(lambda wf: "manual" in wf.status, nonCustodialWfs))
 
         return list(workflows)
@@ -285,24 +243,29 @@ def _getWorkflowsByAction(self, action: str, details: bool = False) -> Union[lis
         )
         for user, wf in userWfs.items():
             if user not in allowedUsers:
-                self.logger.info(self.logMsg["notAllowed"], user, action)
+                self.logger.info("%s is not allowed to %s", user, action)
                 continue
 
-            self.logger.info(self.logMsg["allowed"], user, action, wf)
+            self.logger.info("%s allowed to %s %s", user, action, wf)
             workflows[user].add(wf)
 
         if details:
             return workflows
         return unnestList(workflows)
 
-    def _filterMaxNumberOfWorkflows(self, workflows) -> list:
+    def _filterMaxNumberOfWorkflows(self, workflows: list) -> list:
+        """
+        The function to filter the max number of workflows per round
+        :param workflows: list of workflows
+        :return: filtered list of workflows
+        """
         maxPerRound = self.unifiedConfiguration.get("max_per_round", {}).get("checkor")
         if self.options.get("limit"):
-            self.logger.info(self.logMsg["limitOption"], self.options.get("limit"))
+            self.logger.info("Command line to limit workflows to %s", self.options.get("limit"))
             maxPerRound = self.options.get("limit")
 
         if maxPerRound and not self.specific:
-            self.logger.info(self.logMsg["limit"], maxPerRound)
+            self.logger.info("Limiting workflows to %s this round", maxPerRound)
 
             workflows = self._rankWorkflows(workflows)
             if self.option.get("update"):
@@ -317,18 +280,26 @@ def _rankWorkflows(self, workflows: list) -> list:
         :param workflows: workflows
         :return: sorted workflows
         """
-        completedWfs = self.reqMgrReader.getWorkflowsByStatus("completed", details=True)
+        completedWfs = self.reqMgr["reader"].getWorkflowsByStatus("completed", details=True)
         completedWfs = sorted(completedWfs, key=lambda wf: wf.get("RequestPriority", 0))
         completedWfs = [wf.get("RequestName") for wf in completedWfs]
 
         return sorted(workflows, key=lambda wf: completedWfs.index(wf) if wf in completedWfs else 0, reverse=True)
 
     def _updateWorkflowsRecords(self, wfsRecords: dict) -> None:
+        """
+        The function to update the workflows records
+        :param wfsRecords: records
+        """
         for wf in wfsRecords:
             if wf.get("record"):
                 self.closeoutController.set(wf.get("wf"), wf.get("record"))
 
-    def _updateWorkflowsStatus(self, wfsStatus) -> None:
+    def _updateWorkflowsStatus(self, wfsStatus: list) -> None:
+        """
+        The function to update the workflows status
+        :param wfsStatus: status
+        """
         for wf in wfsStatus:
             if wf.get("newStatus"):
                 newStatus = wf.get("newStatus")
@@ -337,11 +308,16 @@ def _updateWorkflowsStatus(self, wfsStatus) -> None:
                 
                 if newStatus == "close":
                     self.closeoutController.clean(wf.get("wf"))
-                    if self.useMcM and wf.get("mcmForceComplete"):
+                    if self.mcmClient is not None and wf.get("mcmForceComplete"):
                         for prepId in wf.get("prepIds"):
                             self.mcmClient.clean(f"/restapi/requests/forcecomplete/{prepId}")
     
     def _checkExecutionTime(self, nWfs: int, now: struct_time = mktime(gmtime())) -> None:
+        """
+        The function to check the execution time of the module
+        :param nWfs: number of workflows
+        :param now: time now
+        """
         if nWfs:
             avgExecutionTime = (now - self.timePoint.get("start")) / nWfs
             self.logger.info("Average time spend per workflow: %s s", avgExecutionTime)
@@ -350,13 +326,20 @@ def _checkExecutionTime(self, nWfs: int, now: struct_time = mktime(gmtime())) ->
                 self.logger.critical("Checkor took %s [s] per workflow", avgExecutionTime)
 
     def _countAssistanceWorkflowsByStatus(self) -> dict:
+        """
+        The function to count the number of assistance workflows by status
+        :return: count of workflows by status
+        """
         status = defaultdict(int)
         for wf in self.session.query(Workflow).filter(Workflow.status.startswith("assistance")).all():
             status[wf.status] += 1
         
         return status
 
-    def _writeSummary(self) -> str:
+    def _writeSummary(self) -> None:
+        """
+        The function to write a summary of checkor
+        """
         if not self.specificWf:
             msg = ""
             if self.options.get("strict"):
@@ -382,7 +365,8 @@ def go(self) -> bool:
             servicesChecker = ServicesChecker(softServices=["mcm", "wtc"])
 
             if not userLockChecker.isLocked() and servicesChecker.check():
-                self.useMcM = servicesChecker.status.get("mcm")
+                self.mcmClient = McMClient() if servicesChecker.status.get("mcm") else None
+                self.jiraClient = None #JiraClient() if servicesChecker.status.get("jira") else None    TODO: migrate JiraClient
                 return True
 
             return False
@@ -396,10 +380,12 @@ def run(self) -> None:
         The function to run checkor
         """
         try:
+            self._setWfs()
+
             wfsToCheck = self._filterBackfills(self._getWorkflowsToCheck())
             random.shuffle(wfsToCheck)
 
-            self.logger.info(self.logMsg["nWfs"], len(wfsToCheck))
+            self.logger.info("Considering %s workflows (before any limitation)", len(wfsToCheck))
 
             wfsToCheck = self._filterMaxNumberOfWorkflows(wfsToCheck)
             self._check(wfsToCheck)
@@ -426,7 +412,7 @@ def _checkWorkflow(self, wfsToCheck: list) -> dict:
 
         self._checkExecutionTime(len(wfsToCheck))
 
-class WorkflowCheckor:
+class WorkflowCheckor(object):
     """
     __WorkflowCheckor__
     General API for checking a fiven workflow
@@ -439,25 +425,29 @@ def __init__(self, wfToCheck: Workflow, logger: Optional[Logger] = None, **kwarg
             self.now = mktime(gmtime())
 
             self.wf = wfToCheck.name
+            self.wfController = WorkflowController(self.wf)
+
             self.wfToCheck = wfToCheck
-            self.wfController = WorkflowController(wfToCheck.name)
-            self.wfsToCheck.wm_status = self.wfController.request.get("RequestStatus")
+            self.wfToCheck.wm_status = self.wfController.request.get("RequestStatus")
 
             self.checkor = kwargs.get("checkor")
             self.rucioReader = RucioReader()
 
-            self.existingAssistaceTags = set(wfToCheck.status.split("-")[1:])
-            self.assistanceTags = set()
-            self.outputDatasetsToCheck = list()
-            self.acdcWfs = dict()
-            self.fractions = dict()
+            self.assistanceTags, self.existingAssistaceTags = set(), set(wfToCheck.status.split("-")[1:])
+            
+            self.acdcs = dict()
             self.campaigns = dict()
-            self.eventsPerLumi = dict()
+            self.fractions = dict()
             self.percentCompletions, self.percentAvgCompletions = dict(),dict()
+
+            self.eventsPerLumi = dict()
+            self.lumiUpperLimit = dict()
             self.expectedLumis, self.expectedEvents =dict(),dict() 
             self.producedLumis, self.producedEvents = dict(),dict()
+
+            self.outputDatasetsToCheck = list()
             self.passStatsCheck, self.passStatsCheckToAnnounce, self.passStatsCheckOverCompletion = dict(), dict(), dict()
-            self.lumiUpperLimit = dict()
+            
             self.rucioPresence = dict()
             self.dbsPresence, self.dbsInvalid = dict(), dict()
 
@@ -486,6 +476,7 @@ def _skipWorkflow(self) -> bool:
         ):
             self.logger.info("Skipping workflow %s", self.wf)
             return True
+
         return False
 
     def _setWorkflowToClose(self) -> bool:
@@ -494,9 +485,10 @@ def _setWorkflowToClose(self) -> bool:
         :return: True if workflow should be closed, False o/w
         """
         if self.wfToCheck.wm_status in ["closed-out", "announced"] and self.wf not in self.checkor.exceptions:
-            self.logger.info(self.logMsg["setClose"], self.wf, self.wfToCheck.wm_status)
+            self.logger.info("%s is already %s, setting as close", self.wf, self.wfToCheck.wm_status)
             self.newStatus = "close"
             return True
+
         return False
 
     def _setWorkflowToForget(self) -> bool:
@@ -512,9 +504,10 @@ def _setWorkflowToForget(self) -> bool:
             "rejected-archived",
             "aborted-completed",
         ]:
-            self.logger.info(self.logMsg["setForget"], self.wf, self.wfToCheck.wm_status)
+            self.logger.info("%s is %s, but will not be set in trouble to find a replacement", self.wf, self.wfToCheck.wm_status)
             self.newStatus = "forget"
             return True
+
         return False
 
     def _setWorkflowToTrouble(self) -> bool:
@@ -530,21 +523,21 @@ def _setWorkflowToTrouble(self) -> bool:
             "rejected-archived",
             "aborted-completed",
         ]:
-            self.logger.info(self.logMsg["setTrouble"], self.wf, self.wfToCheck.wm_status)
+            self.logger.info("%s is %s, setting as trouble", self.wf, self.wfToCheck.wm_status)
             self.newStatus = "trouble"
             return True
+
         return False
 
     def _setBypassChecks(self) -> None:
         """
-        The function to check if bypass checks for a given workflow.
-        :param wf: workflow name
-        :return: True if bypass checks, False o/w
+        The function to check if bypass checks for a given workflow
         """
         for bypassWf in self.checkor.bypassWfs:
             if bypassWf == self.wf:
                 self.logger.info(self.logMsg["bypassCheck"], self.wf, bypassWf)
                 self.bypassChecks = True
+
         self.bypassChecks = False
 
     def _setBypassChecksByMcMForceComplete(self, prepIds: list) -> None:
@@ -554,8 +547,9 @@ def _setBypassChecksByMcMForceComplete(self, prepIds: list) -> None:
         """
         for forceCompleteWf in self.checkor.forceCompleteWfs:
             if forceCompleteWf in prepIds:
-                self.logger.info(self.logMsg["bypassForceComplete"], self.wf, "prepid", forceCompleteWf)
+                self.logger.info("Bypassing checks and force completing %s because of prep id %s", self.wf, forceCompleteWf)
                 self.bypassChecksByMcMForceComplete = True
+                
         self.bypassChecksByMcMForceComplete = False
 
     def _setBypassChecksByUserForceComplete(self) -> None:
@@ -566,9 +560,10 @@ def _setBypassChecksByUserForceComplete(self) -> None:
             forceCompleteWfs = [overrideWf for overrideWf in userOverrideWfs if overrideWf in self.wf]
             if forceCompleteWfs:
                 self.logger.info(
-                    self.logMsg["bypassForceComplete"], self.wf, "user/keyword", f"{user}/{forceCompleteWfs[0]}"
+                    "Bypassing checks and force completing %s because of user/keyword %s/%s", self.wf, user, forceCompleteWfs[0]
                 )
                 self.bypassChecksByUserForceComplete = True
+
         self.bypassChecksByUserForceComplete =  False
 
     def _isWorkflowOnHold(self) -> bool:
@@ -605,19 +600,19 @@ def _setNewOnHoldStatus(self, family: list) -> bool:
         onHoldTimeout = self.unifiedConfiguration.get("onhold_timeout")
         minFamilyCompletedDelay = self._getMinFamilyCompletedDelay(family, self.now)
 
-        self.logger.info(self.logMsg["onHoldSince"], minFamilyCompletedDelay, onHoldTimeout)
+        self.logger.info("On hold since %s, timeout at %s", minFamilyCompletedDelay, onHoldTimeout)
 
         if onHoldTimeout > 0 and onHoldTimeout < minFamilyCompletedDelay:
-            self.logger.info(self.logMsg["bypassOnHold"], self.wf, minFamilyCompletedDelay)
+            self.logger.info("%s is on hold and stopped for %.2f days, letting this through with current statistics", self.wf, minFamilyCompletedDelay)
             self.bypassChecks = True
             return False
 
         if "-onhold" in self.wfToCheck.wm_status and self.wf in self.checkor.onHoldWfs and not self.bypassChecks:
-            self.logger.info(self.logMsg["onHold"], self.wf)
+            self.logger.info("%s is on hold", self.wf)
             return True
 
         if self.wf in self.checkor.onHoldWfs and not self.bypassChecks:
-            self.logger.info(self.logMsg["setOnHold"], self.wf, self.wfToCheck.wm_status)
+            self.logger.info("%s is %s, setting as on hold", self.wf, self.wfToCheck.wm_status)
             self.newStatus = "assistance-onhold"
             return True
 
@@ -636,6 +631,10 @@ def _updateWorkflowStatus(self) -> bool:
         return False
 
     def _getTiersWithNoCheckAndCustodial(self) -> Tuple[list, list]:
+        """
+        The function yo get the tiers with no check and custodial
+        :return: list of tiers with no check, and list of tiers with no custodial
+        """
         tiersWithNoCheck = set(self.unifiedConfiguration.get("tiers_with_no_check"))
         tiersWithNoCustodial = set(
             self.unifiedConfiguration.get("tiers_with_no_custodial") if not self.wfController.request.isRelVal() else []
@@ -656,6 +655,9 @@ def _getTiersWithNoCheckAndCustodial(self) -> Tuple[list, list]:
         return tiersWithNoCheck, tiersWithNoCustodial
 
     def _setOutputDatasetsToCheck(self) -> None:
+        """
+        The function to set the datasets to check
+        """
         tiersWithNoCheck, tiersWithNoCustodial = self._getTiersWithNoCheckAndCustodial(self.wfController)
 
         expectedOutputsDatasets = self.wfController.request.get("OutputDatasets")
@@ -665,8 +667,8 @@ def _setOutputDatasetsToCheck(self) -> None:
             if all([dataset.split("/")[-1] != tier for tier in tiersWithNoCheck])
         ]
 
-        self.wfController.logger.info(
-            self.logMsg["checkSummary"],
+        self.wfController.logger.debug(
+            "Initial outputs: %s\nWill check on: %s\nTiers out: %s\nTiers no custodials: %s",
             ", ".join(sorted(expectedOutputsDatasets)),
             ", ".join(sorted(self.outputDatasetsToCheck)),
             ", ".join(sorted(tiersWithNoCheck)),
@@ -674,6 +676,11 @@ def _setOutputDatasetsToCheck(self) -> None:
         )
 
     def _skipFamilyWorkflow(self, wfSchema: dict) -> bool:
+        """
+        The function to check if the workflow family should be skipped or not
+        :param wfSchema: workflow schema
+        :return: True if to skip, False o/w
+        """
         return (
             wfSchema.get("RequestType") != "Ressubmission"
             or wfSchema.get("RequestDate") < wfSchema.request.get("RequestDate")
@@ -683,8 +690,12 @@ def _skipFamilyWorkflow(self, wfSchema: dict) -> bool:
 
 
     def _getWorkflowFamily(self) -> list:
+        """
+        The function to get the workflow family
+        :return: workflow family
+        """
         family = []
-        for member in self.reqmgrReader.getWorkflowsByPrepId(self.wfController.request.get("PrepID"), details=True):
+        for member in self.reqMgr["reader"].getWorkflowsByPrepId(self.wfController.request.get("PrepID"), details=True):
             if (
                 member.get("RequestName") == self.wf
                 or self._skipFamilyWorkflow(member)
@@ -697,8 +708,11 @@ def _getWorkflowFamily(self) -> list:
         return family
 
     def _setBadAcdcs(self) -> None:
+        """
+        The function to set the bad acdcs
+        """
         badAcdcs = []
-        for member in self.reqmgrReader.getWorkflowsByPrepId(self.wfController.request.get("PrepID"), details=True):
+        for member in self.reqMgr["reader"].getWorkflowsByPrepId(self.wfController.request.get("PrepID"), details=True):
             if member.get("RequestName") == self.wf or self._skipFamilyWorkflow(member):
                 continue
 
@@ -710,13 +724,17 @@ def _setBadAcdcs(self) -> None:
                     "aborted-archived",
                 ]:
                     badAcdcs.append(member.get("RequestName"))
-                    self.wfController.logger.info(self.logMsg["inconsistentAcdc"], member.get("RequestName"))
+                    self.wfController.logger.info("Inconsistent ACDC %s", member.get("RequestName"))
 
-        self.acdcWfs["bad"] = badAcdcs
+        self.acdcs["bad"] = badAcdcs
         if badAcdcs:
             self.assistanceTags.update(["manual", "assistance"])
 
     def _setHealthyAcdcs(self, family: list) -> None:
+        """
+        The function to set healthy acdcs
+        :param family: workflow family
+        """
         healthyAcdcs = []
         for member in family:
             if member.get("RequestStatus") in [
@@ -727,14 +745,18 @@ def _setHealthyAcdcs(self, family: list) -> None:
                 "staging",
                 "staged",
             ]:
-                self.logger.info(self.logMsg["acdc"], member.get("RequestName"))
+                self.logger.info("%s still has an ACDC running", member.get("RequestName"))
                 healthyAcdcs.append(member.get("RequestName"))
 
-        self.acdcWfs["healthy"] = healthyAcdcs
+        self.acdcs["healthy"] = healthyAcdcs
         if healthyAcdcs:
             self.assistanceTags.add("recovering")
 
     def _setInactiveAcdcs(self, family: list) -> None:
+        """
+        The function to set the inactive acdcs
+        :param family: workflow family
+        """
         inactiveAcdcs = []
         for member in family:
             if member.get("RequestStatus") not in [
@@ -748,28 +770,39 @@ def _setInactiveAcdcs(self, family: list) -> None:
             ]:
                 inactiveAcdcs.append(member.get("RequestName"))
 
-        self.acdcWfs["inactive"] = inactiveAcdcs
+        self.acdcs["inactive"] = inactiveAcdcs
         if inactiveAcdcs:
             self.assistanceTags.add("recovered")
 
-    def _setFailedAcdcs(self, family) -> None:
+    def _setFailedAcdcs(self, family: list) -> None:
+        """
+        The function to set the failed acdcs
+        :param family: workflow family
+        """
         failedAcdcs = []
         for member in family:
             if member.get("RequestStatus") in ["failed"]:
                 failedAcdcs.append(member.get("RequestName"))
 
-        self.acdcWfs["failed"] = failedAcdcs
+        self.acdcs["failed"] = failedAcdcs
     
     def _setAcdcsOrder(self, family: list) -> None:
+        """
+        The function to set the acdcs order
+        :param family: workflow family
+        """
         order = -1
         for member in family:
             memberOrder = sorted(filter(re.compile(f"^ACDC\d+$").search, member.get("RequestName")))
             if memberOrder:
                 order = max(order, int(memberOrder[-1].split("ACDC")[1]))
 
-        self.acdcWfs["order"] = order
+        self.acdcs["order"] = order
 
     def _checkWorkflowFamily(self) -> None:
+        """
+        The function to check the workflow family
+        """
         family = self._getWorkflowFamily()
 
         self._setBadAcdcs()
@@ -778,15 +811,15 @@ def _checkWorkflowFamily(self) -> None:
         self._setFailedAcdcs(family)
         self._setAcdcsOrder(family)
 
-        if (self.bypassChecksByMcMForceComplete or self.bypassChecksByUserForceComplete) and len(self.acdcWfs.get("healthy")):
-            self.wfController.logger.info(self.logMsg["forceRecovering"], self.wf)
-            self.wfController.logger.critical(self.logMsg["forceComplete"], self.wf)
+        if (self.bypassChecksByMcMForceComplete or self.bypassChecksByUserForceComplete) and len(self.acdcs.get("healthy")):
+            self.wfController.logger.info("%s is being force completed while recovering", self.wf)
+            self.wfController.logger.critical("The workflow %s was force completed", self.wf)
             WorkflowStatusEnforcer(self.wf).forceComplete()
 
-        if self.acdcWfs.get("failed"):
-            self.logger.critical(self.logMsg["failedAcdc"], self.wf, ", ".join(self.acdcWfs.get("failed")))
-        if self.acdcWfs.get("bad"):
-            self.logger.critical(self.logMsg["badAcdc"], self.wf, ", ".join(self.acdcWfs.get("bad")))
+        if self.acdcs.get("failed"):
+            self.logger.critical("For %s, ACDC %s failed", self.wf, ", ".join(self.acdcs.get("failed")))
+        if self.acdcs.get("bad"):
+            self.logger.critical("For %s, ACDC %s is inconsistent, preventing from closing or will create a mess.", self.wf, ", ".join(self.acdcs.get("bad")))
 
     def _getWorkflowCompletedDelay(self) -> float:
         """
@@ -796,11 +829,15 @@ def _getWorkflowCompletedDelay(self) -> float:
         completed = [*filter(lambda wf: wf.get("Status") == "completed", self.wfController.request.get("RequestTransition"))]
         delay = (self.now - completed[-1].get("UpdateTime", self.now)) / 86400.0 if completed else 0
 
-        self.logger.info(self.logMsg["sinceCompleted"], delay)
+        self.logger.info("%s days since completed", delay)
 
         return delay
 
     def _getFractionDumping(self) -> float:
+        """
+        The function to compute the fraction dumping
+        :return: fraction dumping
+        """
         wfCompletedDelay = self._getWorkflowCompletedDelay()
         fractionDamping = min(
             0.01
@@ -811,11 +848,14 @@ def _getFractionDumping(self) -> float:
             self.unifiedConfiguration.get("damping_fraction_pass_max") / 100.0,
         )
 
-        self.logger.info(self.logMsg["fractionDamping"], fractionDamping)
+        self.logger.info("The passing fraction could be reduced to %s given it has been in for long", fractionDamping)
 
         return fractionDamping
 
     def _setWorkflowCampaigns(self) -> None:
+        """
+        The function to set the campaigns
+        """
         campaigns = {}
         wfCampaigns = self.wfController.request.getCampaigns(details=False)
         if len(wfCampaigns) == 1:
@@ -824,10 +864,13 @@ def _setWorkflowCampaigns(self) -> None:
         else:
             campaigns = self.wfController.getCampaignsFromOutputDatasets()
 
-        self.logger.info(self.logMsg["campaigns"], campaigns)
+        self.logger.info("Campaigns: %s", campaigns)
         self.campaigns = campaigns
 
     def _setDatasetsFractionsToAnnounce(self) -> None:
+        """
+        The function to set the dataset fractions to announce
+        """
         fractionsAnnounce = {}
         for dataset in self.outputDatasetsToCheck:
             fractionsAnnounce[dataset] = 1.0
@@ -835,6 +878,9 @@ def _setDatasetsFractionsToAnnounce(self) -> None:
         self.fractions["announce"] = fractionsAnnounce
 
     def _setDatasetsFractionsToPass(self) -> None:
+        """
+        The function to set the dataset fractions to pass
+        """
         fractionsPass = {}
         defaultPass = self.unifiedConfiguration.get("default_fraction_pass")
 
@@ -846,7 +892,7 @@ def _setDatasetsFractionsToPass(self) -> None:
             if self.options.get("fractionPass"):
                 fractionsPass[dataset] = self.options.get("fractionPass")
                 self.wfController.logger.info(
-                    self.logMsg["overrideFraction"], fractionsPass[dataset], dataset, "command line"
+                    "Overriding fraction to %s for %s by command line", fractionsPass[dataset], dataset
                 )
             elif self.campaignController.campaigns.get(campaign, {}).get("fractionpass"):
                 fractionPass = self.campaignController.campaigns.get(campaign).get("fractionpass")
@@ -868,7 +914,7 @@ def _setDatasetsFractionsToPass(self) -> None:
                 else:
                     fractionsPass[dataset] = fractionPass
                 self.wfController.logger.info(
-                    self.logMsg["overrideFraction"], fractionsPass[dataset], dataset, "campaign requirement"
+                    "Overriding fraction to %s for %s by campaign requirement", fractionsPass[dataset], dataset
                 )
             else:
                 fractionsPass[dataset] = defaultPass
@@ -876,11 +922,14 @@ def _setDatasetsFractionsToPass(self) -> None:
             for key, passValue in self.unifiedConfiguration.get("pattern_fraction_pass").items():
                 if key in dataset:
                     fractionsPass[dataset] = passValue
-                    self.wfController.logger.info(self.logMsg["overrideFraction"], passValue, dataset, "dataset key")
+                    self.wfController.logger.info("Overriding fraction to %s for %s by dataset key", passValue, dataset)
 
         self.fractions["pass"] = fractionsPass
 
     def _setDatasetsFractionsToTruncateRecovery(self) -> None:
+        """
+        The function to set the dataset fractions to truncate recovery
+        """
         fractionsTruncateRecovery = {}
 
         weightFull = 7.0
@@ -901,7 +950,7 @@ def _setDatasetsFractionsToTruncateRecovery(self) -> None:
                     "truncaterecovery"
                 )
                 self.wfController.logger.info(
-                    self.logMsg["truncateRecovery"],
+                    "Allowed to truncate recovery of %s over %.2f by campaign requirement",
                     dataset,
                     self.campaignController.campaigns.get(campaign).get("truncaterecovery"),
                 )
@@ -912,6 +961,9 @@ def _setDatasetsFractionsToTruncateRecovery(self) -> None:
         self.fractions["truncate"] = fractionsTruncateRecovery
 
     def _setStatisticsThresholds(self) -> None:
+        """
+        The function to set the statistics thresholds
+        """
         self._setDatasetsFractionsToAnnounce()
         self._setDatasetsFractionsToPass()
         self._setDatasetsFractionsToTruncateRecovery()
@@ -923,12 +975,15 @@ def _setStatisticsThresholds(self) -> None:
                 self.fractions["truncate"][dataset] -= fractionDamping
 
         if self.acdsWfs.get("order") > self.unifiedConfiguration.get("acdc_rank_for_truncate"):
-            self.wfController.logger.info(self.logMsg["truncatePass"], self.acdsWfs.get("order"))
+            self.wfController.logger.info("Truncating at pass threshold because of ACDC at rank %d", self.acdsWfs.get("order"))
             self.fractions["truncate"][dataset] = self.fractions["pass"][dataset]
 
         self._updateFractionsToPassAndToTruncateRecovery()
 
     def _updateFractionsToPassAndToTruncateRecovery(self) -> None:
+        """
+        The function to update the fractions to pass and to truncate recovery
+        """
         family = dict([(dataset, self.dbs["reader"].getDatasetParent(dataset)) for dataset in self.fractions["pass"]])
 
         for dataset, value in self.fractions["pass"].items():
@@ -952,27 +1007,40 @@ def _updateFractionsToPassAndToTruncateRecovery(self) -> None:
                 )
 
     def _getExpectedEvents(self) -> float:
+        """
+        The function to get the expected events
+        :return: number of expected events
+        """
         if self.wfController.request.get("RequestType") in ["TaskChain", "StepChain"]:
             return self.wfController.getRequestNumEvents()
 
         expectedEvents = self.wfController.request.get("TotalInputEvents")
         if expectedEvents is None:
-            self.wfController.logger.critical(self.logMsg["missingTotalEvents"], wf)
+            self.wfController.logger.critical("TotalInputEvents is missing from the workload of %s", self.wf)
             return 0
+
         return expectedEvents
 
-    def _getTaskOutputDatasets(self) -> dict:
-        taskOutputDatasets = {}
+    def _getTasksByOutputDataset(self) -> dict:
+        """
+        The function to get the task by output datasets
+        :return: tasks by output datasets
+        """
+        tasksByDataset = {}
         for task, outputs in self.wfController.getOutputDatasetsPerTask().items():
             for output in outputs:
-                taskOutputDatasets[output] = self.wfController.request.get(task, {}).get("TaskName", task)
-        return taskOutputDatasets
+                tasksByDataset[output] = self.wfController.request.get(task, {}).get("TaskName", task)
+
+        return tasksByDataset
 
     def _checkCompletionStatistics(self) -> None:
+        """
+        The function to check the completion statistics
+        """
         lumisExpected = self.wfController.request.get("TotalInputLumis")
         eventsExpected = self._getExpectedEvents()
         eventsExpectedPerTask = self.wfController.request.getExpectedEventsPerTask()
-        taskOutputs = self._getTaskOutputDatasets()
+        taskOutputs = self._getTasksByOutputDataset()
 
         for dataset in self.outputDatasetsToCheck:
             events, lumis = self.dbs["reader"].getDatasetEventsAndLumis(dataset)
@@ -1000,6 +1068,9 @@ def _checkCompletionStatistics(self) -> None:
                     )
 
     def _setPassStatisticsCheck(self) -> None:
+        """
+        The function to set pass statistics
+        """
         self.passStatsCheck = dict(
             [
                 (dataset, self.bypassChecks or self.percentCompletions[dataset] >= passValue)
@@ -1008,6 +1079,9 @@ def _setPassStatisticsCheck(self) -> None:
         )
 
     def _setPassStatisticsCheckToAnnounce(self) -> None:
+        """
+        The function to set the pass statistics to announce
+        """
         self.passStatsCheckToAnnounce = dict(
             [
                 (dataset, self.percentAvgCompletions[dataset] >= passValue) for dataset, passValue in self.fractions["pass"].items()
@@ -1015,11 +1089,17 @@ def _setPassStatisticsCheckToAnnounce(self) -> None:
         )
 
     def _setPassStatisticsCheckOverCompletion(self) -> None:
+        """
+        The function to set the pass statistics to over complete
+        """
         defaultFractionOverdoing = self.unifiedConfiguration.get("default_fraction_overdoing")
         self.passStatsCheckOverCompletion = dict([(dataset, value >= defaultFractionOverdoing) for dataset, value in self.percentCompletions.items()])
 
 
     def _checkAvgCompletionStatistics(self) -> None:
+        """
+        The function to check the average completion statistics
+        """
         percentAvgCompletions = {}
 
         _, primaries, _, _ = self.wfController.request.getIO()
@@ -1059,37 +1139,50 @@ def _checkAvgCompletionStatistics(self) -> None:
         self.percentAvgCompletions = percentAvgCompletions
 
     def _getAnnounceAssistanceTags(self) -> list:
+        """
+        The function to get announce assistance tags
+        :return: announce assistance tags
+        """
         if self.passStatsCheckToAnnounce and all(self.passStatsCheckToAnnounce.values()):
             self.wfController.logger.info(
                 "The output of this workflow are essentially good to be announced while we work on the rest"
             )
             return ["announced" if "announced" in self.wfToCheck.status else "announce"]
+
         return []
 
     def _getRecoveryAssistanceTags(self) -> list:
+        """
+        The function to get recovery assistance tags
+        :return: recovery assistance tags
+        """
         if not all(self.passStatsCheck.values()):
             possibleRecoveries = self.acdcReader.getRecoveryDocs()
-            if possibleRecoveries:
-                self.wfController.logger.info(
-                    "The workflow has missing statistics, but nothing is recoverable. Passing through to announcement"
-                )
-                bypassChecks = True
-            else:
-                self.wfController.logger.info(
-                    "The workflow is not completed, but nothing is recoverable. Passing through to announcement"
+            if not possibleRecoveries:
+                self.logger.info(
+                    "The workflow is not completed/has missing statistics, but nothing is recoverable. Passing through to announcement"
                 )
+                self.bypassChecks = True
 
-            if not bypassChecks:
+            if not self.bypassChecks:
                 return ["recovery" if self.unifiedConfiguration.get("use_recoveror") else "manual"]
 
         return []
 
     def _passOver100(self) -> bool:
+        """
+        The function to check if passing over 100
+        :return: True if pass over 100, False o/w
+        """
         lhe, primaries, _, _ = self.wfController.request.getIO()
         return False if (lhe or primaries) else True
 
     def _forceCompleteWorkflow(self) -> bool:
-        if self.acdcWfs.get("healthy") and all(self.passStatsCheck.values()) and all(self.passStatsCheckToAnnounce.values()):
+        """
+        The function to check if workflow should be force completed or not
+        :return: True to force complete, False o/w
+        """
+        if self.acdcs.get("healthy") and all(self.passStatsCheck.values()) and all(self.passStatsCheckToAnnounce.values()):
             self.logger.info("This is essentially good to truncate, setting to force-complete")
             return True
 
@@ -1097,6 +1190,9 @@ def _forceCompleteWorkflow(self) -> bool:
 
 
     def _checkOutputSize(self,) -> None:
+        """
+        The function check the output size
+        """
         self.assistanceTags += set(self._getAnnounceAssistanceTags())
 
         recoveryAssistanceTags = self._getRecoveryAssistanceTags()
@@ -1111,6 +1207,10 @@ def _checkOutputSize(self,) -> None:
             WorkflowStatusEnforcer(self.wf).forceComplete()
 
     def _hasSmallLumis(self) -> bool:
+        """
+        The function to check if the workflow has small lumi sections
+        :return: True if it has small lumis, False o/w
+        """
         lumiLowerLimit = self.unifiedConfiguration.get("min_events_per_lumi_output")
         _, primaries, _, _ = self.wfController.request.getIO()
 
@@ -1131,6 +1231,10 @@ def _hasSmallLumis(self) -> bool:
         return False
 
     def _hasBigLumis(self) -> bool:
+        """
+        The function to check if the workflow has big lumi sections
+        :return: True if it has big lumis, False o/w
+        """
         if any(
             [
                 self.lumiUpperLimit[dataset] > 0 and self.eventsPerLumi[dataset] >= self.lumiUpperLimit[dataset]
@@ -1144,6 +1248,9 @@ def _hasBigLumis(self) -> bool:
 
 
     def _setLumiUpperLimit(self) -> None:
+        """
+        The function to set the lumi sections upper limit
+        """
         lumiUpperLimit = {}
         campaigns = self.campaigns or self._getWorkflowCampaigns()
 
@@ -1155,8 +1262,8 @@ def _setLumiUpperLimit(self) -> None:
             elif self.options.get("lumisize"):
                 upperLimit = self.options.get("lumisize")
                 self.logger.info("Overriding the upper lumi size to %s for %s", upperLimit, campaign)
-            elif self.closeoutController.campaigns.get(campaign, {}).get("lumisize"):
-                upperLimit = self.closeoutController.campaigns.get(campaign, {}).get("lumisize")
+            elif self.campaignController.campaigns.get(campaign, {}).get("lumisize"):
+                upperLimit = self.campaignController.campaigns.get(campaign, {}).get("lumisize")
                 self.logger.info("Overriding the upper lumi size to %s for %s", upperLimit, campaign)
             else:
                 upperLimit = 1001
@@ -1168,6 +1275,9 @@ def _setLumiUpperLimit(self) -> None:
 
 
     def _checkLumiSize(self) -> None:
+        """
+        The function to check the lumi sections sizes
+        """
         if self._hasSmallLumis():
             self.assistanceTags.add("smalllumi")
             self.isClosing = False
@@ -1177,7 +1287,10 @@ def _checkLumiSize(self) -> None:
             self.assistanceTags.add("biglumi")
             self.isClosing = False
 
-    def _checkRucioCounts(self) -> None:
+    def _checkRucioFileCounts(self) -> None:
+        """
+        The function to check the number of files in Rucio
+        """
         rucioPresence = {}
 
         for dataset in self.wfController.request.get("OutputDatasets"):
@@ -1197,16 +1310,22 @@ def _checkRucioCounts(self) -> None:
 
         self.rucioPresence = rucioPresence
 
-    def _checkDBSCounts(self) -> None:
+    def _checkDBSFileCounts(self) -> None:
+        """
+        The function to check the number of files in DBS
+        """
         dbsPresence, dbsInvalid = {}, {}
         for dataset in self.wfController.request.get("OutputDatasets"):
-            #TODO: migrate dbs3Client.getFileCountDataset
             dbsPresence[dataset] = self.checkor.dbs["reader"].countDatasetFiles(dataset)
             dbsInvalid[dataset] = self.checkor.dbs["reader"].countDatasetFiles(dataset, onlyInvalid=True)
 
         self.dbsPresence, self.dbsInvalid = dbsPresence, dbsInvalid
 
     def _hasFileMismatch(self) -> bool:
+        """
+        The function to check if there is any file mismatch
+        :return: if number of files in DBS in different from Rucio
+        """
         if not self.options.get("ignoreFiles") and not all(
             [self.dbsPresence[dataset] == self.dbsInvalid[dataset] + self.rucioPresence[dataset] for dataset in self.outputDatasetsToCheck]
         ):
@@ -1215,7 +1334,10 @@ def _hasFileMismatch(self) -> bool:
 
         return False
 
-    def _checkFileCounts(self) -> list:
+    def _checkFileCounts(self) -> None:
+        """
+        The function to check the number of files
+        """
         showOnlyN = 10
 
         for dataset in self.dbsPresence:
@@ -1246,7 +1368,6 @@ def _checkFileCounts(self) -> list:
                         showOnlyN,
                         "\n".join(wereInvalidated[:showOnlyN]),
                     )
-                    #TODO: migrate dbs3Client.setFileStatus
                     self.dbs["writer"].setFileStatus(wereInvalidated, validate=False)
 
             if missingDBSFiles:
@@ -1270,7 +1391,10 @@ def _checkFileCounts(self) -> list:
 
         self.isClosing = False
 
-    def _checkInvalidations(self) -> list:
+    def _checkInvalidations(self) -> None:
+        """
+        The function to check the invalidations
+        """
         fractionInvalid = 0.2
         if not self.options.get("ignoreinvalid") and not all(
             [
@@ -1282,7 +1406,10 @@ def _checkInvalidations(self) -> list:
             self.assistanceTags.add("invalidfiles")
 
     def _setRecord(self) -> None:
-        putRecord = {
+        """
+        The function to check set the record
+        """
+        wfRecord = {
             "datasets": {},
             "name": self.wf,
             "closeOutWorkflow": self.isClosing,
@@ -1290,13 +1417,13 @@ def _setRecord(self) -> None:
             "prepid": self.wfController.request.get("PrepId"),
         }
         for dataset in self.outputDatasetsToCheck:
-            record = putRecord["datasets"].get(dataset, {})
+            record = wfRecord["datasets"].get(dataset, {})
             record["expectedL"] = self.expectedLumis[dataset]
             record["expectedN"] = self.expectedEvents[dataset]
             record["producedL"] = self.producedLumis[dataset]
             record["producedN"] = self.producedEvents[dataset]
-            record["percentage"] = math.round(self.percentCompletions[dataset], 2)
-            record["fractionpass"] = math.round(self.fractions["pass"][dataset], 2)
+            record["percentage"] = round(self.percentCompletions[dataset], 2)
+            record["fractionpass"] = round(self.fractions["pass"][dataset], 2)
             record["duplicate"] = "N/A"
             record["closeOutDataset"] = self.isClosing
             record["correctLumis"] = (
@@ -1307,18 +1434,21 @@ def _setRecord(self) -> None:
             record["rucioFiles"] = set(RucioReader().getDatasetFileNames(dataset))
             record[
                 "acdc"
-            ] = f"{len(self.acdcWfs.get('healthy', []))} / {len(self.acdcWfs.get('healthy', []) + self.acdcWfs.get('inactive', []))}"
+            ] = f"{len(self.acdcs.get('healthy', []))} / {len(self.acdcs.get('healthy', []) + self.acdcs.get('inactive', []))}"
             record["family"] = self._getWorkflowFamily(self.wf, self.wfController)
 
             now = gmtime()
             record["timestamp"] = mktime(now)
             record["updated"] = asctime(now) + " (GMT)"
 
-            putRecord["datasets"][dataset] = record
+            wfRecord["datasets"][dataset] = record
 
-        self.record = record
+        self.record = wfRecord
 
     def _closeWorkflow(self) -> None:
+        """
+        The function to close the workflow
+        """
         self.wfController.logger.info("Setting %s as closed-out", self.wf)
 
         if self.wfToCheck.status in ["closed-out", "announced", "normal-archived"]:
@@ -1330,9 +1460,8 @@ def _closeWorkflow(self) -> None:
             self.newStatus = "close"
             return
 
-        # TODO: migrate reqMgrClient.closeOutWorkflowCascade
-        response = self.checkor.ReqMgrReader.closeoutWorkflow(self.wf)
-        if response is None:
+        response = self.checkor.reqMgr["writer"].closeoutWorkflow(self.wf, cascade=True)
+        if response:
             self.newStatus =  "close"
             return
 
@@ -1340,6 +1469,9 @@ def _closeWorkflow(self) -> None:
 
 
     def _checkAssistanceTags(self) -> None:
+        """
+        The function to check the assistance tags
+        """
         self.logger.info("%s was tagged with: %s", self.wf, self.assistanceTags)
         if "recovering" in self.assistanceTags:
             self.assistanceTags -= set(["recovery", "filemismatch", "manual"])
@@ -1348,7 +1480,7 @@ def _checkAssistanceTags(self) -> None:
             self.assistanceTags.add("manual")
         if "recovery" in self.assistanceTags and "manual" in self.assistanceTags:
             self.assistanceTags -= set(["recovery"])
-        if "custodial" in assistanceTags:
+        if "custodial" in self.assistanceTags:
             self.assistanceTags -= set(["announce", "announced"])
         if any([tag in self.assistanceTags for tag in ["duplicates", "filemismatch", "agentfilemismatch"]]):
             self.assistanceTags -= set(["announce"])
@@ -1356,15 +1488,18 @@ def _checkAssistanceTags(self) -> None:
         self.logger.info("%s needs assistance with: %s", self.wf, self.assistanceTags)
         self.logger.info("%s has existing conditions: %s", self.wf, self.existingAssistaceTags)
 
-    def _warnRequestor(self) -> bool:
+    def _warnRequestor(self) -> None:
+        """
+        The function to warn the requestor about the workflow progress
+        """
         if self.assistanceTags and "manual" not in self.existingAssistaceTags and self.existingAssistaceTags != self.assistanceTags and any(tag in self.assistanceTags for tag in ["recovery", "biglumi"]):
 
             msg = "The request PREPID (WORKFLOW) is facing issue in production.\n"
 
             if "recovery" in self.assistanceTags:
-                msg += f"Samples completed with missing statistics\n{'\n'.join([f'{round(self.percentCompletions[dataset]*100, 2)}%% complete for {dataset}' for dataset in self.outputDatasetsToCheck ])}\nhttps://cmsweb.cern.ch/report/{wf}\n"
+                msg += f"Samples completed with missing statistics\n{'\n'.join([f'{round(self.percentCompletions[dataset]*100, 2)}%% complete for {dataset}' for dataset in self.outputDatasetsToCheck ])}\nhttps://cmsweb.cern.ch/report/{self.wf}\n"
             if "biglumi" in self.assistanceTags:
-                msg += f"Samples completed with large luminosity blocks:\n{'\n'.join([f'{self.eventsPerLumi[dataset]} > {self.lumiUpperLimit[dataset]} for {dataset}' for dataset in self.outputDatasetsToCheck])}\nhttps://cmsweb.cern.ch/reqmgr/view/splitting/{wf}\n"
+                msg += f"Samples completed with large luminosity blocks:\n{'\n'.join([f'{self.eventsPerLumi[dataset]} > {self.lumiUpperLimit[dataset]} for {dataset}' for dataset in self.outputDatasetsToCheck])}\nhttps://cmsweb.cern.ch/reqmgr/view/splitting/{self.wf}\n"
 
             msg += "You are invited to check, while this is being taken care of by Comp-Ops.\n"
             msg += "This is an automated message from Comp-Ops.\n"
@@ -1372,18 +1507,27 @@ def _warnRequestor(self) -> bool:
             self.wfController.logger.critical(msg)
 
     def _getAssistanceStatus(self) -> str:
+        """
+        The function to get the assistance status from the tags
+        """
         if self.assistanceTags:
             return "assistance-" + "-".join(sorted(self.assistanceTags))
         return "assistance"
     
     def _setWorkflowToAssistance(self) -> None:
+        """
+        The function to set the workflow to assistance
+        """
         assistanceStatus = self._getAssistanceStatus()
         if "manual" not in self.wfToCheck.status or assistanceStatus != "assistance-recovery":
             self.newStatus = assistanceStatus
 
     def check(self) -> dict:
+        """
+        The function to check the workflow
+        """
         try:
-            self.checkor._checkPoint(self.checkor.logMsg["startCheck"].format(self.wf))
+            self.checkor._checkPoint(f"Starting checkor with {self.wf}")
             if self._skipWorkflow() or self._updateWorkflowStatus():
                 return self._writeResponse()
             self.checkor._checkPoint("Checked workflow status", subLap=True)
@@ -1410,10 +1554,10 @@ def check(self) -> dict:
             self._checkLumiSize()
             self.checkor._checkPoint("Checked lumi size", subLap=True)
 
-            self._checkRucioCounts()
+            self._checkRucioFileCounts()
             self.checkor_checkPoint("Checked Rucio count", subLap=True)
 
-            self._checkDBSCounts()
+            self._checkDBSFileCounts()
             self.checkor._checkPoint("DBS file count", subLap=True)
 
             if self._hasFileMismatch() and "recovering" not in self.assistanceTags:
@@ -1425,8 +1569,6 @@ def check(self) -> dict:
 
             self.checkor._checkPoint(f"Done with {self.wf}")
             self._setRecord()
-            
-            ## TODO: lumi summary if re reco
 
             if self.isClosing:
                 self._closeWorkflow()
@@ -1435,17 +1577,20 @@ def check(self) -> dict:
                 self._warnRequestor()
                 self._setWorkflowToAssistance()
                     
-                ## TODO: update jira
+                ## TODO: update JIRA tickets / migrate JiraClient
             
             return self._writeResponse()
 
         except Exception as error:
-            self.logger.error(self.logMsg["failedCheck"], self.wf)
+            self.logger.error("Failed on checking %s", self.wf)
             self.logger.error(str(error))
             self.failed = True
             return self._writeResponse()
     
     def _writeResponse(self) -> dict:
+        """
+        The function to write the check response
+        """
         response = {
             "workflow": self.wfToCheck,
             "wf": self.wf,

From 5fa27823c400efae1ce17f00bea3e27a20b643fe Mon Sep 17 00:00:00 2001
From: Amanda Goldani <amanda.goldani@hotmail.com>
Date: Wed, 5 Jan 2022 04:42:20 +0100
Subject: [PATCH 05/17] Add dry-run flag and fix some issues

---
 src/python/Unified/Checkor.py                 | 446 +++++++++++++
 .../python/Unified/Helpers/WorkflowCheckor.py | 628 ++++--------------
 .../python/Unified/Helpers}/__init__.py       |   0
 {unified => src/python/Unified}/Injector.py   |   0
 .../python/Unified}/Invalidator.py            |   0
 {unified => src/python/Unified}/Rejector.py   |   0
 .../python/Unified/__init__.py                |   0
 7 files changed, 579 insertions(+), 495 deletions(-)
 create mode 100644 src/python/Unified/Checkor.py
 rename unified/Checkor.py => src/python/Unified/Helpers/WorkflowCheckor.py (65%)
 rename {unified => src/python/Unified/Helpers}/__init__.py (100%)
 rename {unified => src/python/Unified}/Injector.py (100%)
 rename {unified => src/python/Unified}/Invalidator.py (100%)
 rename {unified => src/python/Unified}/Rejector.py (100%)
 rename unified/.gitkeep => src/python/Unified/__init__.py (100%)

diff --git a/src/python/Unified/Checkor.py b/src/python/Unified/Checkor.py
new file mode 100644
index 000000000..eefc5a5e7
--- /dev/null
+++ b/src/python/Unified/Checkor.py
@@ -0,0 +1,446 @@
+import optparse
+import random
+from logging import Logger
+from collections import defaultdict
+from time import mktime, asctime, gmtime, struct_time
+
+from Utilities.DataTools import unnestList
+from Utilities.Decorators import runWithMultiThreading
+from Utilities.Logging import getLogger
+from Utilities.ConfigurationHandler import ConfigurationHandler
+from Cache.DataCacheLoader import DataCacheLoader
+from Databases.Oracle.OracleClient import OracleClient
+from Databases.Oracle.OracleDB import Workflow
+from MongoControllers.CampaignController import CampaignController
+from MongoControllers.CloseoutController import CloseoutController
+from MongoControllers.WTCController import WTCController
+from Services.ServicesChecker import ServicesChecker
+from Services.McM.McMClient import McMClient
+from Services.ReqMgr.ReqMgrReader import ReqMgrReader
+from Services.ReqMgr.ReqMgrWriter import ReqMgrWriter
+from Services.ACDC.ACDCReader import ACDCReader
+from Services.DBS.DBSReader import DBSReader
+from Services.DBS.DBSWriter import DBSWriter
+from WorkflowMgmt.SiteController import SiteController
+from WorkflowMgmt.UserLockChecker import UserLockChecker
+
+from Unified.Helpers.WorkflowCheckor import WorkflowCheckor
+
+from typing import Optional, Tuple, Union
+
+
+class Checkor(OracleClient):
+    """
+    __Checkor__
+    General API for checking workflows in completed status
+    """
+
+    def __init__(self, logger: Optional[Logger] = None, **kwargs) -> None:
+        try:
+            super().__init__(self)
+            self.logger = logger or getLogger(self.__class__.__name__)
+
+            self.options, self.specificWf = kwargs.get("options"), kwargs.get("specificWf")
+            if self.options is None:
+                self.options, self.specificWf = self.parseOptions()
+
+            now = mktime(gmtime())
+            self.timePoint = {"lap": now, "subLap": now, "start": now}
+
+            self.unifiedConfiguration = ConfigurationHandler("config/unifiedConfiguration.json")
+
+            self.acdcReader = ACDCReader()
+            self.dbs = {"reader": DBSReader(), "writer": DBSWriter()}
+            self.reqMgr = {"reader": ReqMgrReader(), "writer": ReqMgrWriter()}
+
+            self.dataCacheLoader = DataCacheLoader()
+
+            self.campaignController = CampaignController()
+            self.closeoutController = CloseoutController()
+            self.siteController = SiteController()
+            self.wtcController = WTCController()
+
+            self.overrideWfs = None
+            self.onHoldWfs = None
+            self.bypassWfs = None
+            self.forceCompleteWfs = None
+
+            self.jiraClient = None
+            self.mcmClient = None
+
+        except Exception as error:
+            raise Exception(f"Error initializing Checkor\n{str(error)}")
+
+    @staticmethod
+    def parseOptions() -> Tuple[dict, Optional[str]]:
+        """
+        The function to parse the Checkor's options and specific workflow
+        :return: options and the specific workflow, if any
+        """
+        parser = optparse.OptionParser()
+
+        parser.add_option("--go", help="Does not check on duplicate process", action="store_true", default=False)
+        parser.add_option(
+            "--update",
+            help="Running workflows that have not yet reached completed",
+            action="store_true",
+            default=False,
+        )
+        parser.add_option(
+            "--strict", help="Only running workflows that reached completed", action="store_true", default=False
+        )
+        parser.add_option(
+            "--clear", help="Only running workflows that have reached custodial", action="store_true", default=False
+        )
+        parser.add_option(
+            "--review",
+            help="Look at the workflows that have already completed and had required actions",
+            action="store_true",
+            default=False,
+        )
+        parser.add_option(
+            "--recovering",
+            help="Look at the workflows that already have on-going acdc",
+            action="store_true",
+            default=False,
+        )
+        parser.add_option("--manual", help='Look at the workflows in "manual"', action="store_true", default=False)
+        parser.add_option("--limit", help="The number of workflows to consider for checking", default=0, type=int)
+        parser.add_option("--threads", help="The number of threads for processing workflows", default=10, type=int)
+        parser.add_option(
+            "--fractionPass", help="The completion fraction that is permitted", default=0.0, type="float"
+        )
+        parser.add_option("--lumiSize", help="Force the upper limit on lumisection", default=0, type="float")
+        parser.add_option(
+            "--ignoreFiles", help="Force ignoring dbs/rucio differences", action="store_true", default=False
+        )
+        parser.add_option(
+            "--ignoreInvalid", help="Force ignoring high level of invalid files", action="store_true", default=False
+        )
+        parser.add_option(
+            "--ignoreDuplicates", help="Force ignoring lumi duplicates", action="store_true", default=False
+        )
+        parser.add_option("--tapeSizeLimit", help="The limit in size of all outputs", default=0, type=int)
+        parser.add_option("--html", help="Build the the monitor page", action="store_true", default=False)
+        parser.add_option(
+            "--noReport", help="Prevent from making the error report", action="store_true", default=False
+        )
+        parser.add_option(
+            "--backfill", help="To run in test mode (only with backfill workflows)", action="store_true", default=False
+        )
+        parser.add_option(
+            "--dryRun",
+            help="To run in test mode (with production but without making any changes)",
+            actions="store_true",
+            default=False,
+        )
+
+        options, args = parser.parse_args()
+        options = vars(options)
+
+        options["manual"] = not options.get("recovering")
+        actions = ["strict", "update", "clear", "review"]
+        if all(not options.get(option) for option in actions):
+            for option in actions + ["recovering", "manual"]:
+                options[option] = True
+
+        return options, args[0] if args else None
+
+    def _setWfs(self) -> None:
+        """
+        The function to set workflows to override, on hold, bypass and force complete.
+        """
+        self.overrideWfs = self._getWorkflowsByAction("force-complete", details=True)
+        self.onHoldWfs = self._getWorkflowsByAction("hold")
+        self.bypassWfs = self._getWorkflowsByAction("bypass") + unnestList(self.overrideWfs)
+        self.forceCompleteWfs = (
+            self.mcmClient.get("/restapi/requests/forcecomplete") if self.mcmClient is not None else {}
+        )
+
+        if self.jiraClient is not None:
+            # TODO: update wfs by JIRA tickets / migrate JiraClient
+            pass
+
+    def _checkPoint(self, label: str = "", subLap: bool = False, now: struct_time = gmtime()) -> None:
+        """
+        The function to get the check points
+        :label: log message label
+        :subLap: True if for sub lap, False o/w
+        :now: time now
+        """
+        self.logger.info("Time check (%s) point at: %s", label, asctime(now))
+        self.logger.info("Since start: %s [s]", now - self.timePoint.get("start", now))
+
+        self.logger.info(
+            "%s: %s [s]",
+            "Sub lap" if subLap else "Lap",
+            now - self.timePoint.get("subLap" if subLap else "lap", now),
+        )
+        self.timePoint["subLap"] = now
+        if not subLap:
+            self.timePoint["lap"] = now
+
+    def _filterBackfills(self, workflows: list) -> list:
+        """
+        The function to filter only backfill workflows
+        :workflows: workflows
+        :return: backfill workflows
+        """
+        if self.options.get("backfill"):
+            self.logger.info(self.logMsg["backfill"])
+            return [wf for wf in workflows if "backfill" in wf.name.lower()]
+        return workflows
+
+    def _getWorkflowsToCheck(self) -> list:
+        """
+        The function to get the workflows to check
+        :return: workflows
+        """
+        workflows = set()
+
+        awayWfs = self.session.query(Workflow).filter(Workflow.status == "away").all()
+        assistanceWfs = self.session.query(Workflow).filter(Workflow.status.startswith("assistance")).all()
+        completedWfs = self.reqMgr["reader"].getWorkflowsByStatus("completed")
+
+        if self.options.get("strict"):
+            self.logger.info("Strict option is on: checking workflows that freshly completed")
+            workflows.update(filter(lambda wf: wf.name in completedWfs, awayWfs))
+
+        if self.options.get("update"):
+            self.logger.info("Update option is on: checking workflows that have not completed yet")
+            workflows.update(filter(lambda wf: wf.name not in completedWfs, awayWfs))
+
+        if self.options.get("clear"):
+            self.logger.info("Clear option is on: checking workflows that are ready to toggle closed-out")
+            workflows.update(filter(lambda wf: "custodial" in wf.status, assistanceWfs))
+
+        if self.options.get("review"):
+            nonCustodialWfs = [*filter(lambda wf: "custodial" not in wf.status, assistanceWfs)]
+            if self.options.get("recovering"):
+                self.logger.info(
+                    "Review-recovering option is on: checking only the workflows that had been already acted on"
+                )
+                workflows.update(filter(lambda wf: "manual" not in wf.status, nonCustodialWfs))
+            if self.options.get("manual"):
+                self.logger.info("Review-manual option is on: checking the workflows to be acted on")
+                workflows.update(filter(lambda wf: "manual" in wf.status, nonCustodialWfs))
+
+        return list(workflows)
+
+    def _getWorkflowsByAction(self, action: str, details: bool = False) -> Union[list, dict]:
+        """
+        The function to get workflows for a given user action
+        :action: user action
+        :details: if True return dict of workflows by user. O/w return workflows names
+        :return: workflows
+        """
+        workflows = defaultdict(set)
+
+        allowedUsers = [*self.unifiedConfiguration.get("allowed_bypass", {}).keys()]
+
+        userWfs = (
+            self.wtcController.getHold()
+            if action == "hold"
+            else self.wtcController.getBypass()
+            if action == "bypass"
+            else self.wtcController.getForce()
+            if action == "force-complete"
+            else {}
+        )
+        for user, wf in userWfs.items():
+            if user not in allowedUsers:
+                self.logger.info("%s is not allowed to %s", user, action)
+                continue
+
+            self.logger.info("%s allowed to %s %s", user, action, wf)
+            workflows[user].add(wf)
+
+        if details:
+            return workflows
+        return unnestList(workflows)
+
+    def _filterMaxNumberOfWorkflows(self, workflows: list) -> list:
+        """
+        The function to filter the max number of workflows per round
+        :param workflows: list of workflows
+        :return: filtered list of workflows
+        """
+        maxPerRound = self.unifiedConfiguration.get("max_per_round", {}).get("checkor")
+        if self.options.get("limit"):
+            self.logger.info("Command line to limit workflows to %s", self.options.get("limit"))
+            maxPerRound = self.options.get("limit")
+
+        if maxPerRound and not self.specificWf:
+            self.logger.info("Limiting workflows to %s this round", maxPerRound)
+
+            workflows = self._rankWorkflows(workflows)
+            if self.option.get("update"):
+                random.shuffle(workflows)
+            return workflows[:maxPerRound]
+
+        return workflows
+
+    def _rankWorkflows(self, workflows: list) -> list:
+        """
+        The function to rank the workflows by their priority
+        :param workflows: workflows
+        :return: sorted workflows
+        """
+        completedWfs = self.reqMgr["reader"].getWorkflowsByStatus("completed", details=True)
+        completedWfs = sorted(completedWfs, key=lambda wf: wf.get("RequestPriority", 0))
+        completedWfs = [wf.get("RequestName") for wf in completedWfs]
+
+        return sorted(workflows, key=lambda wf: completedWfs.index(wf) if wf in completedWfs else 0, reverse=True)
+
+    def _updateWorkflowsRecords(self, wfsRecords: dict) -> None:
+        """
+        The function to update the workflows records
+        :param wfsRecords: records
+        """
+        for wf in wfsRecords:
+            if wf.get("record"):
+                if wf.options.get("dryRun"):
+                    self.logger.debug("Dry run: updated workflow %s records to %s", wf.get("wf"), wf.get("record"))
+                else:
+                    self.closeoutController.set(wf.get("wf"), wf.get("record"))
+
+    def _updateWorkflowsStatus(self, wfsStatus: list) -> None:
+        """
+        The function to update the workflows status
+        :param wfsStatus: status
+        """
+        for wf in wfsStatus:
+            if wf.get("newStatus"):
+                newStatus = wf.get("newStatus")
+                wf["workflow"].status = newStatus
+
+                if wf.options.get("dryRun"):
+                    self.logger.debug("Dry run: updated workflow %s status to %s", wf.get("wf"), newStatus)
+                else:
+                    self.session.commit()
+
+                if newStatus == "close":
+                    if wf.options.get("dryRun"):
+                        self.logger.debug("Dry run: cleaned workflow %s because set as close", wf.get("wf"))
+                    else:
+                        self.closeoutController.clean(wf.get("wf"))
+
+                    if self.mcmClient is not None and wf.get("mcmForceComplete"):
+                        if wf.options.get("dryRun"):
+                            self.logger.debug(
+                                "Dry run: cleaned workflow %s prep ids because of force-complete", wf.get("wf")
+                            )
+                        else:
+                            for prepId in wf.get("prepIds"):
+                                self.mcmClient.clean(f"/restapi/requests/forcecomplete/{prepId}")
+
+    def _checkExecutionTime(self, nWfs: int, now: struct_time = mktime(gmtime())) -> None:
+        """
+        The function to check the execution time of the module
+        :param nWfs: number of workflows
+        :param now: time now
+        """
+        if nWfs:
+            avgExecutionTime = (now - self.timePoint.get("start")) / nWfs
+            self.logger.info("Average time spend per workflow: %s s", avgExecutionTime)
+
+            if avgExecutionTime > 120:
+                self.logger.critical("Checkor took %s [s] per workflow", avgExecutionTime)
+
+    def _countAssistanceWorkflowsByStatus(self) -> dict:
+        """
+        The function to count the number of assistance workflows by status
+        :return: count of workflows by status
+        """
+        status = defaultdict(int)
+        for wf in self.session.query(Workflow).filter(Workflow.status.startswith("assistance")).all():
+            status[wf.status] += 1
+
+        return status
+
+    def _writeSummary(self) -> None:
+        """
+        The function to write a summary of checkor
+        """
+        if not self.specificWf:
+            msg = ""
+            if self.options.get("strict"):
+                msg += "Workflows which just got in completed were looked at. Look in manual.\n"
+            if self.options.get("update"):
+                msg += "Workflows that are still running (and not completed) got looked at.\n"
+            if self.options.get("clear"):
+                msg += "Workflows that just need to close-out were verified. Nothing too new a-priori.\n"
+            if self.options.get("review"):
+                msg += "Workflows under intervention got review.\n"
+
+            msg += "\n".join(
+                [f"{count} in status {status}" for status, count in self._countAssistanceWorkflowsByStatus()]
+            )
+
+            self.logger.info(msg)
+
+    def go(self) -> bool:
+        """
+        The function to check if the checkor can go
+        :return: True if it can go, False o/w
+        """
+        try:
+            userLockChecker = UserLockChecker()
+            servicesChecker = ServicesChecker(softServices=["mcm", "wtc"])
+
+            if not userLockChecker.isLocked() and servicesChecker.check():
+                self.mcmClient = McMClient() if servicesChecker.status.get("mcm") else None
+                self.jiraClient = (
+                    None  # JiraClient() if servicesChecker.status.get("jira") else None    TODO: migrate JiraClient
+                )
+                return True
+
+            return False
+
+        except Exception as error:
+            self.logger.error("Failed to check if Checkor can go")
+            self.logger.error(str(error))
+
+    def run(self) -> None:
+        """
+        The function to run checkor
+        """
+        try:
+            self._setWfs()
+
+            wfsToCheck = self._filterBackfills(self._getWorkflowsToCheck())
+            random.shuffle(wfsToCheck)
+
+            self.logger.info("Considering %s workflows (before any limitation)", len(wfsToCheck))
+
+            wfsToCheck = self._filterMaxNumberOfWorkflows(wfsToCheck)
+            self._check(wfsToCheck)
+
+            self._writeSummary()
+
+        except Exception as error:
+            self.logger.error("Failed to run checkor")
+            self.logger.error(str(error))
+
+    def _check(self, wfsToCheck: list) -> None:
+        """
+        The wrapper function to check the workflows
+        :param wfsToCheck: workflows to check
+        """
+
+        @runWithMultiThreading(mtParam="wfsToCheck", maxThreads=len(wfsToCheck))
+        def _checkWorkflow(self, wfsToCheck: list) -> dict:
+            return WorkflowCheckor(wfsToCheck, checkor=self).check()
+
+        checkResponses = _checkWorkflow(wfsToCheck)
+        self._updateWorkflowsRecords(checkResponses)
+        self._updateWorkflowsStatus(checkResponses)
+
+        self._checkExecutionTime(len(wfsToCheck))
+
+
+if __name__ == "__main__":
+    options, specificWf = Checkor.parseOptions()
+    checkor = Checkor(options=options, specificWf=specificWf)
+    if checkor.go():
+        checkor.run()
diff --git a/unified/Checkor.py b/src/python/Unified/Helpers/WorkflowCheckor.py
similarity index 65%
rename from unified/Checkor.py
rename to src/python/Unified/Helpers/WorkflowCheckor.py
index 3c434cfa6..a72e46814 100644
--- a/unified/Checkor.py
+++ b/src/python/Unified/Helpers/WorkflowCheckor.py
@@ -1,421 +1,21 @@
 import os
 import re
-import optparse
-import random
-import math
 from logging import Logger
-from collections import defaultdict
-from time import mktime, asctime, gmtime, struct_time
+from time import mktime, asctime, gmtime
 
-from Utilities.DataTools import unnestList, flattenDictKeys
-from Utilities.Decorators import runWithMultiThreading
+from Utilities.DataTools import flattenDictKeys
 from Utilities.Logging import getLogger
-from Utilities.ConfigurationHandler import ConfigurationHandler
-from Cache.DataCacheLoader import DataCacheLoader
-from Databases.Oracle.OracleClient import OracleClient
 from Databases.Oracle.OracleDB import Workflow
-from MongoControllers.CampaignController import CampaignController
-from MongoControllers.CloseoutController import CloseoutController
-from MongoControllers.WTCController import WTCController
-from Services.ServicesChecker import ServicesChecker
-from Services.McM.McMClient import McMClient
-from Services.ReqMgr.ReqMgrReader import ReqMgrReader
-from Services.ReqMgr.ReqMgrWriter import ReqMgrWriter
-from Services.ACDC.ACDCReader import ACDCReader
-from Services.DBS.DBSReader import DBSReader
-from Services.DBS.DBSWriter import DBSWriter
 from Services.Rucio.RucioReader import RucioReader
-from WorkflowMgmt.SiteController import SiteController
-from WorkflowMgmt.UserLockChecker import UserLockChecker
 from WorkflowMgmt.WorkflowController import WorkflowController
 from WorkflowMgmt.WorkflowStatusEnforcer import WorkflowStatusEnforcer
 
-from typing import Optional, Tuple, Union
-
-
-class Checkor(OracleClient):
-    """
-    __Checkor__
-    General API for checking workflows in completed status
-    """
-
-    def __init__(self, logger: Optional[Logger] = None, **kwargs) -> None:
-        try:
-            super().__init__(self)
-            self.logger = logger or getLogger(self.__class__.__name__)
-
-            self.options, self.specificWf = kwargs.get("options"), kwargs.get("specificWf")
-            if self.options is None:
-                self.options, self.specificWf = self.parseOptions()
-
-            now = mktime(gmtime())
-            self.timePoint = {"lap": now, "subLap": now, "start": now}
-
-            self.unifiedConfiguration = ConfigurationHandler("config/unifiedConfiguration.json")
-
-            self.acdcReader = ACDCReader()
-            self.dbs = {"reader": DBSReader(), "writer": DBSWriter()}
-            self.reqMgr = {"reader": ReqMgrReader(), "writer": ReqMgrWriter()}
-
-            self.dataCacheLoader = DataCacheLoader()
-
-            self.campaignController = CampaignController()
-            self.closeoutController = CloseoutController()
-            self.siteController = SiteController()
-            self.wtcController = WTCController()
-
-            self.overrideWfs = None
-            self.onHoldWfs = None
-            self.bypassWfs = None
-            self.forceCompleteWfs = None
-
-            self.jiraClient = None
-            self.mcmClient = None
-
-        except Exception as error:
-            raise Exception(f"Error initializing Checkor\n{str(error)}")
-
-    @staticmethod
-    def parseOptions() -> Tuple[dict, Optional[str]]:
-        """
-        The function to parse the Checkor's options and specific workflow
-        :return: options and the specific workflow, if any
-        """
-        parser = optparse.OptionParser()
-
-        parser.add_option("--go", help="Does not check on duplicate process", action="store_true", default=False)
-        parser.add_option(
-            "--update",
-            help="Running workflows that have not yet reached completed",
-            action="store_true",
-            default=False,
-        )
-        parser.add_option(
-            "--strict", help="Only running workflows that reached completed", action="store_true", default=False
-        )
-        parser.add_option(
-            "--clear", help="Only running workflows that have reached custodial", action="store_true", default=False
-        )
-        parser.add_option(
-            "--review",
-            help="Look at the workflows that have already completed and had required actions",
-            action="store_true",
-            default=False,
-        )
-        parser.add_option(
-            "--recovering",
-            help="Look at the workflows that already have on-going acdc",
-            action="store_true",
-            default=False,
-        )
-        parser.add_option("--manual", help='Look at the workflows in "manual"', action="store_true", default=False)
-        parser.add_option("--limit", help="The number of workflows to consider for checking", default=0, type=int)
-        parser.add_option("--threads", help="The number of threads for processing workflows", default=10, type=int)
-        parser.add_option(
-            "--fractionPass", help="The completion fraction that is permitted", default=0.0, type="float"
-        )
-        parser.add_option("--lumiSize", help="Force the upper limit on lumisection", default=0, type="float")
-        parser.add_option(
-            "--ignoreFiles", help="Force ignoring dbs/rucio differences", action="store_true", default=False
-        )
-        parser.add_option(
-            "--ignoreInvalid", help="Force ignoring high level of invalid files", action="store_true", default=False
-        )
-        parser.add_option(
-            "--ignoreDuplicates", help="Force ignoring lumi duplicates", action="store_true", default=False
-        )
-        parser.add_option("--tapeSizeLimit", help="The limit in size of all outputs", default=0, type=int)
-        parser.add_option("--html", help="Build the the monitor page", action="store_true", default=False)
-        parser.add_option(
-            "--noReport", help="Prevent from making the error report", action="store_true", default=False
-        )
-        parser.add_option(
-            "--backfill", help="To run in test mode (only with backfill workflows)", action="store_true", default=False
-        )
-
-        options, args = parser.parse_args()
-        options = vars(options)
-
-        options["manual"] = not options.get("recovering")
-        actions = ["strict", "update", "clear", "review"]
-        if all(not options.get(option) for option in actions):
-            for option in actions + ["recovering", "manual"]:
-                options[option] = True
-
-        return options, args[0] if args else None
-
-    def _setWfs(self) -> None:
-        """
-        The function to set workflows to override, on hold, bypass and force complete.
-        """
-        self.overrideWfs = self._getWorkflowsByAction("force-complete", details=True)
-        self.onHoldWfs = self._getWorkflowsByAction("hold")
-        self.bypassWfs = self._getWorkflowsByAction("bypass") + unnestList(self.overrideWfs)
-        self.forceCompleteWfs = self.mcmClient.get("/restapi/requests/forcecomplete") if self.mcmClient is not None else {}
-
-        if self.jiraClient is not None:
-            # TODO: update wfs by JIRA tickets / migrate JiraClient
-            pass
-
-    def _checkPoint(self, label: str = "", subLap: bool = False, now: struct_time = gmtime()) -> None:
-        """
-        The function to get the check points
-        :label: log message label
-        :subLap: True if for sub lap, False o/w
-        :now: time now
-        """
-        self.logger.info("Time check (%s) point at: %s", label, asctime(now))
-        self.logger.info("Since start: %s [s]", now - self.timePoint.get("start", now))
-
-        self.logger.info(
-            "%s: %s [s]",
-            "Sub lap" if subLap else "Lap",
-            now - self.timePoint.get("subLap" if subLap else "lap", now),
-        )
-        self.timePoint["subLap"] = now
-        if not subLap:
-            self.timePoint["lap"] = now
-
-    def _filterBackfills(self, workflows: list) -> list:
-        """
-        The function to filter only backfill workflows
-        :workflows: workflows
-        :return: backfill workflows
-        """
-        if self.options.get("backfill"):
-            self.logger.info(self.logMsg["backfill"])
-            return [wf for wf in workflows if "backfill" in wf.name.lower()]
-        return workflows
-
-    def _getWorkflowsToCheck(self) -> list:
-        """
-        The function to get the workflows to check
-        :return: workflows
-        """
-        workflows = set()
-
-        awayWfs = self.session.query(Workflow).filter(Workflow.status == "away").all()
-        assistanceWfs = self.session.query(Workflow).filter(Workflow.status.startswith("assistance")).all()
-        completedWfs = self.reqMgr["reader"].getWorkflowsByStatus("completed")
-
-        if self.options.get("strict"):
-            self.logger.info("Strict option is on: checking workflows that freshly completed")
-            workflows.update(filter(lambda wf: wf.name in completedWfs, awayWfs))
-
-        if self.options.get("update"):
-            self.logger.info("Update option is on: checking workflows that have not completed yet")
-            workflows.update(filter(lambda wf: wf.name not in completedWfs, awayWfs))
-
-        if self.options.get("clear"):
-            self.logger.info("Clear option is on: checking workflows that are ready to toggle closed-out")
-            workflows.update(filter(lambda wf: "custodial" in wf.status, assistanceWfs))
-
-        if self.options.get("review"):
-            nonCustodialWfs = [*filter(lambda wf: "custodial" not in wf.status, assistanceWfs)]
-            if self.options.get("recovering"):
-                self.logger.info("Review-recovering option is on: checking only the workflows that had been already acted on")
-                workflows.update(filter(lambda wf: "manual" not in wf.status, nonCustodialWfs))
-            if self.options.get("manual"):
-                self.logger.info("Review-manual option is on: checking the workflows to be acted on")
-                workflows.update(filter(lambda wf: "manual" in wf.status, nonCustodialWfs))
-
-        return list(workflows)
-
-    def _getWorkflowsByAction(self, action: str, details: bool = False) -> Union[list, dict]:
-        """
-        The function to get workflows for a given user action
-        :action: user action
-        :details: if True return dict of workflows by user. O/w return workflows names
-        :return: workflows
-        """
-        workflows = defaultdict(set)
-
-        allowedUsers = [*self.unifiedConfiguration.get("allowed_bypass", {}).keys()]
-
-        userWfs = (
-            self.wtcController.getHold()
-            if action == "hold"
-            else self.wtcController.getBypass()
-            if action == "bypass"
-            else self.wtcController.getForce()
-            if action == "force-complete"
-            else {}
-        )
-        for user, wf in userWfs.items():
-            if user not in allowedUsers:
-                self.logger.info("%s is not allowed to %s", user, action)
-                continue
-
-            self.logger.info("%s allowed to %s %s", user, action, wf)
-            workflows[user].add(wf)
-
-        if details:
-            return workflows
-        return unnestList(workflows)
-
-    def _filterMaxNumberOfWorkflows(self, workflows: list) -> list:
-        """
-        The function to filter the max number of workflows per round
-        :param workflows: list of workflows
-        :return: filtered list of workflows
-        """
-        maxPerRound = self.unifiedConfiguration.get("max_per_round", {}).get("checkor")
-        if self.options.get("limit"):
-            self.logger.info("Command line to limit workflows to %s", self.options.get("limit"))
-            maxPerRound = self.options.get("limit")
-
-        if maxPerRound and not self.specific:
-            self.logger.info("Limiting workflows to %s this round", maxPerRound)
-
-            workflows = self._rankWorkflows(workflows)
-            if self.option.get("update"):
-                random.shuffle(workflows)
-            return workflows[:maxPerRound]
-
-        return workflows
-
-    def _rankWorkflows(self, workflows: list) -> list:
-        """
-        The function to rank the workflows by their priority
-        :param workflows: workflows
-        :return: sorted workflows
-        """
-        completedWfs = self.reqMgr["reader"].getWorkflowsByStatus("completed", details=True)
-        completedWfs = sorted(completedWfs, key=lambda wf: wf.get("RequestPriority", 0))
-        completedWfs = [wf.get("RequestName") for wf in completedWfs]
-
-        return sorted(workflows, key=lambda wf: completedWfs.index(wf) if wf in completedWfs else 0, reverse=True)
-
-    def _updateWorkflowsRecords(self, wfsRecords: dict) -> None:
-        """
-        The function to update the workflows records
-        :param wfsRecords: records
-        """
-        for wf in wfsRecords:
-            if wf.get("record"):
-                self.closeoutController.set(wf.get("wf"), wf.get("record"))
-
-    def _updateWorkflowsStatus(self, wfsStatus: list) -> None:
-        """
-        The function to update the workflows status
-        :param wfsStatus: status
-        """
-        for wf in wfsStatus:
-            if wf.get("newStatus"):
-                newStatus = wf.get("newStatus")
-                wf["workflow"].status = newStatus
-                self.session.commit()
-                
-                if newStatus == "close":
-                    self.closeoutController.clean(wf.get("wf"))
-                    if self.mcmClient is not None and wf.get("mcmForceComplete"):
-                        for prepId in wf.get("prepIds"):
-                            self.mcmClient.clean(f"/restapi/requests/forcecomplete/{prepId}")
-    
-    def _checkExecutionTime(self, nWfs: int, now: struct_time = mktime(gmtime())) -> None:
-        """
-        The function to check the execution time of the module
-        :param nWfs: number of workflows
-        :param now: time now
-        """
-        if nWfs:
-            avgExecutionTime = (now - self.timePoint.get("start")) / nWfs
-            self.logger.info("Average time spend per workflow: %s s", avgExecutionTime)
-            
-            if avgExecutionTime > 120:
-                self.logger.critical("Checkor took %s [s] per workflow", avgExecutionTime)
-
-    def _countAssistanceWorkflowsByStatus(self) -> dict:
-        """
-        The function to count the number of assistance workflows by status
-        :return: count of workflows by status
-        """
-        status = defaultdict(int)
-        for wf in self.session.query(Workflow).filter(Workflow.status.startswith("assistance")).all():
-            status[wf.status] += 1
-        
-        return status
-
-    def _writeSummary(self) -> None:
-        """
-        The function to write a summary of checkor
-        """
-        if not self.specificWf:
-            msg = ""
-            if self.options.get("strict"):
-                msg += "Workflows which just got in completed were looked at. Look in manual.\n"
-            if self.options.get("update"):
-                msg += "Workflows that are still running (and not completed) got looked at.\n"
-            if self.options.get("clear"):
-                msg += "Workflows that just need to close-out were verified. Nothing too new a-priori.\n"
-            if self.options.get("review"):
-                msg += "Workflows under intervention got review.\n"
-            
-            msg += "\n".join([f"{count} in status {status}" for status, count in self._countAssistanceWorkflowsByStatus()])
-
-            self.logger.info(msg)
-
-    def go(self) -> bool:
-        """
-        The function to check if the checkor can go
-        :return: True if it can go, False o/w
-        """
-        try:
-            userLockChecker = UserLockChecker()
-            servicesChecker = ServicesChecker(softServices=["mcm", "wtc"])
-
-            if not userLockChecker.isLocked() and servicesChecker.check():
-                self.mcmClient = McMClient() if servicesChecker.status.get("mcm") else None
-                self.jiraClient = None #JiraClient() if servicesChecker.status.get("jira") else None    TODO: migrate JiraClient
-                return True
-
-            return False
-
-        except Exception as error:
-            self.logger.error("Failed to check if Checkor can go")
-            self.logger.error(str(error))
-
-    def run(self) -> None:
-        """
-        The function to run checkor
-        """
-        try:
-            self._setWfs()
-
-            wfsToCheck = self._filterBackfills(self._getWorkflowsToCheck())
-            random.shuffle(wfsToCheck)
-
-            self.logger.info("Considering %s workflows (before any limitation)", len(wfsToCheck))
-
-            wfsToCheck = self._filterMaxNumberOfWorkflows(wfsToCheck)
-            self._check(wfsToCheck)
-
-            self._writeSummary()
-
-        except Exception as error:
-            self.logger.error("Failed to run checkor")
-            self.logger.error(str(error))
-
-    def _check(self, wfsToCheck: list) -> None:
-        """
-        The wrapper function to check the workflows
-        :param wfsToCheck: workflows to check
-        """
-
-        @runWithMultiThreading(mtParam="wfsToCheck", maxThreads=len(wfsToCheck))
-        def _checkWorkflow(self, wfsToCheck: list) -> dict:
-            return WorkflowCheckor(wfsToCheck, checkor=self).check()
-
-        checkResponses = _checkWorkflow(wfsToCheck)
-        self._updateWorkflowsRecords(checkResponses)
-        self._updateWorkflowsStatus(checkResponses)
-
-        self._checkExecutionTime(len(wfsToCheck))
+from typing import Optional, Tuple
 
 class WorkflowCheckor(object):
     """
     __WorkflowCheckor__
-    General API for checking a fiven workflow
+    General API for checking a given workflow
     """
 
     def __init__(self, wfToCheck: Workflow, logger: Optional[Logger] = None, **kwargs) -> None:
@@ -469,10 +69,10 @@ def _skipWorkflow(self) -> bool:
         :return: True if workflow should be skipped, False o/w
         """
         if (
-            (self.checkor.specific and self.checkor.specific not in self.wf)
+            (self.checkor.specificWf and self.checkor.specificWf not in self.wf)
             or os.path.isfile(".checkor_stop")
             or self.wfToCheck.wm_status in ["assigned", "acquired"]
-            or (self.wfToCheck.wm_status != "completed" and self.wf not in self.checkor.exceptions)
+            or self.wfToCheck.wm_status != "completed"
         ):
             self.logger.info("Skipping workflow %s", self.wf)
             return True
@@ -484,7 +84,7 @@ def _setWorkflowToClose(self) -> bool:
         The function to check if a given workflow should be closed
         :return: True if workflow should be closed, False o/w
         """
-        if self.wfToCheck.wm_status in ["closed-out", "announced"] and self.wf not in self.checkor.exceptions:
+        if self.wfToCheck.wm_status in ["closed-out", "announced"]:
             self.logger.info("%s is already %s, setting as close", self.wf, self.wfToCheck.wm_status)
             self.newStatus = "close"
             return True
@@ -535,7 +135,7 @@ def _setBypassChecks(self) -> None:
         """
         for bypassWf in self.checkor.bypassWfs:
             if bypassWf == self.wf:
-                self.logger.info(self.logMsg["bypassCheck"], self.wf, bypassWf)
+                self.logger.info("Bypassing checks %s because of keyword %s", self.wf, bypassWf)
                 self.bypassChecks = True
 
         self.bypassChecks = False
@@ -597,7 +197,7 @@ def _setNewOnHoldStatus(self, family: list) -> bool:
         :param family: workflow family
         :return: if set new status
         """
-        onHoldTimeout = self.unifiedConfiguration.get("onhold_timeout")
+        onHoldTimeout = self.checkor.unifiedConfiguration.get("onhold_timeout")
         minFamilyCompletedDelay = self._getMinFamilyCompletedDelay(family, self.now)
 
         self.logger.info("On hold since %s, timeout at %s", minFamilyCompletedDelay, onHoldTimeout)
@@ -635,19 +235,21 @@ def _getTiersWithNoCheckAndCustodial(self) -> Tuple[list, list]:
         The function yo get the tiers with no check and custodial
         :return: list of tiers with no check, and list of tiers with no custodial
         """
-        tiersWithNoCheck = set(self.unifiedConfiguration.get("tiers_with_no_check"))
+        tiersWithNoCheck = set(self.checkor.unifiedConfiguration.get("tiers_with_no_check"))
         tiersWithNoCustodial = set(
-            self.unifiedConfiguration.get("tiers_with_no_custodial") if not self.wfController.request.isRelVal() else []
+            self.checkor.unifiedConfiguration.get("tiers_with_no_custodial") if not self.wfController.request.isRelVal() else []
         )
 
-        for campaign in self._getWorkflowCampaigns(self.wfController).values():
+        campaigns = self.campaigns or self._setWorkflowCampaigns()
+
+        for campaign in campaigns.values():
             if (
-                campaign in self.campaignController.campaigns
-                and "custodial_override" in self.campaignController.campaigns[campaign]
+                campaign in self.checkor.campaignController.campaigns
+                and "custodial_override" in self.checkor.campaignController.campaigns[campaign]
             ):
-                if isinstance(self.campaignController.campaigns[campaign]["custodial_override"], list):
-                    tiersWithNoCheck -= self.campaignController.campaigns[campaign]["custodial_override"]
-                elif self.campaignController.campaigns[campaign]["custodial_override"] == "notape":
+                if isinstance(self.checkor.campaignController.campaigns[campaign]["custodial_override"], list):
+                    tiersWithNoCheck -= self.checkor.campaignController.campaigns[campaign]["custodial_override"]
+                elif self.checkor.campaignController.campaigns[campaign]["custodial_override"] == "notape":
                     tiersWithNoCustodial = set(
                         sorted([dataset.split("/")[-1] for dataset in self.wfController.request.get("OutputDatasets")])
                     )
@@ -682,9 +284,9 @@ def _skipFamilyWorkflow(self, wfSchema: dict) -> bool:
         :return: True if to skip, False o/w
         """
         return (
-            wfSchema.get("RequestType") != "Ressubmission"
-            or wfSchema.get("RequestDate") < wfSchema.request.get("RequestDate")
-            or wfSchema.get("PrepID") != wfSchema.request.get("PrepID")
+            wfSchema.get("RequestType") != "Resubmission"
+            or wfSchema.get("RequestDate") < self.wfController.request.get("RequestDate")
+            or wfSchema.get("PrepID") != self.wfController.request.get("PrepID")
             or wfSchema.get("RequestStatus") == None
         )
 
@@ -814,7 +416,11 @@ def _checkWorkflowFamily(self) -> None:
         if (self.bypassChecksByMcMForceComplete or self.bypassChecksByUserForceComplete) and len(self.acdcs.get("healthy")):
             self.wfController.logger.info("%s is being force completed while recovering", self.wf)
             self.wfController.logger.critical("The workflow %s was force completed", self.wf)
-            WorkflowStatusEnforcer(self.wf).forceComplete()
+
+            if self.checkor.options.get("dryRun"):
+                self.checkor.logger.debug("Dry run: force completed workflow %s", self.wf)
+            else:
+                WorkflowStatusEnforcer(self.wf).forceComplete()
 
         if self.acdcs.get("failed"):
             self.logger.critical("For %s, ACDC %s failed", self.wf, ", ".join(self.acdcs.get("failed")))
@@ -842,19 +448,20 @@ def _getFractionDumping(self) -> float:
         fractionDamping = min(
             0.01
             * (
-                max(wfCompletedDelay - self.unifiedConfiguration.get("damping_fraction_pass"), 0)
-                / self.unifiedConfiguration.get("damping_fraction_pass_rate")
+                max(wfCompletedDelay - self.checkor.unifiedConfiguration.get("damping_fraction_pass"), 0)
+                / self.checkor.unifiedConfiguration.get("damping_fraction_pass_rate")
             ),
-            self.unifiedConfiguration.get("damping_fraction_pass_max") / 100.0,
+            self.checkor.unifiedConfiguration.get("damping_fraction_pass_max") / 100.0,
         )
 
         self.logger.info("The passing fraction could be reduced to %s given it has been in for long", fractionDamping)
 
         return fractionDamping
 
-    def _setWorkflowCampaigns(self) -> None:
+    def _setWorkflowCampaigns(self) -> dict:
         """
         The function to set the campaigns
+        :return: campaigns
         """
         campaigns = {}
         wfCampaigns = self.wfController.request.getCampaigns(details=False)
@@ -867,6 +474,8 @@ def _setWorkflowCampaigns(self) -> None:
         self.logger.info("Campaigns: %s", campaigns)
         self.campaigns = campaigns
 
+        return campaigns
+
     def _setDatasetsFractionsToAnnounce(self) -> None:
         """
         The function to set the dataset fractions to announce
@@ -877,49 +486,73 @@ def _setDatasetsFractionsToAnnounce(self) -> None:
 
         self.fractions["announce"] = fractionsAnnounce
 
+    def _getFractionPassByCommandLine(self) -> float:
+        """
+        The function to get the fraction pass by the command line
+        :return: fractions to pass if any, else None
+        """
+        if self.checkor.options.get("fractionPass"):
+            fractionPass = self.checkor.options.get("fractionPass")
+            self.wfController.logger.info("Overriding fraction to pass to %s by command line", fractionPass)
+            return fractionPass
+
+    def _getFractionPassByCampaignRequirement(self, campaign: str, dataset: str) -> float:
+        """
+        The function to get the fraction pass by the campaign requirement
+        :param campaign: campaign name
+        :param dataset: dataset name
+        :return: fractions to pass if any, else None
+        """
+        fractionPass = None
+        campaignFractionPass = self.checkor.campaignController.campaigns.get(campaign, {}).get("fractionpass")
+
+        if campaignFractionPass:
+            if isinstance(campaignFractionPass, dict):
+                tier = dataset.split("/")[-1]
+                priority = str(self.wfController.request.get("RequestPriority"))
+                fractionPass = campaignFractionPass.get("all")
+                if campaignFractionPass.get(tier):
+                    tierFractionPass = campaignFractionPass.get(tier)
+                    if isinstance(tierFractionPass, dict):
+                        fractionPass = tierFractionPass.get("all")
+                        for key, passValue in tierFractionPass.items():
+                            if dataset.startswith(key):
+                                fractionPass = passValue
+                    else:
+                        fractionPass = tierFractionPass
+
+                if campaignFractionPass.get(priority):
+                    fractionPass = campaignFractionPass.get("priority")
+            
+            else:
+                fractionPass = campaignFractionPass
+        
+        if fractionPass is not None:
+            self.wfController.logger.info(
+                "Overriding fraction to pass to %s by campaign requirement", fractionPass
+            )
+            return fractionPass
+
+
     def _setDatasetsFractionsToPass(self) -> None:
         """
         The function to set the dataset fractions to pass
         """
         fractionsPass = {}
-        defaultPass = self.unifiedConfiguration.get("default_fraction_pass")
+        defaultPass = self.checkor.unifiedConfiguration.get("default_fraction_pass")
 
         campaigns = self.campaigns or self._setWorkflowCampaigns()
 
         for dataset in self.outputDatasetsToCheck:
             campaign = campaigns.get(dataset)
 
-            if self.options.get("fractionPass"):
-                fractionsPass[dataset] = self.options.get("fractionPass")
-                self.wfController.logger.info(
-                    "Overriding fraction to %s for %s by command line", fractionsPass[dataset], dataset
-                )
-            elif self.campaignController.campaigns.get(campaign, {}).get("fractionpass"):
-                fractionPass = self.campaignController.campaigns.get(campaign).get("fractionpass")
-                if isinstance(fractionPass, dict):
-                    tier = dataset.split("/")[-1]
-                    priority = str(self.wfController.request.get("RequestPriority"))
-                    fractionsPass[dataset] = fractionPass.get("all", defaultPass)
-                    if fractionPass.get(tier):
-                        tierFractionPass = fractionPass.get(tier)
-                        if isinstance(tierFractionPass, dict):
-                            fractionsPass[dataset] = tierFractionPass.get("all", defaultPass)
-                            for key, passValue in tierFractionPass.items():
-                                if dataset.startswith(key):
-                                    fractionsPass[dataset] = passValue
-                        else:
-                            fractionsPass[dataset] = tierFractionPass
-                    if fractionPass.get(priority):
-                        fractionsPass[dataset] = fractionPass.get("priority")
-                else:
-                    fractionsPass[dataset] = fractionPass
-                self.wfController.logger.info(
-                    "Overriding fraction to %s for %s by campaign requirement", fractionsPass[dataset], dataset
-                )
-            else:
+            fractionsPass[dataset] = self._getFractionPassByCommandLine()
+            if not fractionsPass[dataset]:
+                fractionsPass[dataset] = self._getFractionPassByCampaignRequirement(campaign, dataset)
+            if not fractionsPass[dataset]:
                 fractionsPass[dataset] = defaultPass
 
-            for key, passValue in self.unifiedConfiguration.get("pattern_fraction_pass").items():
+            for key, passValue in self.checkor.unifiedConfiguration.get("pattern_fraction_pass").items():
                 if key in dataset:
                     fractionsPass[dataset] = passValue
                     self.wfController.logger.info("Overriding fraction to %s for %s by dataset key", passValue, dataset)
@@ -945,14 +578,14 @@ def _setDatasetsFractionsToTruncateRecovery(self) -> None:
             ) / (weightPass + weightFull * weightUnderPass)
 
             campaign = campaigns.get(dataset)
-            if self.campaignController.campaigns.get(campaign, {}).get("truncaterecovery"):
-                fractionsTruncateRecovery[dataset] = self.campaignController.campaigns.get(campaign).get(
+            if self.checkor.campaignController.campaigns.get(campaign, {}).get("truncaterecovery"):
+                fractionsTruncateRecovery[dataset] = self.checkor.campaignController.campaigns.get(campaign).get(
                     "truncaterecovery"
                 )
                 self.wfController.logger.info(
                     "Allowed to truncate recovery of %s over %.2f by campaign requirement",
                     dataset,
-                    self.campaignController.campaigns.get(campaign).get("truncaterecovery"),
+                    self.checkor.campaignController.campaigns.get(campaign).get("truncaterecovery"),
                 )
 
             if fractionsTruncateRecovery[dataset] < self.fractions["pass"][dataset]:
@@ -970,12 +603,12 @@ def _setStatisticsThresholds(self) -> None:
 
         fractionDamping = self._getFractionDumping()
         for dataset, value in self.fractions["pass"].items():
-            if value != 1.0 and fractionDamping and self.unifiedConfiguration.get("timeout_for_damping_fraction"):
+            if value != 1.0 and fractionDamping and self.checkor.unifiedConfiguration.get("timeout_for_damping_fraction"):
                 self.fractions["pass"][dataset] -= fractionDamping
                 self.fractions["truncate"][dataset] -= fractionDamping
 
-        if self.acdsWfs.get("order") > self.unifiedConfiguration.get("acdc_rank_for_truncate"):
-            self.wfController.logger.info("Truncating at pass threshold because of ACDC at rank %d", self.acdsWfs.get("order"))
+        if self.checkor.acdsWfs.get("order") > self.checkor.unifiedConfiguration.get("acdc_rank_for_truncate"):
+            self.wfController.logger.info("Truncating at pass threshold because of ACDC at rank %d", self.checkor.acdsWfs.get("order"))
             self.fractions["truncate"][dataset] = self.fractions["pass"][dataset]
 
         self._updateFractionsToPassAndToTruncateRecovery()
@@ -995,7 +628,7 @@ def _updateFractionsToPassAndToTruncateRecovery(self) -> None:
                 descendingPass *= self.fractions["pass"].get(ancestor, 1.0)
                 descendingTruncate *= self.fractions["truncate"].get(ancestor, 1.0)
 
-            if self.unifiedConfiguration.get("cumulative_fraction_pass"):
+            if self.checkor.unifiedConfiguration.get("cumulative_fraction_pass"):
                 self.fractions["pass"][dataset] = descendingPass
                 self.fractions["truncate"][dataset] = descendingTruncate
                 self.logger.info(
@@ -1012,7 +645,7 @@ def _getExpectedEvents(self) -> float:
         :return: number of expected events
         """
         if self.wfController.request.get("RequestType") in ["TaskChain", "StepChain"]:
-            return self.wfController.getRequestNumEvents()
+            return self.wfController.request.getRequestNumEvents()
 
         expectedEvents = self.wfController.request.get("TotalInputEvents")
         if expectedEvents is None:
@@ -1092,8 +725,10 @@ def _setPassStatisticsCheckOverCompletion(self) -> None:
         """
         The function to set the pass statistics to over complete
         """
-        defaultFractionOverdoing = self.unifiedConfiguration.get("default_fraction_overdoing")
-        self.passStatsCheckOverCompletion = dict([(dataset, value >= defaultFractionOverdoing) for dataset, value in self.percentCompletions.items()])
+        defaultFractionOverdoing = self.checkor.unifiedConfiguration.get("default_fraction_overdoing")
+        self.passStatsCheckOverCompletion = dict(
+            [(dataset, value >= defaultFractionOverdoing) for dataset, value in self.percentCompletions.items()]
+        )
 
 
     def _checkAvgCompletionStatistics(self) -> None:
@@ -1104,7 +739,7 @@ def _checkAvgCompletionStatistics(self) -> None:
 
         _, primaries, _, _ = self.wfController.request.getIO()
         runWhiteList = self.wfController.getRunWhiteList()
-        lumiWhiteList = self.wfController.getLumiWhiteList()
+        lumiWhiteList = self.wfController.request.getLumiWhiteList()
 
         lumisPerRun = {}
         if not all(self.passStatsCheck.values()):
@@ -1157,7 +792,7 @@ def _getRecoveryAssistanceTags(self) -> list:
         :return: recovery assistance tags
         """
         if not all(self.passStatsCheck.values()):
-            possibleRecoveries = self.acdcReader.getRecoveryDocs()
+            possibleRecoveries = self.checkor.acdcReader.getRecoveryDocs(self.wf)
             if not possibleRecoveries:
                 self.logger.info(
                     "The workflow is not completed/has missing statistics, but nothing is recoverable. Passing through to announcement"
@@ -1165,7 +800,7 @@ def _getRecoveryAssistanceTags(self) -> list:
                 self.bypassChecks = True
 
             if not self.bypassChecks:
-                return ["recovery" if self.unifiedConfiguration.get("use_recoveror") else "manual"]
+                return ["recovery" if self.checkor.unifiedConfiguration.get("use_recoveror") else "manual"]
 
         return []
 
@@ -1204,18 +839,21 @@ def _checkOutputSize(self,) -> None:
             self.assistanceTags.add("over100")
 
         if self._forceCompleteWorkflow():
-            WorkflowStatusEnforcer(self.wf).forceComplete()
+            if self.checkor.options.get("dryRun"):
+                self.checkor.logger.debug("Dry run: force completed workflow %s", self.wf)
+            else:
+                WorkflowStatusEnforcer(self.wf).forceComplete()
 
     def _hasSmallLumis(self) -> bool:
         """
         The function to check if the workflow has small lumi sections
         :return: True if it has small lumis, False o/w
         """
-        lumiLowerLimit = self.unifiedConfiguration.get("min_events_per_lumi_output")
+        lumiLowerLimit = self.checkor.unifiedConfiguration.get("min_events_per_lumi_output")
         _, primaries, _, _ = self.wfController.request.getIO()
 
         if (
-            not self.wfController.isRelVal()
+            not self.wfController.request.isRelVal()
             and not primaries
             and any(
                 [
@@ -1252,18 +890,18 @@ def _setLumiUpperLimit(self) -> None:
         The function to set the lumi sections upper limit
         """
         lumiUpperLimit = {}
-        campaigns = self.campaigns or self._getWorkflowCampaigns()
+        campaigns = self.campaigns or self._setWorkflowCampaigns()
 
         for dataset in self.outputDatasetsToCheck:
             campaign = campaigns[dataset]
 
             if self.wfController.request.get("RequestType") in ["ReDigi", "ReReco"]:
                 upperLimit = -1
-            elif self.options.get("lumisize"):
-                upperLimit = self.options.get("lumisize")
+            elif self.checkor.options.get("lumisize"):
+                upperLimit = self.checkor.options.get("lumisize")
                 self.logger.info("Overriding the upper lumi size to %s for %s", upperLimit, campaign)
-            elif self.campaignController.campaigns.get(campaign, {}).get("lumisize"):
-                upperLimit = self.campaignController.campaigns.get(campaign, {}).get("lumisize")
+            elif self.checkor.campaignController.campaigns.get(campaign, {}).get("lumisize"):
+                upperLimit = self.checkor.campaignController.campaigns.get(campaign, {}).get("lumisize")
                 self.logger.info("Overriding the upper lumi size to %s for %s", upperLimit, campaign)
             else:
                 upperLimit = 1001
@@ -1326,7 +964,7 @@ def _hasFileMismatch(self) -> bool:
         The function to check if there is any file mismatch
         :return: if number of files in DBS in different from Rucio
         """
-        if not self.options.get("ignoreFiles") and not all(
+        if not self.checkor.options.get("ignoreFiles") and not all(
             [self.dbsPresence[dataset] == self.dbsInvalid[dataset] + self.rucioPresence[dataset] for dataset in self.outputDatasetsToCheck]
         ):
             self.logger.info("The workflow has a dbs/rucio mismatch")
@@ -1347,8 +985,7 @@ def _checkFileCounts(self) -> None:
                     for file in self.dbs["reader"].getDatasetFiles(dataset, validFileOnly=True, details=True)
                 ]
             )
-            rucioReader = RucioReader()
-            rucioFilenames = set(rucioReader.getDatasetFileNames(dataset))
+            rucioFilenames = set(self.rucioReader.getDatasetFileNames(dataset))
 
             missingRucioFiles = dbsFilenames - rucioFilenames
             missingDBSFiles = rucioFilenames - dbsFilenames
@@ -1360,7 +997,7 @@ def _checkFileCounts(self) -> None:
                     showOnlyN,
                     "\n".join(missingRucioFiles[:showOnlyN]),
                 )
-                wereInvalidated = sorted(missingRucioFiles & set(self.dataCacheLoader.load("file_invalidation")))
+                wereInvalidated = sorted(missingRucioFiles & set(self.checkor.dataCacheLoader.load("file_invalidation")))
                 if wereInvalidated:
                     self.wfController.logger.info(
                         "These %d files were invalidated globally, showing %d only.\n %s",
@@ -1368,7 +1005,11 @@ def _checkFileCounts(self) -> None:
                         showOnlyN,
                         "\n".join(wereInvalidated[:showOnlyN]),
                     )
-                    self.dbs["writer"].setFileStatus(wereInvalidated, validate=False)
+
+                    if self.checkor.options.get("dryRun"):
+                        self.checkor.logger.debug("Dry run: invalidated %s files", len(wereInvalidated))
+                    else:
+                        self.dbs["writer"].setFileStatus(wereInvalidated, validate=False)
 
             if missingDBSFiles:
                 self.wfController.logger.info(
@@ -1377,7 +1018,7 @@ def _checkFileCounts(self) -> None:
                     showOnlyN,
                     "\n".join(missingDBSFiles[:showOnlyN]),
                 )
-                wereInvalidated = sorted(missingDBSFiles & set(self.dataCacheLoader.load("file_invalidation")))
+                wereInvalidated = sorted(missingDBSFiles & set(self.checkor.dataCacheLoader.load("file_invalidation")))
                 if wereInvalidated:
                     self.wfController.logger.info(
                         "These %d files were invalidated globally, showing %d only.\n %s",
@@ -1396,7 +1037,7 @@ def _checkInvalidations(self) -> None:
         The function to check the invalidations
         """
         fractionInvalid = 0.2
-        if not self.options.get("ignoreinvalid") and not all(
+        if not self.checkor.options.get("ignoreinvalid") and not all(
             [
                 self.dbsInvalid[dataset] <= int(fractionInvalid * self.dbsPresence[dataset])
                 for dataset in self.wfController.request.get("OutputDatasets")
@@ -1431,7 +1072,7 @@ def _setRecord(self) -> None:
             )
             record["dbsFiles"] = self.dbsPresence[dataset]
             record["dbsInvFiles"] = self.dbsInvalid[dataset]
-            record["rucioFiles"] = set(RucioReader().getDatasetFileNames(dataset))
+            record["rucioFiles"] = set(self.rucioReader.getDatasetFileNames(dataset))
             record[
                 "acdc"
             ] = f"{len(self.acdcs.get('healthy', []))} / {len(self.acdcs.get('healthy', []) + self.acdcs.get('inactive', []))}"
@@ -1460,12 +1101,15 @@ def _closeWorkflow(self) -> None:
             self.newStatus = "close"
             return
 
-        response = self.checkor.reqMgr["writer"].closeoutWorkflow(self.wf, cascade=True)
-        if response:
-            self.newStatus =  "close"
-            return
+        if self.checkor.options.get("dryRun"):
+            self.checkor.logger.debug("Dry run: closed out workflow %s", self.wf)
+        else:
+            response = self.checkor.reqMgr["writer"].closeoutWorkflow(self.wf, cascade=True)
+            if response:
+                self.newStatus =  "close"
+                return
 
-        self.logger.info("Could not close-out, will try again next time")
+            self.logger.info("Could not close-out, will try again next time")
 
 
     def _checkAssistanceTags(self) -> None:
@@ -1603,9 +1247,3 @@ def _writeResponse(self) -> dict:
         }
         return response
 
-
-if __name__ == "__main__":
-    options, specificWf = Checkor.parseOptions()
-    checkor = Checkor(options=options, specificWf=specificWf)
-    if checkor.go():
-        checkor.run()
diff --git a/unified/__init__.py b/src/python/Unified/Helpers/__init__.py
similarity index 100%
rename from unified/__init__.py
rename to src/python/Unified/Helpers/__init__.py
diff --git a/unified/Injector.py b/src/python/Unified/Injector.py
similarity index 100%
rename from unified/Injector.py
rename to src/python/Unified/Injector.py
diff --git a/unified/Invalidator.py b/src/python/Unified/Invalidator.py
similarity index 100%
rename from unified/Invalidator.py
rename to src/python/Unified/Invalidator.py
diff --git a/unified/Rejector.py b/src/python/Unified/Rejector.py
similarity index 100%
rename from unified/Rejector.py
rename to src/python/Unified/Rejector.py
diff --git a/unified/.gitkeep b/src/python/Unified/__init__.py
similarity index 100%
rename from unified/.gitkeep
rename to src/python/Unified/__init__.py

From 384cca05ff57406c51b6ba4212359a82b6687c84 Mon Sep 17 00:00:00 2001
From: haozturk <haozturk@cern.ch>
Date: Tue, 25 Jan 2022 13:01:52 +0300
Subject: [PATCH 06/17] Update the modes of checkor

---
 src/python/Unified/Checkor.py | 73 ++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 39 deletions(-)

diff --git a/src/python/Unified/Checkor.py b/src/python/Unified/Checkor.py
index eefc5a5e7..6f97ac1d4 100644
--- a/src/python/Unified/Checkor.py
+++ b/src/python/Unified/Checkor.py
@@ -81,32 +81,29 @@ def parseOptions() -> Tuple[dict, Optional[str]]:
 
         parser.add_option("--go", help="Does not check on duplicate process", action="store_true", default=False)
         parser.add_option(
-            "--update",
+            "--checkRunning",
             help="Running workflows that have not yet reached completed",
             action="store_true",
             default=False,
         )
         parser.add_option(
-            "--strict", help="Only running workflows that reached completed", action="store_true", default=False
+            "--checkNewlyCompleted", help="Only running workflows that reached completed", action="store_true", default=False
         )
         parser.add_option(
-            "--clear", help="Only running workflows that have reached custodial", action="store_true", default=False
-        )
-        parser.add_option(
-            "--review",
+            "--checkAssistance",
             help="Look at the workflows that have already completed and had required actions",
             action="store_true",
             default=False,
         )
         parser.add_option(
-            "--recovering",
+            "--checkAssistanceRecovering",
             help="Look at the workflows that already have on-going acdc",
             action="store_true",
             default=False,
         )
-        parser.add_option("--manual", help='Look at the workflows in "manual"', action="store_true", default=False)
-        parser.add_option("--limit", help="The number of workflows to consider for checking", default=0, type=int)
-        parser.add_option("--threads", help="The number of threads for processing workflows", default=10, type=int)
+        parser.add_option("--checkAssistanceManual", help='Look at the workflows in "manual"', action="store_true", default=False)
+        parser.add_option("--maxPerRound", help="The number of workflows to consider for checking", default=0, type=int)
+        parser.add_option("--nThreads", help="The number of threads for processing workflows", default=1, type=int)
         parser.add_option(
             "--fractionPass", help="The completion fraction that is permitted", default=0.0, type="float"
         )
@@ -138,10 +135,10 @@ def parseOptions() -> Tuple[dict, Optional[str]]:
         options, args = parser.parse_args()
         options = vars(options)
 
-        options["manual"] = not options.get("recovering")
-        actions = ["strict", "update", "clear", "review"]
+        options["checkAssistanceManual"] = not options.get("checkAssistanceRecovering")
+        actions = ["checkNewlyCompleted", "checkRunning", "checkAssistance"]
         if all(not options.get(option) for option in actions):
-            for option in actions + ["recovering", "manual"]:
+            for option in actions + ["checkAssistanceRecovering", "checkAssistanceManual"]:
                 options[option] = True
 
         return options, args[0] if args else None
@@ -202,27 +199,23 @@ def _getWorkflowsToCheck(self) -> list:
         assistanceWfs = self.session.query(Workflow).filter(Workflow.status.startswith("assistance")).all()
         completedWfs = self.reqMgr["reader"].getWorkflowsByStatus("completed")
 
-        if self.options.get("strict"):
-            self.logger.info("Strict option is on: checking workflows that freshly completed")
+        if self.options.get("checkNewlyCompleted"):
+            self.logger.info("checkNewlyCompleted option is on: checking workflows that freshly completed")
             workflows.update(filter(lambda wf: wf.name in completedWfs, awayWfs))
 
-        if self.options.get("update"):
-            self.logger.info("Update option is on: checking workflows that have not completed yet")
+        if self.options.get("checkRunning"):
+            self.logger.info("checkRunning option is on: checking workflows that have not completed yet")
             workflows.update(filter(lambda wf: wf.name not in completedWfs, awayWfs))
 
-        if self.options.get("clear"):
-            self.logger.info("Clear option is on: checking workflows that are ready to toggle closed-out")
-            workflows.update(filter(lambda wf: "custodial" in wf.status, assistanceWfs))
-
-        if self.options.get("review"):
+        if self.options.get("checkAssistance"):
             nonCustodialWfs = [*filter(lambda wf: "custodial" not in wf.status, assistanceWfs)]
-            if self.options.get("recovering"):
+            if self.options.get("checkAssistanceRecovering"):
                 self.logger.info(
-                    "Review-recovering option is on: checking only the workflows that had been already acted on"
+                    "checkAssistanceRecovering option is on: checking only the workflows that had been already acted on"
                 )
                 workflows.update(filter(lambda wf: "manual" not in wf.status, nonCustodialWfs))
-            if self.options.get("manual"):
-                self.logger.info("Review-manual option is on: checking the workflows to be acted on")
+            if self.options.get("checkAssistanceManual"):
+                self.logger.info("checkAssistanceManual option is on: checking the workflows to be acted on")
                 workflows.update(filter(lambda wf: "manual" in wf.status, nonCustodialWfs))
 
         return list(workflows)
@@ -266,15 +259,15 @@ def _filterMaxNumberOfWorkflows(self, workflows: list) -> list:
         :return: filtered list of workflows
         """
         maxPerRound = self.unifiedConfiguration.get("max_per_round", {}).get("checkor")
-        if self.options.get("limit"):
-            self.logger.info("Command line to limit workflows to %s", self.options.get("limit"))
-            maxPerRound = self.options.get("limit")
+        if self.options.get("maxPerRound"):
+            self.logger.info("Command line to limit workflows to %s", self.options.get("maxPerRound"))
+            maxPerRound = self.options.get("maxPerRound")
 
         if maxPerRound and not self.specificWf:
-            self.logger.info("Limiting workflows to %s this round", maxPerRound)
+            self.logger.info("Number of workflows to check after limitation: %s", maxPerRound)
 
             workflows = self._rankWorkflows(workflows)
-            if self.option.get("update"):
+            if self.option.get("checkRunning"):
                 random.shuffle(workflows)
             return workflows[:maxPerRound]
 
@@ -364,13 +357,11 @@ def _writeSummary(self) -> None:
         """
         if not self.specificWf:
             msg = ""
-            if self.options.get("strict"):
+            if self.options.get("checkNewlyCompleted"):
                 msg += "Workflows which just got in completed were looked at. Look in manual.\n"
-            if self.options.get("update"):
+            if self.options.get("checkRunning"):
                 msg += "Workflows that are still running (and not completed) got looked at.\n"
-            if self.options.get("clear"):
-                msg += "Workflows that just need to close-out were verified. Nothing too new a-priori.\n"
-            if self.options.get("review"):
+            if self.options.get("checkAssistance"):
                 msg += "Workflows under intervention got review.\n"
 
             msg += "\n".join(
@@ -406,14 +397,18 @@ def run(self) -> None:
         The function to run checkor
         """
         try:
-            self._setWfs()
+            # Review this later. Consider moving JIRA functionalities to a different module.
+            #self._setWfs()
 
             wfsToCheck = self._filterBackfills(self._getWorkflowsToCheck())
             random.shuffle(wfsToCheck)
 
-            self.logger.info("Considering %s workflows (before any limitation)", len(wfsToCheck))
+            self.logger.info("Number of workflows to check before any limitation: %s", len(wfsToCheck))
 
             wfsToCheck = self._filterMaxNumberOfWorkflows(wfsToCheck)
+            self.logger.info("Workflows to check: ")
+            for w in wfsToCheck:
+                self.logger.info(w)
             self._check(wfsToCheck)
 
             self._writeSummary()
@@ -428,7 +423,7 @@ def _check(self, wfsToCheck: list) -> None:
         :param wfsToCheck: workflows to check
         """
 
-        @runWithMultiThreading(mtParam="wfsToCheck", maxThreads=len(wfsToCheck))
+        @runWithMultiThreading(mtParam="wfsToCheck", maxThreads=self.options.get("nThreads"))
         def _checkWorkflow(self, wfsToCheck: list) -> dict:
             return WorkflowCheckor(wfsToCheck, checkor=self).check()
 

From 060964514bc84b2fbe0e586c3a1f093089321a81 Mon Sep 17 00:00:00 2001
From: haozturk <haozturk@cern.ch>
Date: Tue, 15 Feb 2022 12:55:06 +0300
Subject: [PATCH 07/17] Checkor dry run mode

---
 src/python/Unified/Checkor.py                 |  13 +-
 src/python/Unified/Helpers/WorkflowCheckor.py | 191 ++++++++++++------
 2 files changed, 143 insertions(+), 61 deletions(-)

diff --git a/src/python/Unified/Checkor.py b/src/python/Unified/Checkor.py
index 6f97ac1d4..d92090481 100644
--- a/src/python/Unified/Checkor.py
+++ b/src/python/Unified/Checkor.py
@@ -302,13 +302,14 @@ def _updateWorkflowsStatus(self, wfsStatus: list) -> None:
         The function to update the workflows status
         :param wfsStatus: status
         """
+        # TODO: Do the ReqMgr update here as well: WorkflowCheckor:_closeOutWorkflow
         for wf in wfsStatus:
             if wf.get("newStatus"):
                 newStatus = wf.get("newStatus")
                 wf["workflow"].status = newStatus
 
                 if wf.options.get("dryRun"):
-                    self.logger.debug("Dry run: updated workflow %s status to %s", wf.get("wf"), newStatus)
+                    self.logger.debug("Dry run: Unified status update of %s to %s", wf.get("wf"), newStatus)
                 else:
                     self.session.commit()
 
@@ -427,9 +428,15 @@ def _check(self, wfsToCheck: list) -> None:
         def _checkWorkflow(self, wfsToCheck: list) -> dict:
             return WorkflowCheckor(wfsToCheck, checkor=self).check()
 
+        # TODO: Workflow statuses have been already updated in WorkflowCheckor????
         checkResponses = _checkWorkflow(wfsToCheck)
-        self._updateWorkflowsRecords(checkResponses)
-        self._updateWorkflowsStatus(checkResponses)
+        self.logger.critical("Response to Checkor:")
+        self.logger.critical(checkResponses)
+        # TODO: The following function updates closeoutInfo table of MongoDB.
+        #self._updateWorkflowsRecords(checkResponses)
+        # TODO: This does unified status update, mongodb record update and McM force-completion.
+        # TODO: Why not do all the operations here, especially ReqMgr2 status update??
+        #self._updateWorkflowsStatus(checkResponses)
 
         self._checkExecutionTime(len(wfsToCheck))
 
diff --git a/src/python/Unified/Helpers/WorkflowCheckor.py b/src/python/Unified/Helpers/WorkflowCheckor.py
index a72e46814..84393d100 100644
--- a/src/python/Unified/Helpers/WorkflowCheckor.py
+++ b/src/python/Unified/Helpers/WorkflowCheckor.py
@@ -29,10 +29,13 @@ def __init__(self, wfToCheck: Workflow, logger: Optional[Logger] = None, **kwarg
 
             self.wfToCheck = wfToCheck
             self.wfToCheck.wm_status = self.wfController.request.get("RequestStatus")
+            # TODO: Check this
+            self.unifiedStatus = wfToCheck.status
 
             self.checkor = kwargs.get("checkor")
             self.rucioReader = RucioReader()
 
+            # TODO: What's the difference between assistanceTags & existingAssistaceTags?
             self.assistanceTags, self.existingAssistaceTags = set(), set(wfToCheck.status.split("-")[1:])
             
             self.acdcs = dict()
@@ -84,10 +87,13 @@ def _setWorkflowToClose(self) -> bool:
         The function to check if a given workflow should be closed
         :return: True if workflow should be closed, False o/w
         """
-        if self.wfToCheck.wm_status in ["closed-out", "announced"]:
+        self.logger.info("Checking if the unified status should be 'close' already, but not")
+        if self.wfToCheck.wm_status in ["closed-out", "announced"] and self.wfToCheck.unifiedStatus != "close":
             self.logger.info("%s is already %s, setting as close", self.wf, self.wfToCheck.wm_status)
             self.newStatus = "close"
             return True
+        else:
+            self.logger.info("Unified status is already okay, skipping the request.")
 
         return False
 
@@ -96,6 +102,7 @@ def _setWorkflowToForget(self) -> bool:
         The function to check if a given workflow should be forgotten
         :return: True if workflow should be forgotten, False o/w
         """
+        self.logger.info("Checking if the unified status should be 'forget'")
         if self.wfController.request.isRelVal() and self.wfToCheck.wm_status in [
             "failed",
             "aborted",
@@ -104,9 +111,11 @@ def _setWorkflowToForget(self) -> bool:
             "rejected-archived",
             "aborted-completed",
         ]:
-            self.logger.info("%s is %s, but will not be set in trouble to find a replacement", self.wf, self.wfToCheck.wm_status)
+            self.logger.info("%s is %s, setting the unified status as 'forget'", self.wf, self.wfToCheck.wm_status)
             self.newStatus = "forget"
             return True
+        else:
+            self.logger.info("%s is %s, not setting it as 'forget'", self.wf, self.wfToCheck.wm_status)
 
         return False
 
@@ -223,25 +232,29 @@ def _updateWorkflowStatus(self) -> bool:
         The function to check if the status of a given workflow should be updated
         :return: True if update status, False o/w
         """
-        if self._setWorkflowToClose() or self._setWorkflowToForget() or self._setWorkflowToTrouble():
+        if self._setWorkflowToClose():
             return True
-        if self._isWorkflowOnHold() and self._setNewOnHoldStatus(self.wfController.getFamily(includeItself=True)):
+        if self._setWorkflowToForget():
             return True
+        # TODO Check this
+        #if self._setWorkflowToTrouble():
+        #    return True
+
+        # TODO Check this later
+        #if self._isWorkflowOnHold() and self._setNewOnHoldStatus(self.wfController.getFamily(includeItself=True)):
+        #    return True
 
         return False
 
-    def _getTiersWithNoCheckAndCustodial(self) -> Tuple[list, list]:
+    def _getTiersWithNoCheck(self) -> Tuple[list, list]:
         """
         The function yo get the tiers with no check and custodial
         :return: list of tiers with no check, and list of tiers with no custodial
         """
         tiersWithNoCheck = set(self.checkor.unifiedConfiguration.get("tiers_with_no_check"))
-        tiersWithNoCustodial = set(
-            self.checkor.unifiedConfiguration.get("tiers_with_no_custodial") if not self.wfController.request.isRelVal() else []
-        )
 
         campaigns = self.campaigns or self._setWorkflowCampaigns()
-
+        # TODO custodial_override is not good name anymore, change it!
         for campaign in campaigns.values():
             if (
                 campaign in self.checkor.campaignController.campaigns
@@ -249,19 +262,15 @@ def _getTiersWithNoCheckAndCustodial(self) -> Tuple[list, list]:
             ):
                 if isinstance(self.checkor.campaignController.campaigns[campaign]["custodial_override"], list):
                     tiersWithNoCheck -= self.checkor.campaignController.campaigns[campaign]["custodial_override"]
-                elif self.checkor.campaignController.campaigns[campaign]["custodial_override"] == "notape":
-                    tiersWithNoCustodial = set(
-                        sorted([dataset.split("/")[-1] for dataset in self.wfController.request.get("OutputDatasets")])
-                    )
 
-        return tiersWithNoCheck, tiersWithNoCustodial
+        return tiersWithNoCheck
 
     def _setOutputDatasetsToCheck(self) -> None:
         """
         The function to set the datasets to check
         """
-        tiersWithNoCheck, tiersWithNoCustodial = self._getTiersWithNoCheckAndCustodial(self.wfController)
-
+        self.logger.info("Checking which output datasets to be handled")
+        tiersWithNoCheck = self._getTiersWithNoCheck(self.wfController)
         expectedOutputsDatasets = self.wfController.request.get("OutputDatasets")
         self.outputDatasetsToCheck = [
             dataset
@@ -269,12 +278,11 @@ def _setOutputDatasetsToCheck(self) -> None:
             if all([dataset.split("/")[-1] != tier for tier in tiersWithNoCheck])
         ]
 
-        self.wfController.logger.debug(
+        self.wfController.logger.info(
             "Initial outputs: %s\nWill check on: %s\nTiers out: %s\nTiers no custodials: %s",
             ", ".join(sorted(expectedOutputsDatasets)),
             ", ".join(sorted(self.outputDatasetsToCheck)),
-            ", ".join(sorted(tiersWithNoCheck)),
-            ", ".join(sorted(tiersWithNoCustodial)),
+            ", ".join(sorted(tiersWithNoCheck))
         )
 
     def _skipFamilyWorkflow(self, wfSchema: dict) -> bool:
@@ -534,7 +542,7 @@ def _getFractionPassByCampaignRequirement(self, campaign: str, dataset: str) ->
             return fractionPass
 
 
-    def _setDatasetsFractionsToPass(self) -> None:
+    def _setDatasetsFractionPass(self) -> None:
         """
         The function to set the dataset fractions to pass
         """
@@ -557,6 +565,7 @@ def _setDatasetsFractionsToPass(self) -> None:
                     fractionsPass[dataset] = passValue
                     self.wfController.logger.info("Overriding fraction to %s for %s by dataset key", passValue, dataset)
 
+        self.logger.info("Expected stats (Fraction pass): %s", str(fractionsPass))
         self.fractions["pass"] = fractionsPass
 
     def _setDatasetsFractionsToTruncateRecovery(self) -> None:
@@ -593,14 +602,18 @@ def _setDatasetsFractionsToTruncateRecovery(self) -> None:
 
         self.fractions["truncate"] = fractionsTruncateRecovery
 
-    def _setStatisticsThresholds(self) -> None:
+    def _setExpectedStats(self) -> None:
         """
         The function to set the statistics thresholds
         """
-        self._setDatasetsFractionsToAnnounce()
-        self._setDatasetsFractionsToPass()
-        self._setDatasetsFractionsToTruncateRecovery()
+        # TODO: # Sets it 1 regardless of the dataset and it's not used?
+        #self._setDatasetsFractionsToAnnounce()
+        self._setDatasetsFractionPass()
+
 
+        # TODO: Disabling truncation for now. Check that later.
+        """
+        self._setDatasetsFractionsToTruncateRecovery()
         fractionDamping = self._getFractionDumping()
         for dataset, value in self.fractions["pass"].items():
             if value != 1.0 and fractionDamping and self.checkor.unifiedConfiguration.get("timeout_for_damping_fraction"):
@@ -612,6 +625,7 @@ def _setStatisticsThresholds(self) -> None:
             self.fractions["truncate"][dataset] = self.fractions["pass"][dataset]
 
         self._updateFractionsToPassAndToTruncateRecovery()
+        """
 
     def _updateFractionsToPassAndToTruncateRecovery(self) -> None:
         """
@@ -666,7 +680,7 @@ def _getTasksByOutputDataset(self) -> dict:
 
         return tasksByDataset
 
-    def _checkCompletionStatistics(self) -> None:
+    def _setActualStats(self) -> None:
         """
         The function to check the completion statistics
         """
@@ -699,17 +713,20 @@ def _checkCompletionStatistics(self) -> None:
                         outputEventsExpected,
                         dataset,
                     )
+        self.logger.info("Actual stats: %s", str(self.percentCompletions))
 
-    def _setPassStatisticsCheck(self) -> None:
+    def _compareAndSetExpectedAndActualStats(self) -> None:
         """
         The function to set pass statistics
         """
+        # TODO: The way bypassChecks is introduced seems problematic, review it.
         self.passStatsCheck = dict(
             [
                 (dataset, self.bypassChecks or self.percentCompletions[dataset] >= passValue)
                 for dataset, passValue in self.fractions["pass"].items()
             ]
         )
+        self.logger.info("Compared actual and expected stats: %s", str(self.passStatsCheck))
 
     def _setPassStatisticsCheckToAnnounce(self) -> None:
         """
@@ -916,21 +933,26 @@ def _checkLumiSize(self) -> None:
         """
         The function to check the lumi sections sizes
         """
+        self.logger.info("Checking lumi size")
         if self._hasSmallLumis():
             self.assistanceTags.add("smalllumi")
             self.isClosing = False
+            # TODO: Give more details??
+            self.logger.info("Output has small lumisections, not closing out.")
 
         self._setLumiUpperLimit()
         if self._hasBigLumis():
             self.assistanceTags.add("biglumi")
             self.isClosing = False
+            # TODO: Give more details??
+            self.logger.info("Output has big lumisections, not closing out.")
 
     def _checkRucioFileCounts(self) -> None:
         """
         The function to check the number of files in Rucio
         """
         rucioPresence = {}
-
+        # TODO: Check the algorithm of this function
         for dataset in self.wfController.request.get("OutputDatasets"):
             filesPerBlock = set(self.rucioReader.countDatasetFilesPerBlock(dataset))
             allBlocks = set([*map(lambda x: x[0], filesPerBlock)])
@@ -947,6 +969,7 @@ def _checkRucioFileCounts(self) -> None:
             self.assistanceTags.remove("announce")
 
         self.rucioPresence = rucioPresence
+        self.logger.info("Checked the rucio presence: %s", str(self.rucioPresence))
 
     def _checkDBSFileCounts(self) -> None:
         """
@@ -959,6 +982,9 @@ def _checkDBSFileCounts(self) -> None:
 
         self.dbsPresence, self.dbsInvalid = dbsPresence, dbsInvalid
 
+        self.logger.info("Checked DBS Presence: %s", str(self.dbsPresence))
+        self.logger.info("Checked INVALID DBS Presence: %s", str(self.dbsInvalid))
+
     def _hasFileMismatch(self) -> bool:
         """
         The function to check if there is any file mismatch
@@ -972,12 +998,16 @@ def _hasFileMismatch(self) -> bool:
 
         return False
 
-    def _checkFileCounts(self) -> None:
+    def _handleFileMismatch(self) -> None:
         """
         The function to check the number of files
         """
+        # TODO: Make this configurable!
         showOnlyN = 10
 
+        # TODO: Check the algorithm of this function
+        self.logger.info("There is a RUCIO/DBS filemismatch. Checking the details")
+
         for dataset in self.dbsPresence:
             dbsFilenames = set(
                 [
@@ -998,6 +1028,7 @@ def _checkFileCounts(self) -> None:
                     "\n".join(missingRucioFiles[:showOnlyN]),
                 )
                 wereInvalidated = sorted(missingRucioFiles & set(self.checkor.dataCacheLoader.load("file_invalidation")))
+                # TODO: Check this invalidation!!
                 if wereInvalidated:
                     self.wfController.logger.info(
                         "These %d files were invalidated globally, showing %d only.\n %s",
@@ -1019,6 +1050,7 @@ def _checkFileCounts(self) -> None:
                     "\n".join(missingDBSFiles[:showOnlyN]),
                 )
                 wereInvalidated = sorted(missingDBSFiles & set(self.checkor.dataCacheLoader.load("file_invalidation")))
+                # TODO: Check this invalidation
                 if wereInvalidated:
                     self.wfController.logger.info(
                         "These %d files were invalidated globally, showing %d only.\n %s",
@@ -1032,10 +1064,11 @@ def _checkFileCounts(self) -> None:
 
         self.isClosing = False
 
-    def _checkInvalidations(self) -> None:
+    def _checkInvalidFiles(self) -> None:
         """
         The function to check the invalidations
         """
+        self.logger.info("Checking if the output(s) has/have a significant amount of invalid files")
         fractionInvalid = 0.2
         if not self.checkor.options.get("ignoreinvalid") and not all(
             [
@@ -1045,6 +1078,8 @@ def _checkInvalidations(self) -> None:
         ):
             self.wfController.logger.info("The workflow has a DBS invalid file level too high")
             self.assistanceTags.add("invalidfiles")
+        else:
+            self.logger.info("The outputs don't have a significant amount of invalid files")
 
     def _setRecord(self) -> None:
         """
@@ -1084,9 +1119,11 @@ def _setRecord(self) -> None:
 
             wfRecord["datasets"][dataset] = record
 
+        self.logger.info("Following record has been produced for MongoDB update: %s", record)
+
         self.record = wfRecord
 
-    def _closeWorkflow(self) -> None:
+    def _closeOutWorkflow(self) -> None:
         """
         The function to close the workflow
         """
@@ -1112,10 +1149,11 @@ def _closeWorkflow(self) -> None:
             self.logger.info("Could not close-out, will try again next time")
 
 
-    def _checkAssistanceTags(self) -> None:
+    def _updateAssistanceStatus(self) -> None:
         """
         The function to check the assistance tags
         """
+        # TODO: Needs dry-run mode update
         self.logger.info("%s was tagged with: %s", self.wf, self.assistanceTags)
         if "recovering" in self.assistanceTags:
             self.assistanceTags -= set(["recovery", "filemismatch", "manual"])
@@ -1141,9 +1179,9 @@ def _warnRequestor(self) -> None:
             msg = "The request PREPID (WORKFLOW) is facing issue in production.\n"
 
             if "recovery" in self.assistanceTags:
-                msg += f"Samples completed with missing statistics\n{'\n'.join([f'{round(self.percentCompletions[dataset]*100, 2)}%% complete for {dataset}' for dataset in self.outputDatasetsToCheck ])}\nhttps://cmsweb.cern.ch/report/{self.wf}\n"
+                msg += f"Samples completed with missing statistics\n{''.join([f'{round(self.percentCompletions[dataset]*100, 2)}%% complete for {dataset}' for dataset in self.outputDatasetsToCheck ])}\nhttps://cmsweb.cern.ch/report/{self.wf}\n"
             if "biglumi" in self.assistanceTags:
-                msg += f"Samples completed with large luminosity blocks:\n{'\n'.join([f'{self.eventsPerLumi[dataset]} > {self.lumiUpperLimit[dataset]} for {dataset}' for dataset in self.outputDatasetsToCheck])}\nhttps://cmsweb.cern.ch/reqmgr/view/splitting/{self.wf}\n"
+                msg += f"Samples completed with large luminosity blocks:\n{''.join([f'{self.eventsPerLumi[dataset]} > {self.lumiUpperLimit[dataset]} for {dataset}' for dataset in self.outputDatasetsToCheck])}\nhttps://cmsweb.cern.ch/reqmgr/view/splitting/{self.wf}\n"
 
             msg += "You are invited to check, while this is being taken care of by Comp-Ops.\n"
             msg += "This is an automated message from Comp-Ops.\n"
@@ -1158,7 +1196,7 @@ def _getAssistanceStatus(self) -> str:
             return "assistance-" + "-".join(sorted(self.assistanceTags))
         return "assistance"
     
-    def _setWorkflowToAssistance(self) -> None:
+    def _setAssistanceStatus(self) -> None:
         """
         The function to set the workflow to assistance
         """
@@ -1166,63 +1204,96 @@ def _setWorkflowToAssistance(self) -> None:
         if "manual" not in self.wfToCheck.status or assistanceStatus != "assistance-recovery":
             self.newStatus = assistanceStatus
 
+        self.logger.info("Ultimate assistance status is: %s", str(assistanceStatus))
+
     def check(self) -> dict:
         """
         The function to check the workflow
         """
         try:
             self.checkor._checkPoint(f"Starting checkor with {self.wf}")
-            if self._skipWorkflow() or self._updateWorkflowStatus():
+
+            # Investigate this later: There should not be a case to skip a workflow
+            #if self._skipWorkflow():
+            #    return self._writeResponse()
+
+            # TODO: The following function checks workflows whose unified status should have been updated, but not.
+            # TODO: Workflows to close & Workflows to forget
+            # TODO: I think we can keep it for a while and see if it really has a use
+            # TODO: I also think that it needs a renaming.
+            if self._updateWorkflowStatus():
+                # TODO: Check this function!!
                 return self._writeResponse()
             self.checkor._checkPoint("Checked workflow status", subLap=True)
         
             self._setOutputDatasetsToCheck()
-            self._checkWorkflowFamily()
-            self.checkor._checkPoint("Checked workflow family", subLap=True)
 
-            self._setStatisticsThresholds()
-            self.checkor._checkPoint("Checked statistics threshold", subLap=True)
+            # TODO: Check this function. It does more than checking
+            #self._checkWorkflowFamily()
+            #self.checkor._checkPoint("Checked workflow family", subLap=True)
 
-            self._checkCompletionStatistics()
-            self.checkor._checkPoint("Checked observed statistics", subLap=True)
+            # TODO: I did simplifications. Review it later.
+            self._setExpectedStats()
+            self.checkor._checkPoint("Checked expected stats", subLap=True)
 
-            self._setPassStatisticsCheck()
-            self._checkAvgCompletionStatistics()
-            self.checkor._checkPoint("Checked more detailed observed statistics", subLap=True)
+            # TODO: "_set" prefix seems confusing. The function gets and sets
+            # TODO: Perhaps, break it down into 2 function: getActualStats & setActualStats
+            self._setActualStats()
+            self.checkor._checkPoint("Checked actual stats", subLap=True)
 
-            self._setPassStatisticsCheckToAnnounce()
-            self._setPassStatisticsCheckOverCompletion()
-            self._checkOutputSize()
-            self.checkor._checkPoint("Checked output size", subLap=True)
+            # TODO: Rename this after understanding the following functions
+            self._compareAndSetExpectedAndActualStats()
 
+            # TODO: I don't understand these two functions. Could be related to over100 and announce tags. Disabling now
+            #self._checkAvgCompletionStatistics()
+            #self._setPassStatisticsCheckToAnnounce()
+            #self.checkor._checkPoint("Checked more detailed observed statistics", subLap=True)
+
+            # TODO: This might be a good feature. Disabling for now. Review later.
+            #self._setPassStatisticsCheckOverCompletion()
+
+            # TODO: Function does more than output size checking, which I don't understand. Disabling for now.
+            #self._checkOutputSize()
+            #self.checkor._checkPoint("Checked output size", subLap=True)
+
+            # TODO: We might disable smalllumi check. Review later. Keep it for now.
             self._checkLumiSize()
             self.checkor._checkPoint("Checked lumi size", subLap=True)
 
             self._checkRucioFileCounts()
-            self.checkor_checkPoint("Checked Rucio count", subLap=True)
+            self.checkor_checkPoint("Checked Rucio file count", subLap=True)
 
             self._checkDBSFileCounts()
-            self.checkor._checkPoint("DBS file count", subLap=True)
+            self.checkor._checkPoint("Checked DBS file count", subLap=True)
 
             if self._hasFileMismatch() and "recovering" not in self.assistanceTags:
-                self._checkFileCounts()
+                # TODO: Test this function carefully: filemismatch, agentfilemismatch, etc.
+                self._handleFileMismatch()
             self.checkor._checkPoint("Checked file count", subLap=True)
 
-            self._checkInvalidations()
-            self.checkor._checkPoint("Checked invalidation", subLap=True)
+            # TODO: Test this function
+            self._checkInvalidFiles()
+            self.checkor._checkPoint("Checked invalid files", subLap=True)
 
             self.checkor._checkPoint(f"Done with {self.wf}")
+            # TODO: Log this properly and understand how it is used
             self._setRecord()
 
             if self.isClosing:
-                self._closeWorkflow()
+                self.logger.info("The workflow is okay to be closed-out. Perform the action later.")
+                #self._closeOutWorkflow()
             else:
-                self._checkAssistanceTags()
-                self._warnRequestor()
-                self._setWorkflowToAssistance()
+                self.logger.info("The workflow is not okay to be closed-out.")
+                # TODO: Rename and update dry run mode
+                self._updateAssistanceStatus()
+                # TODO: What's the difference from the function above? Clear here.
+                self._setAssistanceStatus()
+
+                # TODO: Disabling for now, check later.
+                # self._warnRequestor()
                     
                 ## TODO: update JIRA tickets / migrate JiraClient
-            
+
             return self._writeResponse()
 
         except Exception as error:
@@ -1235,6 +1306,7 @@ def _writeResponse(self) -> dict:
         """
         The function to write the check response
         """
+        # TODO: What's the difference between workflow and wf???
         response = {
             "workflow": self.wfToCheck,
             "wf": self.wf,
@@ -1245,5 +1317,8 @@ def _writeResponse(self) -> dict:
             "mcmForceComplete": self.bypassChecksByMcMForceComplete,
             "record": self.record
         }
+
+        self.logger.info("An update is required: ")
+        self.logger.info(response)
         return response
 

From 3eb374200993cd8a97d6c46c28dca42b73ddb900 Mon Sep 17 00:00:00 2001
From: haozturk <haozturk@cern.ch>
Date: Tue, 15 Feb 2022 13:00:53 +0300
Subject: [PATCH 08/17] Fix the type in option handling

---
 src/python/Unified/Checkor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python/Unified/Checkor.py b/src/python/Unified/Checkor.py
index d92090481..8cba527e3 100644
--- a/src/python/Unified/Checkor.py
+++ b/src/python/Unified/Checkor.py
@@ -128,7 +128,7 @@ def parseOptions() -> Tuple[dict, Optional[str]]:
         parser.add_option(
             "--dryRun",
             help="To run in test mode (with production but without making any changes)",
-            actions="store_true",
+            action="store_true",
             default=False,
         )
 

From 0b12632e325b7c00e869cfd82814d570aecf4b75 Mon Sep 17 00:00:00 2001
From: haozturk <haozturk@cern.ch>
Date: Tue, 15 Feb 2022 13:09:35 +0300
Subject: [PATCH 09/17] Fix configuration retrival for max_per_round

---
 src/python/Unified/Checkor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python/Unified/Checkor.py b/src/python/Unified/Checkor.py
index 8cba527e3..e1887b51c 100644
--- a/src/python/Unified/Checkor.py
+++ b/src/python/Unified/Checkor.py
@@ -258,7 +258,7 @@ def _filterMaxNumberOfWorkflows(self, workflows: list) -> list:
         :param workflows: list of workflows
         :return: filtered list of workflows
         """
-        maxPerRound = self.unifiedConfiguration.get("max_per_round", {}).get("checkor")
+        maxPerRound = self.unifiedConfiguration.get("max_per_round").get("checkor")
         if self.options.get("maxPerRound"):
             self.logger.info("Command line to limit workflows to %s", self.options.get("maxPerRound"))
             maxPerRound = self.options.get("maxPerRound")

From fea6fc6233114b63efbf766a45f0bedb209930ce Mon Sep 17 00:00:00 2001
From: haozturk <haozturk@cern.ch>
Date: Tue, 15 Feb 2022 13:10:26 +0300
Subject: [PATCH 10/17] Fix typo

---
 src/python/Unified/Checkor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python/Unified/Checkor.py b/src/python/Unified/Checkor.py
index e1887b51c..502d8ab7e 100644
--- a/src/python/Unified/Checkor.py
+++ b/src/python/Unified/Checkor.py
@@ -267,7 +267,7 @@ def _filterMaxNumberOfWorkflows(self, workflows: list) -> list:
             self.logger.info("Number of workflows to check after limitation: %s", maxPerRound)
 
             workflows = self._rankWorkflows(workflows)
-            if self.option.get("checkRunning"):
+            if self.options.get("checkRunning"):
                 random.shuffle(workflows)
             return workflows[:maxPerRound]
 

From df9963b86f76f0f9cbaa9cbf316bb6512cfc1beb Mon Sep 17 00:00:00 2001
From: haozturk <haozturk@cern.ch>
Date: Tue, 15 Feb 2022 13:15:21 +0300
Subject: [PATCH 11/17] Fix typo

---
 src/python/Unified/Checkor.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/python/Unified/Checkor.py b/src/python/Unified/Checkor.py
index 502d8ab7e..534c3e65d 100644
--- a/src/python/Unified/Checkor.py
+++ b/src/python/Unified/Checkor.py
@@ -27,6 +27,7 @@
 from Unified.Helpers.WorkflowCheckor import WorkflowCheckor
 
 from typing import Optional, Tuple, Union
+import traceback
 
 
 class Checkor(OracleClient):
@@ -408,8 +409,8 @@ def run(self) -> None:
 
             wfsToCheck = self._filterMaxNumberOfWorkflows(wfsToCheck)
             self.logger.info("Workflows to check: ")
-            for w in wfsToCheck:
-                self.logger.info(w)
+            for workflow in wfsToCheck:
+                self.logger.info(workflow.name)
             self._check(wfsToCheck)
 
             self._writeSummary()
@@ -417,6 +418,7 @@ def run(self) -> None:
         except Exception as error:
             self.logger.error("Failed to run checkor")
             self.logger.error(str(error))
+            self.logger.error(traceback.format_exc())
 
     def _check(self, wfsToCheck: list) -> None:
         """

From f824fdb5f8990874804e121565ef068cd6a832d9 Mon Sep 17 00:00:00 2001
From: haozturk <haozturk@cern.ch>
Date: Tue, 15 Feb 2022 13:37:17 +0300
Subject: [PATCH 12/17] Disable multithreading for now

---
 src/python/Unified/Checkor.py                 | 30 +++++++++++--------
 src/python/Unified/Helpers/WorkflowCheckor.py |  1 +
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/src/python/Unified/Checkor.py b/src/python/Unified/Checkor.py
index 534c3e65d..c89065dc1 100644
--- a/src/python/Unified/Checkor.py
+++ b/src/python/Unified/Checkor.py
@@ -426,19 +426,23 @@ def _check(self, wfsToCheck: list) -> None:
         :param wfsToCheck: workflows to check
         """
 
-        @runWithMultiThreading(mtParam="wfsToCheck", maxThreads=self.options.get("nThreads"))
-        def _checkWorkflow(self, wfsToCheck: list) -> dict:
-            return WorkflowCheckor(wfsToCheck, checkor=self).check()
-
-        # TODO: Workflow statuses have been already updated in WorkflowCheckor????
-        checkResponses = _checkWorkflow(wfsToCheck)
-        self.logger.critical("Response to Checkor:")
-        self.logger.critical(checkResponses)
-        # TODO: The following function updates closeoutInfo table of MongoDB.
-        #self._updateWorkflowsRecords(checkResponses)
-        # TODO: This does unified status update, mongodb record update and McM force-completion.
-        # TODO: Why not do all the operations here, especially ReqMgr2 status update??
-        #self._updateWorkflowsStatus(checkResponses)
+        # TODO: Fix the multithreading!
+        #@runWithMultiThreading(mtParam="wfsToCheck", maxThreads=self.options.get("nThreads"))
+        #def _checkWorkflow(self, wfsToCheck: list) -> dict:
+            #return WorkflowCheckor(wfsToCheck, checkor=self).check()
+        def _checkWorkflow(workflow: list) -> dict:
+            return WorkflowCheckor(workflow).check()
+
+        for workflow in wfsToCheck:
+            # TODO: Workflow statuses have been already updated in WorkflowCheckor????
+            checkResponses = _checkWorkflow(wfsToCheck)
+            self.logger.critical("Response to Checkor:")
+            self.logger.critical(checkResponses)
+            # TODO: The following function updates closeoutInfo table of MongoDB.
+            #self._updateWorkflowsRecords(checkResponses)
+            # TODO: This does unified status update, mongodb record update and McM force-completion.
+            # TODO: Why not do all the operations here, especially ReqMgr2 status update??
+            #self._updateWorkflowsStatus(checkResponses)
 
         self._checkExecutionTime(len(wfsToCheck))
 
diff --git a/src/python/Unified/Helpers/WorkflowCheckor.py b/src/python/Unified/Helpers/WorkflowCheckor.py
index 84393d100..d9e6ced19 100644
--- a/src/python/Unified/Helpers/WorkflowCheckor.py
+++ b/src/python/Unified/Helpers/WorkflowCheckor.py
@@ -22,6 +22,7 @@ def __init__(self, wfToCheck: Workflow, logger: Optional[Logger] = None, **kwarg
         try:
             super().__init__(self)
             self.logger = logger or getLogger(self.__class__.__name__)
+            self.logger.info("Initializing Workflow Checkor")
             self.now = mktime(gmtime())
 
             self.wf = wfToCheck.name

From 1262237de7f2ebfeb108647a555f4cb336bfd29b Mon Sep 17 00:00:00 2001
From: haozturk <haozturk@cern.ch>
Date: Tue, 15 Feb 2022 13:41:32 +0300
Subject: [PATCH 13/17] Update arguments of WorkflowCheckor

---
 src/python/Unified/Helpers/WorkflowCheckor.py | 51 ++++++++++---------
 1 file changed, 26 insertions(+), 25 deletions(-)

diff --git a/src/python/Unified/Helpers/WorkflowCheckor.py b/src/python/Unified/Helpers/WorkflowCheckor.py
index d9e6ced19..c50c8fdda 100644
--- a/src/python/Unified/Helpers/WorkflowCheckor.py
+++ b/src/python/Unified/Helpers/WorkflowCheckor.py
@@ -12,32 +12,33 @@
 
 from typing import Optional, Tuple
 
-class WorkflowCheckor(object):
+#class WorkflowCheckor(object):
+class WorkflowCheckor():
     """
     __WorkflowCheckor__
     General API for checking a given workflow
     """
 
-    def __init__(self, wfToCheck: Workflow, logger: Optional[Logger] = None, **kwargs) -> None:
+    def __init__(self, workflow: Workflow, logger: Optional[Logger] = None, **kwargs) -> None:
         try:
-            super().__init__(self)
+            #super().__init__(self)
             self.logger = logger or getLogger(self.__class__.__name__)
             self.logger.info("Initializing Workflow Checkor")
             self.now = mktime(gmtime())
 
-            self.wf = wfToCheck.name
+            self.wf = workflow.name
             self.wfController = WorkflowController(self.wf)
 
-            self.wfToCheck = wfToCheck
-            self.wfToCheck.wm_status = self.wfController.request.get("RequestStatus")
+            self.workflow = workflow
+            self.workflow.wm_status = self.wfController.request.get("RequestStatus")
             # TODO: Check this
-            self.unifiedStatus = wfToCheck.status
+            self.unifiedStatus = workflow.status
 
             self.checkor = kwargs.get("checkor")
             self.rucioReader = RucioReader()
 
             # TODO: What's the difference between assistanceTags & existingAssistaceTags?
-            self.assistanceTags, self.existingAssistaceTags = set(), set(wfToCheck.status.split("-")[1:])
+            self.assistanceTags, self.existingAssistaceTags = set(), set(workflow.status.split("-")[1:])
             
             self.acdcs = dict()
             self.campaigns = dict()
@@ -75,8 +76,8 @@ def _skipWorkflow(self) -> bool:
         if (
             (self.checkor.specificWf and self.checkor.specificWf not in self.wf)
             or os.path.isfile(".checkor_stop")
-            or self.wfToCheck.wm_status in ["assigned", "acquired"]
-            or self.wfToCheck.wm_status != "completed"
+            or self.workflow.wm_status in ["assigned", "acquired"]
+            or self.workflow.wm_status != "completed"
         ):
             self.logger.info("Skipping workflow %s", self.wf)
             return True
@@ -89,8 +90,8 @@ def _setWorkflowToClose(self) -> bool:
         :return: True if workflow should be closed, False o/w
         """
         self.logger.info("Checking if the unified status should be 'close' already, but not")
-        if self.wfToCheck.wm_status in ["closed-out", "announced"] and self.wfToCheck.unifiedStatus != "close":
-            self.logger.info("%s is already %s, setting as close", self.wf, self.wfToCheck.wm_status)
+        if self.workflow.wm_status in ["closed-out", "announced"] and self.workflow.unifiedStatus != "close":
+            self.logger.info("%s is already %s, setting as close", self.wf, self.workflow.wm_status)
             self.newStatus = "close"
             return True
         else:
@@ -104,7 +105,7 @@ def _setWorkflowToForget(self) -> bool:
         :return: True if workflow should be forgotten, False o/w
         """
         self.logger.info("Checking if the unified status should be 'forget'")
-        if self.wfController.request.isRelVal() and self.wfToCheck.wm_status in [
+        if self.wfController.request.isRelVal() and self.workflow.wm_status in [
             "failed",
             "aborted",
             "aborted-archived",
@@ -112,11 +113,11 @@ def _setWorkflowToForget(self) -> bool:
             "rejected-archived",
             "aborted-completed",
         ]:
-            self.logger.info("%s is %s, setting the unified status as 'forget'", self.wf, self.wfToCheck.wm_status)
+            self.logger.info("%s is %s, setting the unified status as 'forget'", self.wf, self.workflow.wm_status)
             self.newStatus = "forget"
             return True
         else:
-            self.logger.info("%s is %s, not setting it as 'forget'", self.wf, self.wfToCheck.wm_status)
+            self.logger.info("%s is %s, not setting it as 'forget'", self.wf, self.workflow.wm_status)
 
         return False
 
@@ -125,7 +126,7 @@ def _setWorkflowToTrouble(self) -> bool:
         The function to check if a given workflow should be set as trouble
         :return: True if workflow should be set as trouble, False o/w
         """
-        if not self.wfController.request.isRelVal() and self.wfToCheck.wm_status in [
+        if not self.wfController.request.isRelVal() and self.workflow.wm_status in [
             "failed",
             "aborted",
             "aborted-archived",
@@ -133,7 +134,7 @@ def _setWorkflowToTrouble(self) -> bool:
             "rejected-archived",
             "aborted-completed",
         ]:
-            self.logger.info("%s is %s, setting as trouble", self.wf, self.wfToCheck.wm_status)
+            self.logger.info("%s is %s, setting as trouble", self.wf, self.workflow.wm_status)
             self.newStatus = "trouble"
             return True
 
@@ -181,7 +182,7 @@ def _isWorkflowOnHold(self) -> bool:
         The function to check if a given workfloe is on hold
         :return: True if workflow is on hold, False o/w
         """
-        return "-onhold" in self.wfToCheck.wm_status or self.wf in self.checkor.onHoldWfs
+        return "-onhold" in self.workflow.wm_status or self.wf in self.checkor.onHoldWfs
 
     def _getMinFamilyCompletedDelay(self, family: list) -> float:
         """
@@ -217,12 +218,12 @@ def _setNewOnHoldStatus(self, family: list) -> bool:
             self.bypassChecks = True
             return False
 
-        if "-onhold" in self.wfToCheck.wm_status and self.wf in self.checkor.onHoldWfs and not self.bypassChecks:
+        if "-onhold" in self.workflow.wm_status and self.wf in self.checkor.onHoldWfs and not self.bypassChecks:
             self.logger.info("%s is on hold", self.wf)
             return True
 
         if self.wf in self.checkor.onHoldWfs and not self.bypassChecks:
-            self.logger.info("%s is %s, setting as on hold", self.wf, self.wfToCheck.wm_status)
+            self.logger.info("%s is %s, setting as on hold", self.wf, self.workflow.wm_status)
             self.newStatus = "assistance-onhold"
             return True
 
@@ -800,7 +801,7 @@ def _getAnnounceAssistanceTags(self) -> list:
             self.wfController.logger.info(
                 "The output of this workflow are essentially good to be announced while we work on the rest"
             )
-            return ["announced" if "announced" in self.wfToCheck.status else "announce"]
+            return ["announced" if "announced" in self.workflow.status else "announce"]
 
         return []
 
@@ -1130,11 +1131,11 @@ def _closeOutWorkflow(self) -> None:
         """
         self.wfController.logger.info("Setting %s as closed-out", self.wf)
 
-        if self.wfToCheck.status in ["closed-out", "announced", "normal-archived"]:
+        if self.workflow.status in ["closed-out", "announced", "normal-archived"]:
             self.logger.info(
                 "%s is already %s, not trying to close-out as assuming it does",
                 self.wf,
-                self.wfToCheck.status,
+                self.workflow.status,
             )
             self.newStatus = "close"
             return
@@ -1202,7 +1203,7 @@ def _setAssistanceStatus(self) -> None:
         The function to set the workflow to assistance
         """
         assistanceStatus = self._getAssistanceStatus()
-        if "manual" not in self.wfToCheck.status or assistanceStatus != "assistance-recovery":
+        if "manual" not in self.workflow.status or assistanceStatus != "assistance-recovery":
             self.newStatus = assistanceStatus
 
         self.logger.info("Ultimate assistance status is: %s", str(assistanceStatus))
@@ -1309,7 +1310,7 @@ def _writeResponse(self) -> dict:
         """
         # TODO: What's the difference between workflow and wf???
         response = {
-            "workflow": self.wfToCheck,
+            "workflow": self.workflow,
             "wf": self.wf,
             "failed": self.failed, 
             "isClosing": self.isClosing, 

From b7ff5dee5294f572b47b93e238b0fc8a5386b462 Mon Sep 17 00:00:00 2001
From: haozturk <haozturk@cern.ch>
Date: Tue, 15 Feb 2022 13:43:29 +0300
Subject: [PATCH 14/17] Fix typo

---
 src/python/Unified/Checkor.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/python/Unified/Checkor.py b/src/python/Unified/Checkor.py
index c89065dc1..1ae0bc5fc 100644
--- a/src/python/Unified/Checkor.py
+++ b/src/python/Unified/Checkor.py
@@ -433,9 +433,13 @@ def _check(self, wfsToCheck: list) -> None:
         def _checkWorkflow(workflow: list) -> dict:
             return WorkflowCheckor(workflow).check()
 
+        #checkResponses = _checkWorkflow(wfsToCheck)
+        # self._updateWorkflowsRecords(checkResponses)
+        # self._updateWorkflowsStatus(checkResponses)
+
         for workflow in wfsToCheck:
             # TODO: Workflow statuses have been already updated in WorkflowCheckor????
-            checkResponses = _checkWorkflow(wfsToCheck)
+            checkResponses = _checkWorkflow(workflow)
             self.logger.critical("Response to Checkor:")
             self.logger.critical(checkResponses)
             # TODO: The following function updates closeoutInfo table of MongoDB.

From c2749c8451eae8ca4a7fc904d7b92c2bee9514ea Mon Sep 17 00:00:00 2001
From: haozturk <haozturk@cern.ch>
Date: Tue, 15 Feb 2022 13:46:29 +0300
Subject: [PATCH 15/17] Add a new argument

---
 src/python/Unified/Checkor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python/Unified/Checkor.py b/src/python/Unified/Checkor.py
index 1ae0bc5fc..bfbb0cb9f 100644
--- a/src/python/Unified/Checkor.py
+++ b/src/python/Unified/Checkor.py
@@ -439,7 +439,7 @@ def _checkWorkflow(workflow: list) -> dict:
 
         for workflow in wfsToCheck:
             # TODO: Workflow statuses have been already updated in WorkflowCheckor????
-            checkResponses = _checkWorkflow(workflow)
+            checkResponses = _checkWorkflow(workflow=workflow, checkor=self)
             self.logger.critical("Response to Checkor:")
             self.logger.critical(checkResponses)
             # TODO: The following function updates closeoutInfo table of MongoDB.

From 7396a5ad0612b494cbcbab3537c68db6efc0689e Mon Sep 17 00:00:00 2001
From: haozturk <haozturk@cern.ch>
Date: Tue, 15 Feb 2022 13:50:21 +0300
Subject: [PATCH 16/17] Add a new argument

---
 src/python/Unified/Helpers/WorkflowCheckor.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/python/Unified/Helpers/WorkflowCheckor.py b/src/python/Unified/Helpers/WorkflowCheckor.py
index c50c8fdda..beccfd595 100644
--- a/src/python/Unified/Helpers/WorkflowCheckor.py
+++ b/src/python/Unified/Helpers/WorkflowCheckor.py
@@ -9,6 +9,7 @@
 from Services.Rucio.RucioReader import RucioReader
 from WorkflowMgmt.WorkflowController import WorkflowController
 from WorkflowMgmt.WorkflowStatusEnforcer import WorkflowStatusEnforcer
+from Unified.Checkor import Checkor
 
 from typing import Optional, Tuple
 
@@ -19,7 +20,7 @@ class WorkflowCheckor():
     General API for checking a given workflow
     """
 
-    def __init__(self, workflow: Workflow, logger: Optional[Logger] = None, **kwargs) -> None:
+    def __init__(self, workflow: Workflow, checkor: Checkor, logger: Optional[Logger] = None, **kwargs) -> None:
         try:
             #super().__init__(self)
             self.logger = logger or getLogger(self.__class__.__name__)
@@ -34,7 +35,8 @@ def __init__(self, workflow: Workflow, logger: Optional[Logger] = None, **kwargs
             # TODO: Check this
             self.unifiedStatus = workflow.status
 
-            self.checkor = kwargs.get("checkor")
+            #self.checkor = kwargs.get("checkor")
+            self.checkor = checkor
             self.rucioReader = RucioReader()
 
             # TODO: What's the difference between assistanceTags & existingAssistaceTags?

From e2dbd07e5155f56e3a3ac1aa37e221b3ed3c36c5 Mon Sep 17 00:00:00 2001
From: haozturk <haozturk@cern.ch>
Date: Tue, 5 Apr 2022 15:27:57 +0200
Subject: [PATCH 17/17] Adjust dry run mode

---
 src/python/Unified/Checkor.py                 | 8 +++++---
 src/python/Unified/Helpers/WorkflowCheckor.py | 3 +--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/python/Unified/Checkor.py b/src/python/Unified/Checkor.py
index bfbb0cb9f..580c71df2 100644
--- a/src/python/Unified/Checkor.py
+++ b/src/python/Unified/Checkor.py
@@ -430,8 +430,8 @@ def _check(self, wfsToCheck: list) -> None:
         #@runWithMultiThreading(mtParam="wfsToCheck", maxThreads=self.options.get("nThreads"))
         #def _checkWorkflow(self, wfsToCheck: list) -> dict:
             #return WorkflowCheckor(wfsToCheck, checkor=self).check()
-        def _checkWorkflow(workflow: list) -> dict:
-            return WorkflowCheckor(workflow).check()
+        def _checkWorkflow(workflow: Workflow, checkor: Checkor) -> dict:
+            return WorkflowCheckor(workflow, checkor).check()
 
         #checkResponses = _checkWorkflow(wfsToCheck)
         # self._updateWorkflowsRecords(checkResponses)
@@ -454,5 +454,7 @@ def _checkWorkflow(workflow: list) -> dict:
 if __name__ == "__main__":
     options, specificWf = Checkor.parseOptions()
     checkor = Checkor(options=options, specificWf=specificWf)
-    if checkor.go():
+    # TODO: Update this in production
+    #if checkor.go():
+    if True:
         checkor.run()
diff --git a/src/python/Unified/Helpers/WorkflowCheckor.py b/src/python/Unified/Helpers/WorkflowCheckor.py
index beccfd595..4f5ab3af7 100644
--- a/src/python/Unified/Helpers/WorkflowCheckor.py
+++ b/src/python/Unified/Helpers/WorkflowCheckor.py
@@ -9,7 +9,6 @@
 from Services.Rucio.RucioReader import RucioReader
 from WorkflowMgmt.WorkflowController import WorkflowController
 from WorkflowMgmt.WorkflowStatusEnforcer import WorkflowStatusEnforcer
-from Unified.Checkor import Checkor
 
 from typing import Optional, Tuple
 
@@ -20,7 +19,7 @@ class WorkflowCheckor():
     General API for checking a given workflow
     """
 
-    def __init__(self, workflow: Workflow, checkor: Checkor, logger: Optional[Logger] = None, **kwargs) -> None:
+    def __init__(self, workflow: Workflow, checkor, logger: Optional[Logger] = None, **kwargs) -> None:
         try:
             #super().__init__(self)
             self.logger = logger or getLogger(self.__class__.__name__)