Source code for buildtest.system

"""
This module detects System changes defined in class BuildTestSystem.
"""

import getpass
import json
import logging
import os
import platform
import shutil
import sys

import distro
from buildtest.defaults import BUILDTEST_ROOT
from buildtest.utils.command import BuildTestCommand


[docs]class BuildTestSystem:
    """BuildTestSystem is a class that detects system configuration"""

    system = {}

    def __init__(self):
        """Constructor method for BuildTestSystem(). Defines all system
        configuration using class variable **system** which is a dictionary.
        """

        self.logger = logging.getLogger(__name__)

[docs]    def get(self):
        return self.system

[docs]    def check(self):
        """Based on the module "distro" get system details like linux distro,
        processor, hostname, etc...
        """

        self.logger.debug("Starting System Compatibility Check")

        self.system["platform"] = platform.system()
        if self.system["platform"] not in ["Linux", "Darwin"]:
            print("System must be Linux or Darwin")
            sys.exit(1)

        self.system["os"] = distro.id()
        self.system["user"] = getpass.getuser()
        self.system["python"] = shutil.which("python")
        self.system["pyver"] = platform.python_version()
        self.system["processor"] = platform.processor()
        self.system["host"] = platform.node()
        self.system["machine"] = platform.machine()

        self.logger.debug(f"Machine: {self.system['machine']}")
        self.logger.debug(f"Host: {self.system['host']}")
        self.logger.debug(f"User: {self.system['user']}")
        self.logger.debug(f"Operating System: {self.system['os']}")
        self.logger.debug(f"Python Path: {self.system['python']}")
        self.logger.debug(f"Python Version: {self.system['pyver']}")
        self.logger.debug(f"BUILDTEST_ROOT: {BUILDTEST_ROOT}")
        self.logger.debug(f"Path to Buildtest: {shutil.which('buildtest')}")

        self.detect_module_tool()
        self.check_scheduler()

        self.logger.debug("Finished System Compatibility Check")

[docs]    def check_scheduler(self):
        """Check existence of batch scheduler and if so determine which scheduler
        it is. Currently we support Slurm, LSF, and Cobalt we invoke each
        class and see if its valid state. The checks determine if scheduler
        binaries exist in $PATH.
        """

        slurm = Slurm()
        lsf = LSF()
        cobalt = Cobalt()
        pbs = PBS()

        # the "scheduler" property is used with run_only section in buildspecs for
        # running test based on scheduler.
        self.system["scheduler"] = []

        if slurm.state:
            self.logger.debug("Detected Slurm Scheduler")
            self.system["scheduler"].append("slurm")

        if lsf.state:
            self.logger.debug("Detected LSF Scheduler")
            self.system["scheduler"].append("lsf")

        if cobalt.state:
            self.logger.debug("Detected Cobalt Scheduler")
            self.system["scheduler"].append("cobalt")

        if pbs.state:
            self.logger.debug("Detected PBS Scheduler")
            self.system["scheduler"].append("pbs")

[docs]    def detect_module_tool(self):
        """Check if module tool exists, we check for Lmod or environment-modules by
        checking if environment variable ``LMOD_VERSION``, ``MODULE_VERSION`` or
        ``MODULES_CMD`` exist. We check this with input specification in buildtest
        configuration. If user specifies lmod as the module tool but detected
        environment-modules, buildtest should pick this up and report this as part
        of configuration check
        """

        self.system["moduletool"] = None
        lmod_version = os.getenv("LMOD_VERSION")
        environmodules_version = os.getenv("MODULE_VERSION") or os.getenv("MODULES_CMD")

        if lmod_version:
            self.system["moduletool"] = "lmod"
            self.logger.debug(f"Detected Lmod with version: {lmod_version}")
        # 3.x module versions define MODULE_VERSION while 4.5 version has MODULES_CMD, it doesn't have MODULE_VERSION set
        if environmodules_version:
            self.system["moduletool"] = "environment-modules"
            self.logger.debug(
                f"Detected environment-modules with version: {environmodules_version}"
            )


[docs]class Scheduler:
    """This is a base Scheduler class used for implementing common methods for
    detecting Scheduler details. The subclass implement specific queries that
    are scheduler specific. The ``Slurm``, ``LSF``, ``PBS`` and ``Cobalt`` class inherit
    from Base Class ``Scheduler``.
    """

    logger = logging.getLogger(__name__)

[docs]    def check(self):
        """Check if binaries exist binary exist in $PATH"""

        for command in self.binaries:
            if not shutil.which(command):
                self.logger.debug(f"Cannot find {command} command in $PATH")
                return False

        return True


[docs]class Slurm(Scheduler):
    """The Slurm class implements common functions to query Slurm cluster
    including partitions, qos, cluster. We check existence of slurm binaries
    in $PATH and return if slurm cluster is in valid state.
    """

    # specify a set of Slurm commands to check for file existence
    binaries = ["sbatch", "sacct", "sacctmgr", "sinfo", "scancel"]

    def __init__(self):

        self.logger = logging.getLogger(__name__)

        self.state = self.check()

        # retrieve slurm partitions, qos, and cluster only if slurm is detected.
        if self.state:
            self.partitions = self._get_partitions()
            self.clusters = self._get_clusters()
            self.qos = self._get_qos()

[docs]    def _get_partitions(self):
        """Get list of all partitions slurm partitions using ``sinfo -a -h -O partitionname``. The output
        is a list of queue names

        .. code-block:: console

             $ sinfo -a -h -O partitionname
             system
             system_shared
             debug_hsw
             debug_knl
             jupyter

        """
        # get list of partitions

        query = "sinfo -a -h -O partitionname"
        cmd = BuildTestCommand(query)
        cmd.execute()
        out = cmd.get_output()

        self.logger.debug(f"Get all Slurm Partitions by running: {query}")
        partitions = [partition.rstrip() for partition in out]
        return partitions

[docs]    def _get_clusters(self):
        """Get list of slurm clusters by running ``sacctmgr list cluster -P -n format=Cluster``.
        The output is a list of slurm clusters something as follows

        .. code-block:: console

             $ sacctmgr list cluster -P -n format=Cluster
             cori
             escori

        """

        query = "sacctmgr list cluster -P -n format=Cluster"
        cmd = BuildTestCommand(query)
        cmd.execute()
        out = cmd.get_output()

        self.logger.debug(f"Get all Slurm Clusters by running: {query}")
        slurm_clusters = [clustername.rstrip() for clustername in out]
        return slurm_clusters

[docs]    def _get_qos(self):
        """Retrieve a list of all slurm qos by running ``sacctmgr list qos -P -n  format=Name``. The output
        is a list of qos. Shown below is an example output

        .. code-block:: console

            $ sacctmgr list qos -P -n  format=Name
            normal
            premium
            low
            serialize
            scavenger

        """

        query = "sacctmgr list qos -P -n  format=Name"
        cmd = BuildTestCommand(query)
        cmd.execute()
        out = cmd.get_output()

        self.logger.debug(f"Get all Slurm Quality of Service (QOS) by running: {query}")
        slurm_qos = [qos.rstrip() for qos in out]
        return slurm_qos


[docs]class LSF(Scheduler):
    """The LSF class checks for LSF binaries and returns a list of LSF queues"""

    # specify a set of LSF commands to check for file existence
    binaries = ["bsub", "bqueues", "bkill", "bjobs"]

    def __init__(self):

        self.logger = logging.getLogger(__name__)

        self.state = self.check()

        # retrieve LSF queues if LSF is detected
        if self.state:
            self.queues = self._get_queues()

[docs]    def _get_queues(self):
        """Return json dictionary of available LSF Queues and their queue states.
        The command we run is the following: ``bqueues -o 'queue_name status' -json`` which
        returns a JSON record of all queue details.

        .. code-block:: console

            $ bqueues -o 'queue_name status' -json
                {
                  "COMMAND":"bqueues",
                  "QUEUES":2,
                  "RECORDS":[
                    {
                      "QUEUE_NAME":"batch",
                      "STATUS":"Open:Active"
                    },
                    {
                      "QUEUE_NAME":"test",
                      "STATUS":"Open:Active"
                    }
                  ]
                }

        """

        query = "bqueues -o 'queue_name status' -json"
        cmd = BuildTestCommand(query)
        cmd.execute()
        out = cmd.get_output()

        self.logger.debug(f"Get all LSF Queues by running {query}")
        # if command returns output, we convert to string and return as json object
        if out:
            out = "".join(cmd.get_output()).rstrip()
            return json.loads(out)


[docs]class Cobalt(Scheduler):
    """The Cobalt class checks for Cobalt binaries and gets a list of Cobalt queues"""

    # specify a set of Cobalt commands to check for file existence
    binaries = ["qsub", "qstat", "qdel", "nodelist", "showres", "partlist"]

    def __init__(self):
        self.logger = logging.getLogger(__name__)

        self.state = self.check()

        if self.state:
            self.queues = self._get_queues()

[docs]    def _get_queues(self):
        """Get all Cobalt queues by running ``qstat -Ql`` and parsing output"""

        query = "qstat -Ql"
        cmd = BuildTestCommand(query)
        cmd.execute()
        content = cmd.get_output()

        self.logger.debug(f"Get all Cobalt Queues by running {query}")
        # remove all None from list
        content = list(filter(None, content))

        queues = []
        for line in content:
            if line.startswith("Name"):
                name = line.partition(":")[2].strip()
                queues.append(name)
        return queues


[docs]class PBS(Scheduler):
    """The PBS class checks for Cobalt binaries and gets a list of Cobalt queues"""

    # specify a set of PBS commands to check for file existence
    binaries = ["qsub", "qstat", "qdel", "qstart", "qhold", "qmgr"]

    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self.state = None

        self.state = self.check()
        if self.state:
            self._get_queues()

[docs]    def _get_queues(self):
        """Get queue configuration using ``qstat -Q -f -F json`` and retrieve a
        list of queues.
        """
        query = "qstat -Q -f -F json"
        cmd = BuildTestCommand(query)
        cmd.execute()
        content = cmd.get_output()

        self.logger.debug(f"Get PBS Queues details by running {query}")
        self.queue_summary = json.loads(" ".join(content))
        self.logger.debug(json.dumps(self.queue_summary, indent=2))

        queues = list(self.queue_summary["Queue"].keys())
        self.logger.debug(f"Available Queues: {queues}")

        self.queues = queues


system = BuildTestSystem()
system.check()