Site Examples

NERSC

NERSC provides High Performance Computing system to support research in the Office of Science program offices. NERSC has one production HPC systems Perlmutter and

muller which is Test system for Perlmutter.

Shown below is the buildtest configuration at NERSC. We have defined multiple slurm executors, along with settings for configuring compilers that is available on Perlmutter.

system:
  perlmutter:
    description: Cray Shasta system with AMD CPU and NVIDIA A100 GPUs
    hostnames:
    - login(0[3-9]|[1-3][0-9]|40)
    moduletool: lmod
    poolsize: 8
    buildspecs:
      rebuild: false
      count: 15
      format: name,description
      terse: false
    report:
      count: 25
      terse: false
      format: name,id,state,runtime,returncode
    executors:
      defaults:
        pollinterval: 30
        maxpendtime: 86400
      local:
        bash:
          description: submit jobs on local machine using bash shell
          shell: bash
        sh:
          description: submit jobs on local machine using sh shell
          shell: sh
        csh:
          description: submit jobs on local machine using csh shell
          shell: csh
        zsh:
          description: submit jobs on local machine using zsh shell
          shell: zsh
        python:
          description: submit jobs on local machine using python shell
          shell: python
      slurm:
        regular:
          qos: regular
        debug:
          qos: debug
        xfer:
          qos: xfer
        preempt:
          qos: preempt
    compilers:
      purge: false
      enable_prgenv: true
      prgenv_modules:
        gcc: PrgEnv-gnu
        cray: PrgEnv-cray
        nvhpc: PrgEnv-nvidia
      find:
        gcc: ^gcc\/.*
        cray: ^cce\/.*
        nvhpc: ^nvhpc\/.*
      compiler:
        gcc:
          builtin_gcc:
            cc: /usr/bin/gcc
            cxx: /usr/bin/g++
            fc: /usr/bin/gfortran
          gcc/11.2.0:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-gnu
              - gcc/11.2.0
              purge: false
          gcc/10.3.0:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-gnu
              - gcc/10.3.0
              purge: false
          gcc/12.2.0:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-gnu
              - gcc/12.2.0
              purge: false
        cray:
          cce/15.0.1:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-cray
              - cce/15.0.1
              purge: false
          cce/16.0.0:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-cray
              - cce/16.0.0
              purge: false
          cce/16.0.1:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-cray
              - cce/16.0.1
              purge: false
          cce/17.0.0:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-cray
              - cce/17.0.0
              purge: false
        nvhpc:
          nvhpc/22.7:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-nvidia
              - nvhpc/22.7
              purge: false
          nvhpc/23.1:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-nvidia
              - nvhpc/23.1
              purge: false
          nvhpc/23.9:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-nvidia
              - nvhpc/23.9
              purge: false
    cdash:
      url: https://my.cdash.org
      project: buildtest-nersc
      site: perlmutter
  muller:
    description: Muller is TDS system for Perlmutter
    hostnames:
    - login01|login02
    moduletool: lmod
    poolsize: 8
    buildspecs:
      rebuild: false
      count: 15
      format: name,description
      terse: false
    report:
      count: 25
      terse: false
      format: name,id,state,runtime,returncode
    executors:
      defaults:
        pollinterval: 30
        maxpendtime: 86400
      local:
        bash:
          description: submit jobs on local machine using bash shell
          shell: bash
        sh:
          description: submit jobs on local machine using sh shell
          shell: sh
        csh:
          description: submit jobs on local machine using csh shell
          shell: csh
        zsh:
          description: submit jobs on local machine using zsh shell
          shell: zsh
        python:
          description: submit jobs on local machine using python shell
          shell: python
      slurm:
        regular:
          qos: regular
        debug:
          qos: debug
        xfer:
          qos: xfer
        preempt:
          qos: preempt
    compilers:
      purge: false
      enable_prgenv: true
      prgenv_modules:
        gcc: PrgEnv-gnu
        cray: PrgEnv-cray
        nvhpc: PrgEnv-nvidia
      find:
        gcc: ^gcc\/.*
        cray: ^cce\/.*
        nvhpc: ^nvhpc\/.*
      compiler:
        gcc:
          builtin_gcc:
            cc: /usr/bin/gcc
            cxx: /usr/bin/g++
            fc: /usr/bin/gfortran
          gcc/11.2.0:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-gnu
              - gcc/11.2.0
              purge: false
          gcc/10.3.0:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-gnu
              - gcc/10.3.0
              purge: false
          gcc/12.2.0:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-gnu
              - gcc/12.2.0
              purge: false
        cray:
          cce/15.0.1:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-cray
              - cce/15.0.1
              purge: false
          cce/16.0.1:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-cray
              - cce/16.0.1
              purge: false
          cce/16.0.0:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-cray
              - cce/16.0.0
              purge: false
          cce/17.0.0:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-cray
              - cce/17.0.0
              purge: false
        nvhpc:
          nvhpc/22.7:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-nvidia
              - nvhpc/22.7
              purge: false
          nvhpc/23.9:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-nvidia
              - nvhpc/23.9
              purge: false
          nvhpc/23.1:
            cc: cc
            cxx: CC
            fc: ftn
            module:
              load:
              - PrgEnv-nvidia
              - nvhpc/23.1
              purge: false
    cdash:
      url: https://my.cdash.org
      project: buildtest-nersc
      site: muller

Oak Ridge National Laboratory

Ascent is a training system for Summit at OLCF, which is using a IBM Load Sharing Facility (LSF) as their batch scheduler. Ascent has two queues batch and test. To declare LSF executors we define them under lsf section within the executors section.

The default launcher is bsub which can be defined under defaults. The pollinterval will poll LSF jobs every 10 seconds using bjobs. The pollinterval accepts a range between 10 - 300 seconds as defined in schema. In order to avoid polling scheduler excessively pick a number that is best suitable for your site

system:
  ascent:
    hostnames:
    - login1.ascent.olcf.ornl.gov
    moduletool: lmod
    poolsize: 8
    max_jobs: 10
    pager: False
    buildspecs:
      rebuild: False
      count: 15
      format: "name,description"
      terse: False
    report:
      count: 25
      terse: False
      format: "name,id,state,runtime,returncode"
    executors:
      defaults:
        pollinterval: 30
        maxpendtime: 300
        account: gen014ecpci
      local:
        bash:
          description: submit jobs on local machine using bash shell
          shell: bash
        sh:
          description: submit jobs on local machine using sh shell
          shell: sh
        csh:
          description: submit jobs on local machine using csh shell
          shell: csh
        python:
          description: submit jobs on local machine using python shell
          shell: python
      lsf:
        batch:
          queue: batch
    compilers:
      find:
        gcc: '^(gcc)'
      compiler:
        gcc:
          builtin_gcc:
            cc: /usr/bin/gcc
            cxx: /usr/bin/g++
            fc: /usr/bin/gfortran
          gcc/9.3.0:
            cc: gcc
            cxx: g++
            fc: gfortran
            module:
              load:
              - gcc/9.3.0
              purge: false
          gcc/11.1.0:
            cc: gcc
            cxx: g++
            fc: gfortran
            module:
              load:
              - gcc/11.1.0
              purge: false
          gcc/7.5.0:
            cc: gcc
            cxx: g++
            fc: gfortran
            module:
              load:
              - gcc/7.5.0
              purge: false
          gcc/12.1.0:
            cc: gcc
            cxx: g++
            fc: gfortran
            module:
              load:
              - gcc/12.1.0
              purge: false
          gcc/11.2.0:
            cc: gcc
            cxx: g++
            fc: gfortran
            module:
              load:
              - gcc/11.2.0
              purge: false
          gcc/10.2.0:
            cc: gcc
            cxx: g++
            fc: gfortran
            module:
              load:
              - gcc/10.2.0
              purge: false
          gcc/9.1.0:
            cc: gcc
            cxx: g++
            fc: gfortran
            module:
              load:
              - gcc/9.1.0
              purge: false

Argonne National Laboratory

Joint Laboratory for System Evaluation (JLSE) provides a testbed of emerging HPC systems, the default scheduler is Cobalt, this is defined in the cobalt section defined in the executor field.

We set default launcher to qsub defined with launcher: qsub. This is inherited for all batch executors. In each cobalt executor the queue property will specify the queue name to submit job, for instance the executor yarrow with queue: yarrow will submit job using qsub -q yarrow when using this executor.

system:
  jlse:
    # hostnames on JLSE where jobs are run are jlsebatch[1-2]
    hostnames: ['^jlsebatch/d{1}$']
    moduletool: environment-modules
    poolsize: 8
    max_jobs: 10
    pager: False
    buildspecs:
      rebuild: False
      count: 15
      format: "name,description"
      terse: False
    report:
      count: 25
      terse: False
      format: "name,id,state,runtime,returncode"
    executors:
      defaults:
        pollinterval: 30
        maxpendtime: 300
      local:
        bash:
          description: submit jobs on local machine using bash shell
          shell: bash
        sh:
          description: submit jobs on local machine using sh shell
          shell: sh
        csh:
          description: submit jobs on local machine using csh shell
          shell: csh
        python:
          description: submit jobs on local machine using python shell
          shell: python
      cobalt:
        testing:
          queue: testing
    compilers:
      find:
        gcc: "^(gcc)"
      compiler:
        gcc:
          builtin_gcc:
            cc: /usr/bin/gcc
            cxx: /usr/bin/g++
            fc: /usr/bin/gfortran