!--------------------------------------------------------------------------------------------------!
!   CP2K: A general program to perform molecular dynamics simulations                              !
!   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
!                                                                                                  !
!   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
!--------------------------------------------------------------------------------------------------!

! **************************************************************************************************
MODULE cp2k_runs
   USE atom,                            ONLY: atom_code
   USE bibliography,                    ONLY: Hutter2014,&
                                              cite_reference
   USE bsse,                            ONLY: do_bsse_calculation
   USE cell_opt,                        ONLY: cp_cell_opt
   USE cp2k_debug,                      ONLY: cp2k_debug_energy_and_forces
   USE cp2k_info,                       ONLY: compile_date,&
                                              compile_revision,&
                                              cp2k_version,&
                                              cp2k_year
   USE cp_control_types,                ONLY: dft_control_type
   USE cp_dbcsr_api,                    ONLY: dbcsr_finalize_lib,&
                                              dbcsr_init_lib,&
                                              dbcsr_print_config,&
                                              dbcsr_print_statistics
   USE cp_dbcsr_cp2k_link,              ONLY: cp_dbcsr_config
   USE cp_files,                        ONLY: close_file,&
                                              open_file
   USE cp_log_handling,                 ONLY: cp_get_default_logger,&
                                              cp_logger_get_default_io_unit,&
                                              cp_logger_type,&
                                              cp_logger_would_log,&
                                              cp_note_level
   USE cp_output_handling,              ONLY: cp_add_iter_level,&
                                              cp_print_key_finished_output,&
                                              cp_print_key_unit_nr,&
                                              cp_rm_iter_level
   USE cp_parser_methods,               ONLY: parser_search_string
   USE cp_parser_types,                 ONLY: cp_parser_type,&
                                              parser_create,&
                                              parser_release
   USE cp_units,                        ONLY: cp_unit_set_create,&
                                              cp_unit_set_release,&
                                              cp_unit_set_type,&
                                              export_units_as_xml
   USE dbm_api,                         ONLY: dbm_library_print_stats
   USE environment,                     ONLY: cp2k_finalize,&
                                              cp2k_init,&
                                              cp2k_read,&
                                              cp2k_setup
   USE f77_interface,                   ONLY: create_force_env,&
                                              destroy_force_env,&
                                              f77_default_para_env => default_para_env,&
                                              f_env_add_defaults,&
                                              f_env_rm_defaults,&
                                              f_env_type
   USE farming_methods,                 ONLY: do_deadlock,&
                                              do_nothing,&
                                              do_wait,&
                                              farming_parse_input,&
                                              get_next_job
   USE farming_types,                   ONLY: deallocate_farming_env,&
                                              farming_env_type,&
                                              init_farming_env,&
                                              job_finished,&
                                              job_running
   USE force_env_methods,               ONLY: force_env_calc_energy_force
   USE force_env_types,                 ONLY: force_env_get,&
                                              force_env_type
   USE geo_opt,                         ONLY: cp_geo_opt
   USE global_types,                    ONLY: global_environment_type,&
                                              globenv_create,&
                                              globenv_release
   USE grid_api,                        ONLY: grid_library_print_stats,&
                                              grid_library_set_config
   USE input_constants,                 ONLY: &
        bsse_run, cell_opt_run, debug_run, do_atom, do_band, do_cp2k, do_embed, do_farming, &
        do_fist, do_ipi, do_mixed, do_nnp, do_opt_basis, do_optimize_input, do_qmmm, do_qs, &
        do_sirius, do_swarm, do_tamc, do_test, do_tree_mc, do_tree_mc_ana, driver_run, ehrenfest, &
        electronic_spectra_run, energy_force_run, energy_run, geo_opt_run, linear_response_run, &
        mol_dyn_run, mon_car_run, negf_run, none_run, pint_run, real_time_propagation, &
        rtp_method_bse, tree_mc_run, vib_anal
   USE input_cp2k,                      ONLY: create_cp2k_root_section
   USE input_cp2k_check,                ONLY: check_cp2k_input
   USE input_cp2k_global,               ONLY: create_global_section
   USE input_cp2k_read,                 ONLY: read_input
   USE input_keyword_types,             ONLY: keyword_release
   USE input_parsing,                   ONLY: section_vals_parse
   USE input_section_types,             ONLY: &
        section_release, section_type, section_vals_create, section_vals_get_subs_vals, &
        section_vals_release, section_vals_retain, section_vals_type, section_vals_val_get, &
        section_vals_write, write_section_xml
   USE ipi_driver,                      ONLY: run_driver
   USE kinds,                           ONLY: default_path_length,&
                                              default_string_length,&
                                              dp,&
                                              int_8
   USE library_tests,                   ONLY: lib_test
   USE machine,                         ONLY: default_output_unit,&
                                              m_chdir,&
                                              m_flush,&
                                              m_getcwd,&
                                              m_memory,&
                                              m_memory_max,&
                                              m_walltime
   USE mc_run,                          ONLY: do_mon_car
   USE md_run,                          ONLY: qs_mol_dyn
   USE message_passing,                 ONLY: mp_any_source,&
                                              mp_comm_type,&
                                              mp_para_env_release,&
                                              mp_para_env_type
   USE mscfg_methods,                   ONLY: do_mol_loop,&
                                              loop_over_molecules
   USE neb_methods,                     ONLY: neb
   USE negf_methods,                    ONLY: do_negf
   USE offload_api,                     ONLY: offload_get_chosen_device,&
                                              offload_get_device_count
   USE optimize_basis,                  ONLY: run_optimize_basis
   USE optimize_input,                  ONLY: run_optimize_input
   USE pint_methods,                    ONLY: do_pint_run
   USE qs_environment_types,            ONLY: get_qs_env
   USE qs_linres_module,                ONLY: linres_calculation
   USE qs_tddfpt_module,                ONLY: tddfpt_calculation
   USE reference_manager,               ONLY: export_references_as_xml
   USE rt_bse,                          ONLY: run_propagation_bse
   USE rt_propagation,                  ONLY: rt_prop_setup
   USE swarm,                           ONLY: run_swarm
   USE tamc_run,                        ONLY: qs_tamc
   USE tmc_setup,                       ONLY: do_analyze_files,&
                                              do_tmc
   USE vibrational_analysis,            ONLY: vb_anal
#include "../base/base_uses.f90"

   IMPLICIT NONE

   PRIVATE

   PUBLIC :: write_xml_file, run_input

   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp2k_runs'

CONTAINS

! **************************************************************************************************
!> \brief performs an instance of a cp2k run
!> \param input_declaration ...
!> \param input_file_name name of the file to be opened for input
!> \param output_unit unit to which output should be written
!> \param mpi_comm ...
!> \param initial_variables key-value list of initial preprocessor variables
!> \author Joost VandeVondele
!> \note
!>      para_env should be a valid communicator
!>      output_unit should be writeable by at least the lowest rank of the mpi group
!>
!>      recursive because a given run_type might need to be able to perform
!>      another cp2k_run as part of its job (e.g. farming, classical equilibration, ...)
!>
!>      the idea is that a cp2k instance should be able to run with just three
!>      arguments, i.e. a given input file, output unit, mpi communicator.
!>      giving these three to cp2k_run should produce a valid run.
!>      the only task of the PROGRAM cp2k is to create valid instances of the
!>      above arguments. Ideally, anything that is called afterwards should be
!>      able to run simultaneously / multithreaded / sequential / parallel / ...
!>      and able to fail safe
! **************************************************************************************************
   RECURSIVE SUBROUTINE cp2k_run(input_declaration, input_file_name, output_unit, mpi_comm, initial_variables)
      TYPE(section_type), POINTER                        :: input_declaration
      CHARACTER(LEN=*), INTENT(IN)                       :: input_file_name
      INTEGER, INTENT(IN)                                :: output_unit

      CLASS(mp_comm_type)                                 :: mpi_comm
      CHARACTER(len=default_path_length), &
         DIMENSION(:, :), INTENT(IN)                     :: initial_variables

      INTEGER                                            :: f_env_handle, grid_backend, ierr, &
                                                            iter_level, method_name_id, &
                                                            new_env_id, prog_name_id, run_type_id
#if defined(__DBCSR_ACC)
      INTEGER, TARGET                                    :: offload_chosen_device
#endif
      INTEGER, POINTER                                   :: active_device_id
      INTEGER(KIND=int_8)                                :: m_memory_max_mpi
      LOGICAL                                            :: echo_input, grid_apply_cutoff, &
                                                            grid_validate, I_was_ionode
      TYPE(cp_logger_type), POINTER                      :: logger, sublogger
      TYPE(mp_para_env_type), POINTER                    :: para_env
      TYPE(dft_control_type), POINTER                    :: dft_control
      TYPE(f_env_type), POINTER                          :: f_env
      TYPE(force_env_type), POINTER                      :: force_env
      TYPE(global_environment_type), POINTER             :: globenv
      TYPE(section_vals_type), POINTER                   :: glob_section, input_file, root_section

      NULLIFY (para_env, f_env, dft_control, active_device_id)
      ALLOCATE (para_env)
      para_env = mpi_comm

#if defined(__DBCSR_ACC)
      IF (offload_get_device_count() > 0) THEN
         offload_chosen_device = offload_get_chosen_device()
         active_device_id => offload_chosen_device
      END IF
#endif
      CALL dbcsr_init_lib(mpi_comm%get_handle(), io_unit=output_unit, &
                          accdrv_active_device_id=active_device_id)

      NULLIFY (globenv, force_env)

      CALL cite_reference(Hutter2014)

      ! parse the input
      input_file => read_input(input_declaration, input_file_name, initial_variables=initial_variables, &
                               para_env=para_env)

      CALL para_env%sync()

      glob_section => section_vals_get_subs_vals(input_file, "GLOBAL")
      CALL section_vals_val_get(glob_section, "ECHO_INPUT", l_val=echo_input)
      logger => cp_get_default_logger()
      IF (echo_input) THEN
         CALL section_vals_write(input_file, &
                                 unit_nr=cp_logger_get_default_io_unit(logger), &
                                 hide_root=.TRUE., hide_defaults=.FALSE.)
      END IF

      CALL check_cp2k_input(input_declaration, input_file, para_env=para_env, output_unit=output_unit)
      root_section => input_file
      CALL section_vals_val_get(input_file, "GLOBAL%PROGRAM_NAME", &
                                i_val=prog_name_id)
      CALL section_vals_val_get(input_file, "GLOBAL%RUN_TYPE", &
                                i_val=run_type_id)
      CALL section_vals_val_get(root_section, "FORCE_EVAL%METHOD", i_val=method_name_id)

      IF (prog_name_id /= do_cp2k) THEN
         ! initial setup (cp2k does in in the creation of the force_env)
         CALL globenv_create(globenv)
         CALL section_vals_retain(input_file)
         CALL cp2k_init(para_env, output_unit, globenv, input_file_name=input_file_name)
         CALL cp2k_read(root_section, para_env, globenv)
         CALL cp2k_setup(root_section, para_env, globenv)
      END IF

      CALL cp_dbcsr_config(root_section)
      IF (output_unit > 0 .AND. &
          cp_logger_would_log(logger, cp_note_level)) THEN
         CALL dbcsr_print_config(unit_nr=output_unit)
         WRITE (UNIT=output_unit, FMT='()')
      END IF

      ! Configure the grid library.
      CALL section_vals_val_get(root_section, "GLOBAL%GRID%BACKEND", i_val=grid_backend)
      CALL section_vals_val_get(root_section, "GLOBAL%GRID%VALIDATE", l_val=grid_validate)
      CALL section_vals_val_get(root_section, "GLOBAL%GRID%APPLY_CUTOFF", l_val=grid_apply_cutoff)

      CALL grid_library_set_config(backend=grid_backend, &
                                   validate=grid_validate, &
                                   apply_cutoff=grid_apply_cutoff)

      SELECT CASE (prog_name_id)
      CASE (do_atom)
         globenv%run_type_id = none_run
         CALL atom_code(root_section)
      CASE (do_optimize_input)
         CALL run_optimize_input(input_declaration, root_section, para_env)
      CASE (do_swarm)
         CALL run_swarm(input_declaration, root_section, para_env, globenv, input_file_name)
      CASE (do_farming) ! TODO: refactor cp2k's startup code
         CALL dbcsr_finalize_lib()
         CALL farming_run(input_declaration, root_section, para_env, initial_variables)
         CALL dbcsr_init_lib(mpi_comm%get_handle(), io_unit=output_unit, &
                             accdrv_active_device_id=active_device_id)
      CASE (do_opt_basis)
         CALL run_optimize_basis(input_declaration, root_section, para_env)
         globenv%run_type_id = none_run
      CASE (do_cp2k)
         CALL create_force_env(new_env_id, &
                               input_declaration=input_declaration, &
                               input_path=input_file_name, &
                               output_path="__STD_OUT__", mpi_comm=para_env, &
                               output_unit=output_unit, &
                               owns_out_unit=.FALSE., &
                               input=input_file, ierr=ierr)
         CPASSERT(ierr == 0)
         CALL f_env_add_defaults(new_env_id, f_env, handle=f_env_handle)
         force_env => f_env%force_env
         CALL force_env_get(force_env, globenv=globenv)
      CASE (do_test)
         CALL lib_test(root_section, para_env, globenv)
      CASE (do_tree_mc) ! TMC entry point
         CALL do_tmc(input_declaration, root_section, para_env, globenv)
      CASE (do_tree_mc_ana)
         CALL do_analyze_files(input_declaration, root_section, para_env)
      CASE default
         CPABORT("")
      END SELECT
      CALL section_vals_release(input_file)

      SELECT CASE (globenv%run_type_id)
      CASE (pint_run)
         CALL do_pint_run(para_env, root_section, input_declaration, globenv)
      CASE (none_run, tree_mc_run)
         ! do nothing
      CASE (driver_run)
         CALL run_driver(force_env, globenv)
      CASE (energy_run, energy_force_run)
         IF (method_name_id /= do_qs .AND. &
             method_name_id /= do_sirius .AND. &
             method_name_id /= do_qmmm .AND. &
             method_name_id /= do_mixed .AND. &
             method_name_id /= do_nnp .AND. &
             method_name_id /= do_embed .AND. &
             method_name_id /= do_fist .AND. &
             method_name_id /= do_ipi) &
            CPABORT("Energy/Force run not available for all methods ")

         sublogger => cp_get_default_logger()
         CALL cp_add_iter_level(sublogger%iter_info, "JUST_ENERGY", &
                                n_rlevel_new=iter_level)

         ! loop over molecules to generate a molecular guess
         ! this procedure is initiated here to avoid passing globenv deep down
         ! the subroutine stack
         IF (do_mol_loop(force_env=force_env)) &
            CALL loop_over_molecules(globenv, force_env)

         SELECT CASE (globenv%run_type_id)
         CASE (energy_run)
            CALL force_env_calc_energy_force(force_env, calc_force=.FALSE.)
         CASE (energy_force_run)
            CALL force_env_calc_energy_force(force_env, calc_force=.TRUE.)
         CASE default
            CPABORT("")
         END SELECT
         CALL cp_rm_iter_level(sublogger%iter_info, level_name="JUST_ENERGY", n_rlevel_att=iter_level)
      CASE (mol_dyn_run)
         CALL qs_mol_dyn(force_env, globenv)
      CASE (geo_opt_run)
         CALL cp_geo_opt(force_env, globenv)
      CASE (cell_opt_run)
         CALL cp_cell_opt(force_env, globenv)
      CASE (mon_car_run)
         CALL do_mon_car(force_env, globenv, input_declaration, input_file_name)
      CASE (do_tamc)
         CALL qs_tamc(force_env, globenv)
      CASE (electronic_spectra_run)
         IF (method_name_id /= do_qs) &
            CPABORT("Electron spectra available only with Quickstep. ")
         CALL force_env_calc_energy_force(force_env, calc_force=.FALSE.)
         CALL tddfpt_calculation(force_env%qs_env)
      CASE (real_time_propagation)
         IF (method_name_id /= do_qs) &
            CPABORT("Real time propagation needs METHOD QS. ")
         CALL get_qs_env(force_env%qs_env, dft_control=dft_control)
         dft_control%rtp_control%fixed_ions = .TRUE.
         SELECT CASE (dft_control%rtp_control%rtp_method)
         CASE (rtp_method_bse)
            ! Run the TD-BSE method
            CALL run_propagation_bse(force_env%qs_env, force_env)
         CASE default
            ! Run the TDDFT method
            CALL rt_prop_setup(force_env)
         END SELECT
      CASE (ehrenfest)
         IF (method_name_id /= do_qs) &
            CPABORT("Ehrenfest dynamics needs METHOD QS ")
         CALL get_qs_env(force_env%qs_env, dft_control=dft_control)
         dft_control%rtp_control%fixed_ions = .FALSE.
         CALL qs_mol_dyn(force_env, globenv)
      CASE (bsse_run)
         CALL do_bsse_calculation(force_env, globenv)
      CASE (linear_response_run)
         IF (method_name_id /= do_qs .AND. &
             method_name_id /= do_qmmm) &
            CPABORT("Property calculations by Linear Response only within the QS or QMMM program ")
         ! The Ground State is needed, it can be read from Restart
         CALL force_env_calc_energy_force(force_env, calc_force=.FALSE., linres=.TRUE.)
         CALL linres_calculation(force_env)
      CASE (debug_run)
         SELECT CASE (method_name_id)
         CASE (do_qs, do_qmmm, do_fist)
            CALL cp2k_debug_energy_and_forces(force_env)
         CASE DEFAULT
            CPABORT("Debug run available only with QS, FIST, and QMMM program ")
         END SELECT
      CASE (vib_anal)
         CALL vb_anal(root_section, input_declaration, para_env, globenv)
      CASE (do_band)
         CALL neb(root_section, input_declaration, para_env, globenv)
      CASE (negf_run)
         CALL do_negf(force_env)
      CASE default
         CPABORT("")
      END SELECT

      !sample peak memory
      CALL m_memory()

      CALL dbcsr_print_statistics()
      CALL dbm_library_print_stats(mpi_comm=mpi_comm, output_unit=output_unit)
      CALL grid_library_print_stats(mpi_comm=mpi_comm, output_unit=output_unit)

      m_memory_max_mpi = m_memory_max
      CALL mpi_comm%max(m_memory_max_mpi)
      IF (output_unit > 0) THEN
         WRITE (output_unit, *)
         WRITE (output_unit, '(T2,"MEMORY| Estimated peak process memory [MiB]",T73,I8)') &
            (m_memory_max_mpi + (1024*1024) - 1)/(1024*1024)
      END IF

      IF (prog_name_id == do_cp2k) THEN
         f_env%force_env => force_env ! for mc
         IF (ASSOCIATED(force_env%globenv)) THEN
         IF (.NOT. ASSOCIATED(force_env%globenv, globenv)) THEN
            CALL globenv_release(force_env%globenv) !mc
         END IF
         END IF
         force_env%globenv => globenv !mc
         CALL f_env_rm_defaults(f_env, ierr=ierr, &
                                handle=f_env_handle)
         CPASSERT(ierr == 0)
         CALL destroy_force_env(new_env_id, ierr=ierr)
         CPASSERT(ierr == 0)
      ELSE
         I_was_ionode = para_env%is_source()
         CALL cp2k_finalize(root_section, para_env, globenv)
         CPASSERT(globenv%ref_count == 1)
         CALL section_vals_release(root_section)
         CALL globenv_release(globenv)
      END IF

      CALL dbcsr_finalize_lib()

      CALL mp_para_env_release(para_env)

   END SUBROUTINE cp2k_run

! **************************************************************************************************
!> \brief performs a farming run that performs several independent cp2k_runs
!> \param input_declaration ...
!> \param root_section ...
!> \param para_env ...
!> \param initial_variables ...
!> \author Joost VandeVondele
!> \note
!>      needs to be part of this module as the cp2k_run -> farming_run -> cp2k_run
!>      calling style creates a hard circular dependency
! **************************************************************************************************
   RECURSIVE SUBROUTINE farming_run(input_declaration, root_section, para_env, initial_variables)
      TYPE(section_type), POINTER              :: input_declaration
      TYPE(section_vals_type), POINTER         :: root_section
      TYPE(mp_para_env_type), POINTER          :: para_env
      CHARACTER(len=default_path_length), DIMENSION(:, :), INTENT(IN) :: initial_variables

      CHARACTER(len=*), PARAMETER :: routineN = 'farming_run'
      INTEGER, PARAMETER                       :: minion_status_done = -3, &
                                                  minion_status_wait = -4

      CHARACTER(len=7)                         :: label
      CHARACTER(LEN=default_path_length)       :: output_file
      CHARACTER(LEN=default_string_length)     :: str
      INTEGER :: dest, handle, i, i_job_to_restart, ierr, ijob, ijob_current, &
                 ijob_end, ijob_start, iunit, n_jobs_to_run, new_output_unit, &
                 new_rank, ngroups, num_minions, output_unit, primus_minion, &
                 minion_rank, source, tag, todo
      INTEGER, DIMENSION(:), POINTER           :: group_distribution, &
                                                  captain_minion_partition, &
                                                  minion_distribution, &
                                                  minion_status
      LOGICAL                                  :: found, captain, minion
      REAL(KIND=dp)                            :: t1, t2
      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: waittime
      TYPE(cp_logger_type), POINTER            :: logger
      TYPE(cp_parser_type), POINTER            :: my_parser
      TYPE(cp_unit_set_type)         :: default_units
      TYPE(farming_env_type), POINTER          :: farming_env
      TYPE(section_type), POINTER              :: g_section
      TYPE(section_vals_type), POINTER         :: g_data
      TYPE(mp_comm_type)                       :: minion_group, new_group

      ! the primus of all minions, talks to the captain on topics concerning all minions
      CALL timeset(routineN, handle)
      NULLIFY (my_parser, g_section, g_data)

      logger => cp_get_default_logger()
      output_unit = cp_print_key_unit_nr(logger, root_section, "FARMING%PROGRAM_RUN_INFO", &
                                         extension=".log")

      IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A)") "FARMING| Hi, welcome on this farm!"

      ALLOCATE (farming_env)
      CALL init_farming_env(farming_env)
      ! remember where we started
      CALL m_getcwd(farming_env%cwd)
      CALL farming_parse_input(farming_env, root_section, para_env)

      ! the full mpi group is first split in a minion group and a captain group, the latter being at most 1 process
      minion = .TRUE.
      captain = .FALSE.
      IF (farming_env%captain_minion) THEN
         IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A)") "FARMING| Using a Captain-Minion setup"

         ALLOCATE (captain_minion_partition(0:1))
         captain_minion_partition = (/1, para_env%num_pe - 1/)
         ALLOCATE (group_distribution(0:para_env%num_pe - 1))

         CALL minion_group%from_split(para_env, ngroups, group_distribution, &
                                      n_subgroups=2, group_partition=captain_minion_partition)
         DEALLOCATE (captain_minion_partition)
         DEALLOCATE (group_distribution)
         num_minions = minion_group%num_pe
         minion_rank = minion_group%mepos

         IF (para_env%mepos == 0) THEN
            minion = .FALSE.
            captain = .TRUE.
            ! on the captain node, num_minions corresponds to the size of the captain group
            CPASSERT(num_minions == 1)
            num_minions = para_env%num_pe - 1
            minion_rank = -1
         END IF
         CPASSERT(num_minions == para_env%num_pe - 1)
      ELSE
         ! all processes are minions
         IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A)") "FARMING| Using a Minion-only setup"
         CALL minion_group%from_dup(para_env)
         num_minions = minion_group%num_pe
         minion_rank = minion_group%mepos
      END IF
      IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A,I0)") "FARMING| Number of Minions ", num_minions

      ! keep track of which para_env rank is which minion/captain
      ALLOCATE (minion_distribution(0:para_env%num_pe - 1))
      minion_distribution = 0
      minion_distribution(para_env%mepos) = minion_rank
      CALL para_env%sum(minion_distribution)
      ! we do have a primus inter pares
      primus_minion = 0
      DO i = 1, para_env%num_pe - 1
         IF (minion_distribution(i) == 0) primus_minion = i
      END DO

      ! split the current communicator for the minions
      ! in a new_group, new_size and new_rank according to the number of groups required according to the input
      ALLOCATE (group_distribution(0:num_minions - 1))
      group_distribution = -1
      IF (minion) THEN
         IF (farming_env%group_size_wish_set) THEN
            farming_env%group_size_wish = MIN(farming_env%group_size_wish, para_env%num_pe)
            CALL new_group%from_split(minion_group, ngroups, group_distribution, &
                                      subgroup_min_size=farming_env%group_size_wish, stride=farming_env%stride)
         ELSE IF (farming_env%ngroup_wish_set) THEN
            IF (ASSOCIATED(farming_env%group_partition)) THEN
               CALL new_group%from_split(minion_group, ngroups, group_distribution, &
                                         n_subgroups=farming_env%ngroup_wish, &
                                         group_partition=farming_env%group_partition, stride=farming_env%stride)
            ELSE
               CALL new_group%from_split(minion_group, ngroups, group_distribution, &
                                         n_subgroups=farming_env%ngroup_wish, stride=farming_env%stride)
            END IF
         ELSE
            CPABORT("must set either group_size_wish or ngroup_wish")
         END IF
         new_rank = new_group%mepos
      END IF

      ! transfer the info about the minion group distribution to the captain
      IF (farming_env%captain_minion) THEN
         IF (para_env%mepos == primus_minion) THEN
            tag = 1
            CALL para_env%send(group_distribution, 0, tag)
            tag = 2
            CALL para_env%send(ngroups, 0, tag)
         END IF
         IF (para_env%mepos == 0) THEN
            tag = 1
            CALL para_env%recv(group_distribution, primus_minion, tag)
            tag = 2
            CALL para_env%recv(ngroups, primus_minion, tag)
         END IF
      END IF

      ! write info on group distribution
      IF (output_unit > 0) THEN
         WRITE (output_unit, FMT="(T2,A,T71,I10)") "FARMING| Number of created MPI (Minion) groups:", ngroups
         WRITE (output_unit, FMT="(T2,A)", ADVANCE="NO") "FARMING| MPI (Minion) process to group correspondence:"
         DO i = 0, num_minions - 1
            IF (MODULO(i, 4) == 0) WRITE (output_unit, *)
            WRITE (output_unit, FMT='(A3,I6,A3,I6,A1)', ADVANCE="NO") &
               "  (", i, " : ", group_distribution(i), ")"
         END DO
         WRITE (output_unit, *)
         CALL m_flush(output_unit)
      END IF

      ! protect about too many jobs being run in single go. Not more jobs are allowed than the number in the input file
      ! and determine the future restart point
      IF (farming_env%cycle) THEN
         n_jobs_to_run = farming_env%max_steps*ngroups
         i_job_to_restart = MODULO(farming_env%restart_n + n_jobs_to_run - 1, farming_env%njobs) + 1
      ELSE
         n_jobs_to_run = MIN(farming_env%njobs, farming_env%max_steps*ngroups)
         n_jobs_to_run = MIN(n_jobs_to_run, farming_env%njobs - farming_env%restart_n + 1)
         i_job_to_restart = n_jobs_to_run + farming_env%restart_n
      END IF

      ! and write the restart now, that's the point where the next job starts, even if this one is running
      iunit = cp_print_key_unit_nr(logger, root_section, "FARMING%RESTART", &
                                   extension=".restart")
      IF (iunit > 0) THEN
         WRITE (iunit, *) i_job_to_restart
      END IF
      CALL cp_print_key_finished_output(iunit, logger, root_section, "FARMING%RESTART")

      ! this is the job range to be executed.
      ijob_start = farming_env%restart_n
      ijob_end = ijob_start + n_jobs_to_run - 1
      IF (output_unit > 0 .AND. ijob_end - ijob_start < 0) THEN
         WRITE (output_unit, FMT="(T2,A)") "FARMING| --- WARNING --- NO JOBS NEED EXECUTION ? "
         WRITE (output_unit, FMT="(T2,A)") "FARMING| is the cycle keyword required ?"
         WRITE (output_unit, FMT="(T2,A)") "FARMING| or is a stray RESTART file present ?"
         WRITE (output_unit, FMT="(T2,A)") "FARMING| or is the group_size requested smaller than the number of CPUs?"
      END IF

      ! actual executions of the jobs in two different modes
      IF (farming_env%captain_minion) THEN
         IF (minion) THEN
            ! keep on doing work until captain has decided otherwise
            todo = do_wait
            DO
               IF (new_rank == 0) THEN
                  ! the head minion tells the captain he's done or ready to start
                  ! the message tells what has been done lately
                  tag = 1
                  dest = 0
                  CALL para_env%send(todo, dest, tag)

                  ! gets the new todo item
                  tag = 2
                  source = 0
                  CALL para_env%recv(todo, source, tag)

                  ! and informs his peer minions
                  CALL new_group%bcast(todo, 0)
               ELSE
                  CALL new_group%bcast(todo, 0)
               END IF

               ! if the todo is do_nothing we are flagged to quit. Otherwise it is the job number
               SELECT CASE (todo)
               CASE (do_wait, do_deadlock)
                  ! go for a next round, but we first wait a bit
                  t1 = m_walltime()
                  DO
                     t2 = m_walltime()
                     IF (t2 - t1 > farming_env%wait_time) EXIT
                  END DO
               CASE (do_nothing)
                  EXIT
               CASE (1:)
                  CALL execute_job(todo)
               END SELECT
            END DO
         ELSE ! captain
            ALLOCATE (minion_status(0:ngroups - 1))
            minion_status = minion_status_wait
            ijob_current = ijob_start - 1

            DO
               IF (ALL(minion_status == minion_status_done)) EXIT

               ! who's the next minion waiting for work
               tag = 1
               source = mp_any_source
               CALL para_env%recv(todo, source, tag) ! updates source
               IF (todo > 0) THEN
                  farming_env%Job(todo)%status = job_finished
                  IF (output_unit > 0) THEN
                     WRITE (output_unit, FMT=*) "Job finished: ", todo
                     CALL m_flush(output_unit)
                  END IF
               END IF

               ! get the next job in line, this could be do_nothing, if we're finished
               CALL get_next_job(farming_env, ijob_start, ijob_end, ijob_current, todo)
               dest = source
               tag = 2
               CALL para_env%send(todo, dest, tag)

               IF (todo > 0) THEN
                  farming_env%Job(todo)%status = job_running
                  IF (output_unit > 0) THEN
                     WRITE (output_unit, FMT=*) "Job: ", todo, " Dir: ", TRIM(farming_env%Job(todo)%cwd), &
                        " assigned to group ", group_distribution(minion_distribution(dest))
                     CALL m_flush(output_unit)
                  END IF
               ELSE
                  IF (todo == do_nothing) THEN
                     minion_status(group_distribution(minion_distribution(dest))) = minion_status_done
                     IF (output_unit > 0) THEN
                        WRITE (output_unit, FMT=*) "group done: ", group_distribution(minion_distribution(dest))
                        CALL m_flush(output_unit)
                     END IF
                  END IF
                  IF (todo == do_deadlock) THEN
                     IF (output_unit > 0) THEN
                        WRITE (output_unit, FMT=*) ""
                        WRITE (output_unit, FMT=*) "FARMING JOB DEADLOCKED ... CIRCULAR DEPENDENCIES"
                        WRITE (output_unit, FMT=*) ""
                        CALL m_flush(output_unit)
                     END IF
                     CPASSERT(todo .NE. do_deadlock)
                  END IF
               END IF

            END DO

            DEALLOCATE (minion_status)

         END IF
      ELSE
         ! this is the non-captain-minion mode way of executing the jobs
         ! the i-th job in the input is always executed by the MODULO(i-1,ngroups)-th group
         ! (needed for cyclic runs, we don't want two groups working on the same job)
         IF (output_unit > 0) THEN
            IF (ijob_end - ijob_start >= 0) THEN
               WRITE (output_unit, FMT="(T2,A)") "FARMING| List of jobs : "
               DO ijob = ijob_start, ijob_end
                  i = MODULO(ijob - 1, farming_env%njobs) + 1
                  WRITE (output_unit, FMT=*) "Job: ", i, " Dir: ", TRIM(farming_env%Job(i)%cwd), " Input: ", &
                     TRIM(farming_env%Job(i)%input), " MPI group:", MODULO(i - 1, ngroups)
               END DO
            END IF
            CALL m_flush(output_unit)
         END IF

         DO ijob = ijob_start, ijob_end
            i = MODULO(ijob - 1, farming_env%njobs) + 1
            ! this farms out the jobs
            IF (MODULO(i - 1, ngroups) == group_distribution(minion_rank)) THEN
               IF (output_unit > 0) THEN
                  WRITE (output_unit, FMT="(T2,A,I5.5,A)", ADVANCE="NO") " Running Job ", i, &
                     " in "//TRIM(farming_env%Job(i)%cwd)//"."
                  CALL m_flush(output_unit)
               END IF
               CALL execute_job(i)
               IF (output_unit > 0) THEN
                  WRITE (output_unit, FMT="(A)") " Done, output in "//TRIM(output_file)
                  CALL m_flush(output_unit)
               END IF
            END IF
         END DO
      END IF

      ! keep information about how long each process has to wait
      ! i.e. the load imbalance
      t1 = m_walltime()
      CALL para_env%sync()
      t2 = m_walltime()
      ALLOCATE (waittime(0:para_env%num_pe - 1))
      waittime = 0.0_dp
      waittime(para_env%mepos) = t2 - t1
      CALL para_env%sum(waittime)
      IF (output_unit > 0) THEN
         WRITE (output_unit, '(T2,A)') "Process idle times [s] at the end of the run"
         DO i = 0, para_env%num_pe - 1
            WRITE (output_unit, FMT='(A2,I6,A3,F8.3,A1)', ADVANCE="NO") &
               " (", i, " : ", waittime(i), ")"
            IF (MOD(i + 1, 4) == 0) WRITE (output_unit, '(A)') ""
         END DO
         CALL m_flush(output_unit)
      END IF
      DEALLOCATE (waittime)

      ! give back the communicators of the split groups
      IF (minion) CALL new_group%free()
      CALL minion_group%free()

      ! and message passing deallocate structures
      DEALLOCATE (group_distribution)
      DEALLOCATE (minion_distribution)

      ! clean the farming env
      CALL deallocate_farming_env(farming_env)

      CALL cp_print_key_finished_output(output_unit, logger, root_section, &
                                        "FARMING%PROGRAM_RUN_INFO")

      CALL timestop(handle)

   CONTAINS
! **************************************************************************************************
!> \brief ...
!> \param i ...
! **************************************************************************************************
      RECURSIVE SUBROUTINE execute_job(i)
      INTEGER                                            :: i

         ! change to the new working directory

         CALL m_chdir(TRIM(farming_env%Job(i)%cwd), ierr)
         IF (ierr .NE. 0) &
            CPABORT("Failed to change dir to: "//TRIM(farming_env%Job(i)%cwd))

         ! generate a fresh call to cp2k_run
         IF (new_rank == 0) THEN

            IF (farming_env%Job(i)%output == "") THEN
               ! generate the output file
               WRITE (output_file, '(A12,I5.5)') "FARMING_OUT_", i
               ALLOCATE (my_parser)
               CALL parser_create(my_parser, file_name=TRIM(farming_env%Job(i)%input))
               label = "&GLOBAL"
               CALL parser_search_string(my_parser, label, ignore_case=.TRUE., found=found)
               IF (found) THEN
                  CALL create_global_section(g_section)
                  CALL section_vals_create(g_data, g_section)
                  CALL cp_unit_set_create(default_units, "OUTPUT")
                  CALL section_vals_parse(g_data, my_parser, default_units)
                  CALL cp_unit_set_release(default_units)
                  CALL section_vals_val_get(g_data, "PROJECT", &
                                            c_val=str)
                  IF (str .NE. "") output_file = TRIM(str)//".out"
                  CALL section_vals_val_get(g_data, "OUTPUT_FILE_NAME", &
                                            c_val=str)
                  IF (str .NE. "") output_file = str
                  CALL section_vals_release(g_data)
                  CALL section_release(g_section)
               END IF
               CALL parser_release(my_parser)
               DEALLOCATE (my_parser)
            ELSE
               output_file = farming_env%Job(i)%output
            END IF

            CALL open_file(file_name=TRIM(output_file), &
                           file_action="WRITE", &
                           file_status="UNKNOWN", &
                           file_position="APPEND", &
                           unit_number=new_output_unit)
         ELSE
            ! this unit should be negative, otherwise all processors that get a default unit
            ! start writing output (to the same file, adding to confusion).
            ! error handling should be careful, asking for a local output unit if required
            new_output_unit = -1
         END IF

         CALL cp2k_run(input_declaration, TRIM(farming_env%Job(i)%input), new_output_unit, new_group, initial_variables)

         IF (new_rank == 0) CALL close_file(unit_number=new_output_unit)

         ! change to the original working directory
         CALL m_chdir(TRIM(farming_env%cwd), ierr)
         CPASSERT(ierr == 0)

      END SUBROUTINE execute_job
   END SUBROUTINE farming_run

! **************************************************************************************************
!> \brief ...
! **************************************************************************************************
   SUBROUTINE write_xml_file()

      INTEGER                                            :: i, unit_number
      TYPE(section_type), POINTER                        :: root_section

      NULLIFY (root_section)
      CALL create_cp2k_root_section(root_section)
      CALL keyword_release(root_section%keywords(0)%keyword)
      CALL open_file(unit_number=unit_number, &
                     file_name="cp2k_input.xml", &
                     file_action="WRITE", &
                     file_status="REPLACE")

      WRITE (UNIT=unit_number, FMT="(A)") '<?xml version="1.0" encoding="utf-8"?>'

      !MK CP2K input structure
      WRITE (UNIT=unit_number, FMT="(A)") &
         "<CP2K_INPUT>", &
         " <CP2K_VERSION>"//TRIM(cp2k_version)//"</CP2K_VERSION>", &
         " <CP2K_YEAR>"//TRIM(cp2k_year)//"</CP2K_YEAR>", &
         " <COMPILE_DATE>"//TRIM(compile_date)//"</COMPILE_DATE>", &
         " <COMPILE_REVISION>"//TRIM(compile_revision)//"</COMPILE_REVISION>"

      CALL export_references_as_xml(unit_number)
      CALL export_units_as_xml(unit_number)

      DO i = 1, root_section%n_subsections
         CALL write_section_xml(root_section%subsections(i)%section, 1, unit_number)
      END DO

      WRITE (UNIT=unit_number, FMT="(A)") "</CP2K_INPUT>"
      CALL close_file(unit_number=unit_number)
      CALL section_release(root_section)

   END SUBROUTINE write_xml_file

! **************************************************************************************************
!> \brief runs the given input
!> \param input_declaration ...
!> \param input_file_path the path of the input file
!> \param output_file_path path of the output file (to which it is appended)
!>        if it is "__STD_OUT__" the default_output_unit is used
!> \param initial_variables key-value list of initial preprocessor variables
!> \param mpi_comm the mpi communicator to be used for this environment
!>        it will not be freed
!> \author fawzi
!> \note
!>      moved here because of circular dependencies
! **************************************************************************************************
   SUBROUTINE run_input(input_declaration, input_file_path, output_file_path, initial_variables, mpi_comm)
      TYPE(section_type), POINTER                        :: input_declaration
      CHARACTER(len=*), INTENT(in)                       :: input_file_path, output_file_path
      CHARACTER(len=default_path_length), &
         DIMENSION(:, :), INTENT(IN)                     :: initial_variables
      TYPE(mp_comm_type), INTENT(in), OPTIONAL           :: mpi_comm

      INTEGER                                            :: unit_nr
      TYPE(mp_para_env_type), POINTER                    :: para_env

      IF (PRESENT(mpi_comm)) THEN
         ALLOCATE (para_env)
         para_env = mpi_comm
      ELSE
         para_env => f77_default_para_env
         CALL para_env%retain()
      END IF
      IF (para_env%is_source()) THEN
         IF (output_file_path == "__STD_OUT__") THEN
            unit_nr = default_output_unit
         ELSE
            INQUIRE (FILE=output_file_path, NUMBER=unit_nr)
         END IF
      ELSE
         unit_nr = -1
      END IF
      CALL cp2k_run(input_declaration, input_file_path, unit_nr, para_env, initial_variables)
      CALL mp_para_env_release(para_env)
   END SUBROUTINE run_input

END MODULE cp2k_runs
