Genesis4 Benchmark on Perlmutter (NERSC)¶
HPC systems often have special commands to invoke MPI jobs. NERSC uses slurm, which provides srun in place of mpirun.
In [1]:
Copied!
import os
from genesis.version4 import Genesis4
import os
from genesis.version4 import Genesis4
The MPI_RUN class attribute needs to be replaced. For convenience, Genesis4 tries to detect NERSC:
In [2]:
Copied!
Genesis4.MPI_RUN
Genesis4.MPI_RUN
Out[2]:
'"/Users/klauer/miniforge3/envs/lume-genesis/bin/mpirun" -n {nproc} {command_mpi}'
Additionally, the working directory must be on the scratch disk. This is automatically detected:
In [3]:
Copied!
Genesis4.WORKDIR
Genesis4.WORKDIR
Create Genesis4 object:
In [4]:
Copied!
G = Genesis4("data/basic4/cu_hxr.in")
G = Genesis4("data/basic4/cu_hxr.in")
Set up the benchmark
In [7]:
Copied!
G.input.main.track.zstop = 40
G.input.main.track.zstop = 40
Run with MPI
In [8]:
Copied!
%%time
G.verbose = False
G.nproc = 0
G.run()
%%time
G.verbose = False
G.nproc = 0
G.run()
CPU times: user 75.4 ms, sys: 42.2 ms, total: 118 ms Wall time: 50 s
Out[8]:
Genesis4Output(
run=RunInfo(
error_reason='',
run_script='/Users/klauer/miniforge3/envs/lume-genesis/bin/mpirun -n 0 /Users/klauer/miniforge3/envs/lume-genesis/bin/genesis4 -l hxr.lat genesis4...
output_log='---------------------------------------------\nGENESIS - Version 4.6.6 has started...\nCompile info: Compiled by runner at 2024-01-11 ...
start_time=1588289.1093675,
end_time=1588338.959887,
run_time=49.850519499974325,
),
beam=OutputBeam(
units={
'alphax': pmd_unit('rad', 1, (0, 0, 0, 0, 0, 0, 0)),
'alphay': pmd_unit('rad', 1, (0, 0, 0, 0, 0, 0, 0)),
'betax': pmd_unit('m', 1, (1, 0, 0, 0, 0, 0, 0)),
'betay': pmd_unit('m', 1, (1, 0, 0, 0, 0, 0, 0)),
'bunchingphase': pmd_unit('rad', 1, (0, 0, 0, 0, 0, 0, 0)),
'current': pmd_unit('A', 1, (0, 0, 0, 1, 0, 0, 0)),
'efield': pmd_unit('eV/m', 1.602176634e-19, (1, 1, -2, 0, 0, 0, 0)),
'emax': pmd_unit('m_ec^2', 8.187105776885341e-14, (2, 1, -2, 0, 0, 0, 0)),
'emin': pmd_unit('m_ec^2', 8.187105776885341e-14, (2, 1, -2, 0, 0, 0, 0)),
'emitx': pmd_unit('m', 1, (1, 0, 0, 0, 0, 0, 0)),
'emity': pmd_unit('m', 1, (1, 0, 0, 0, 0, 0, 0)),
'energy': pmd_unit('m_ec^2', 8.187105776885341e-14, (2, 1, -2, 0, 0, 0, 0)),
'energyspread': pmd_unit('m_ec^2', 8.187105776885341e-14, (2, 1, -2, 0, 0, 0, 0)),
'lsc_field': pmd_unit('eV/m', 1.602176634e-19, (1, 1, -2, 0, 0, 0, 0)),
'pxmax': pmd_unit('rad', 1, (0, 0, 0, 0, 0, 0, 0)),
'pxmin': pmd_unit('rad', 1, (0, 0, 0, 0, 0, 0, 0)),
'pxposition': pmd_unit('rad', 1, (0, 0, 0, 0, 0, 0, 0)),
'pymax': pmd_unit('rad', 1, (0, 0, 0, 0, 0, 0, 0)),
'pymin': pmd_unit('rad', 1, (0, 0, 0, 0, 0, 0, 0)),
'pyposition': pmd_unit('rad', 1, (0, 0, 0, 0, 0, 0, 0)),
'ssc_field': pmd_unit('eV/m', 1.602176634e-19, (1, 1, -2, 0, 0, 0, 0)),
'wakefield': pmd_unit('eV/m', 1.602176634e-19, (1, 1, -2, 0, 0, 0, 0)),
'xmax': pmd_unit('m', 1, (1, 0, 0, 0, 0, 0, 0)),
'xmin': pmd_unit('m', 1, (1, 0, 0, 0, 0, 0, 0)),
'xposition': pmd_unit('m', 1, (1, 0, 0, 0, 0, 0, 0)),
'xsize': pmd_unit('m', 1, (1, 0, 0, 0, 0, 0, 0)),
'ymax': pmd_unit('m', 1, (1, 0, 0, 0, 0, 0, 0)),
'ymin': pmd_unit('m', 1, (1, 0, 0, 0, 0, 0, 0)),
'yposition': pmd_unit('m', 1, (1, 0, 0, 0, 0, 0, 0)),
'ysize': pmd_unit('m', 1, (1, 0, 0, 0, 0, 0, 0)),
},
globals=OutputBeamGlobal(
units={},
energy=array([], dtype=float64),
energyspread=array([], dtype=float64),
xposition=array([], dtype=float64),
yposition=array([], dtype=float64),
xsize=array([], dtype=float64),
ysize=array([], dtype=float64),
),
bunching=array(shape=(1084, 552), dtype=float64),
bunchingphase=array(shape=(1084, 552), dtype=float64),
energy=array(shape=(1084, 552), dtype=float64),
energyspread=array(shape=(1084, 552), dtype=float64),
xsize=array(shape=(1084, 552), dtype=float64),
ysize=array(shape=(1084, 552), dtype=float64),
lsc_field=array(shape=(1084, 552), dtype=float64),
ssc_field=array(shape=(1084, 552), dtype=float64),
efield=array(shape=(1084, 552), dtype=float64),
wakefield=array(shape=(1084, 552), dtype=float64),
emin=array(shape=(1084, 552), dtype=float64),
emax=array(shape=(1084, 552), dtype=float64),
pxmin=array(shape=(1084, 552), dtype=float64),
pxmax=array(shape=(1084, 552), dtype=float64),
pymin=array(shape=(1084, 552), dtype=float64),
pymax=array(shape=(1084, 552), dtype=float64),
pxposition=array(shape=(1084, 552), dtype=float64),
pyposition=array(shape=(1084, 552), dtype=float64),
xmin=array(shape=(1084, 552), dtype=float64),
xmax=array(shape=(1084, 552), dtype=float64),
ymin=array(shape=(1084, 552), dtype=float64),
ymax=array(shape=(1084, 552), dtype=float64),
xposition=array(shape=(1084, 552), dtype=float64),
yposition=array(shape=(1084, 552), dtype=float64),
alphax=array(shape=(1, 552), dtype=float64),
alphay=array(shape=(1, 552), dtype=float64),
betax=array(shape=(1, 552), dtype=float64),
betay=array(shape=(1, 552), dtype=float64),
current=array(shape=(1084, 552), dtype=float64),
emitx=array(shape=(1, 552), dtype=float64),
emity=array(shape=(1, 552), dtype=float64),
extra={},
),
field_harmonics={
'1': OutputField(
units={
'intensity_farfield': pmd_unit('W/rad^2', 1, (2, 1, -3, 0, 0, 0, 0)),
'intensity_nearfield': pmd_unit('W/m^2', 1, (0, 1, -3, 0, 0, 0, 0)),
'phase_farfield': pmd_unit('rad', 1, (0, 0, 0, 0, 0, 0, 0)),
'phase_nearfield': pmd_unit('rad', 1, (0, 0, 0, 0, 0, 0, 0)),
'power': pmd_unit('W', 1, (2, 1, -3, 0, 0, 0, 0)),
'xdivergence': pmd_unit('rad', 1, (0, 0, 0, 0, 0, 0, 0)),
'xpointing': pmd_unit('rad', 1, (0, 0, 0, 0, 0, 0, 0)),
'xposition': pmd_unit('m', 1, (1, 0, 0, 0, 0, 0, 0)),
'xsize': pmd_unit('m', 1, (1, 0, 0, 0, 0, 0, 0)),
'ydivergence': pmd_unit('rad', 1, (0, 0, 0, 0, 0, 0, 0)),
'ypointing': pmd_unit('rad', 1, (0, 0, 0, 0, 0, 0, 0)),
'yposition': pmd_unit('m', 1, (1, 0, 0, 0, 0, 0, 0)),
'ysize': pmd_unit('m', 1, (1, 0, 0, 0, 0, 0, 0)),
},
globals=OutputFieldGlobal(
units={},
energy=array([], dtype=float64),
intensity_farfield=array([], dtype=float64),
intensity_nearfield=array([], dtype=float64),
xdivergence=array([], dtype=float64),
ydivergence=array([], dtype=float64),
xpointing=array([], dtype=float64),
ypointing=array([], dtype=float64),
xposition=array([], dtype=float64),
yposition=array([], dtype=float64),
xsize=array([], dtype=float64),
ysize=array([], dtype=float64),
extra={},
),
dgrid=2e-06,
intensity_farfield=array(shape=(1084, 552), dtype=float64),
intensity_nearfield=array(shape=(1084, 552), dtype=float64),
ngrid=101,
phase_farfield=array(shape=(1084, 552), dtype=float64),
phase_nearfield=array(shape=(1084, 552), dtype=float64),
power=array(shape=(1084, 552), dtype=float64),
xdivergence=array(shape=(1084, 552), dtype=float64),
ydivergence=array(shape=(1084, 552), dtype=float64),
xpointing=array(shape=(1084, 552), dtype=float64),
ypointing=array(shape=(1084, 552), dtype=float64),
xposition=array(shape=(1084, 552), dtype=float64),
yposition=array(shape=(1084, 552), dtype=float64),
xsize=array(shape=(1084, 552), dtype=float64),
ysize=array(shape=(1084, 552), dtype=float64),
energy=array(shape=(1084,), dtype=float64),
extra={},
),
},
lattice=OutputLattice(
units={},
aw=array(shape=(4332,), dtype=float64),
ax=array(shape=(4332,), dtype=float64),
ay=array(shape=(4332,), dtype=float64),
chic_angle=array(shape=(4332,), dtype=float64),
chic_lb=array(shape=(4332,), dtype=float64),
chic_ld=array(shape=(4332,), dtype=float64),
chic_lt=array(shape=(4332,), dtype=float64),
cx=array(shape=(4332,), dtype=float64),
cy=array(shape=(4332,), dtype=float64),
dz=array(shape=(4332,), dtype=float64),
gradx=array(shape=(4332,), dtype=float64),
grady=array(shape=(4332,), dtype=float64),
ku=array(shape=(4332,), dtype=float64),
kx=array(shape=(4332,), dtype=float64),
ky=array(shape=(4332,), dtype=float64),
phaseshift=array(shape=(4332,), dtype=float64),
qf=array(shape=(4332,), dtype=float64),
qx=array(shape=(4332,), dtype=float64),
qy=array(shape=(4332,), dtype=float64),
slippage=array(shape=(4332,), dtype=float64),
z=array(shape=(4332,), dtype=float64),
zplot=array(shape=(1084,), dtype=float64),
extra={},
),
globals=OutputGlobal(
units={},
frequency=array(shape=(552,), dtype=float64),
gamma0=19174.0776,
lambdaref=1.3789244869952112e-10,
s=array(shape=(552,), dtype=float64),
sample=200.0,
slen=1.5223326336427133e-05,
time=True,
extra={},
),
meta=OutputMeta(
units={},
beamdumps=OutputMetaDumps(units={}, extra={}),
fielddumps=OutputMetaDumps(units={}, extra={}),
version=OutputMetaVersion(units={}, extra={}),
extra={},
),
version=OutputMetaVersion(
units={},
build_info='Compiled by runner at 2024-01-11 18:10:26 [UTC] from Git Commit ID: ',
major=4.0,
minor=6.0,
revision=6.0,
extra={},
),
extra={},
field={},
particles={},
alias={
'alphax': 'beam.alphax',
'alphay': 'beam.alphay',
'aw': 'lattice.aw',
'ax': 'lattice.ax',
'ay': 'lattice.ay',
'beam_alphax': 'beam.alphax',
'beam_alphay': 'beam.alphay',
'beam_betax': 'beam.betax',
'beam_betay': 'beam.betay',
'beam_bunching': 'beam.bunching',
'beam_bunchingphase': 'beam.bunchingphase',
'beam_current': 'beam.current',
'beam_efield': 'beam.efield',
'beam_emax': 'beam.emax',
'beam_emin': 'beam.emin',
'beam_emitx': 'beam.emitx',
'beam_emity': 'beam.emity',
'beam_energy': 'beam.energy',
'beam_energyspread': 'beam.energyspread',
'beam_globals_energy': 'beam.globals.energy',
'beam_globals_energyspread': 'beam.globals.energyspread',
'beam_globals_xposition': 'beam.globals.xposition',
'beam_globals_xsize': 'beam.globals.xsize',
'beam_globals_yposition': 'beam.globals.yposition',
'beam_globals_ysize': 'beam.globals.ysize',
'beam_lsc_field': 'beam.lsc_field',
'beam_pxmax': 'beam.pxmax',
'beam_pxmin': 'beam.pxmin',
'beam_pxposition': 'beam.pxposition',
'beam_pymax': 'beam.pymax',
'beam_pymin': 'beam.pymin',
'beam_pyposition': 'beam.pyposition',
'beam_sigma_energy': 'beam.stat.sigma_energy',
'beam_sigma_x': 'beam.stat.sigma_x',
'beam_sigma_y': 'beam.stat.sigma_y',
'beam_ssc_field': 'beam.ssc_field',
'beam_stat_alphax': 'beam.stat.alphax',
'beam_stat_alphay': 'beam.stat.alphay',
'beam_stat_betax': 'beam.stat.betax',
'beam_stat_betay': 'beam.stat.betay',
'beam_stat_bunching': 'beam.stat.bunching',
'beam_stat_bunchingphase': 'beam.stat.bunchingphase',
'beam_stat_current': 'beam.stat.current',
'beam_stat_efield': 'beam.stat.efield',
'beam_stat_emitx': 'beam.stat.emitx',
'beam_stat_emity': 'beam.stat.emity',
'beam_stat_energy': 'beam.stat.energy',
'beam_stat_energyspread': 'beam.stat.energyspread',
'beam_stat_lsc_field': 'beam.stat.lsc_field',
'beam_stat_pxmax': 'beam.stat.pxmax',
'beam_stat_pxmin': 'beam.stat.pxmin',
'beam_stat_pxposition': 'beam.stat.pxposition',
'beam_stat_pymax': 'beam.stat.pymax',
'beam_stat_pymin': 'beam.stat.pymin',
'beam_stat_pyposition': 'beam.stat.pyposition',
'beam_stat_sigma_energy': 'beam.stat.sigma_energy',
'beam_stat_sigma_x': 'beam.stat.sigma_x',
'beam_stat_sigma_y': 'beam.stat.sigma_y',
'beam_stat_ssc_field': 'beam.stat.ssc_field',
'beam_stat_wakefield': 'beam.stat.wakefield',
'beam_stat_xmax': 'beam.stat.xmax',
'beam_stat_xmin': 'beam.stat.xmin',
'beam_stat_xposition': 'beam.stat.xposition',
'beam_stat_ymax': 'beam.stat.ymax',
'beam_stat_ymin': 'beam.stat.ymin',
'beam_stat_yposition': 'beam.stat.yposition',
'beam_wakefield': 'beam.wakefield',
'beam_xmax': 'beam.xmax',
'beam_xmin': 'beam.xmin',
'beam_xposition': 'beam.xposition',
'beam_xsize': 'beam.xsize',
'beam_ymax': 'beam.ymax',
'beam_ymin': 'beam.ymin',
'beam_yposition': 'beam.yposition',
'beam_ysize': 'beam.ysize',
'betax': 'beam.betax',
'betay': 'beam.betay',
'bunching': 'beam.bunching',
'bunchingphase': 'beam.bunchingphase',
'chic_angle': 'lattice.chic_angle',
'chic_lb': 'lattice.chic_lb',
'chic_ld': 'lattice.chic_ld',
'chic_lt': 'lattice.chic_lt',
'current': 'beam.current',
'cx': 'lattice.cx',
'cy': 'lattice.cy',
'dz': 'lattice.dz',
'efield': 'beam.efield',
'emax': 'beam.emax',
'emin': 'beam.emin',
'emitx': 'beam.emitx',
'emity': 'beam.emity',
'field_energy': 'field.energy',
'field_globals_energy': 'field.globals.energy',
'field_globals_intensity_farfield': 'field.globals.intensity_farfield',
'field_globals_intensity_nearfield': 'field.globals.intensity_nearfield',
'field_globals_xdivergence': 'field.globals.xdivergence',
'field_globals_xpointing': 'field.globals.xpointing',
'field_globals_xposition': 'field.globals.xposition',
'field_globals_xsize': 'field.globals.xsize',
'field_globals_ydivergence': 'field.globals.ydivergence',
'field_globals_ypointing': 'field.globals.ypointing',
'field_globals_yposition': 'field.globals.yposition',
'field_globals_ysize': 'field.globals.ysize',
'field_intensity_farfield': 'field.intensity_farfield',
'field_intensity_nearfield': 'field.intensity_nearfield',
'field_peak_power': 'field.peak_power',
'field_phase_farfield': 'field.phase_farfield',
'field_phase_nearfield': 'field.phase_nearfield',
'field_power': 'field.power',
'field_stat_xposition': 'field.stat.xposition',
'field_stat_xsize': 'field.stat.xsize',
'field_stat_yposition': 'field.stat.yposition',
'field_stat_ysize': 'field.stat.ysize',
'field_xdivergence': 'field.xdivergence',
'field_xpointing': 'field.xpointing',
'field_xposition': 'field.xposition',
'field_xsize': 'field.xsize',
'field_ydivergence': 'field.ydivergence',
'field_ypointing': 'field.ypointing',
'field_yposition': 'field.yposition',
'field_ysize': 'field.ysize',
'frequency': 'globals.frequency',
'globals_frequency': 'globals.frequency',
'globals_s': 'globals.s',
'gradx': 'lattice.gradx',
'grady': 'lattice.grady',
'ku': 'lattice.ku',
'kx': 'lattice.kx',
'ky': 'lattice.ky',
'lattice_aw': 'lattice.aw',
'lattice_ax': 'lattice.ax',
'lattice_ay': 'lattice.ay',
'lattice_chic_angle': 'lattice.chic_angle',
'lattice_chic_lb': 'lattice.chic_lb',
'lattice_chic_ld': 'lattice.chic_ld',
'lattice_chic_lt': 'lattice.chic_lt',
'lattice_cx': 'lattice.cx',
'lattice_cy': 'lattice.cy',
'lattice_dz': 'lattice.dz',
'lattice_gradx': 'lattice.gradx',
'lattice_grady': 'lattice.grady',
'lattice_ku': 'lattice.ku',
'lattice_kx': 'lattice.kx',
'lattice_ky': 'lattice.ky',
'lattice_phaseshift': 'lattice.phaseshift',
'lattice_qf': 'lattice.qf',
'lattice_qx': 'lattice.qx',
'lattice_qy': 'lattice.qy',
'lattice_slippage': 'lattice.slippage',
'lattice_z': 'lattice.z',
'lattice_zplot': 'lattice.zplot',
'lsc_field': 'beam.lsc_field',
'peak_power': 'field.peak_power',
'phase_farfield': 'field.phase_farfield',
'phase_nearfield': 'field.phase_nearfield',
'phaseshift': 'lattice.phaseshift',
'power': 'field.power',
'pxmax': 'beam.pxmax',
'pxmin': 'beam.pxmin',
'pxposition': 'beam.pxposition',
'pymax': 'beam.pymax',
'pymin': 'beam.pymin',
'pyposition': 'beam.pyposition',
'qf': 'lattice.qf',
'qx': 'lattice.qx',
'qy': 'lattice.qy',
's': 'globals.s',
'slippage': 'lattice.slippage',
'ssc_field': 'beam.ssc_field',
'wakefield': 'beam.wakefield',
'xmax': 'beam.xmax',
'xmin': 'beam.xmin',
'ymax': 'beam.ymax',
'ymin': 'beam.ymin',
'z': 'lattice.z',
'zplot': 'lattice.zplot',
},
field_files={},
particle_files={},
)
Timing¶
In [ ]:
Copied!
MAX_CPUS = os.cpu_count()
CPUS_PER_NODE = 128
MIN_CPUS = MAX_CPUS // 8
MAX_CPUS, MIN_CPUS
MAX_CPUS = os.cpu_count()
CPUS_PER_NODE = 128
MIN_CPUS = MAX_CPUS // 8
MAX_CPUS, MIN_CPUS
In [ ]:
Copied!
def time1(nproc):
G.verbose = False
G.nproc = nproc
G.nnode = (nproc - 1) // CPUS_PER_NODE + 1
G.run()
return G.output.run.run_time
time1(MAX_CPUS)
def time1(nproc):
G.verbose = False
G.nproc = nproc
G.nnode = (nproc - 1) // CPUS_PER_NODE + 1
G.run()
return G.output.run.run_time
time1(MAX_CPUS)
In [ ]:
Copied!
%%time
nlist = []
tlist = []
n_cpu = MAX_CPUS
while n_cpu > 0:
n = n_cpu
if n < MIN_CPUS:
break
nlist.append(n)
dt = time1(n)
tlist.append(dt)
print(f"{n} cores, {dt:0.1f} s")
n_cpu = n_cpu // 2
%%time
nlist = []
tlist = []
n_cpu = MAX_CPUS
while n_cpu > 0:
n = n_cpu
if n < MIN_CPUS:
break
nlist.append(n)
dt = time1(n)
tlist.append(dt)
print(f"{n} cores, {dt:0.1f} s")
n_cpu = n_cpu // 2
In [ ]:
Copied!
nlist, tlist
nlist, tlist
Some saved stats¶
In [ ]:
Copied!
import matplotlib.pyplot as plt
import numpy as np
%config InlineBackend.figure_format = 'retina'
# Perlmutter
nlist, tlist = (
[256, 128, 64, 32, 16, 8, 4, 2, 1],
[
12.407618761062622,
9.337806940078735,
15.55810832977295,
27.734387397766113,
49.104663372039795,
113.33639931678772,
184.2123155593872,
367.3742334842682,
555.4603695869446,
],
)
nlist = np.array(nlist)[::-1]
tlist = np.array(tlist)[::-1]
tref = tlist[0]
# M1 Max
nlistmac, tlistmac = (
[8, 4, 2, 1],
[38.151074171066284, 75.41113114356995, 146.3512842655182, 285.11587405204773],
)
nlistmac = np.array(nlistmac)
tlistmac = np.array(tlistmac)
plt.plot(nlist, tlist / tref, marker=".", label="Genesis4 Perlmutter")
plt.plot(nlistmac, tlistmac / tref, marker=".", label="Genesis4 M1 Max")
plt.plot(nlist, 1 / (nlist / nlist[0]), "--", label="ideal")
plt.xscale("log")
plt.yscale("log")
plt.xlabel("n cores")
plt.ylabel("run time (normalized)")
plt.legend()
import matplotlib.pyplot as plt
import numpy as np
%config InlineBackend.figure_format = 'retina'
# Perlmutter
nlist, tlist = (
[256, 128, 64, 32, 16, 8, 4, 2, 1],
[
12.407618761062622,
9.337806940078735,
15.55810832977295,
27.734387397766113,
49.104663372039795,
113.33639931678772,
184.2123155593872,
367.3742334842682,
555.4603695869446,
],
)
nlist = np.array(nlist)[::-1]
tlist = np.array(tlist)[::-1]
tref = tlist[0]
# M1 Max
nlistmac, tlistmac = (
[8, 4, 2, 1],
[38.151074171066284, 75.41113114356995, 146.3512842655182, 285.11587405204773],
)
nlistmac = np.array(nlistmac)
tlistmac = np.array(tlistmac)
plt.plot(nlist, tlist / tref, marker=".", label="Genesis4 Perlmutter")
plt.plot(nlistmac, tlistmac / tref, marker=".", label="Genesis4 M1 Max")
plt.plot(nlist, 1 / (nlist / nlist[0]), "--", label="ideal")
plt.xscale("log")
plt.yscale("log")
plt.xlabel("n cores")
plt.ylabel("run time (normalized)")
plt.legend()