Octopus
dm_propagation.F90
Go to the documentation of this file.
1!! Copyright (C) 2024 - 2025 S. Pal, Z. Nie, U. De Giovannini
2!! This program is free software; you can redistribute it and/or modify
3!! it under the terms of the GNU General Public License as published by
4!! the Free Software Foundation; either version 2, or (at your option)
5!! any later version.
6!!
7!! This program is distributed in the hope that it will be useful,
8!! but WITHOUT ANY WARRANTY; without even the implied warranty of
9!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10!! GNU General Public License for more details.
11!!
12!! You should have received a copy of the GNU General Public License
13!! along with this program; if not, write to the Free Software
14!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15!! 02110-1301, USA.
16!!
17
18#include "global.h"
19
22 use blas_oct_m
23 use debug_oct_m
28 use global_oct_m
29 use grid_oct_m
32 use ions_oct_m
33 use, intrinsic :: iso_fortran_env
42 use mpi_oct_m
45 use parser_oct_m
48 use space_oct_m
52 use unit_oct_m
54 use v_ks_oct_m
56
57 implicit none
58
59 private
60 public :: &
61 dmp_t, &
65
66 type dmp_t
67 integer :: calculation_mode
68 integer :: basis
69 logical :: unitary_transform
70 type(states_elec_t) :: adiabatic_st
71 integer :: strategy
72 logical :: othn
73 type(restart_t) :: restart_dump
74 ! 2-Times Model
75 real(real64) :: tmodel(2)
76 real(real64), allocatable :: occ_gs(:, :)
77 ! Uniform Decay
78 real(real64) :: uniform(2)
79 ! EPW Data Parsing and Shared Memory Configuration
80 character(len=256) :: epw_file
81 integer :: iunit
82 integer :: istart, iend, wnst
83 integer :: ia
84 integer :: na(3)
85 real(real64) :: astep(3)
86 integer(int64) :: num
87 integer, allocatable :: kmap(:)
88 type(mpi_grp_t) :: intranode_grp, internode_grp
89#ifdef HAVE_MPI
90 type(MPI_Win) :: window_trans_rate
91#endif
92 contains
93 procedure :: init => dmp_init
94 procedure :: update_trans_rate => dm_propagation_update_trans_rate
95 end type dmp_t
96
97 ! Defined separately because the VOLATILE attribute is not allowed for derived-type components.
98 real(real32), pointer, volatile :: ave_trans(:)
99
100contains
101
103 subroutine dmp_init(this, namespace, st, space, hm)
104 class(dmp_t), intent(inout) :: this
105 type(namespace_t), intent(in) :: namespace
106 type(states_elec_t), intent(in) :: st
107 class(space_t), intent(in) :: space
108 type(hamiltonian_elec_t), intent(in) :: hm
109
110 type(block_t) :: blk
111 integer :: ncols, nempty
112 real(real64) :: nempty_percent
113
114 push_sub(dmp_init)
116 !%Variable TDDMPropagationBasis
117 !%Type integer
118 !%Default Adiabatic
119 !%Section Time-Dependent
120 !%Description
121 !% Decides the basis set for the density matrix propagation.
122 !%Option Adiabatic 01
123 !% Instantaneous eigenstates of the Hamiltonian.
124 !%Option Groundstate 02
125 !% Eigenstates of the Hamiltonian at t=0.
126 !%End
127 call parse_variable(namespace, 'TDDMPropagationBasis', option__tddmpropagationbasis__adiabatic, this%basis)
128 if (.not. varinfo_valid_option('TDDMPropagationBasis', this%basis)) then
129 call messages_input_error(namespace, 'TDDMPropagationBasis')
130 endif
131 call messages_print_var_option('TDDMPropagationBasis', this%basis, namespace=namespace)
132
133 !%Variable TDDMOrthogonal
134 !%Type logical
135 !%Default no
136 !%Section Time-Dependent
137 !%Description
138 !% Use a fully orthonormalized basis when constructing and
139 !% damping the density matrix.
140 !%End
141 call parse_variable(namespace, 'TDDMOrthogonal', .false., this%othn)
142 call messages_print_var_value('TDDMOrthogonal', this%othn, namespace=namespace)
143
144 !%Variable TDDMUnitaryTransformFix
145 !%Type logical
146 !%Default yes
147 !%Section Time-Dependent
148 !%Description
149 !% Applies an additional unitary transformation to the damped wavefunctions
150 !% to maximize their overlap with the reference wavefunctions after damping.
151 !%End
152 call parse_variable(namespace, 'TDDMUnitaryTransformFix', .true., this%unitary_transform)
153 call messages_print_var_value('TDDMUnitaryTransformFix', this%unitary_transform, namespace=namespace)
154
155 this%strategy = -1
156
157 !%Variable TDDMPropagation_uniform_decay
158 !%Type block
159 !%Section Time-Dependent
160 !%Description
161 !% The intra-k transition rates between any pair of states are taken to be constant.
162 !% The population dynamics are therefore determined solely by the state occupations
163 !% and the bath temperature, following Fermi’s golden rule.
164 !% The first column of the block specifies the characteristic lifetime, and the
165 !% second column gives the bath temperature (in Kelvin).
166 !% <tt>%TDDMPropagation_uniform_decay
167 !% <br>&nbsp;&nbsp; Time | Temp
168 !% <br>%</tt>
169 !%End
170 if (parse_block(namespace, 'TDDMPropagation_uniform_decay', blk) == 0) then
171 if (this%strategy /= -1) then
172 message(1) = "Multiple dissipation strategies are not allowed."
173 call messages_fatal(1, namespace=namespace)
174 end if
175 this%strategy = 1
176 ncols = parse_block_cols(blk, 0)
177 if (ncols == 2) then
178 call parse_block_float(blk, 0, 0, this%uniform(1), units_inp%time)
179 call parse_block_float(blk, 0, 1, this%uniform(2))
182 message(1) = "Info: TDDMPropagation uniform decay approximation:"
183 write(message(2),'(a, f0.2, 1x, 2a, F0.2, 1x, 2a)') ' [lifetime, temperature] = [', &
184 this%uniform(1), trim(units_abbrev(units_out%time)), ', ', this%uniform(2), trim(units_abbrev(unit_kelvin)), ']'
185 call messages_info(2, namespace=namespace)
186 else
187 message(1) = "Input: TDDMPropagation_uniform_decay block must have 2 columns."
188 call messages_fatal(1, namespace=namespace)
189 end if
190 end if
191
192 !%Variable TDDMPropagation_2Times
193 !%Type block
194 !%Section Time-Dependent
195 !%Description
196 !% Two times approximation for the jump operator in the master equation.
197 !% S. A. Sato <i>et al.</i>, <i>Phys. Rev. B</i> 99, 214302 (2019).
198 !%
199 !% <tt>%TDDMPropagation_2Times
200 !% <br>&nbsp;&nbsp; t1 | t2
201 !% <br>%</tt>
202 !%End
203 if (parse_block(namespace, 'TDDMPropagation_2Times', blk) == 0) then
204 if (this%strategy /= -1) then
205 message(1) = "Multiple dissipation strategies are not allowed."
206 call messages_fatal(1, namespace=namespace)
207 end if
208 this%strategy = 2
209 ncols = parse_block_cols(blk, 0)
210 if (ncols == 2) then
211 call parse_block_float(blk, 0, 0, this%tmodel(1), units_inp%time)
212 call parse_block_float(blk, 0, 1, this%tmodel(2), units_inp%time)
213 call parse_block_end(blk)
214
215 message(1) = "Info: TDDMPropagation 2-times approximation:"
216 write(message(2),'(a, f12.6, a, f12.6, a,a)') &
217 ' [t1, t2] = [', this%tmodel(1) , ', ', this%tmodel(2), '] ', &
218 trim(units_abbrev(units_out%time))
219 call messages_info(2, namespace=namespace)
220 else
221 message(1) = "Input: TDDMPropagation_2Times block must have 2 columns."
222 call messages_fatal(1, namespace=namespace)
223 end if
224 ! for two times model, only orthonormalized basis is allowed
225 if (.not. this%othn) then
226 this%othn = .true.
227 message(1) = "Overriding input: TDDMOrthogonal set to yes for TDDMPropagation_2Times."
228 call messages_warning(1, namespace=namespace)
229 end if
230 end if
231
232 !%Variable TDDMPropagation_from_epw
233 !%Type string
234 !%Default "-"
235 !%Section Time-Dependent
236 !%Description
237 !% Specifies the transition-rate file generated by EPW. Once loaded, the
238 !% simulation includes all intra-k and inter-k scattering processes, with
239 !% the electron dynamics governed by Fermi’s Golden Rule. This option
240 !% requires EPWBandLowest to be set.
241 !%End
242 call parse_variable(namespace, 'TDDMPropagation_from_epw', '-', this%epw_file)
243 if (trim(this%epw_file) /= '-') then
244 if (bitand(hm%kpoints%method, kpoints_monkh_pack) == 0) then
245 write(message(1),'(a)') 'Only Monkhorst-Pack k-point meshes are supported in the EPW-DMPropagation strategy.'
246 call messages_fatal(1, namespace = namespace)
247 end if
248 if (hm%kpoints%reduced%nshifts /= 1) then
249 write(message(1),'(a)') 'Multiple Monkhorst-Pack shifts are not supported in the EPW-DMPropagation strategy.'
250 call messages_fatal(1, namespace = namespace)
251 end if
252 if (space%dim /= 3) then
253 write(message(1),'(a)') 'Only 3D systems are supported in the EPW-DMPropagation strategy.'
254 call messages_fatal(1, namespace = namespace)
255 end if
256
257 if (this%strategy /= -1) then
258 message(1) = "Multiple dissipation strategies are not allowed."
259 call messages_fatal(1, namespace=namespace)
260 end if
261 this%strategy = 3
262 write(message(1),'(a, a)') "Info: TDDMPropagation transition rates from file: ", trim(this%epw_file)
263 call messages_info(1, namespace=namespace)
264 end if
265
266 !%Variable EPWBandLowest
267 !%Type integer
268 !%Default -1
269 !%Section Time-Dependent
270 !%Description
271 !% The starting DFT band index used for damping.
272 !%
273 !% Since EPW transition rates are calculated for a subset of DFT bands,
274 !% this variable tells the code which global DFT band corresponds
275 !% to damping band #1.
276 !%
277 !% Example: If you damp DFT bands 10 through 15, set EPWBandLowest = 10.
278 !%End
279 if (this%strategy == 3) then
280 call parse_variable(namespace, 'EPWBandLowest', 0, this%istart)
281 if (.NOT. (this%istart > 0)) then
282 write(message(1), '(a)') 'EPWBandLowest must be specified when TDDMPropagation_from_epw is enabled.'
283 call messages_fatal(1, namespace=namespace)
284 end if
285 call messages_print_var_value('EPWBandLowest', this%istart, namespace=namespace)
286 end if
287
288 ! Sanity checks
289 if (this%calculation_mode == option__tddmpropagation__collision_integral .and. this%strategy /= 3) then
290 message(1) = "Warning: TDDMPropagation_from_epw is required for collision integral."
291 message(2) = " Add TDDMPropagation_from_epw and rerun."
292 call messages_fatal(2, namespace=namespace)
293 end if
294
295 call parse_variable(namespace, 'ExtraStates', 0, nempty)
296 call parse_variable(namespace, 'ExtraStatesInPercent', m_zero, nempty_percent)
297 if (nempty == 0 .and. nempty_percent < m_epsilon) then
298 message(1) = "Warning: TDDMPropagation requires a number of empty states."
299 message(2) = " Add ExtraStates and rerun."
300 call messages_fatal(2, namespace=namespace)
301 end if
302
303 if (st%parallel_in_states) then
304 message(1) = "Warning: TDDMPropagation does not support parallel states."
305 message(2) = " Remove ParallelStates and rerun."
306 call messages_fatal(2, namespace=namespace)
307 end if
308
309 if (st%d%ispin == spin_polarized .and. this%strategy == 3) then
310 call messages_not_implemented('Spin-polarized TDDMPropagation with EPW', namespace=namespace)
311 end if
312
313 pop_sub(dmp_init)
314
315 end subroutine dmp_init
316
318 subroutine dm_propagation_init_run(dmp, namespace, space, gr, ions, st, hm, mc, from_scratch)
319 type(dmp_t), intent(inout) :: dmp
320 type(namespace_t), intent(in) :: namespace
321 type(electron_space_t), intent(in) :: space
322 type(grid_t), intent(in) :: gr
323 type(ions_t), intent(in) :: ions
324 type(states_elec_t), intent(in) :: st
325 type(hamiltonian_elec_t), intent(in) :: hm
326 type(multicomm_t), intent(in) :: mc
327 logical, intent(in) :: from_scratch
328
329 integer :: ierr
330 type(restart_t) :: restart_load
331
333
334 ! By default, wavefunctions are copied. In principle, exclude_eigenval should be true.
335 ! But we need its allocation
336 call states_elec_copy(dmp%adiabatic_st, st, special=.true.)
337
338 if (from_scratch .or. dmp%basis == option__tddmpropagationbasis__groundstate) then
339 call restart_load%init(namespace, restart_gs, restart_type_load, mc, ierr, mesh=gr, exact=.true.)
340 else
341 call restart_load%init(namespace, restart_dm, restart_type_load, mc, ierr, mesh=gr, exact=.true.)
342 end if
343
344 if (ierr == 0) then
345 call states_elec_load(restart_load, namespace, space, dmp%adiabatic_st, gr, hm%kpoints, ierr, label = ": DM-basis")
346 else
347 message(1) = 'Unable to read DM-basis wavefunctions.'
348 call messages_fatal(1, namespace=namespace)
349 end if
350
351 call restart_load%end()
352
353 ! TDDMPropagation_2Times
354 if (dmp%strategy == 2) then
355 safe_allocate(dmp%occ_gs(1:st%nst, 1:st%nik))
356 dmp%occ_gs = dmp%adiabatic_st%occ
357 end if
358
359 ! dmp%ave_trans: replicated on each node, shared within the node
360 if (dmp%strategy == 3) call iopar_open_trans_rate(namespace, ions, hm, st%system_grp, dmp)
361
362 ! record only adiabatic states
363 if (dmp%basis == option__tddmpropagationbasis__adiabatic) then
364 call dmp%restart_dump%init(namespace, restart_dm, restart_type_dump, mc, ierr, mesh=gr)
365 end if
366
368 end subroutine dm_propagation_init_run
369
370 subroutine dm_end_run(system_grp, dmp)
371 type(mpi_grp_t), intent(in) :: system_grp
372 type(dmp_t), intent(inout) :: dmp
373
374 push_sub(dm_end_run)
375
376 if (dmp%basis == option__tddmpropagationbasis__adiabatic) then
377 call dmp%restart_dump%end()
378 end if
379
380 call states_elec_end(dmp%adiabatic_st)
381
382 if (dmp%strategy == 3) call iopar_close_trans_rate(system_grp, dmp)
383
384 safe_deallocate_a(dmp%occ_gs)
385
386 pop_sub(dm_end_run)
387 end subroutine dm_end_run
388
390 subroutine dm_propagation_run(dmp, namespace, space, gr, ions, st, mc, hm, ks, iter, dt, ext_partners, update_energy)
391 type(dmp_t), intent(inout) :: dmp
392 type(namespace_t), intent(in) :: namespace
393 type(electron_space_t), intent(in) :: space
394 type(grid_t), intent(in) :: gr
395 type(ions_t), intent(in) :: ions
396 type(states_elec_t), intent(inout) :: st
397 type(multicomm_t), intent(in) :: mc
398 type(hamiltonian_elec_t), intent(inout) :: hm
399 type(v_ks_t), intent(inout) :: ks
400 integer, intent(in) :: iter
401 real(real64), intent(in) :: dt
402 type(partner_list_t), intent(in) :: ext_partners
403 logical, optional,intent(in) :: update_energy
404
405 real(real64), parameter :: ZERO = 1.0e-15_real64
406 type(eigensolver_t) :: eigens
407 real(real64) :: nrm2_tdks(st%nst, st%nik), population(3), leak
408 integer :: ik
409 logical :: update_energy_
410 complex(real64), allocatable :: rho_mat_k(:, :, :)
411 type(states_elec_t) :: resd_st
412 complex(real64), allocatable :: overlap_ad_ks(:, :, :), overlap_resd_ks(:, :, :)
413 integer, allocatable :: nresd_k(:)
414 !*
415 push_sub_with_profile(dm_propagation_run)
416
417 ! Update the hamiltonian. hm is updated after each td propagation.
418 ! But ions may move afterthen
419 call hm%update(gr, namespace, space, ext_partners, time=iter * dt)
420 if (dmp%basis == option__tddmpropagationbasis__adiabatic) then
421 call eigensolver_init(eigens, namespace, gr, dmp%adiabatic_st, hm, mc, space)
422 eigens%converged = 0
423 call eigens%run(namespace, gr, dmp%adiabatic_st, hm, space, ext_partners, iter)
424 end if
425
426 safe_allocate(overlap_ad_ks(1:st%nst, 1:st%nst, st%d%kpt%start:st%d%kpt%end))
427 safe_allocate(overlap_resd_ks(1:st%nst, 1:st%nst, st%d%kpt%start:st%d%kpt%end))
428 safe_allocate(rho_mat_k(2*st%nst, 2*st%nst, st%d%kpt%start:st%d%kpt%end))
429 safe_allocate(nresd_k(st%d%kpt%start:st%d%kpt%end))
430
431 population = 0.0_real64
432
433 ! Create a copy of the TDKS states to initialise the residuals
434 call states_elec_copy(resd_st, st, exclude_eigenval = .true.)
435
436 ! Store all density matrices for potential future communication
437 do ik = st%d%kpt%start, st%d%kpt%end
438 ! pop = \sum_{i,k} f_{i,k} <\psi_{i,k} | \psi_{i,k}>
439 call total_population(ik, st, gr, nrm2_tdks(:, ik), population(1))
440 ! S^{\phi,\psi}_{ij} = <\phi_i | \psi_j>. one more conjugation is required for this function
441 call zstates_elec_calc_projections(st, dmp%adiabatic_st, namespace, gr, ik, overlap_ad_ks(:, :, ik))
442 overlap_ad_ks(:, :, ik) = conjg(overlap_ad_ks(:, :, ik))
443 ! population before damping, set occupation as well
444 call population_in_adiabatic(ik, dmp%adiabatic_st, st, overlap_ad_ks(:, :, ik), population(2))
445
446 ! residual |d_j> = |\psi_j> - \sum_i S^{\phi,\psi}_{ij} |\phi_i>
447 ! S^{d,\psi}_{ij} = <d_i | \psi_j>
448 call construct_residuals(gr, namespace, dmp%adiabatic_st, st, ik, dmp%othn, overlap_ad_ks(:, :, ik), &
449 nrm2_tdks(:, ik), nresd_k(ik), overlap_resd_ks(:, :, ik), resd_st)
450
451 ! Active block: (nresd+nst, nresd+nst, :), allocated shape: (2*nst, 2*nst, :)
452 call construct_density_matrix(nresd_k(ik), ik, st, overlap_ad_ks(:, :, ik), overlap_resd_ks(:, :, ik), rho_mat_k(:, :, ik))
453 end do
454
455 ! broadcast the instanteous occupation for Pauli blocking
456 call broadcast_occupation(dmp%adiabatic_st%occ, dmp%adiabatic_st%d%kpt, dmp%adiabatic_st%nst, st%parallel_in_states)
457
458 ! dissipate the density matrix
459 call dissipation(hm, st, namespace, nresd_k, dt, dmp, rho_mat_k)
460
461 do ik = st%d%kpt%start, st%d%kpt%end
462 ! reconstruct wavefunctions. Active block: (nresd+nst, nresd+nst, :), allocated shape: (2*nst, 2*nst, :)
463 call update_st(dmp, ik, gr, namespace, nresd_k(ik), overlap_ad_ks(:, :, ik), overlap_resd_ks(:, :, ik), nrm2_tdks(:, ik), &
464 resd_st, st, rho_mat_k(:, :, ik), population(3))
465 end do
466
467 call states_elec_end(resd_st)
468
469 safe_deallocate_a(overlap_ad_ks)
470 safe_deallocate_a(overlap_resd_ks)
471
472 call broadcast_occupation(st%occ, st%d%kpt, st%nst, st%parallel_in_states)
473
474 safe_deallocate_a(rho_mat_k)
475 safe_deallocate_a(nresd_k)
476
477 call st%d%kpt%mpi_grp%allreduce_inplace(population, 3, mpi_double_precision, mpi_sum)
478
479 write(message(1), '(a,E21.14,a,E21.14,a,E21.14,a)') 'DMPopulation: ', population(1), &
480 ' (', population(2), ' in basis ) before damping; ', population(3), ' after damping'
481 call messages_info(1)
482
483 leak = population(1) - population(3)
484 if (abs(leak) > zero) then
485 write(message(1), '(a,E15.8,a,I8)') 'Leakage: ', leak, '(after damping) at time step', iter
486 call messages_info(1)
487 end if
488 dmp%adiabatic_st%qtot = population(1)
489
490 ! renew hamiltonian
491 call density_calc(st, gr, st%rho)
492 update_energy_ = optional_default(update_energy, .true.)
493 call v_ks_calc(ks, namespace, space, hm, st, ions, ext_partners, calc_eigenval = update_energy_, &
494 time = abs(iter * dt), calc_energy = update_energy_)
495
496 if (dmp%basis == option__tddmpropagationbasis__adiabatic) then
497 call eigensolver_end(eigens)
498 end if
499
500 pop_sub_with_profile(dm_propagation_run)
501
502 end subroutine dm_propagation_run
503
506 subroutine total_population(ik, st, gr, nrm2, pop)
507 integer, intent(in) :: ik
508 type(states_elec_t), intent(in) :: st
509 type(grid_t), intent(in) :: gr
510 real(real64), intent(out) :: nrm2(:)
511 real(real64), intent(inout) :: pop
512
513 integer :: ib, i_minst, i_maxst
514
515 push_sub(total_population)
516
517 assert(pop >= 0.0_real64)
518 assert(size(nrm2) == st%nst)
519
520 do ib = st%group%block_start, st%group%block_end
521 i_minst = states_elec_block_min(st, ib)
522 i_maxst = states_elec_block_max(st, ib)
523 call mesh_batch_nrm2(gr, st%group%psib(ib, ik), nrm2(i_minst:i_maxst), reduce = .false.)
524 end do
525 nrm2(1:st%nst) = nrm2(1:st%nst)**2
526 if (gr%parallel_in_domains) then
527 call gr%allreduce(nrm2, dim = st%nst)
528 end if
529
530 pop = pop + sum(st%occ(1:st%nst, ik)* nrm2(1:st%nst)) * st%kweights(ik)
531
532 pop_sub(total_population)
533
534 end subroutine total_population
535
538 subroutine population_in_adiabatic(ik, ad_st, st, overlap, pop)
539 integer, intent(in) :: ik
540 type(states_elec_t), intent(inout) :: ad_st
541 type(states_elec_t), intent(in) :: st
542 complex(real64), intent(in) :: overlap(:, :)
543 real(real64), intent(inout) :: pop
544
545 integer :: ist, jst
546
548
549 assert(ubound(overlap, dim = 1) == ad_st%nst)
550 assert(ubound(overlap, dim = 2) == st%nst)
551
552 ! Assign ad_st occupation and store in dm_proj_basis (also used for hopping rate calculation)
553 ad_st%occ(1:ad_st%nst, ik) = 0.0_real64
554 do jst = 1, st%nst
555 do ist = 1, ad_st%nst
556 ad_st%occ(ist, ik) = ad_st%occ(ist, ik) + &
557 (real(overlap(ist, jst))**2 + aimag(overlap(ist, jst))**2) * st%occ(jst, ik)
558 end do
559 end do
560
561 pop = pop + sum(ad_st%occ(:, ik)) * ad_st%kweights(ik)
562
564
565 end subroutine population_in_adiabatic
566
582 subroutine construct_residuals(gr, namespace, ad_st, st, ik, othn, overlap_ad_ks, nrm2_tdks, nresd, overlap_resd_ks, resd)
583 type(grid_t), intent(in) :: gr
584 type(namespace_t), intent(in) :: namespace
585 type(states_elec_t), intent(in) :: ad_st
586 type(states_elec_t), intent(in) :: st
587 integer, intent(in) :: ik
588 logical, intent(in) :: othn
589 complex(real64), intent(in) :: overlap_ad_ks(:, :)
590 real(real64), intent(in) :: nrm2_tdks(:)
591 integer, intent(out) :: nresd
592 complex(real64), intent(out) :: overlap_resd_ks(:, :)
593 type(states_elec_t), intent(inout) :: resd
594
595 integer :: ib, j, i_minst, i_maxst
596 complex(real64), allocatable :: d_j(:, :), ss(:)
597 real(real64), parameter :: small_rho = 1.0e-14_real64
598 real(real64), parameter :: small_resd = 1.0e-7_real64
599 real(real64) :: nrm_dj, nrm2_dj
600
601 push_sub_with_profile(construct_residuals)
602
603 assert(ubound(overlap_ad_ks, dim = 1) == ad_st%nst)
604 assert(ubound(overlap_ad_ks, dim = 2) == st%nst)
605 assert(ubound(overlap_resd_ks, dim = 1) == resd%nst)
606 assert(ubound(overlap_resd_ks, dim = 2) == st%nst)
607
608 safe_allocate(d_j(1:gr%np, st%d%dim))
609
610 if (.not. othn) then
611
612 ! Iterate over TDKS batches, construct the residuals
613 do j = 1, st%nst
614 call states_elec_get_state(st, gr, j, ik, d_j)
615 ! Compute |\psi_j > - sum_i S^{\phi,\psi}_{ij} |\phi_i >
616 do ib = ad_st%group%block_start, ad_st%group%block_end
617 i_minst = states_elec_block_min(ad_st, ib)
618 i_maxst = states_elec_block_max(ad_st, ib)
619 call zbatch_axpy_function(gr%np, -overlap_ad_ks(i_minst:i_maxst, j), &
620 ad_st%group%psib(ib, ik), d_j)
621 enddo
622 ! reset the residuals
623 call states_elec_set_state(resd, gr, j, ik, d_j)
624 enddo
625
626 ! Overlap S^{d,\psi} = < d | \psi>
627 nresd = st%nst
628 call zstates_elec_calc_projections(st, resd, namespace, gr, ik, overlap_resd_ks)
629 overlap_resd_ks = conjg(overlap_resd_ks)
630
631 else
632
633 ! Construct the orthonormalized residuals
634 safe_allocate(ss(1:st%nst))
635
636 nresd = 0
637 ! Iterate over TDKS states, construct the linearly-independent terms as residuals
638 do j = 1, resd%nst
639 ! nrm2_dj = nrm2_tdks - \sum_i |S^{\phi,\psi}_{ij}|^2
640 nrm2_dj = nrm2_tdks(j) - sum(real(overlap_ad_ks(:, j))**2) - sum(aimag(overlap_ad_ks(:, j))**2)
641 if (nrm2_dj > small_rho) then
642 ! scale the TDKS wavefunction to make the residual terms approximately normalized
643 call states_elec_get_state(st, gr, j, ik, d_j)
644 nrm_dj = sqrt(nrm2_dj)
645 call lalg_scal(gr%np, resd%d%dim, m_one / nrm_dj, d_j)
646
647 ! substract the overlap with ad states
648 ! Compute 1/|d_j| |\psi_j > - 1/|d_j| sum_i S^{\phi,\psi}_{ij} |\phi_i >
649 do ib = ad_st%group%block_start, ad_st%group%block_end
650 i_minst = states_elec_block_min(ad_st, ib)
651 i_maxst = states_elec_block_max(ad_st, ib)
652 call zbatch_axpy_function(gr%np, -overlap_ad_ks(i_minst:i_maxst, j) / nrm_dj, &
653 ad_st%group%psib(ib, ik), d_j)
654 enddo
655 call zstates_elec_orthogonalize_single_batch(ad_st, gr, ad_st%nst, ik, d_j)
656 ! substract the overlap with previous residuals
657 if (nresd > 0) then
658 call zstates_elec_orthogonalize_single_batch(resd, gr, nresd, ik, d_j)
659 end if
660 ! screen the small terms
661 nrm_dj = zmf_nrm2(gr, resd%d%dim, d_j, reduce=.true.)
662 if (nrm_dj > small_resd) then
663 call lalg_scal(gr%np, resd%d%dim, m_one / nrm_dj, d_j)
664 nresd = nresd + 1
665 call states_elec_set_state(resd, gr, nresd, ik, d_j)
666 else
667 cycle
668 end if
669 ! Overlap matrix S^{d,\psi}_{ji} = < d_j | \psi_i >
670 do ib = st%group%block_start, st%group%block_end
671 i_minst = states_elec_block_min(st, ib)
672 i_maxst = states_elec_block_max(st, ib)
673 call zmesh_batch_mf_dotp(gr, st%group%psib(ib, ik), d_j, ss(i_minst:i_maxst), reduce = .false., nst = i_maxst-i_minst+1)
674 end do
675
676 overlap_resd_ks(nresd, 1:st%nst) = conjg(ss(1:st%nst))
677 end if
678 enddo
679
680 overlap_resd_ks(nresd+1:resd%nst, :) = m_z0
681 if (gr%parallel_in_domains) then
682 call gr%allreduce(overlap_resd_ks)
683 end if
684
685 safe_deallocate_a(ss)
686
687 end if
688
689 safe_deallocate_a(d_j)
690
691 pop_sub_with_profile(construct_residuals)
692
693 end subroutine construct_residuals
694
710 subroutine construct_density_matrix(nresd, ik, st, overlap_ad_ks, overlap_resd_ks, rho_mat)
711 integer, intent(in) :: nresd
712 integer, intent(in) :: ik
713 type(states_elec_t), intent(in) :: st
714 complex(real64), intent(in) :: overlap_ad_ks(:, :)
715 complex(real64), intent(in) :: overlap_resd_ks(:, :)
716 complex(real64), intent(out) :: rho_mat(:, :)
717
718 integer :: ist, jst, lst
719 real(real64) :: sqrt_f
720 complex(real64), allocatable :: s_ad_scaled(:, :), s_resd_scaled(:, :)
721
723
724 assert(ubound(overlap_ad_ks, dim = 1) == st%nst)
725 assert(ubound(overlap_ad_ks, dim = 2) == st%nst)
726 assert(ubound(overlap_resd_ks, dim = 1) == st%nst)
727 assert(ubound(overlap_resd_ks, dim = 2) == st%nst)
728 assert(ubound(rho_mat, dim = 1) == 2*st%nst)
729 assert(ubound(rho_mat, dim = 2) == 2*st%nst)
730
731 safe_allocate(s_ad_scaled(st%nst, st%nst))
732 if (nresd > 0) then
733 safe_allocate(s_resd_scaled(nresd, st%nst))
734 end if
735
736 ! set those beyond (nst+nresd) elements to zero
737 rho_mat = 0.0_real64
738
739 ! scale S matrix
740 do lst = 1, st%nst
741 sqrt_f = sqrt(st%occ(lst, ik))
742 s_ad_scaled(1:st%nst, lst) = overlap_ad_ks(1:st%nst, lst) * sqrt_f
743 if (nresd > 0) then
744 s_resd_scaled(1:nresd, lst) = overlap_resd_ks(1:nresd, lst) * sqrt_f
745 end if
746 end do
747
748 ! ad-ad
749 call blas_herk('U', 'N', st%nst, st%nst, 1.0_real64, s_ad_scaled(1,1), st%nst, 0.0_real64, rho_mat(1, 1), 2*st%nst)
750 if (nresd > 0) then
751 ! resd-resd
752 call blas_herk('U', 'N', nresd, st%nst, 1.0_real64, s_resd_scaled(1,1), nresd, 0.0_real64, &
753 rho_mat(st%nst + 1, st%nst + 1), 2*st%nst)
754 ! ad-resd
755 call blas_gemm('N', 'C', st%nst, nresd, st%nst, m_z1, s_ad_scaled(1,1), st%nst, s_resd_scaled(1,1), &
756 nresd, m_z0, rho_mat(1, st%nst + 1), 2*st%nst)
757 end if
758
759 !$omp parallel do private(jst, ist)
760 do jst = 1, st%nst
761 ! mirror ad-ad lower triangle
762 do ist = 1, jst - 1
763 rho_mat(jst, ist) = conjg(rho_mat(ist, jst))
764 end do
765
766 ! mirror ad-resd to resd-ad (bottom-left block)
767 do ist = 1, nresd
768 rho_mat(ist + st%nst, jst) = conjg(rho_mat(jst, ist + st%nst))
769 end do
770 end do
771 !$omp end parallel do
772
773 !$omp parallel do private(jst, ist)
774 do jst = 1, nresd
775 ! mirror resd-resd lower triangle
776 do ist = 1, jst - 1
777 rho_mat(jst + st%nst, ist + st%nst) = conjg(rho_mat(ist + st%nst, jst + st%nst))
778 end do
779 end do
780 !$omp end parallel do
781
782 safe_deallocate_a(s_ad_scaled)
783 if (nresd > 0) then
784 safe_deallocate_a(s_resd_scaled)
785 end if
786
788 end subroutine construct_density_matrix
789
790 subroutine broadcast_occupation(occ, kpt, nst, parstate)
791 real(real64), intent(inout) :: occ(:, :)
792 type(distributed_t), intent(in) :: kpt
793 integer, intent(in) :: nst
794 logical, intent(in) :: parstate
795
796 integer :: incount
797 integer, allocatable :: rdispls(:), recvcnts(:)
798 real(real64), allocatable :: sendbuffer(:, :)
799
800 push_sub_with_profile(broadcast_occupation)
801
802 assert(ubound(occ, dim = 1) == nst)
803 assert(ubound(occ, dim = 2) == kpt%nglobal)
804 assert(.not. parstate)
806 if (kpt%parallel) then
807 safe_allocate(recvcnts(1:kpt%mpi_grp%size))
808 safe_allocate(rdispls(1:kpt%mpi_grp%size))
809 safe_allocate(sendbuffer(1:nst, kpt%nlocal))
810
811 incount = nst * kpt%nlocal
812 recvcnts(:) = nst * kpt%num(:)
813 sendbuffer(1:nst, 1:kpt%nlocal) = occ(:, kpt%start:kpt%end)
814
815 call mpi_displacements(recvcnts, rdispls)
816 ! send buffer and receive buffer can not be the same address
817 call kpt%mpi_grp%allgatherv(sendbuffer, incount, mpi_double_precision, &
818 occ, recvcnts, rdispls, mpi_double_precision)
819
820 safe_deallocate_a(sendbuffer)
821 safe_deallocate_a(recvcnts)
822 safe_deallocate_a(rdispls)
823 end if
824
825 pop_sub_with_profile(broadcast_occupation)
826
827 end subroutine broadcast_occupation
828
844 subroutine dissipation(hm, st, namespace, nresd_k, dt, dmp, rho_mat_k)
845 type(hamiltonian_elec_t), intent(in) :: hm
846 type(states_elec_t), intent(in) :: st
847 type(namespace_t), intent(in) :: namespace
848 integer, intent(in) :: nresd_k(:)
849 real(real64), intent(in) :: dt
850 type(dmp_t), intent(inout) :: dmp
851 complex(real64), intent(inout) :: rho_mat_k(:, :, :)
852
853 push_sub_with_profile(dissipation)
854
855 assert(ubound(nresd_k, dim = 1) == dmp%adiabatic_st%d%kpt%nlocal)
856 assert(ubound(rho_mat_k, dim = 1) == 2*dmp%adiabatic_st%nst)
857 assert(ubound(rho_mat_k, dim = 2) == 2*dmp%adiabatic_st%nst)
858 assert(ubound(rho_mat_k, dim = 3) == dmp%adiabatic_st%d%kpt%nlocal)
859
860 if (dmp%calculation_mode == option__tddmpropagation__master_equation) then
861 select case (dmp%strategy)
862 case (1)
863 call lindblad_uniform(dmp, st%d%kpt, nresd_k, dt, rho_mat_k)
864 case (2)
865 call lindblad_2times(dmp, st%d%kpt, nresd_k, dt, rho_mat_k)
866 case (3)
867 call lindblad_from_epw(dmp, hm, st%d%kpt, st%system_grp, namespace, nresd_k, dt, rho_mat_k)
868 end select
869 else if (dmp%calculation_mode == option__tddmpropagation__collision_integral) then
870 call collision_from_epw(dmp, hm, st%d%kpt, st%system_grp, namespace, nresd_k, dt, rho_mat_k)
871 end if
872
873 pop_sub_with_profile(dissipation)
874
875 end subroutine dissipation
876
881 subroutine lindblad_uniform(dmp, kpt, nresd_k, dt, rho_mat_k)
882 type(dmp_t), intent(in) :: dmp
883 type(distributed_t), intent(in) :: kpt
884 integer, intent(in) :: nresd_k(:)
885 real(real64), intent(in) :: dt
886 complex(real64), intent(inout) :: rho_mat_k(:, :, :)
887
888 real(real64) :: coeff
889 real(real64), allocatable :: rtrans(:, :)
890 complex(real64), allocatable :: rho_in(:, :), rho_out(:, :), rho_res(:, :), rho_tmp(:, :)
891 integer :: iorder, nst, ik, ik_, nresd
892 integer, parameter :: norder = 4
893
894 push_sub_with_profile(lindblad_uniform)
895
896 nst = dmp%adiabatic_st%nst
897
898 ! lindblad operate on the density matrix
899 safe_allocate(rtrans(1:nst, 1:nst))
900 safe_allocate(rho_in(1:2*nst, 1:2*nst))
901 safe_allocate(rho_out(1:2*nst, 1:2*nst))
902 safe_allocate(rho_res(1:2*nst, 1:2*nst))
903
904 do ik = kpt%start, kpt%end
905 ik_ = ik - kpt%start + 1
906 nresd = nresd_k(ik_)
907
908 call transition_rate_uniform(dmp%uniform, dmp%adiabatic_st, ik, rtrans)
909
910 rho_res = 0.0_real64
911 rho_in(1:2*nst, 1:2*nst) = rho_mat_k(1:2*nst, 1:2*nst, ik_)
912
913 coeff = m_one
914 do iorder = 1, norder-1
915
916 call lindblad_operator_uniform(nst, nresd, rtrans, rho_in, rho_out)
917
918 coeff = coeff * dt / iorder
919 rho_res = rho_res + coeff * rho_out
920 ! swap the pointer between rho_in and rho_out, so new rho_in is current rho_out,
921 ! and new rho_out is the prior input that will get overwritten
922 call move_alloc(rho_in, rho_tmp)
923 call move_alloc(rho_out, rho_in)
924 call move_alloc(rho_tmp, rho_out)
925 end do
926 ! iorder == norder
927 call lindblad_operator_uniform(nst, nresd, rtrans, rho_in, rho_out)
928 coeff = coeff * dt / norder
929 rho_res = rho_res + coeff * rho_out
930
931 rho_mat_k(1:2*nst, 1:2*nst, ik_) = rho_mat_k(1:2*nst, 1:2*nst, ik_) + rho_res
932 end do
933
934 safe_deallocate_a(rho_in)
935 safe_deallocate_a(rho_out)
936 safe_deallocate_a(rtrans)
937 safe_deallocate_a(rho_res)
938
939 pop_sub_with_profile(lindblad_uniform)
940
941 end subroutine lindblad_uniform
942
947 subroutine transition_rate_uniform(uniform, ad_st, ik, rtrans)
948 real(real64), intent(in) :: uniform(:)
949 type(states_elec_t), intent(in) :: ad_st
950 integer, intent(in) :: ik
951 real(real64), intent(out) :: rtrans(:, :)
952
953 integer :: ist, jst, nst
954 real(real64) :: rate_character, omega, inv_omega, nph, delta_e
955 real(real64), parameter :: small_e = 0.002_real64/(m_two*p_ry), large_e = 0.5_real64/(m_two*p_ry)
956 real(real64) :: unocc_ist, unocc_jst
957
959
960 nst = ad_st%nst
961 assert(ubound(rtrans, dim = 1) == nst)
962 assert(ubound(rtrans, dim = 2) == nst)
963
964 rate_character = 1 / uniform(1)
965 omega = units_to_atomic(unit_kelvin, uniform(2))
966 inv_omega = 1.0_real64 / max(omega, 1.0e-12_real64)
967
968 rtrans = m_zero
969 ! rtrans(ist, jst) is the transition rate from jst to ist
970 do ist = 1, nst
971 unocc_ist = 1.0_real64 - ad_st%occ(ist, ik) / ad_st%smear%el_per_state
972
973 do jst = ist + 1, nst
974 delta_e = ad_st%eigenval(jst, ik) - ad_st%eigenval(ist, ik)
975 if (delta_e < small_e) cycle
977 if (delta_e < large_e) then
978 nph = 1.0_real64/(exp(delta_e*inv_omega) - 1.0_real64)
979 else
980 nph = m_zero
981 end if
982 unocc_jst = 1.0_real64 - ad_st%occ(jst, ik) / ad_st%smear%el_per_state
983
984 rtrans(ist, jst) = merge(rate_character * unocc_ist * (nph + 1), m_zero, unocc_ist > m_zero)
985 rtrans(jst, ist) = merge(rate_character * unocc_jst * nph, m_zero, unocc_jst > m_zero)
986 end do
987 end do
988
990
991 end subroutine transition_rate_uniform
992
998 subroutine lindblad_operator_uniform(nst, nresd, rtrans, den_mat, l_mat)
999 integer, intent(in) :: nst
1000 integer, intent(in) :: nresd
1001 real(real64), intent(in) :: rtrans(:, :)
1002 complex(real64), intent(in) :: den_mat(:, :)
1003 complex(real64), intent(out) :: l_mat(:, :)
1004
1005 integer :: ist, jst, lst
1006
1007 push_sub_with_profile(lindblad_operator_uniform)
1008
1009 assert(ubound(rtrans, dim = 1) == nst)
1010 assert(ubound(rtrans, dim = 2) == nst)
1011 assert(ubound(den_mat, dim = 1) == 2*nst)
1012 assert(ubound(den_mat, dim = 2) == 2*nst)
1013 assert(ubound(l_mat, dim = 1) == 2*nst)
1014 assert(ubound(l_mat, dim = 2) == 2*nst)
1015
1016 l_mat = 0.0_real64
1017
1018 do ist = 1, nst
1019 do jst = 1, nst
1020 if (ist == jst) cycle
1021 do lst = 1, nst + nresd
1022 l_mat(ist, lst) = l_mat(ist, lst) - m_half * rtrans(jst, ist) * den_mat(ist, lst)
1023 l_mat(lst, ist) = l_mat(lst, ist) - m_half * rtrans(jst, ist) * den_mat(lst, ist)
1024 end do
1025 l_mat(jst, jst) = l_mat(jst, jst) + rtrans(jst, ist) * den_mat(ist, ist)
1026 end do
1027 end do
1028
1029 pop_sub_with_profile(lindblad_operator_uniform)
1030
1031 end subroutine lindblad_operator_uniform
1032
1040 subroutine lindblad_2times(dmp, kpt, nresd_k, dt, rho_mat_k)
1041 type(dmp_t), intent(in) :: dmp
1042 type(distributed_t), intent(in) :: kpt
1043 integer, intent(in) :: nresd_k(:)
1044 real(real64), intent(in) :: dt
1045 complex(real64), intent(inout) :: rho_mat_k(:, :, :)
1046
1047 real(real64) :: decay_T1, decay_T2
1048 integer :: ist, jst, nst, ik, ik_, nresd
1049
1050 push_sub_with_profile(lindblad_2times)
1051
1052 nst = dmp%adiabatic_st%nst
1053
1054 decay_t1 = exp(-dt / dmp%tmodel(1))
1055 decay_t2 = exp(-dt / dmp%tmodel(2))
1056
1057 do ik = kpt%start, kpt%end
1058 ik_ = ik - kpt%start + 1
1059 nresd = nresd_k(ik_)
1060
1061 do ist = 1, nst
1062 rho_mat_k(ist, ist, ik_) = dmp%occ_gs(ist, ik_) + (rho_mat_k(ist, ist, ik_) - dmp%occ_gs(ist, ik_)) * decay_t1
1063 end do
1064 do ist = nst + 1, nst + nresd
1065 rho_mat_k(ist, ist, ik_) = rho_mat_k(ist, ist, ik_) * decay_t1
1066 end do
1067 do ist = 1, nst + nresd
1068 do jst = ist + 1, nst + nresd
1069 rho_mat_k(jst, ist, ik_) = rho_mat_k(jst, ist, ik_) * decay_t2
1070 rho_mat_k(ist, jst, ik_) = conjg(rho_mat_k(jst, ist, ik_))
1071 end do
1072 end do
1073 end do
1074
1075 pop_sub_with_profile(lindblad_2times)
1076 end subroutine lindblad_2times
1077
1081 subroutine lindblad_from_epw(dmp, hm, kpt, system_grp, namespace, nresd_k, dt, rho_mat_k)
1082 type(dmp_t), intent(inout) :: dmp
1083 type(hamiltonian_elec_t), intent(in) :: hm
1084 type(distributed_t), intent(in) :: kpt
1085 type(mpi_grp_t), intent(in) :: system_grp
1086 type(namespace_t), intent(in) :: namespace
1087 integer, intent(in) :: nresd_k(:)
1088 real(real64), intent(in) :: dt
1089 complex(real64), intent(inout) :: rho_mat_k(:, :, :)
1090
1091 real(real64) :: coeff
1092 real(real64), allocatable :: rho_diag(:, :)
1093 complex(real64), allocatable :: rho_in(:, :, :), rho_out(:, :, :), rho_tmp(:, :, :)
1094 integer, parameter :: norder = 4
1095 integer :: ist, iorder, ik, nst, ik_, nik
1096
1097 push_sub_with_profile(lindblad_from_epw)
1098
1099 nst = dmp%adiabatic_st%nst
1100 nik = dmp%adiabatic_st%nik
1101
1102 call dmp%update_trans_rate(hm, system_grp, namespace)
1103
1104 ! the diagonal elements of L\rho is initialized to occupation
1105 safe_allocate_source_a(rho_diag, dmp%adiabatic_st%occ)
1106
1107 ! L_D[rho_in] = rho_out
1108 safe_allocate_source(rho_in, rho_mat_k)
1109 safe_allocate(rho_out(1:2*nst, 1:2*nst, 1:kpt%nlocal))
1110
1111 ! dimensition of rho_out should be modified later
1112 coeff = 1.0_real64
1113 do iorder = 1, norder - 1
1114
1115 coeff = coeff * dt / iorder
1116
1117 do ik = kpt%start, kpt%end
1118 ik_ = ik - kpt%start + 1
1119
1120 call lindblad_operator_epw(dmp, ik, hm%kpoints%nik_skip, nresd_k(ik_), rho_diag, rho_in(:, :, ik_), rho_out(:, :, ik_))
1121
1122 call lalg_axpy(2*nst, 2*nst, coeff, rho_out(:, :, ik_), rho_mat_k(:, :, ik_))
1123 end do
1124
1125 ! update rho_diag independently
1126 do ik = kpt%start, kpt%end
1127 ik_ = ik - kpt%start + 1
1128 do ist = 1, nst
1129 rho_diag(ist, ik) = real(rho_out(ist, ist, ik_))
1130 end do
1131 end do
1132 ! broadcast occupation for all processes
1133 call broadcast_occupation(rho_diag, kpt, nst, dmp%adiabatic_st%parallel_in_states)
1134
1135 ! swap the pointer between rho_in and rho_out, so new rho_in is current rho_out,
1136 ! and new rho_out is the prior input that will get overwritten
1137 call move_alloc(rho_in, rho_tmp)
1138 call move_alloc(rho_out, rho_in)
1139 call move_alloc(rho_tmp, rho_out)
1140 end do
1141
1142 ! iorder == norder
1143 coeff = coeff * dt / norder
1144
1145 do ik = kpt%start, kpt%end
1146 ik_ = ik - kpt%start + 1
1147
1148 call lindblad_operator_epw(dmp, ik, hm%kpoints%nik_skip, nresd_k(ik_), rho_diag, rho_in(:, :, ik_), rho_out(:, :, ik_))
1149
1150 call lalg_axpy(2*nst, 2*nst, coeff, rho_out(:, :, ik_), rho_mat_k(:, :, ik_))
1151 end do
1152
1153
1154 safe_deallocate_a(rho_in)
1155 safe_deallocate_a(rho_out)
1156 safe_deallocate_a(rho_diag)
1157
1158 pop_sub_with_profile(lindblad_from_epw)
1159 end subroutine lindblad_from_epw
1160
1170 subroutine collision_from_epw(dmp, hm, kpt, system_grp, namespace, nresd_k, dt, rho_mat_k)
1171 type(dmp_t), intent(inout) :: dmp
1172 type(hamiltonian_elec_t), intent(in) :: hm
1173 type(distributed_t), intent(in) :: kpt
1174 type(mpi_grp_t), intent(in) :: system_grp
1175 type(namespace_t), intent(in) :: namespace
1176 integer, intent(in) :: nresd_k(:)
1177 real(real64), intent(in) :: dt
1178 complex(real64), intent(inout) :: rho_mat_k(:, :, :)
1179
1180 real(real64) :: gam, gam_in, gam_out
1181 real(real64), allocatable :: gam_bnd(:)
1182 real(real64), parameter :: gthresh = 1.0e-8_real64
1183 integer :: ist, jst, ik, nst, nresd, ik_, nik_skip
1184
1185 push_sub_with_profile(collision_from_epw)
1186
1187 nst = dmp%adiabatic_st%nst
1188 nik_skip = hm%kpoints%nik_skip
1189
1190 safe_allocate(gam_bnd(2*nst))
1191
1192 call dmp%update_trans_rate(hm, system_grp, namespace)
1193
1194 ! off-diagonals
1195 do ik = kpt%start, kpt%end
1196 ik_ = ik - kpt%start + 1
1197 nresd = nresd_k(ik_)
1198
1199 ! pre-calculate gamma
1200 gam_bnd = 0.0_real64
1201 do ist = dmp%istart, dmp%iend
1202 call lifetime(dmp, ik, ist, nik_skip, gam_bnd(ist))
1203 end do
1204
1205 do ist = 1, nst + nresd
1206 do jst = ist + 1, nst + nresd
1207 gam = -(gam_bnd(ist) + gam_bnd(jst)) / 2.0_real64
1208
1209 ! damping
1210 rho_mat_k(jst, ist, ik_) = rho_mat_k(jst, ist, ik_) * exp(gam * dt)
1211 rho_mat_k(ist, jst, ik_) = conjg(rho_mat_k(jst, ist, ik_))
1212 end do
1213 end do
1214 end do
1215
1216 ! diagonals, \dot{f} = -gam_out * f + gam_in, gives analytical solution
1217 do ik = kpt%start, kpt%end
1218 ik_ = ik - kpt%start + 1
1219 do ist = dmp%istart, dmp%iend
1220 call get_gamma(dmp, ik, ist, nik_skip, gam_in, gam_out)
1221 if (gam_out * dt > gthresh) then
1222 rho_mat_k(ist, ist, ik_) = rho_mat_k(ist, ist, ik_) * exp(-gam_out * dt) &
1223 + (gam_in / gam_out) * (1.0_real64 - exp(-gam_out * dt))
1224 else
1225 ! when denominator divergent
1226 rho_mat_k(ist, ist, ik_) = rho_mat_k(ist, ist, ik_) * (1.0_real64 - gam_out * dt) + gam_in * dt
1227 end if
1228 end do
1229 end do
1230
1231 safe_deallocate_a(gam_bnd)
1232
1233 pop_sub_with_profile(collision_from_epw)
1234 end subroutine collision_from_epw
1235
1237 subroutine dm_propagation_update_trans_rate(this, hm, system_grp, namespace)
1238 class(dmp_t), intent(inout) :: this
1239 type(hamiltonian_elec_t), intent(in) :: hm
1240 type(mpi_grp_t), intent(in) :: system_grp
1241 type(namespace_t), intent(in) :: namespace
1242
1243 integer :: ia
1244
1245 push_sub_with_profile(dmp_propagation_update_trans_rate)
1246
1247 ia = get_vector_field_index(this, hm, namespace)
1248
1249 if (ia /= this%ia) then
1250 call iopar_read_trans_rate(ia, system_grp, namespace, this)
1251 this%ia = ia
1252 end if
1253
1254 pop_sub_with_profile(dmp_propagation_update_trans_rate)
1255
1257
1259 subroutine iopar_open_trans_rate(namespace, ions, hm, system_grp, dmp)
1260 type(namespace_t), intent(in) :: namespace
1261 type(ions_t), intent(in) :: ions
1262 type(hamiltonian_elec_t), intent(in) :: hm
1263 type(mpi_grp_t), intent(in) :: system_grp
1264 type(dmp_t), intent(inout) :: dmp
1266 integer :: ierr, iostat, idim, idir, iqq, totq
1267 integer :: epw_nk(3), oct_nk(3)
1268 real(real64) :: oct_s(3), at(3,3)
1269
1270 push_sub(iopar_open_trans_rate)
1271
1272 ! metadata
1273 if (system_grp%is_root()) then
1274 open(newunit=dmp%iunit, file=trim(dmp%epw_file), form='unformatted', access='stream', status='old', &
1275 action='read', iostat=iostat)
1276 if (iostat /= 0) then
1277 dmp%iunit = -1
1278 write(message(1), '(a,a)') 'Error opening file: ', trim(dmp%epw_file)
1279 call messages_fatal(1, namespace=namespace)
1280 end if
1281 !
1282 read(dmp%iunit, iostat=ierr) iqq, totq, dmp%wnst, epw_nk(1:3), at, oct_nk(1:3), oct_s(1:3), dmp%astep(1:3), dmp%na(1:3)
1283 if (ierr /= 0) then
1284 write(message(1), '(a,a)') 'Error reading header from: ', trim(dmp%epw_file)
1285 call messages_fatal(1, namespace=namespace)
1286 end if
1287 !
1288 end if
1289
1290 call system_grp%bcast(dmp%wnst, 1, mpi_integer, 0)
1291 call system_grp%bcast(at, 9, mpi_double_precision, 0)
1292 call system_grp%bcast(oct_s, 3, mpi_double_precision, 0)
1293 call system_grp%bcast(dmp%astep, 3, mpi_double_precision, 0)
1294 call system_grp%bcast(dmp%na, 3, mpi_integer, 0)
1295 call system_grp%bcast(epw_nk, 3, mpi_integer, 0)
1296 call system_grp%bcast(oct_nk, 3, mpi_integer, 0)
1297
1298 dmp%iend = dmp%istart + dmp%wnst - 1
1299
1300 if (any(oct_nk /= hm%kpoints%nik_axis)) then
1301 write(message(1), '(a, a)') 'Inconsistent k-point mesh in KPointsGrid and ', trim(dmp%epw_file)
1302 call messages_fatal(1, namespace=namespace)
1303 end if
1304 if (any(abs(oct_s - hm%kpoints%full%shifts(:, 1)) > m_epsilon)) then
1305 write(message(1), '(a, a)') 'Inconsistent k-point mesh shifts in KPointsGrid and ', trim(dmp%epw_file)
1306 call messages_fatal(1, namespace=namespace)
1307 end if
1308 !
1309 write(message(1), '(3(a,i0), a)') 'Info: Averaged transition rates obtained from a ', epw_nk(1), ' x ', &
1310 epw_nk(2), ' x ', epw_nk(3), ' EPW k-point mesh'
1311 call messages_info(1, namespace=namespace)
1312 write(message(1), '(a, i0, a, i0)') 'Info: Damping band ', dmp%istart, ' - ', dmp%iend
1313 call messages_info(1, namespace=namespace)
1314 write(message(1),'(a)') ' EPW Lattice Vectors [1/alat]'
1315 do idim = 1, 3
1316 write(message(1+idim),'(3f12.6)') (at(idir, idim), idir = 1, 3)
1317 end do
1318 call messages_info(4, namespace=namespace)
1319 if (any(abs(at - ions%latt%rlattice_primitive) > m_epsilon)) then
1320 write(message(1),'(a)') 'Lattice settings are not fully consistent with those in EPW'
1321 call messages_warning(1, namespace=namespace)
1322 end if
1323
1324 call build_epw_kmap(namespace, hm%kpoints, dmp)
1325
1326 ! initialize vector field grid index
1327 dmp%ia = -1
1328
1329 ! shared memory for transition rates
1330 dmp%num = int(product(oct_nk), kind=int64)**2 * int(dmp%wnst, kind=int64)**2
1331#ifdef HAVE_MPI
1332 ! create shared memory window and fill it only on root
1333 call create_intranode_communicator(system_grp, dmp%intranode_grp, dmp%internode_grp)
1334 ! We inline the logic of lmpi_create_shared_memory_window because single precision is not supported.
1335 call slmpi_create_shared_memory_window(dmp%num, dmp%intranode_grp, dmp%window_trans_rate, ave_trans)
1336#else
1337 safe_allocate(ave_trans(1:dmp%num))
1338#endif
1339
1340 pop_sub(iopar_open_trans_rate)
1341 end subroutine iopar_open_trans_rate
1342
1344 subroutine iopar_read_trans_rate(ia, system_grp, namespace, dmp)
1345 integer, intent(in) :: ia
1346 type(mpi_grp_t), intent(in) :: system_grp
1347 type(namespace_t), intent(in) :: namespace
1348 type(dmp_t), intent(inout) :: dmp
1349
1350 integer(int64), parameter :: header_bytes = 168 ! head info in binary file
1351 integer(int64), parameter :: epw_bytes = 4 ! single precision
1352 integer(int64) :: offset
1353 integer :: ierr
1355 push_sub(iopar_read_trans_rate)
1356
1357 if (system_grp%is_root()) then
1358 ! single precision
1359 offset = header_bytes + int(ia - 1, kind=int64) * dmp%num * epw_bytes + 1_int64
1360 read(dmp%iunit, pos=offset, iostat=ierr) ave_trans
1361 if (ierr /= 0) then
1362 write(message(1), '(a,a)') 'Error reading transition rates from: ', trim(dmp%epw_file)
1363 call messages_fatal(1, namespace=namespace)
1364 end if
1365 end if
1366
1367#ifdef HAVE_MPI
1368 ! now broadcast the global arrays to local rank 0 on each node
1369 if (dmp%intranode_grp%rank == 0) then
1370 call smpi_grp_bcast(dmp%internode_grp, ave_trans(1), dmp%num, mpi_real, 0)
1371 end if
1372 call lmpi_sync_shared_memory_window(dmp%window_trans_rate, dmp%intranode_grp)
1373#endif
1374
1375 pop_sub(iopar_read_trans_rate)
1376 end subroutine iopar_read_trans_rate
1377
1381 subroutine iopar_close_trans_rate(system_grp, dmp)
1382 type(mpi_grp_t), intent(in) :: system_grp
1383 type(dmp_t), intent(inout) :: dmp
1384
1385 push_sub(iopar_close_trans_rate)
1386
1387 safe_deallocate_a(dmp%kmap)
1388
1389 if (system_grp%is_root()) then
1390 close(dmp%iunit)
1391 end if
1392
1393#ifdef HAVE_MPI
1394 call lmpi_destroy_shared_memory_window(dmp%window_trans_rate)
1395 nullify(ave_trans)
1396#else
1397 safe_deallocate_p(ave_trans)
1398#endif
1399
1400 pop_sub(iopar_close_trans_rate)
1401
1402 end subroutine iopar_close_trans_rate
1403
1405 subroutine build_epw_kmap(namespace, kpoints, dmp)
1406 type(namespace_t), intent(in) :: namespace
1407 type(kpoints_t), intent(in) :: kpoints
1408 type(dmp_t), intent(inout) :: dmp
1409
1410 integer :: ik, idx, nik, nik_mp
1411 integer :: kidx_(3)
1412 real(real64) :: kred(3), kidx(3)
1413 real(real64), parameter :: tol = 1.0d-10
1414
1415 push_sub(build_epw_kmap)
1416
1417 nik = dmp%adiabatic_st%nik
1418 nik_mp = nik - kpoints%nik_skip
1419 safe_allocate(dmp%kmap(nik))
1420
1421 ! mp k-points
1422 dmp%kmap = nik_mp + 1
1423
1424 do ik = 1, nik
1425 ! Map reduced coordinate into an integer MP-grid coordinate
1426 kred = kpoints%get_point(ik, .false.)
1427 kidx = (kred + 0.5_real64) * real(kpoints%nik_axis, kind=real64) - kpoints%full%shifts(:, 1)
1428
1429 ! for MP k-points
1430 if (ik <= nik_mp) then
1431 if (any(abs(kidx - nint(kidx)) > tol)) then
1432 write(message(1), '(a)') 'K-point mesh is not compatible with EPW input'
1433 call messages_fatal(1, namespace=namespace)
1434 end if
1435 end if
1436
1437 ! Fold into central cell in EPW
1438 kidx_ = modulo(nint(kidx), kpoints%nik_axis)
1439 ! Flatten into a single, contiguous index
1440 idx = kidx_(1) * kpoints%nik_axis(2) * kpoints%nik_axis(3) + kidx_(2) * kpoints%nik_axis(3) + &
1441 kidx_(3) + 1
1442
1443 dmp%kmap(ik) = idx
1444 end do
1445
1446 ! sanity check
1447 assert(all(dmp%kmap <= nik_mp))
1448
1449 pop_sub(build_epw_kmap)
1450 end subroutine build_epw_kmap
1451
1453 function get_vector_field_index(dmp, hm, namespace) result(aidx)
1454 type(dmp_t), intent(in) :: dmp
1455 type(hamiltonian_elec_t), intent(in) :: hm
1456 type(namespace_t), intent(in) :: namespace
1457
1458 real(real64) :: ared(3), approx(3)
1459 integer :: apoint_idx(3)
1460 integer :: aidx, idim
1461
1462 push_sub(get_vector_field_index)
1463
1464 ! Find the nearest discrete vector field grid point on the EPW vector field mesh
1465 if (allocated(hm%hm_base%uniform_vector_potential)) then
1466 call kpoints_to_reduced(hm%ions%latt, hm%hm_base%uniform_vector_potential, ared)
1467 else
1468 ared = 0.0_real64
1469 end if
1470 do idim = 1, 3
1471 if (dmp%astep(idim) > m_zero) then
1472 apoint_idx(idim) = nint(ared(idim) / dmp%astep(idim))
1473 approx(idim) = apoint_idx(idim) * dmp%astep(idim)
1474 else
1475 apoint_idx(idim) = 0
1476 approx(idim) = 0.0_real64
1477 end if
1478 end do
1479 if (any(abs(apoint_idx) > dmp%na)) then
1480 write(message(1), '(a, 3F8.3)') 'Vector potential exceeds mesh range: ', ared
1481 call messages_warning(1, namespace=namespace)
1482 where (apoint_idx < -dmp%na)
1483 apoint_idx = -dmp%na
1484 end where
1485 where (apoint_idx > dmp%na)
1486 apoint_idx = dmp%na
1487 end where
1488 end if
1489 ! Get the flattened 1D index
1490 ! Step 1: Find the relative offset by subtracting the minimum index (-na).
1491 ! This shifts the range [-na, na] to an offset [0, 2*na].
1492 ! e.g., if range is [-2, 2], the offset for 1 is 1 - (-2) = 3.
1493 ! Step 2: Flatten the 3D offsets into 1D, and finally add 1 for Fortran 1-based indexing.
1494 aidx = (apoint_idx(1) + dmp%na(1)) * (2 * dmp%na(2) + 1) * (2 * dmp%na(3) + 1) + &
1495 (apoint_idx(2) + dmp%na(2)) * (2 * dmp%na(3) + 1) + &
1496 (apoint_idx(3) + dmp%na(3)) + 1
1497
1498 write(message(1), '(a, x, 3F7.3)') 'Approximated vector field damping grid:', approx
1499 call messages_info(1, namespace=namespace)
1500
1501 pop_sub(get_vector_field_index)
1502 end function get_vector_field_index
1503
1508 function get_trans_rate(dmp, nik_mp, jbnd, ibnd, k, kq, p_block) result(res)
1509 type(dmp_t), intent(in) :: dmp
1510 integer, intent(in) :: nik_mp
1511 integer, intent(in) :: jbnd
1512 integer, intent(in) :: ibnd
1513 integer, intent(in) :: k
1514 integer, intent(in) :: kq
1515 logical, optional, intent(in) :: p_block
1516
1517 real(real64) :: unocc, res
1518 integer :: k_, kq_
1519 integer(int64) :: idx
1520
1521 push_sub(get_trans_rate)
1522
1523 k_ = dmp%kmap(k)
1524 kq_ = dmp%kmap(kq)
1525 ! Flatten 4D transition rate index to 1D: (kq, k, ibnd, jbnd) -> idx
1526 ! Matrix dimensions are [nik_mp, nik_mp, wnst, wnst]
1527 ! The layout follows the hierarchy: kq is the slowest index, jbnd is the fastest.
1528 idx = int(kq_-1, kind=int64) * int(nik_mp, kind=int64) * int(dmp%wnst, kind=int64)**2 + &
1529 int((k_-1) * dmp%wnst**2, kind=int64) + &
1530 int((ibnd-dmp%istart)*dmp%wnst + (jbnd-dmp%istart) + 1, kind=int64)
1531
1532 assert(idx >= 1 .and. idx <= dmp%num)
1533
1534 res = real(ave_trans(idx), kind=real64)
1535 ! Pauli blocking
1536 if (optional_default(p_block, .true.)) then
1537 unocc = 1.0_real64 - dmp%adiabatic_st%occ(jbnd, kq) / dmp%adiabatic_st%smear%el_per_state
1538 res = res * max(unocc, 0.0_real64)
1539 end if
1541 pop_sub(get_trans_rate)
1542 end function get_trans_rate
1543
1551 subroutine lindblad_operator_epw(dmp, ik, nik_skip, nresd, rho_diag, den_mat, l_mat)
1552 type(dmp_t), intent(in) :: dmp
1553 integer, intent(in) :: ik
1554 integer, intent(in) :: nik_skip
1555 integer, intent(in) :: nresd
1556 real(real64), intent(in) :: rho_diag(:, :)
1557 complex(real64), intent(in) :: den_mat(:, :)
1558 complex(real64), intent(out) :: l_mat(:, :)
1559
1560 integer :: lst, nst, nik_mp, ikq, ibnd, jbnd
1561 real(real64) :: tr
1562 push_sub(lindblad_operator_epw)
1563
1564 nst = dmp%adiabatic_st%nst
1565 nik_mp = dmp%adiabatic_st%nik - nik_skip
1566
1567 assert(ubound(rho_diag, dim = 1) == nst)
1568 assert(ubound(rho_diag, dim = 2) == dmp%adiabatic_st%nik)
1569 assert(ubound(den_mat, dim = 1) == 2*nst)
1570 assert(ubound(den_mat, dim = 2) == 2*nst)
1571 assert(ubound(l_mat, dim = 1) == 2*nst)
1572 assert(ubound(l_mat, dim = 2) == 2*nst)
1573
1574 l_mat = 0.0_real64
1575
1576 ! there are two types of damping: 1. mp-mp k-grid scattering; 2. highsympath-mp k-grid scattering
1577 ! in both case, ikq is the mp grid.
1578 do ikq = 1, nik_mp
1579 do jbnd = dmp%istart, dmp%iend
1580 do ibnd = dmp%istart, dmp%iend
1581
1582 tr = get_trans_rate(dmp, nik_mp, jbnd, ibnd, ik, ikq, p_block=.true.)
1583 do lst = 1, nst + nresd
1584 l_mat(ibnd, lst) = l_mat(ibnd, lst) - m_half * tr * den_mat(ibnd, lst)
1585 l_mat(lst, ibnd) = l_mat(lst, ibnd) - m_half * tr * den_mat(lst, ibnd)
1586 end do
1587 l_mat(ibnd, ibnd) = l_mat(ibnd, ibnd) + get_trans_rate(dmp, nik_mp, ibnd, jbnd, ikq, ik, p_block=.true.) &
1588 * rho_diag(jbnd, ikq)
1589 end do
1590 end do
1591 end do
1592
1593 pop_sub(lindblad_operator_epw)
1594
1596
1601 subroutine lifetime(dmp, ik, ibnd, nik_skip, gam)
1602 type(dmp_t), intent(in) :: dmp
1603 integer, intent(in) :: ik
1604 integer, intent(in) :: ibnd
1605 integer, intent(in) :: nik_skip
1606 real(real64), intent(out) :: gam
1607
1608 integer :: nik_mp, ikq, jbnd
1609 real(real64) :: tr
1610 push_sub(lifetime)
1611
1612 assert(ibnd >= dmp%istart .and. ibnd <= dmp%iend)
1613 nik_mp = dmp%adiabatic_st%nik - nik_skip
1614
1615 ! there are two types of damping: 1. mp-mp k-grid scattering; 2. highsympath-mp k-grid scattering
1616 ! in both case, ikq is the mp grid.
1617 gam = 0.0_real64
1618 do ikq = 1, nik_mp
1619 do jbnd = dmp%istart, dmp%iend
1620 tr = get_trans_rate(dmp, nik_mp, jbnd, ibnd, ik, ikq, p_block=.true.)
1621 gam = gam + tr
1622 tr = get_trans_rate(dmp, nik_mp, ibnd, jbnd, ikq, ik, p_block=.false.)
1623 gam = gam + tr * dmp%adiabatic_st%occ(jbnd, ikq) / dmp%adiabatic_st%smear%el_per_state
1624 end do
1625 end do
1626
1627 pop_sub(lifetime)
1628
1629 end subroutine lifetime
1630
1641 subroutine get_gamma(dmp, ik, ibnd, nik_skip, gam_in, gam_out)
1642 type(dmp_t), intent(in) :: dmp
1643 integer, intent(in) :: ik
1644 integer, intent(in) :: ibnd
1645 integer, intent(in) :: nik_skip
1646 real(real64), intent(out) :: gam_in
1647 real(real64), intent(out) :: gam_out
1648
1649 integer :: nik_mp, ikq, jbnd
1650 real(real64) :: tr
1651 push_sub(get_gamma)
1652
1653 assert(ibnd >= dmp%istart .and. ibnd <= dmp%iend)
1654 nik_mp = dmp%adiabatic_st%nik - nik_skip
1655
1656 ! there are two types of damping: 1. mp-mp k-grid scattering; 2. highsympath-mp k-grid scattering
1657 ! in both case, ikq is the mp grid.
1658 gam_in = 0.0_real64
1659 gam_out = 0.0_real64
1660 do ikq = 1, nik_mp
1661 do jbnd = dmp%istart, dmp%iend
1662 tr = get_trans_rate(dmp, nik_mp, jbnd, ibnd, ik, ikq, p_block=.true.)
1663 gam_out = gam_out + tr
1664 tr = get_trans_rate(dmp, nik_mp, ibnd, jbnd, ikq, ik, p_block=.true.)
1665 gam_in = gam_in + tr * dmp%adiabatic_st%occ(jbnd, ikq)
1666 end do
1667 end do
1668
1669 pop_sub(get_gamma)
1670
1671 end subroutine get_gamma
1672
1688 subroutine update_st(dmp, ik, gr, namespace, nresd, overlap_ad_ks, overlap_resd_ks, nrm2_tdks, resd, st, rho_mat, pop)
1689 type(dmp_t), intent(inout) :: dmp
1690 integer, intent(in) :: ik
1691 type(grid_t), intent(in) :: gr
1692 type(namespace_t), intent(in) :: namespace
1693 integer, intent(in) :: nresd
1694 complex(real64), intent(in) :: overlap_ad_ks(:, :)
1695 complex(real64), intent(in) :: overlap_resd_ks(:, :)
1696 real(real64), intent(in) :: nrm2_tdks(:)
1697 type(states_elec_t), intent(inout) :: resd
1698 type(states_elec_t), intent(inout) :: st
1699 complex(real64), intent(inout) :: rho_mat(:, :)
1700 real(real64), intent(inout) :: pop
1701
1702 real(real64), allocatable :: occ(:)
1703 complex(real64), allocatable :: overlap(:, :), overlap_ad_resd(:, :)
1704 integer :: ist, jst, nst
1705
1706 push_sub_with_profile(update_st)
1707
1708 nst = dmp%adiabatic_st%nst
1709
1710 assert(ubound(overlap_ad_ks, dim = 1) == nst)
1711 assert(ubound(overlap_ad_ks, dim = 2) == nst)
1712 assert(ubound(overlap_resd_ks, dim = 1) == nst)
1713 assert(ubound(overlap_resd_ks, dim = 2) == nst)
1714 assert(ubound(rho_mat, dim = 1) == 2*nst)
1715 assert(ubound(rho_mat, dim = 2) == 2*nst)
1716 assert(is_hermitian(2*nst, rho_mat))
1717
1718 safe_allocate(occ(1:nst+nresd))
1719
1720 if (dmp%othn) then
1721 call lalg_eigensolve(nst+nresd, rho_mat, occ)
1722 else
1723 assert(nresd == nst)
1724
1725 ! generalized eigenvalue problem
1726 safe_allocate(overlap(1:2*nst, 1:2*nst))
1727 safe_allocate(overlap_ad_resd(1:nst, 1:nst))
1729 ! ad-ad block
1730 overlap = m_z0
1731 do ist = 1, nst
1732 overlap(ist, ist) = m_one
1733 end do
1734
1735 ! adiabatic - resd, zstates_elec_calc_projections returns the conjugate
1736 call zstates_elec_calc_projections(resd, dmp%adiabatic_st, namespace, gr, ik, overlap_ad_resd)
1737 do jst = 1, nresd
1738 do ist = 1, nst
1739 overlap(ist, jst + nst) = conjg(overlap_ad_resd(ist, jst))
1740 enddo
1741 enddo
1742
1743 ! resd - resd
1744 ! _i<d | d>_j = S^{d,\psi}_{ij} - sum_k S^{\phi,\psi}_{kj} S^{d,\phi}_{ik}
1745 overlap(nst+1:nst+nresd, nst+1:nst+nresd) = overlap_resd_ks(1:nresd, 1:nresd)
1746 call blas_gemm('T', 'N', nresd, nresd, nst, -m_z1, overlap_ad_resd(1, 1), nst, overlap_ad_ks(1, 1), nst, &
1747 m_z1, overlap(nst+1, nst+1), 2*nst)
1748
1749 ! only the upper triangle of overlap is needed
1750 call lalg_geneigensolve(nst+nresd, rho_mat, overlap, occ, preserve_mat=.false.)
1751
1752 safe_deallocate_a(overlap_ad_resd)
1753 safe_deallocate_a(overlap)
1754
1755 end if
1756
1757 if (dmp%unitary_transform) then
1758 call update_wfc_occ_procrustes(ik, dmp%adiabatic_st, resd, gr, nresd, overlap_ad_ks, overlap_resd_ks, &
1759 nrm2_tdks, occ, rho_mat, st, pop)
1760 else
1761 call update_wfc_occ(ik, dmp%adiabatic_st, resd, gr, nresd, occ, rho_mat, st, pop)
1762 end if
1763
1764 safe_deallocate_a(occ)
1765
1766 pop_sub_with_profile(update_st)
1767
1768 end subroutine update_st
1769
1770
1776 subroutine update_wfc_occ(ik, ad_st, resd, gr, nresd, occ, v_mat, st, pop)
1777 integer, intent(in) :: ik
1778 type(states_elec_t), intent(in) :: ad_st
1779 type(states_elec_t), intent(in) :: resd
1780 type(grid_t), intent(in) :: gr
1781 integer, intent(in) :: nresd
1782 real(real64), intent(in) :: occ(:)
1783 complex(real64), intent(in) :: v_mat(:, :)
1784 type(states_elec_t), intent(inout) :: st
1785 real(real64), intent(inout) :: pop
1786
1787 complex(real64), allocatable :: psi_j(:, :)
1788 integer :: j, ib, i_minst, i_maxst, nst
1789
1790 push_sub_with_profile(update_wfc_occ)
1791
1792 nst = ad_st%nst
1793 assert(ubound(v_mat, dim = 1) == 2*nst)
1794 assert(ubound(v_mat, dim = 2) == 2*nst)
1795
1796 safe_allocate(psi_j(1:gr%np, st%d%dim))
1797
1798 do j = nst+nresd, nst+1, -1
1799 psi_j = (0.0_real64, 0.0_real64)
1800 do ib = ad_st%group%block_start, ad_st%group%block_end
1801 i_minst = states_elec_block_min(ad_st, ib)
1802 i_maxst = states_elec_block_max(ad_st, ib)
1803 call zbatch_axpy_function(gr%np, v_mat(i_minst:i_maxst, j), &
1804 ad_st%group%psib(ib, ik), psi_j)
1805 enddo
1806 do ib = resd%group%block_start, resd%group%block_end
1807 i_minst = states_elec_block_min(resd, ib)
1808 i_maxst = min(states_elec_block_max(resd, ib), nresd)
1809 if (i_minst > nresd) cycle
1810 call zbatch_axpy_function(gr%np, v_mat(i_minst+nst:i_maxst+nst, j), &
1811 resd%group%psib(ib, ik), psi_j, nst=i_maxst-i_minst+1)
1812 enddo
1813 ! reset TDKS state
1814 call states_elec_set_state(st, gr, nst+nresd+1-j, ik, psi_j)
1815 end do
1816
1817 ! eigenvalues in ascending order, so inverse
1818 st%occ(1:nst, ik) = occ(nst+nresd:nresd+1:-1)
1819 pop = pop + sum(st%occ(1:nst, ik)) * st%kweights(ik)
1820
1821 safe_deallocate_a(psi_j)
1822 pop_sub_with_profile(update_wfc_occ)
1823 end subroutine update_wfc_occ
1824
1825
1840 subroutine update_wfc_occ_procrustes(ik, ad_st, resd, gr, nresd, overlap_ad_ks, overlap_resd_ks, &
1841 nrm2_tdks, occ_tilde, v_mat, st, pop)
1842 integer, intent(in) :: ik
1843 type(states_elec_t), intent(in) :: ad_st
1844 type(states_elec_t), intent(in) :: resd
1845 type(grid_t), intent(in) :: gr
1846 integer, intent(in) :: nresd
1847 complex(real64), intent(in) :: overlap_ad_ks(:, :)
1848 complex(real64), intent(in) :: overlap_resd_ks(:, :)
1849 real(real64), intent(in) :: nrm2_tdks(:)
1850 real(real64), intent(in) :: occ_tilde(:)
1851 complex(real64), intent(inout) :: v_mat(:, :)
1852 type(states_elec_t), intent(inout) :: st
1853 real(real64), intent(inout) :: pop
1854
1855 integer :: ist, jst, ib, i_minst, i_maxst, nst
1856 complex(real64), allocatable :: overlap_procrus(:, :), uproj(:, :), utrans(:, :)
1857 complex(real64), allocatable :: uu(:, :), vt(:, :)
1858 complex(real64), allocatable :: psi(:, :)
1859 real(real64), parameter :: small_occ = 5.0e-15_real64 ! for those zero occupations, assume them a small quantity
1860 real(real64) :: sg_values(1:st%nst), rocc_tilde(1:st%nst+nresd), rocc(1:st%nst)
1861 real(real64) :: qtot_transform, nrm2, occ
1862
1863 push_sub_with_profile(update_wfc_occ_procrustes)
1864
1865 nst = ad_st%nst
1866 assert(ubound(overlap_ad_ks, dim = 1) == ad_st%nst)
1867 assert(ubound(overlap_ad_ks, dim = 2) == nst)
1868 assert(ubound(overlap_resd_ks, dim = 1) == resd%nst)
1869 assert(ubound(overlap_resd_ks, dim = 2) == nst)
1870 assert(ubound(v_mat, dim = 1) == 2*nst)
1871 assert(ubound(v_mat, dim = 2) == 2*nst)
1872
1873 ! set rocc for both TDKS and new wfcs. For those zero occupations, set them a small quantity
1874 rocc_tilde(1:nst+nresd) = sqrt(max(occ_tilde(1:nst+nresd), small_occ))
1875 rocc(1:nst) = sqrt(max(st%occ(1:nst, ik), small_occ))
1876
1877 safe_allocate(overlap_procrus(1:nst+nresd, 1:nst))
1878
1879 ! Procrustes Overlap: S \propto (C_{top})^\dagger S_{ad\_ks} + (C_{bot})^\dagger S_{resd\_ks}
1880 ! Computed via in-place BLAS GEMM accumulation ('C' for conjugate transpose),
1881 ! avoiding the temporary memory allocation and copy overhead of native matmul.
1882 call blas_gemm('C', 'N', nst+nresd, nst, nst, m_z1, v_mat(1, 1), 2*nst, overlap_ad_ks(1, 1), nst, &
1883 m_z0, overlap_procrus(1, 1), nst+nresd)
1884 call blas_gemm('C', 'N', nst+nresd, nst, nresd, m_z1, v_mat(1+nst, 1), 2*nst, overlap_resd_ks(1, 1), nst, &
1885 m_z1, overlap_procrus(1, 1), nst+nresd)
1886 do jst = 1, nst
1887 do ist = 1, nst+nresd
1888 overlap_procrus(ist, jst) = overlap_procrus(ist, jst) * rocc_tilde(ist) * rocc(jst)
1889 end do
1890 end do
1891
1892 safe_allocate(uproj(1:nst+nresd, 1:nst))
1893 safe_allocate(uu(1:nst+nresd, 1:nst+nresd))
1894 safe_allocate(vt(1:nst, 1:nst))
1895
1896 call lalg_singular_value_decomp(nst+nresd, nst, overlap_procrus, uu, vt, sg_values)
1897 uproj = matmul(uu(:,1:nst), vt)
1898
1899 safe_deallocate_a(overlap_procrus)
1900 safe_deallocate_a(vt)
1901 safe_deallocate_a(uu)
1902
1903 safe_allocate(utrans(1:nst+nresd, 1:nst))
1904
1905 ! update transformation matrix
1906 do ist = 1, nst+nresd
1907 call blas_scal(nst+nresd, cmplx(rocc_tilde(ist), 0.0, real64), v_mat(1, ist), 1)
1908 end do
1909 ! utrans = matmul(v_mat(1:nst+nresd,1:nst+nresd), uproj)
1910 call blas_gemm('N', 'N', nst+nresd, nst, nst+nresd, m_z1, v_mat(1, 1), 2*nst, uproj(1, 1), nst+nresd, &
1911 m_z0, utrans(1, 1), nst+nresd)
1912
1913 safe_deallocate_a(uproj)
1914 safe_allocate(psi(1:gr%np, st%d%dim))
1915
1916 ! update state and occupations
1917 qtot_transform = 0.0_real64
1918 do jst = 1, nst
1919 psi = (0.0_real64, 0.0_real64)
1920 do ib = ad_st%group%block_start, ad_st%group%block_end
1921 i_minst = states_elec_block_min(ad_st, ib)
1922 i_maxst = states_elec_block_max(ad_st, ib)
1923 call zbatch_axpy_function(gr%np, utrans(i_minst:i_maxst, jst), &
1924 ad_st%group%psib(ib, ik), psi)
1925 end do
1926 do ib = resd%group%block_start, resd%group%block_end
1927 i_minst = states_elec_block_min(resd, ib)
1928 i_maxst = min(states_elec_block_max(resd, ib), nresd)
1929 if (i_minst > nresd) cycle
1930 call zbatch_axpy_function(gr%np, utrans(i_minst+nst:i_maxst+nst, jst), &
1931 resd%group%psib(ib, ik), psi, nst=i_maxst-i_minst+1)
1932 end do
1933 nrm2 = real(zmf_dotp(gr, st%d%dim, psi, psi, reduce = .true.), real64)
1934 qtot_transform = qtot_transform + nrm2
1935 occ = nrm2 / nrm2_tdks(jst)
1936 st%occ(jst, ik) = occ
1937 call lalg_scal(gr%np, st%d%dim, 1.0_real64/sqrt(occ), psi)
1938 call states_elec_set_state(st, gr, jst, ik, psi)
1939 end do
1940
1941 pop = pop + qtot_transform * st%kweights(ik)
1942
1943 safe_deallocate_a(utrans)
1944 safe_deallocate_a(psi)
1945
1946 pop_sub_with_profile(update_wfc_occ_procrustes)
1947
1948 end subroutine update_wfc_occ_procrustes
1949
1951 logical function is_hermitian(n, mat)
1952 integer, intent(in) :: n
1953 complex(real64), intent(in) :: mat(:, :)
1954 real(real64), parameter :: tol = 1.0e-14_real64
1955
1956 assert(ubound(mat, dim=1) == n)
1957 assert(ubound(mat, dim=2) == n)
1958
1959 is_hermitian = maxval(abs(mat - transpose(conjg(mat)))) <= tol
1960 end function is_hermitian
1961
1962#ifdef HAVE_MPI
1963 subroutine slmpi_create_shared_memory_window(number_of_elements, intranode_grp, window, array)
1964 use iso_c_binding
1965 integer(int64), intent(in) :: number_of_elements
1966 type(mpi_grp_t), intent(in) :: intranode_grp
1967 type(mpi_win), intent(out) :: window
1968 real(real32), pointer, intent(out) :: array(:)
1969
1970 type(c_ptr) :: ptr
1971 integer(kind=MPI_ADDRESS_KIND) :: window_size
1972 integer :: disp_unit
1973
1974 assert(not_in_openmp())
1975
1976 ! allocate only on rank 0 of each node
1977 if (intranode_grp%rank == 0) then
1978 window_size = number_of_elements * 4_mpi_address_kind
1979 else
1980 window_size = 0_mpi_address_kind
1981 end if
1982 assert(number_of_elements * 4 < huge(0_mpi_address_kind))
1983 assert(window_size >= 0)
1984 disp_unit = 4
1985 call mpi_win_allocate_shared(window_size, disp_unit, mpi_info_null, &
1986 intranode_grp%comm, ptr, window)
1987 ! get pointer on all ranks
1988 if (intranode_grp%rank /= 0) then
1989 call mpi_win_shared_query(window, 0, window_size, disp_unit, ptr)
1990 end if
1991 ! get fortran pointer
1992 call c_f_pointer(ptr, array, [number_of_elements])
1993
1994 ! start access epoch
1995 call mpi_win_lock_all(mpi_mode_nocheck, window)
1996
1997 end subroutine slmpi_create_shared_memory_window
1998
1999 subroutine smpi_grp_bcast(mpi_grp, buf, cnt, sendtype, root)
2000 use iso_c_binding
2001 class(mpi_grp_t), intent(in) :: mpi_grp
2002 real(real32), target, intent(inout) :: buf
2003 integer(int64), intent(in) :: cnt
2004 type(mpi_datatype), intent(in) :: sendtype
2005 integer, intent(in) :: root
2006
2007 integer :: rounds, iround, size
2008 integer(int64) :: offset
2009 real(real32), pointer :: bufptr(:)
2010
2011 assert(not_in_openmp())
2012
2013 call mpi_debug_in(mpi_grp%comm, c_mpi_bcast)
2014 if (mpi_grp%comm /= mpi_comm_undefined) then
2015 ! need to do the broadcast in rounds that fit into int32 integers
2016 call c_f_pointer(c_loc(buf), bufptr, [cnt])
2017 rounds = int(cnt/huge(0_int32), int32)
2018 do iround = 1, rounds
2019 offset = int(huge(0_int32), int64) * (iround - 1) + 1
2020 call mpi_bcast(bufptr(offset), huge(0_int32), sendtype, root, mpi_grp%comm)
2021 end do
2022 ! broadcast the remainder
2023 offset = int(huge(0_int32), int64) * rounds + 1
2024 size = int(mod(cnt, int(huge(0_int32),int64)), int32)
2025 call mpi_bcast(bufptr(offset), size, sendtype, root, mpi_grp%comm)
2026 end if
2027 call mpi_debug_out(mpi_grp%comm, c_mpi_bcast)
2028
2029 end subroutine smpi_grp_bcast
2030#endif
2031
2032end module dm_propagation_oct_m
int access(const char *__name, int __type) __attribute__((__nothrow__
--------------— gemm ---------------— performs one of the matrix-matrix operations
Definition: blas.F90:370
--------------— syrk, herk ---------------— performs one of the symmetric rank k operations
Definition: blas.F90:490
--------------— scal ---------------— Scales a vector by a constant.
Definition: blas.F90:150
constant times a vector plus a vector
Definition: lalg_basic.F90:173
scales a vector by a constant
Definition: lalg_basic.F90:159
Prints out to iunit a message in the form: ["InputVariable" = value] where "InputVariable" is given b...
Definition: messages.F90:182
double exp(double __x) __attribute__((__nothrow__
This module implements common operations on batches of mesh functions.
Definition: batch_ops.F90:118
subroutine, public zbatch_axpy_function(np, aa, xx, psi, nst)
This routine performs a set of axpy operations for each function x of a batch (xx),...
Definition: batch_ops.F90:2915
This module contains interfaces for BLAS routines You should not use these routines directly....
Definition: blas.F90:120
This module implements a calculator for the density and defines related functions.
Definition: density.F90:122
subroutine, public density_calc(st, gr, density, istin)
Computes the density from the orbitals in st.
Definition: density.F90:619
subroutine lifetime(dmp, ik, ibnd, nik_skip, gam)
Calculate the total scattering rate (inverse lifetime) for a given state.
subroutine iopar_close_trans_rate(system_grp, dmp)
Finalize transition rate resources.
subroutine lindblad_from_epw(dmp, hm, kpt, system_grp, namespace, nresd_k, dt, rho_mat_k)
Evolve the density matrix under EPW-derived Lindblad dissipation.
subroutine lindblad_uniform(dmp, kpt, nresd_k, dt, rho_mat_k)
Evolve the density matrix in time under uniform dissipation.
subroutine, public dm_propagation_init_run(dmp, namespace, space, gr, ions, st, hm, mc, from_scratch)
Initialise the adiabatic states prior to running TD propagation.
subroutine lindblad_operator_uniform(nst, nresd, rtrans, den_mat, l_mat)
Calculate the Lindblad dissipator matrix for uniform decay.
integer function get_vector_field_index(dmp, hm, namespace)
Get the flattened 1D index of the current vector potential on the discrete EPW vector field grid.
real(real64) function get_trans_rate(dmp, nik_mp, jbnd, ibnd, k, kq, p_block)
Get transition rate from state (k, ibnd) to (kq, jbnd).
subroutine lindblad_operator_epw(dmp, ik, nik_skip, nresd, rho_diag, den_mat, l_mat)
Calculate the Lindblad dissipator matrix using EPW electron-phonon scattering rates.
subroutine, public dm_end_run(system_grp, dmp)
logical function is_hermitian(n, mat)
Check if a matrix is Hermitian.
subroutine iopar_read_trans_rate(ia, system_grp, namespace, dmp)
Read in transition rates to the shared memory window and then broadcast via internode communicator.
subroutine dmp_init(this, namespace, st, space, hm)
Initialise an instance of density matrix dissipation.
subroutine build_epw_kmap(namespace, kpoints, dmp)
Map internal k-point indices to the 1D EPW Monkhorst-Pack grid and verify mesh compatibility.
subroutine collision_from_epw(dmp, hm, kpt, system_grp, namespace, nresd_k, dt, rho_mat_k)
Evolve the density matrix subject to the electron-phonon collision integral.
subroutine broadcast_occupation(occ, kpt, nst, parstate)
subroutine total_population(ik, st, gr, nrm2, pop)
Calculate total population.
subroutine update_wfc_occ_procrustes(ik, ad_st, resd, gr, nresd, overlap_ad_ks, overlap_resd_ks, nrm2_tdks, occ_tilde, v_mat, st, pop)
Update states using Procrustes transformation to ensure time continuity.
subroutine get_gamma(dmp, ik, ibnd, nik_skip, gam_in, gam_out)
Calculate in/out scattering rates (Gamma) for a specific state (ik, ibnd).
subroutine update_wfc_occ(ik, ad_st, resd, gr, nresd, occ, v_mat, st, pop)
Update states directly from diagonalization (no Procrustes).
subroutine construct_residuals(gr, namespace, ad_st, st, ik, othn, overlap_ad_ks, nrm2_tdks, nresd, overlap_resd_ks, resd)
Construct the residual basis and its overlap with TDKS wavefunctions.
subroutine, public dm_propagation_run(dmp, namespace, space, gr, ions, st, mc, hm, ks, iter, dt, ext_partners, update_energy)
Density matrix propagation.
subroutine dm_propagation_update_trans_rate(this, hm, system_grp, namespace)
Read and update the EPW transition matrix only when the vector field index changes.
subroutine transition_rate_uniform(uniform, ad_st, ik, rtrans)
Calculate state transition rates assuming uniform electron-phonon coupling.
subroutine construct_density_matrix(nresd, ik, st, overlap_ad_ks, overlap_resd_ks, rho_mat)
Construct the full density matrix in the adiabatic and residual basis.
subroutine lindblad_2times(dmp, kpt, nresd_k, dt, rho_mat_k)
Evolve the density matrix using the phenomenological two-time (T1/T2) relaxation model.
subroutine iopar_open_trans_rate(namespace, ions, hm, system_grp, dmp)
Read in metadata of transition rates, build intra/inter communicators and shared memory window.
subroutine dissipation(hm, st, namespace, nresd_k, dt, dmp, rho_mat_k)
Evolve the density matrix in time under dissipation.
subroutine update_st(dmp, ik, gr, namespace, nresd, overlap_ad_ks, overlap_resd_ks, nrm2_tdks, resd, st, rho_mat, pop)
Diagonalize the density matrix to update occupations and wavefunctions.
subroutine population_in_adiabatic(ik, ad_st, st, overlap, pop)
Calculate number of electrons in the adiabatic basis.
subroutine, public eigensolver_init(eigens, namespace, gr, st, hm, mc, space, deactivate_oracle)
subroutine, public eigensolver_end(eigens)
integer, parameter, public spin_polarized
real(real64), parameter, public m_two
Definition: global.F90:202
real(real64), parameter, public m_zero
Definition: global.F90:200
real(real64), parameter, public p_ry
Definition: global.F90:239
logical pure function, public not_in_openmp()
Definition: global.F90:566
complex(real64), parameter, public m_z0
Definition: global.F90:210
real(real64), parameter, public m_epsilon
Definition: global.F90:216
complex(real64), parameter, public m_z1
Definition: global.F90:211
real(real64), parameter, public m_half
Definition: global.F90:206
real(real64), parameter, public m_one
Definition: global.F90:201
This module implements the underlying real-space grid.
Definition: grid.F90:119
This module defines classes and functions for interaction partners.
integer, parameter, public kpoints_monkh_pack
Definition: kpoints.F90:223
subroutine, public kpoints_to_reduced(latt, kin, kout)
Definition: kpoints.F90:1150
This module defines functions over batches of mesh functions.
Definition: mesh_batch.F90:118
subroutine, public mesh_batch_nrm2(mesh, aa, nrm2, reduce)
Calculate the norms (norm2, not the square!) of a batch of mesh functions.
Definition: mesh_batch.F90:179
subroutine, public zmesh_batch_mf_dotp(mesh, aa, psi, dot, reduce, nst)
calculate the dot products between a batch and a vector of mesh functions
This module defines various routines, operating on mesh functions.
subroutine, public messages_not_implemented(feature, namespace)
Definition: messages.F90:1068
subroutine, public messages_warning(no_lines, all_nodes, namespace)
Definition: messages.F90:525
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:162
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:410
subroutine, public messages_input_error(namespace, var, details, row, column)
Definition: messages.F90:691
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:594
This module contains some common usage patterns of MPI routines.
Definition: mpi_lib.F90:117
type(mpi_comm), parameter, public mpi_comm_undefined
used to indicate a communicator has not been initialized
Definition: mpi.F90:138
This module handles the communicators for the various parallelization strategies.
Definition: multicomm.F90:147
integer function, public parse_block(namespace, name, blk, check_varinfo_)
Definition: parser.F90:615
integer, parameter, public restart_dm
Definition: restart.F90:156
integer, parameter, public restart_gs
Definition: restart.F90:156
integer, parameter, public restart_type_dump
Definition: restart.F90:184
integer, parameter, public restart_type_load
Definition: restart.F90:184
subroutine, public zstates_elec_calc_projections(st, gs_st, namespace, mesh, ik, proj, gs_nst)
This routine computes the projection between two set of states.
subroutine, public zstates_elec_orthogonalize_single_batch(st, mesh, nst, iqn, phi, normalize, mask, overlap, norm, Theta_fi, beta_ij, against_all)
orthogonalize a single wave function against a set of states
integer pure function, public states_elec_block_max(st, ib)
return index of last state in block ib
subroutine, public states_elec_end(st)
finalize the states_elec_t object
subroutine, public states_elec_copy(stout, stin, exclude_wfns, exclude_eigenval, special)
make a (selective) copy of a states_elec_t object
integer pure function, public states_elec_block_min(st, ib)
return index of first state in block ib
This module handles reading and writing restart information for the states_elec_t.
subroutine, public states_elec_load(restart, namespace, space, st, mesh, kpoints, ierr, iter, lr, lowest_missing, label, verbose, skip)
returns in ierr: <0 => Fatal error, or nothing read =0 => read all wavefunctions >0 => could only rea...
brief This module defines the class unit_t which is used by the unit_systems_oct_m module.
Definition: unit.F90:134
character(len=20) pure function, public units_abbrev(this)
Definition: unit.F90:225
This module defines the unit system, used for input and output.
type(unit_system_t), public units_out
type(unit_t), public unit_kelvin
For converting energies into temperatures.
type(unit_system_t), public units_inp
the units systems for reading and writing
subroutine, public v_ks_calc(ks, namespace, space, hm, st, ions, ext_partners, calc_eigenval, time, calc_energy, calc_current, force_semilocal)
Definition: v_ks.F90:754
Distribution of N instances over mpi_grpsize processes, for the local rank mpi_grprank....
Extension of space that contains the knowledge of the spin dimension.
Description of the grid, containing information on derivatives, stencil, and symmetries.
Definition: grid.F90:171
This is defined even when running serial.
Definition: mpi.F90:144
Stores all communicators and groups.
Definition: multicomm.F90:208
The states_elec_t class contains all electronic wave functions.
int true(void)