Octopus
phase.F90
Go to the documentation of this file.
1!! Copyright (C) 2009 X. Andrade
2!! Copyright (C) 2024 N. Tancogne-Dejean
3!!
4!! This program is free software; you can redistribute it and/or modify
5!! it under the terms of the GNU General Public License as published by
6!! the Free Software Foundation; either version 2, or (at your option)
7!! any later version.
8!!
9!! This program is distributed in the hope that it will be useful,
10!! but WITHOUT ANY WARRANTY; without even the implied warranty of
11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12!! GNU General Public License for more details.
13!!
14!! You should have received a copy of the GNU General Public License
15!! along with this program; if not, write to the Free Software
16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17!! 02110-1301, USA.
18!!
19
20#include "global.h"
21
22module phase_oct_m
23 use accel_oct_m
24 use batch_oct_m
27 use comm_oct_m
28 use debug_oct_m
32 use global_oct_m
33 use grid_oct_m
36 use math_oct_m
37 use mesh_oct_m
39 use mpi_oct_m
41 use space_oct_m
45 use types_oct_m
47
48 implicit none
49
50 private
51
52 public :: &
53 phase_t, &
55
86 type phase_t
87 private
88 complex(real64), allocatable :: phase(:, :)
91 complex(real64), public, allocatable :: phase_corr(:,:)
94 complex(real64), allocatable :: phase_spiral(:,:)
97 type(accel_mem_t) :: buff_phase
98 type(accel_mem_t) :: buff_phase_spiral
99 type(accel_mem_t), public :: buff_phase_corr
100 integer :: buff_phase_qn_start
101 real(real64), public, pointer :: spin(:,:,:) => null()
102 contains
103 procedure :: init => phase_init_phases
104
105 procedure :: update => phase_update_phases
106
107 procedure :: end => phase_end
108
109 procedure :: set_phase_corr => phase_set_phase_corr
110
111 procedure :: unset_phase_corr => phase_unset_phase_corr
112
113 procedure :: apply_to => phase_apply_batch
114
115 procedure :: apply_to_single => phase_apply_mf
116
117 procedure :: apply_phase_spiral => phase_phase_spiral
118
119 procedure :: is_allocated => phase_is_allocated
120
121 procedure :: copy_and_set_phase => phase_copy_and_set_phase
122 end type phase_t
123
124contains
125
126 ! ---------------------------------------------------------
128 subroutine phase_init_phases(phase, gr, kpt, kpoints, d, space)
129 class(phase_t), intent(inout) :: phase
130 class(mesh_t), intent(in) :: gr
131 type(distributed_t), intent(in) :: kpt
132 type(kpoints_t), intent(in) :: kpoints
133 type(states_elec_dim_t), intent(in) :: d
134 type(space_t), intent(in) :: space
135
136 integer :: ip, ik, sp
137 integer(int64) :: ip_inner_global
138 real(real64) :: kpoint(space%dim), x_global(space%dim)
139
140 push_sub(phase_init_phases)
141
142 ! no e^ik phase needed for Gamma-point-only periodic calculations
143 ! unless for velocity-gauge for lasers
144 if (accel_is_enabled()) then
145 phase%buff_phase_qn_start = kpt%start
146 end if
147 if(kpoints%gamma_only()) then
148 pop_sub(phase_init_phases)
149 return
150 end if
151
152 safe_allocate(phase%phase(1:gr%np_part, kpt%start:kpt%end))
153 safe_allocate(phase%phase_corr(gr%np+1:gr%np_part, kpt%start:kpt%end))
154 !$omp parallel private(ip)
155 do ik = kpt%start, kpt%end
156 !$omp do
157 do ip = gr%np + 1, gr%np_part
158 phase%phase_corr(ip, ik) = m_one
159 end do
160 !$omp end do nowait
161 end do
162 !$omp end parallel
163
164 ! Only when gr is a grid_t type, we can access gr%der
165 select type(gr)
166 class is(grid_t)
167 if (gr%der%boundaries%spiralBC) then
168 sp = gr%np
169 if (gr%parallel_in_domains) sp = gr%np + gr%pv%np_ghost
170
171 ! We decided to allocate the array from 1:np_part-sp as this is less error prone when passing
172 ! the array to other routines, or in particular creating a C-style pointer from phase_spiral(1,1).
173 ! We will also update phase_corr and possible other similar arrays.
174
175 safe_allocate(phase%phase_spiral(1:gr%np_part-sp, 1:2))
176
177 ! loop over boundary points
178 do ip = sp + 1, gr%np_part
179 ! get corresponding inner point
180 ip_inner_global = mesh_periodic_point(gr, space, ip)
181 x_global = mesh_x_global(gr, ip_inner_global)
182 phase%phase_spiral(ip-sp, 1) = &
183 exp(m_zi * sum((gr%x(1:space%dim, ip)-x_global(1:space%dim)) * gr%der%boundaries%spiral_q(1:space%dim)))
184 phase%phase_spiral(ip-sp, 2) = &
185 exp(-m_zi * sum((gr%x(1:space%dim, ip)-x_global(1:space%dim)) * gr%der%boundaries%spiral_q(1:space%dim)))
186 end do
187
188 if (accel_is_enabled()) then
189 call accel_create_buffer(phase%buff_phase_spiral, accel_mem_read_only, type_cmplx, (gr%np_part-sp)*2)
190 call accel_write_buffer(phase%buff_phase_spiral, gr%np_part-sp, 2, phase%phase_spiral)
191 end if
192 end if
193 class default
194 ! Do nothing
195 end select
197
198 kpoint(1:space%dim) = m_zero
199
200 sp = gr%np
201 if (gr%parallel_in_domains) sp = gr%np + gr%pv%np_ghost
203 !$omp parallel private(ip, ip_inner_global, x_global, kpoint)
204 do ik = kpt%start, kpt%end
205 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
206 !$omp do
207 do ip = 1, gr%np_part
208 phase%phase(ip, ik) = exp(-m_zi * sum(gr%x(1:space%dim, ip) * kpoint(1:space%dim)))
209 end do
210 !$omp end do
211
212 ! loop over boundary points
213 !$omp do
214 do ip = sp + 1, gr%np_part
215 ! get corresponding inner point
216 ip_inner_global = mesh_periodic_point(gr, space, ip)
217
218 ! compute phase correction from global coordinate (opposite sign!)
219 x_global = mesh_x_global(gr, ip_inner_global)
220 phase%phase_corr(ip, ik) = phase%phase(ip, ik)* &
221 exp(m_zi * sum(x_global(1:space%dim) * kpoint(1:space%dim)))
222 end do
223 !$omp end do nowait
224 end do
225 !$omp end parallel
226
227 if (accel_is_enabled()) then
228 call accel_create_buffer(phase%buff_phase, accel_mem_read_write, type_cmplx, gr%np_part*kpt%nlocal)
229 call accel_write_buffer(phase%buff_phase, gr%np_part, kpt%nlocal, phase%phase)
230 call accel_create_buffer(phase%buff_phase_corr, accel_mem_read_write, type_cmplx, (gr%np_part - gr%np)*kpt%nlocal)
231 call accel_write_buffer(phase%buff_phase_corr, gr%np_part - gr%np, kpt%nlocal, phase%phase_corr)
232 end if
233
234 pop_sub(phase_init_phases)
235 end subroutine phase_init_phases
236
237 ! ----------------------------------------------------------------------------------
239 subroutine phase_update_phases(phase, mesh, kpt, kpoints, d, space, uniform_vector_potential)
240 class(phase_t), intent(inout) :: phase
241 class(mesh_t), intent(in) :: mesh
242 type(distributed_t), intent(in) :: kpt
243 type(kpoints_t), intent(in) :: kpoints
244 type(states_elec_dim_t), intent(in) :: d
245 type(space_t), intent(in) :: space
246 real(real64), allocatable, intent(in) :: uniform_vector_potential(:)
247
248 integer :: ik, ip, sp
249 integer(int64), dimension(2) :: np, gsize, bsize
250 integer(int64) :: ip_inner_global
251 real(real64) :: kpoint(space%dim)
252 real(real64), allocatable :: x_global(:,:), kpt_vec_pot(:,:)
253 type(accel_mem_t) :: buff_vec_pot, buff_x_global, buff_x
254 type(accel_kernel_t), save :: kernel
255 real(real64) :: tmp_sum
256
257 if (.not. allocated(uniform_vector_potential)) return
258
259 push_sub_with_profile(phase_update_phases)
260
261
262 if (.not. allocated(phase%phase)) then
263 safe_allocate(phase%phase(1:mesh%np_part, kpt%start:kpt%end))
264 if (accel_is_enabled()) then
265 call accel_create_buffer(phase%buff_phase, accel_mem_read_write, type_cmplx, &
266 mesh%np_part*kpt%nlocal)
267 end if
268 end if
269
270 if (.not. allocated(phase%phase_corr)) then
271 safe_allocate(phase%phase_corr(mesh%np+1:mesh%np_part, kpt%start:kpt%end))
272 if (accel_is_enabled()) then
273 call accel_create_buffer(phase%buff_phase_corr, accel_mem_read_write, type_cmplx, &
274 (mesh%np_part - mesh%np)*kpt%nlocal)
275 end if
276 end if
277
278 ! TODO: We should not recompute this every time-step. We should store it.
279
280 ! loop over boundary points
281 sp = mesh%np
282 ! skip ghost points
283 if (mesh%parallel_in_domains) sp = mesh%np + mesh%pv%np_ghost
284
285 safe_allocate(x_global(1:space%dim,(sp + 1):mesh%np_part))
286
287 !$omp parallel do schedule(static) private(ip_inner_global)
288 do ip = sp + 1, mesh%np_part
289 ! get corresponding inner point
290 ip_inner_global = mesh_periodic_point(mesh, space, ip)
291 ! compute the difference between the global coordinate and the local coordinate
292 x_global(:,ip) = mesh_x_global(mesh, ip_inner_global) - mesh%x(1:space%dim, ip)
293 end do
294
295
296 if (.not. accel_is_enabled()) then
297 !$omp parallel private(ik, ip, kpoint, tmp_sum)
298 do ik = kpt%start, kpt%end
299 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
300 !We add the vector potential
301 kpoint(1:space%dim) = kpoint(1:space%dim) + uniform_vector_potential(1:space%dim)
302
303 !$omp do schedule(static)
304 do ip = 1, mesh%np_part
305 tmp_sum = sum(mesh%x(1:space%dim, ip)*kpoint(1:space%dim))
306 phase%phase(ip, ik) = cmplx(cos(tmp_sum), -sin(tmp_sum), real64)
307 end do
308 !$omp end do
309
310 !$omp do schedule(static)
311 do ip = sp + 1, mesh%np_part
312 tmp_sum = sum(x_global(1:space%dim, ip)*kpoint(1:space%dim))
313 phase%phase_corr(ip, ik) = cmplx(cos(tmp_sum), sin(tmp_sum), real64)
314 end do
315 !$omp end do nowait
316 end do
317 !$omp end parallel
318
319 else !accel_is enabled
320
321 call accel_create_buffer(buff_vec_pot, accel_mem_read_only, type_float, space%dim*kpt%nlocal)
322 safe_allocate(kpt_vec_pot(1:space%dim,kpt%start:kpt%end))
323 do ik = kpt%start, kpt%end
324 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
325 kpt_vec_pot(1:space%dim, ik) = kpoint(1:space%dim) + uniform_vector_potential(1:space%dim)
326 end do
327 call accel_write_buffer(buff_vec_pot, space%dim, kpt%nlocal, kpt_vec_pot, async=.true.)
328
329 ! Note: this should be globally stored
330 call accel_create_buffer(buff_x, accel_mem_read_only, type_float, space%dim*(mesh%np_part))
331 call accel_write_buffer(buff_x, space%dim, mesh%np_part, mesh%x, async=.true.)
332
333 call accel_create_buffer(buff_x_global, accel_mem_read_only, type_float, space%dim*(mesh%np_part-sp))
334 call accel_write_buffer(buff_x_global, space%dim, mesh%np_part-sp, x_global(1:space%dim,(sp + 1):mesh%np_part), async=.true.)
335
336 call accel_kernel_start_call(kernel, 'phase.cu', 'update_phases')
337
338 call accel_set_kernel_arg(kernel, 0, space%dim)
339 call accel_set_kernel_arg(kernel, 1, mesh%np)
340 call accel_set_kernel_arg(kernel, 2, mesh%np_part)
341 call accel_set_kernel_arg(kernel, 3, kpt%start)
342 call accel_set_kernel_arg(kernel, 4, kpt%end)
343 call accel_set_kernel_arg(kernel, 5, sp)
344 call accel_set_kernel_arg(kernel, 6, buff_vec_pot)
345 call accel_set_kernel_arg(kernel, 7, buff_x)
346 call accel_set_kernel_arg(kernel, 8, buff_x_global)
347 call accel_set_kernel_arg(kernel, 9, phase%buff_phase)
348 call accel_set_kernel_arg(kernel, 10, phase%buff_phase_corr)
349
350 ! Compute the grid size
351 np = (/mesh%np_part, kpt%nlocal/)
352 bsize = (/accel_kernel_block_size(kernel), 1/)
353 call accel_grid_size(np, bsize, gsize)
354
355 call accel_kernel_run(kernel, gsize, bsize)
356
357 call accel_read_buffer(phase%buff_phase, mesh%np_part, kpt%nlocal, phase%phase, async=.true.)
358 call accel_read_buffer(phase%buff_phase_corr, mesh%np_part - mesh%np, kpt%nlocal, phase%phase_corr)
359
360 call accel_free_buffer(buff_vec_pot)
361 call accel_free_buffer(buff_x)
362 call accel_free_buffer(buff_x_global)
363 safe_deallocate_a(kpt_vec_pot)
364 end if
365
366 safe_deallocate_a(x_global)
367
368 pop_sub_with_profile(phase_update_phases)
369 end subroutine phase_update_phases
370
371 ! ----------------------------------------------------------------------------------
373 subroutine phase_end(phase)
374 class(phase_t), intent(inout) :: phase
375
376 push_sub(phase_end)
377
378 if (phase%is_allocated() .and. accel_is_enabled()) then
379 call accel_free_buffer(phase%buff_phase)
380 call accel_free_buffer(phase%buff_phase_corr)
381 end if
382
383 if (allocated(phase%phase_spiral) .and. accel_is_enabled()) then
384 call accel_free_buffer(phase%buff_phase_spiral)
385 end if
386
387 safe_deallocate_a(phase%phase)
388 safe_deallocate_a(phase%phase_corr)
389 safe_deallocate_a(phase%phase_spiral)
390
391 pop_sub(phase_end)
392 end subroutine phase_end
393
394 ! ----------------------------------------------------------------------------------
396 subroutine phase_accel_rebuild(phase, mesh, kpt)
397 class(phase_t), intent(inout) :: phase
398 class(mesh_t), intent(in) :: mesh
399 type(distributed_t), intent(in) :: kpt
400
401 integer :: nlocal
402
403 push_sub(phase_accel_rebuild)
404
405 if (.not. accel_is_enabled()) then
406 pop_sub(phase_accel_rebuild)
407 return
408 end if
409
410 phase%buff_phase_qn_start = kpt%start
411
412 call accel_detach_buffer(phase%buff_phase)
413 call accel_detach_buffer(phase%buff_phase_corr)
414 call accel_detach_buffer(phase%buff_phase_spiral)
415
416 if (allocated(phase%phase)) then
417 assert(size(phase%phase, 1) == mesh%np_part)
418 nlocal = ubound(phase%phase, dim=2) - lbound(phase%phase, dim=2) + 1
419 call accel_create_buffer(phase%buff_phase, accel_mem_read_write, type_cmplx, size(phase%phase, 1)*nlocal)
420 call accel_write_buffer(phase%buff_phase, size(phase%phase, 1), nlocal, phase%phase)
421 end if
422
423 if (allocated(phase%phase_corr)) then
424 assert(size(phase%phase_corr, 1) == mesh%np_part - mesh%np)
425 nlocal = ubound(phase%phase_corr, dim=2) - lbound(phase%phase_corr, dim=2) + 1
426 call accel_create_buffer(phase%buff_phase_corr, accel_mem_read_write, type_cmplx, &
427 size(phase%phase_corr, 1)*nlocal)
428 call accel_write_buffer(phase%buff_phase_corr, size(phase%phase_corr, 1), nlocal, phase%phase_corr)
429 end if
430
431 if (allocated(phase%phase_spiral)) then
432 call accel_create_buffer(phase%buff_phase_spiral, accel_mem_read_only, type_cmplx, &
433 size(phase%phase_spiral, 1)*size(phase%phase_spiral, 2))
434 call accel_write_buffer(phase%buff_phase_spiral, size(phase%phase_spiral, 1), &
435 size(phase%phase_spiral, 2), phase%phase_spiral)
436 end if
437
438 pop_sub(phase_accel_rebuild)
439 end subroutine phase_accel_rebuild
440
441 ! ----------------------------------------------------------------------------------
443 !
444 subroutine phase_set_phase_corr(phase, mesh, psib, async)
445 class(phase_t), intent(in) :: phase
446 class(mesh_t), intent(in) :: mesh
447 type(wfs_elec_t), intent(inout) :: psib
448 logical, optional, intent(in) :: async
449
450
451 logical :: phase_correction
452
453 push_sub(phase_set_phase_corr)
454
455 ! check if we only want a phase correction for the boundary points
456 phase_correction = phase%is_allocated()
457
458 !We apply the phase only to np points, and the phase for the np+1 to np_part points
459 !will be treated as a phase correction in the Hamiltonian
460 if (phase_correction) then
461 call phase%apply_to(mesh, mesh%np, .false., psib, async=async)
462 end if
463
464 pop_sub(phase_set_phase_corr)
465 end subroutine phase_set_phase_corr
466
467 ! ----------------------------------------------------------------------------------
469 !
470 subroutine phase_unset_phase_corr(phase, mesh, psib, async)
471 class(phase_t), intent(in) :: phase
472 class(mesh_t), intent(in) :: mesh
473 type(wfs_elec_t), intent(inout) :: psib
474 logical, optional, intent(in) :: async
475
476 logical :: phase_correction
477
478 push_sub(phase_unset_phase_corr)
479
480 ! check if we only want a phase correction for the boundary points
481 phase_correction = phase%is_allocated()
482
483 !We apply the phase only to np points, and the phase for the np+1 to np_part points
484 !will be treated as a phase correction in the Hamiltonian
485 if (phase_correction) then
486 call phase%apply_to(mesh, mesh%np, .true., psib, async=async)
487 end if
488
490 end subroutine phase_unset_phase_corr
492 ! ---------------------------------------------------------------------------------------
494 !
495 subroutine phase_apply_batch(this, mesh, np, conjugate, psib, src, async)
496 class(phase_t), intent(in) :: this
497 class(mesh_t), intent(in) :: mesh
498 integer, intent(in) :: np
499 logical, intent(in) :: conjugate
500 type(wfs_elec_t), target, intent(inout) :: psib
501 type(wfs_elec_t), optional, target, intent(in) :: src
502 logical, optional, intent(in) :: async
503
504 integer :: ip, ii, sp
505 type(wfs_elec_t), pointer :: src_
506 complex(real64) :: phase
507 integer(int64), dimension(3) :: gsizes, bsizes
508 type(accel_kernel_t), save :: ker_phase
509
510 push_sub(phase_apply_batch)
511 call profiling_in("PHASE_APPLY_BATCH")
512
513 call profiling_count_operations(6*np*psib%nst_linear)
514
515 assert(np <= mesh%np_part)
516 assert(psib%type() == type_cmplx)
517 assert(psib%ik >= lbound(this%phase, dim=2))
518 assert(psib%ik <= ubound(this%phase, dim=2))
519
520 src_ => psib
521 if (present(src)) src_ => src
522
523 assert(src_%has_phase .eqv. conjugate)
524 assert(src_%ik == psib%ik)
525 assert(src_%type() == type_cmplx)
526
527 ! We want to skip the ghost points for setting the phase
528 sp = min(np, mesh%np)
529 if (np > mesh%np .and. mesh%parallel_in_domains) sp = mesh%np + mesh%pv%np_ghost
530
531 select case (psib%status())
532 case (batch_packed)
533
534 if (conjugate) then
535
536 !$omp parallel private(ii, phase)
537 !$omp do
538 do ip = 1, min(mesh%np, np)
539 phase = conjg(this%phase(ip, psib%ik))
540 !$omp simd
541 do ii = 1, psib%nst_linear
542 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
543 end do
544 end do
545 !$omp end do nowait
546
547 ! Boundary points, if requested
548 !$omp do
549 do ip = sp+1, np
550 phase = conjg(this%phase(ip, psib%ik))
551 !$omp simd
552 do ii = 1, psib%nst_linear
553 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
554 end do
555 end do
556 !$omp end parallel
557
558 else
559
560 !$omp parallel private(ii, phase)
561 !$omp do
562 do ip = 1, min(mesh%np, np)
563 phase = this%phase(ip, psib%ik)
564 !$omp simd
565 do ii = 1, psib%nst_linear
566 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
567 end do
568 end do
569 !$omp end do nowait
570
571 ! Boundary points, if requested
572 !$omp do
573 do ip = sp+1, np
574 phase = this%phase(ip, psib%ik)
575 !$omp simd
576 do ii = 1, psib%nst_linear
577 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
578 end do
579 end do
580 !$omp end parallel
581
582 end if
583
584 case (batch_not_packed)
585
586 if (conjugate) then
587
588 !$omp parallel private(ii, ip)
589 do ii = 1, psib%nst_linear
590 !$omp do simd
591 do ip = 1, min(mesh%np, np)
592 psib%zff_linear(ip, ii) = conjg(this%phase(ip, psib%ik))*src_%zff_linear(ip, ii)
593 end do
594 !$omp end do simd nowait
595
596 ! Boundary points, if requested
597 !$omp do simd
598 do ip = sp+1, np
599 psib%zff_linear(ip, ii) = conjg(this%phase(ip, psib%ik))*src_%zff_linear(ip, ii)
600 end do
601 !$omp end do simd nowait
602 end do
603 !$omp end parallel
604
605 else
606 !$omp parallel private(ii, ip)
607 do ii = 1, psib%nst_linear
608 !$omp do simd
609 do ip = 1, min(mesh%np, np)
610 psib%zff_linear(ip, ii) = this%phase(ip, psib%ik)*src_%zff_linear(ip, ii)
611 end do
612 !$omp end do simd nowait
613
614 ! Boundary points, if requested
615 !$omp do simd
616 do ip = sp+1, np
617 psib%zff_linear(ip, ii) = this%phase(ip, psib%ik)*src_%zff_linear(ip, ii)
618 end do
619 !$omp end do simd nowait
620
621 end do
622 !$omp end parallel
623
624 end if
625
627 call accel_kernel_start_call(ker_phase, 'phase.cu', 'phase_hamiltonian')
628
629 if (conjugate) then
630 call accel_set_kernel_arg(ker_phase, 0, 1_4)
631 else
632 call accel_set_kernel_arg(ker_phase, 0, 0_4)
633 end if
634
635 call accel_set_kernel_arg(ker_phase, 1, (psib%ik - this%buff_phase_qn_start)*mesh%np_part)
636 call accel_set_kernel_arg(ker_phase, 2, np)
637 call accel_set_kernel_arg(ker_phase, 3, this%buff_phase)
638 call accel_set_kernel_arg(ker_phase, 4, src_%ff_device)
639 call accel_set_kernel_arg(ker_phase, 5, log2(int(src_%pack_size(1), int32)))
640 call accel_set_kernel_arg(ker_phase, 6, psib%ff_device)
641 call accel_set_kernel_arg(ker_phase, 7, log2(int(psib%pack_size(1), int32)))
642
643 ! Compute the grid (extend to another dimensions if the size of the problem is too big)
644 call accel_grid_size_extend_dim(int(np, int64), psib%pack_size(1), gsizes, bsizes, ker_phase)
645
646 call accel_kernel_run(ker_phase, gsizes, bsizes)
647
648 if(.not. optional_default(async, .false.)) call accel_finish()
649 end select
650
651 psib%has_phase = .not. conjugate
652
653 call profiling_out("PHASE_APPLY_BATCH")
654 pop_sub(phase_apply_batch)
655 end subroutine phase_apply_batch
656
662 !
663 subroutine phase_apply_mf(this, psi, np, dim, ik, conjugate)
664 class(phase_t), intent(in) :: this
665 complex(real64), intent(inout) :: psi(:, :)
666 integer, intent(in) :: np
667 integer, intent(in) :: dim
668 integer, intent(in) :: ik
669 logical, intent(in) :: conjugate
670
671 integer :: idim, ip
672
673 push_sub(phase_apply_mf)
674
675 assert(ik >= lbound(this%phase, dim=2))
676 assert(ik <= ubound(this%phase, dim=2))
677
678 call profiling_in("PHASE_APPLY_SINGLE")
679
680 if (conjugate) then
681 ! Apply the phase that contains both the k-point and vector-potential terms.
682 do idim = 1, dim
683 !$omp parallel do
684 do ip = 1, np
685 psi(ip, idim) = conjg(this%phase(ip, ik))*psi(ip, idim)
686 end do
687 !$omp end parallel do
688 end do
689 else
690 ! Apply the conjugate of (i.e. remove) the phase that contains both the k-point and vector-potential terms.
691 do idim = 1, dim
692 !$omp parallel do
693 do ip = 1, np
694 psi(ip, idim) = this%phase(ip, ik)*psi(ip, idim)
695 end do
696 !$omp end parallel do
697 end do
698 end if
699
700 call profiling_out("PHASE_APPLY_SINGLE")
701
702 pop_sub(phase_apply_mf)
703 end subroutine phase_apply_mf
704
705
706 ! ---------------------------------------------------------------------------------------
708 !
709 subroutine phase_phase_spiral(this, der, psib)
710 class(phase_t), intent(in) :: this
711 type(derivatives_t), intent(in) :: der
712 class(wfs_elec_t), intent(inout) :: psib
713
714 integer :: ip, ii, sp
715 integer, allocatable :: spin_label(:)
716 type(accel_mem_t) :: spin_label_buffer
717 integer(int64) :: bsize
718 integer(int64), dimension(2) :: np, gsizes, bsizes
719
720 push_sub(phase_phase_spiral)
721 call profiling_in("PBC_PHASE_SPIRAL")
722
723 call profiling_count_operations(6*(der%mesh%np_part-der%mesh%np)*psib%nst_linear)
724
725 assert(der%boundaries%spiral)
726 assert(psib%type() == type_cmplx)
727
728 sp = der%mesh%np
729 if (der%mesh%parallel_in_domains) sp = der%mesh%np + der%mesh%pv%np_ghost
730
731
732 select case (psib%status())
733 case (batch_packed)
734
735 !$omp parallel do private(ip, ii)
736 do ip = sp + 1, der%mesh%np_part
737 do ii = 1, psib%nst_linear, 2
738 if (this%spin(3,psib%linear_to_ist(ii), psib%ik)>0) then
739 psib%zff_pack(ii+1, ip) = psib%zff_pack(ii+1, ip)*this%phase_spiral(ip-sp, 1)
740 else
741 psib%zff_pack(ii, ip) = psib%zff_pack(ii, ip)*this%phase_spiral(ip-sp, 2)
742 end if
743 end do
744 end do
745 !$omp end parallel do
746
747 case (batch_not_packed)
748
749 !$omp parallel private(ii, ip)
750 do ii = 1, psib%nst_linear, 2
751 if (this%spin(3,psib%linear_to_ist(ii), psib%ik)>0) then
752 !$omp do
753 do ip = sp + 1, der%mesh%np_part
754 psib%zff_linear(ip, ii+1) = psib%zff_linear(ip, ii+1)*this%phase_spiral(ip-sp, 1)
755 end do
756 !$omp end do nowait
757 else
758 !$omp do
759 do ip = sp + 1, der%mesh%np_part
760 psib%zff_linear(ip, ii) = psib%zff_linear(ip, ii)*this%phase_spiral(ip-sp, 2)
761 end do
762 !$omp end do nowait
763 end if
764 end do
765 !$omp end parallel
766
768
769 assert(accel_is_enabled())
770
771 ! generate array of offsets for access of psib and phase_spiral:
772 ! TODO: Move this to the routine where spin(:,:,:) is generated
773 ! and also move the buffer to the GPU at this point to
774 ! avoid unecessary latency here!
775
776 safe_allocate(spin_label(1:psib%nst_linear))
777 spin_label = 0
778 do ii = 1, psib%nst_linear, 2
779 if (this%spin(3, psib%linear_to_ist(ii), psib%ik) > 0) spin_label(ii)=1
780 end do
781
782 call accel_create_buffer(spin_label_buffer, accel_mem_read_only, type_integer, psib%nst_linear)
783 call accel_write_buffer(spin_label_buffer, psib%nst_linear, spin_label)
784
785 call accel_kernel_start_call(kernel_phase_spiral, 'phase_spiral.cu', 'phase_spiral_apply')
786
789 call accel_set_kernel_arg(kernel_phase_spiral, 2, der%mesh%np_part)
790 call accel_set_kernel_arg(kernel_phase_spiral, 3, psib%ff_device)
791 call accel_set_kernel_arg(kernel_phase_spiral, 4, log2(psib%pack_size(1)))
792 call accel_set_kernel_arg(kernel_phase_spiral, 5, this%buff_phase_spiral)
793 call accel_set_kernel_arg(kernel_phase_spiral, 6, spin_label_buffer)
794
795 ! Compute the grid size
796 bsize = accel_kernel_block_size(kernel_phase_spiral)/psib%pack_size(1)
797 np = (/psib%pack_size(1)/2_int64, int(der%mesh%np_part - sp, int64)/)
798 bsizes = (/psib%pack_size(1)/2, 2*bsize/)
799 call accel_grid_size(np, bsizes, gsizes)
800
801 call accel_kernel_run(kernel_phase_spiral, bsizes, gsizes)
802
803 call accel_finish()
805 call accel_free_buffer(spin_label_buffer)
806
807 safe_deallocate_a(spin_label)
808
809 end select
810
811 call profiling_out("PBC_PHASE_SPIRAL")
812 pop_sub(phase_phase_spiral)
813 end subroutine phase_phase_spiral
814
815
816 ! ---------------------------------------------------------------------------------------
817 logical pure function phase_is_allocated(this)
818 class(phase_t), intent(in) :: this
819
820 phase_is_allocated = allocated(this%phase)
821 end function phase_is_allocated
822
823 !----------------------------------------------------------
830 subroutine phase_copy_and_set_phase(phase, gr, kpt, psib, psib_with_phase)
831 class(phase_t), intent(in) :: phase
832 type(grid_t), intent(in) :: gr
833 type(distributed_t), intent(in) :: kpt
834 type(wfs_elec_t), intent(in) :: psib
835 type(wfs_elec_t), intent(out) :: psib_with_phase
836
837 integer :: k_offset, n_boundary_points
838
840
841 call psib%copy_to(psib_with_phase)
842 if (phase%is_allocated()) then
843 call phase%apply_to(gr, gr%np, conjugate = .false., psib = psib_with_phase, src = psib, async=.true.)
844 ! apply phase correction while setting boundary -> memory needs to be
845 ! accessed only once
846 k_offset = psib%ik - kpt%start
847 n_boundary_points = int(gr%np_part - gr%np)
848 call boundaries_set(gr%der%boundaries, gr, psib_with_phase, phase_correction = phase%phase_corr(:, psib%ik), &
849 buff_phase_corr = phase%buff_phase_corr, offset=k_offset*n_boundary_points, async=.true.)
850 else
851 call psib%copy_data_to(gr%np, psib_with_phase)
852 call boundaries_set(gr%der%boundaries, gr, psib_with_phase)
853 end if
854
855 call psib_with_phase%do_pack(copy = .true.)
856
858 end subroutine phase_copy_and_set_phase
859
860
861end module phase_oct_m
862
863!! Local Variables:
864!! mode: f90
865!! coding: utf-8
866!! End:
double exp(double __x) __attribute__((__nothrow__
double sin(double __x) __attribute__((__nothrow__
double cos(double __x) __attribute__((__nothrow__
integer function, public accel_kernel_block_size(kernel)
Definition: accel.F90:1188
subroutine, public accel_free_buffer(this, async)
Definition: accel.F90:1005
subroutine, public accel_kernel_start_call(this, file_name, kernel_name, flags)
Definition: accel.F90:1413
subroutine, public accel_finish()
Definition: accel.F90:1098
subroutine, public accel_detach_buffer(this)
Clear a buffer handle without freeing device memory.
Definition: accel.F90:1049
integer, parameter, public accel_mem_read_write
Definition: accel.F90:185
type(accel_kernel_t), target, save, public kernel_phase_spiral
Definition: accel.F90:275
pure logical function, public accel_is_enabled()
Definition: accel.F90:402
integer, parameter, public accel_mem_read_only
Definition: accel.F90:185
This module implements batches of mesh functions.
Definition: batch.F90:135
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
Definition: batch.F90:286
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
Definition: batch.F90:286
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
Definition: batch.F90:286
This module implements common operations on batches of mesh functions.
Definition: batch_ops.F90:118
Module implementing boundary conditions in Octopus.
Definition: boundaries.F90:124
This module calculates the derivatives (gradients, Laplacians, etc.) of a function.
real(real64), parameter, public m_zero
Definition: global.F90:191
complex(real64), parameter, public m_zi
Definition: global.F90:205
real(real64), parameter, public m_one
Definition: global.F90:192
This module implements the underlying real-space grid.
Definition: grid.F90:119
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:117
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:120
integer(int64) function, public mesh_periodic_point(mesh, space, ip)
This function returns the point inside the grid corresponding to a boundary point when PBCs are used....
Definition: mesh.F90:724
real(real64) function, dimension(1:mesh%box%dim), public mesh_x_global(mesh, ipg)
Given a global point index, this function returns the coordinates of the point.
Definition: mesh.F90:817
subroutine phase_phase_spiral(this, der, psib)
apply spiral phase
Definition: phase.F90:805
subroutine phase_unset_phase_corr(phase, mesh, psib, async)
unset the phase correction (if necessary)
Definition: phase.F90:566
subroutine, public phase_accel_rebuild(phase, mesh, kpt)
Rebuild phase accelerator buffers after an intrinsic copy.
Definition: phase.F90:492
subroutine phase_copy_and_set_phase(phase, gr, kpt, psib, psib_with_phase)
Copy a batch to another batch and apply the Bloch phase to it.
Definition: phase.F90:926
subroutine phase_init_phases(phase, gr, kpt, kpoints, d, space)
Initiliaze the phase arrays and copy to GPU the data.
Definition: phase.F90:224
subroutine phase_end(phase)
Releases the memory of the phase object.
Definition: phase.F90:469
subroutine phase_update_phases(phase, mesh, kpt, kpoints, d, space, uniform_vector_potential)
Update the phases.
Definition: phase.F90:335
logical pure function phase_is_allocated(this)
Definition: phase.F90:913
subroutine phase_apply_batch(this, mesh, np, conjugate, psib, src, async)
apply (remove) the phase to the wave functions before (after) applying the Hamiltonian
Definition: phase.F90:591
subroutine phase_set_phase_corr(phase, mesh, psib, async)
set the phase correction (if necessary)
Definition: phase.F90:540
subroutine phase_apply_mf(this, psi, np, dim, ik, conjugate)
apply (or remove) the phase to a wave function psi
Definition: phase.F90:759
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
Definition: profiling.F90:631
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
Definition: profiling.F90:554
This module handles spin dimensions of the states and the k-point distribution.
type(type_t), public type_float
Definition: types.F90:135
type(type_t), public type_cmplx
Definition: types.F90:136
type(type_t), public type_integer
Definition: types.F90:137
class representing derivatives
Distribution of N instances over mpi_grpsize processes, for the local rank mpi_grprank....
Description of the grid, containing information on derivatives, stencil, and symmetries.
Definition: grid.F90:171
Describes mesh distribution to nodes.
Definition: mesh.F90:187
A container for the phase.
Definition: phase.F90:181
class for organizing spins and k-points
batches of electronic states
Definition: wfs_elec.F90:141
int true(void)