Octopus
phase.F90
Go to the documentation of this file.
1!! Copyright (C) 2009 X. Andrade
2!! Copyright (C) 2024 N. Tancogne-Dejean
3!!
4!! This program is free software; you can redistribute it and/or modify
5!! it under the terms of the GNU General Public License as published by
6!! the Free Software Foundation; either version 2, or (at your option)
7!! any later version.
8!!
9!! This program is distributed in the hope that it will be useful,
10!! but WITHOUT ANY WARRANTY; without even the implied warranty of
11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12!! GNU General Public License for more details.
13!!
14!! You should have received a copy of the GNU General Public License
15!! along with this program; if not, write to the Free Software
16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17!! 02110-1301, USA.
18!!
19
20#include "global.h"
21
22module phase_oct_m
23 use accel_oct_m
24 use batch_oct_m
27 use comm_oct_m
28 use debug_oct_m
32 use global_oct_m
33 use grid_oct_m
36 use math_oct_m
37 use mesh_oct_m
39 use mpi_oct_m
41 use space_oct_m
45 use types_oct_m
47
48 implicit none
49
50 private
51
52 public :: &
54
85 type phase_t
86 private
87 complex(real64), allocatable :: phase(:, :)
90 complex(real64), public, allocatable :: phase_corr(:,:)
93 complex(real64), allocatable :: phase_spiral(:,:)
96 type(accel_mem_t) :: buff_phase
97 type(accel_mem_t) :: buff_phase_spiral
98 type(accel_mem_t), public :: buff_phase_corr
99 integer :: buff_phase_qn_start
100 real(real64), public, pointer :: spin(:,:,:) => null()
101 contains
102 procedure :: init => phase_init_phases
103
104 procedure :: update => phase_update_phases
105
106 procedure :: end => phase_end
107
108 procedure :: set_phase_corr => phase_set_phase_corr
109
110 procedure :: unset_phase_corr => phase_unset_phase_corr
111
112 procedure :: apply_to => phase_apply_batch
113
114 procedure :: apply_to_single => phase_apply_mf
115
116 procedure :: apply_phase_spiral => phase_phase_spiral
118 procedure :: is_allocated => phase_is_allocated
119
120 procedure :: copy_and_set_phase => phase_copy_and_set_phase
121 end type phase_t
122
123contains
124
125 ! ---------------------------------------------------------
127 subroutine phase_init_phases(phase, gr, kpt, kpoints, d, space)
128 class(phase_t), intent(inout) :: phase
129 class(mesh_t), intent(in) :: gr
130 type(distributed_t), intent(in) :: kpt
131 type(kpoints_t), intent(in) :: kpoints
132 type(states_elec_dim_t), intent(in) :: d
133 type(space_t), intent(in) :: space
134
135 integer :: ip, ik, sp
136 integer(int64) :: ip_inner_global
137 real(real64) :: kpoint(space%dim), x_global(space%dim)
138
139 push_sub(phase_init_phases)
140
141 ! no e^ik phase needed for Gamma-point-only periodic calculations
142 ! unless for velocity-gauge for lasers
143 if (accel_is_enabled()) then
144 phase%buff_phase_qn_start = kpt%start
145 end if
146 if(kpoints%gamma_only()) then
147 pop_sub(phase_init_phases)
148 return
149 end if
150
151 safe_allocate(phase%phase(1:gr%np_part, kpt%start:kpt%end))
152 safe_allocate(phase%phase_corr(gr%np+1:gr%np_part, kpt%start:kpt%end))
153 !$omp parallel private(ip)
154 do ik = kpt%start, kpt%end
155 !$omp do
156 do ip = gr%np + 1, gr%np_part
157 phase%phase_corr(ip, ik) = m_one
158 end do
159 !$omp end do nowait
160 end do
161 !$omp end parallel
162
163 ! Only when gr is a grid_t type, we can access gr%der
164 select type(gr)
165 class is(grid_t)
166 if (gr%der%boundaries%spiralBC) then
167 sp = gr%np
168 if (gr%parallel_in_domains) sp = gr%np + gr%pv%np_ghost
169
170 ! We decided to allocate the array from 1:np_part-sp as this is less error prone when passing
171 ! the array to other routines, or in particular creating a C-style pointer from phase_spiral(1,1).
172 ! We will also update phase_corr and possible other similar arrays.
173
174 safe_allocate(phase%phase_spiral(1:gr%np_part-sp, 1:2))
175
176 ! loop over boundary points
177 do ip = sp + 1, gr%np_part
178 ! get corresponding inner point
179 ip_inner_global = mesh_periodic_point(gr, space, ip)
180 x_global = mesh_x_global(gr, ip_inner_global)
181 phase%phase_spiral(ip-sp, 1) = &
182 exp(m_zi * sum((gr%x(1:space%dim, ip)-x_global(1:space%dim)) * gr%der%boundaries%spiral_q(1:space%dim)))
183 phase%phase_spiral(ip-sp, 2) = &
184 exp(-m_zi * sum((gr%x(1:space%dim, ip)-x_global(1:space%dim)) * gr%der%boundaries%spiral_q(1:space%dim)))
185 end do
186
187 if (accel_is_enabled()) then
188 call accel_create_buffer(phase%buff_phase_spiral, accel_mem_read_only, type_cmplx, (gr%np_part-sp)*2)
189 call accel_write_buffer(phase%buff_phase_spiral, gr%np_part-sp, 2, phase%phase_spiral)
190 end if
191 end if
192 class default
193 ! Do nothing
194 end select
196
197 kpoint(1:space%dim) = m_zero
198
199 sp = gr%np
200 if (gr%parallel_in_domains) sp = gr%np + gr%pv%np_ghost
202 !$omp parallel private(ip, ip_inner_global, x_global, kpoint)
203 do ik = kpt%start, kpt%end
204 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
205 !$omp do
206 do ip = 1, gr%np_part
207 phase%phase(ip, ik) = exp(-m_zi * sum(gr%x(1:space%dim, ip) * kpoint(1:space%dim)))
208 end do
209 !$omp end do
210
211 ! loop over boundary points
212 !$omp do
213 do ip = sp + 1, gr%np_part
214 ! get corresponding inner point
215 ip_inner_global = mesh_periodic_point(gr, space, ip)
216
217 ! compute phase correction from global coordinate (opposite sign!)
218 x_global = mesh_x_global(gr, ip_inner_global)
219 phase%phase_corr(ip, ik) = phase%phase(ip, ik)* &
220 exp(m_zi * sum(x_global(1:space%dim) * kpoint(1:space%dim)))
221 end do
222 !$omp end do nowait
223 end do
224 !$omp end parallel
225
226 if (accel_is_enabled()) then
227 call accel_create_buffer(phase%buff_phase, accel_mem_read_write, type_cmplx, gr%np_part*kpt%nlocal)
228 call accel_write_buffer(phase%buff_phase, gr%np_part, kpt%nlocal, phase%phase)
229 call accel_create_buffer(phase%buff_phase_corr, accel_mem_read_write, type_cmplx, (gr%np_part - gr%np)*kpt%nlocal)
230 call accel_write_buffer(phase%buff_phase_corr, gr%np_part - gr%np, kpt%nlocal, phase%phase_corr)
231 end if
232
233 pop_sub(phase_init_phases)
234 end subroutine phase_init_phases
235
236 ! ----------------------------------------------------------------------------------
238 subroutine phase_update_phases(phase, mesh, kpt, kpoints, d, space, uniform_vector_potential)
239 class(phase_t), intent(inout) :: phase
240 class(mesh_t), intent(in) :: mesh
241 type(distributed_t), intent(in) :: kpt
242 type(kpoints_t), intent(in) :: kpoints
243 type(states_elec_dim_t), intent(in) :: d
244 type(space_t), intent(in) :: space
245 real(real64), allocatable, intent(in) :: uniform_vector_potential(:)
246
247 integer :: ik, ip, sp
248 integer(int64), dimension(2) :: np, gsize, bsize
249 integer(int64) :: ip_inner_global
250 real(real64) :: kpoint(space%dim)
251 real(real64), allocatable :: x_global(:,:), kpt_vec_pot(:,:)
252 type(accel_mem_t) :: buff_vec_pot, buff_x_global, buff_x
253 type(accel_kernel_t), save :: kernel
254 real(real64) :: tmp_sum
255
256 if (.not. allocated(uniform_vector_potential)) return
257
258 push_sub_with_profile(phase_update_phases)
259
260
261 if (.not. allocated(phase%phase)) then
262 safe_allocate(phase%phase(1:mesh%np_part, kpt%start:kpt%end))
263 if (accel_is_enabled()) then
264 call accel_create_buffer(phase%buff_phase, accel_mem_read_write, type_cmplx, &
265 mesh%np_part*kpt%nlocal)
266 end if
267 end if
268
269 if (.not. allocated(phase%phase_corr)) then
270 safe_allocate(phase%phase_corr(mesh%np+1:mesh%np_part, kpt%start:kpt%end))
271 if (accel_is_enabled()) then
272 call accel_create_buffer(phase%buff_phase_corr, accel_mem_read_write, type_cmplx, &
273 (mesh%np_part - mesh%np)*kpt%nlocal)
274 end if
275 end if
276
277 ! TODO: We should not recompute this every time-step. We should store it.
278
279 ! loop over boundary points
280 sp = mesh%np
281 ! skip ghost points
282 if (mesh%parallel_in_domains) sp = mesh%np + mesh%pv%np_ghost
283
284 safe_allocate(x_global(1:space%dim,(sp + 1):mesh%np_part))
285
286 !$omp parallel do schedule(static) private(ip_inner_global)
287 do ip = sp + 1, mesh%np_part
288 ! get corresponding inner point
289 ip_inner_global = mesh_periodic_point(mesh, space, ip)
290 ! compute the difference between the global coordinate and the local coordinate
291 x_global(:,ip) = mesh_x_global(mesh, ip_inner_global) - mesh%x(1:space%dim, ip)
292 end do
293
294
295 if (.not. accel_is_enabled()) then
296 !$omp parallel private(ik, ip, kpoint, tmp_sum)
297 do ik = kpt%start, kpt%end
298 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
299 !We add the vector potential
300 kpoint(1:space%dim) = kpoint(1:space%dim) + uniform_vector_potential(1:space%dim)
301
302 !$omp do schedule(static)
303 do ip = 1, mesh%np_part
304 tmp_sum = sum(mesh%x(1:space%dim, ip)*kpoint(1:space%dim))
305 phase%phase(ip, ik) = cmplx(cos(tmp_sum), -sin(tmp_sum), real64)
306 end do
307 !$omp end do
308
309 !$omp do schedule(static)
310 do ip = sp + 1, mesh%np_part
311 tmp_sum = sum(x_global(1:space%dim, ip)*kpoint(1:space%dim))
312 phase%phase_corr(ip, ik) = cmplx(cos(tmp_sum), sin(tmp_sum), real64)
313 end do
314 !$omp end do nowait
315 end do
316 !$omp end parallel
317
318 else !accel_is enabled
319
320 call accel_create_buffer(buff_vec_pot, accel_mem_read_only, type_float, space%dim*kpt%nlocal)
321 safe_allocate(kpt_vec_pot(1:space%dim,kpt%start:kpt%end))
322 do ik = kpt%start, kpt%end
323 kpoint(1:space%dim) = kpoints%get_point(d%get_kpoint_index(ik))
324 kpt_vec_pot(1:space%dim, ik) = kpoint(1:space%dim) + uniform_vector_potential(1:space%dim)
325 end do
326 call accel_write_buffer(buff_vec_pot, space%dim, kpt%nlocal, kpt_vec_pot, async=.true.)
327
328 ! Note: this should be globally stored
329 call accel_create_buffer(buff_x, accel_mem_read_only, type_float, space%dim*(mesh%np_part))
330 call accel_write_buffer(buff_x, space%dim, mesh%np_part, mesh%x, async=.true.)
331
332 call accel_create_buffer(buff_x_global, accel_mem_read_only, type_float, space%dim*(mesh%np_part-sp))
333 call accel_write_buffer(buff_x_global, space%dim, mesh%np_part-sp, x_global(1:space%dim,(sp + 1):mesh%np_part), async=.true.)
334
335 call accel_kernel_start_call(kernel, 'phase.cu', 'update_phases')
336
337 call accel_set_kernel_arg(kernel, 0, space%dim)
338 call accel_set_kernel_arg(kernel, 1, mesh%np)
339 call accel_set_kernel_arg(kernel, 2, mesh%np_part)
340 call accel_set_kernel_arg(kernel, 3, kpt%start)
341 call accel_set_kernel_arg(kernel, 4, kpt%end)
342 call accel_set_kernel_arg(kernel, 5, sp)
343 call accel_set_kernel_arg(kernel, 6, buff_vec_pot)
344 call accel_set_kernel_arg(kernel, 7, buff_x)
345 call accel_set_kernel_arg(kernel, 8, buff_x_global)
346 call accel_set_kernel_arg(kernel, 9, phase%buff_phase)
347 call accel_set_kernel_arg(kernel, 10, phase%buff_phase_corr)
348
349 ! Compute the grid size
350 np = (/mesh%np_part, kpt%nlocal/)
351 bsize = (/accel_kernel_block_size(kernel), 1/)
352 call accel_grid_size(np, bsize, gsize)
353
354 call accel_kernel_run(kernel, gsize, bsize)
355
356 call accel_read_buffer(phase%buff_phase, mesh%np_part, kpt%nlocal, phase%phase, async=.true.)
357 call accel_read_buffer(phase%buff_phase_corr, mesh%np_part - mesh%np, kpt%nlocal, phase%phase_corr)
358
359 call accel_free_buffer(buff_vec_pot)
360 call accel_free_buffer(buff_x)
361 call accel_free_buffer(buff_x_global)
362 safe_deallocate_a(kpt_vec_pot)
363 end if
364
365 safe_deallocate_a(x_global)
366
367 pop_sub_with_profile(phase_update_phases)
368 end subroutine phase_update_phases
369
370 ! ----------------------------------------------------------------------------------
372 subroutine phase_end(phase)
373 class(phase_t), intent(inout) :: phase
374
375 push_sub(phase_end)
376
377 if (phase%is_allocated() .and. accel_is_enabled()) then
378 call accel_free_buffer(phase%buff_phase)
379 call accel_free_buffer(phase%buff_phase_corr)
380 end if
381
382 if (allocated(phase%phase_spiral) .and. accel_is_enabled()) then
383 call accel_free_buffer(phase%buff_phase_spiral)
384 end if
385
386 safe_deallocate_a(phase%phase)
387 safe_deallocate_a(phase%phase_corr)
388 safe_deallocate_a(phase%phase_spiral)
389
390 pop_sub(phase_end)
391 end subroutine phase_end
392
393 ! ----------------------------------------------------------------------------------
395 !
396 subroutine phase_set_phase_corr(phase, mesh, psib, async)
397 class(phase_t), intent(in) :: phase
398 class(mesh_t), intent(in) :: mesh
399 type(wfs_elec_t), intent(inout) :: psib
400 logical, optional, intent(in) :: async
401
402
403 logical :: phase_correction
404
405 push_sub(phase_set_phase_corr)
406
407 ! check if we only want a phase correction for the boundary points
408 phase_correction = phase%is_allocated()
409
410 !We apply the phase only to np points, and the phase for the np+1 to np_part points
411 !will be treated as a phase correction in the Hamiltonian
412 if (phase_correction) then
413 call phase%apply_to(mesh, mesh%np, .false., psib, async=async)
414 end if
415
416 pop_sub(phase_set_phase_corr)
417 end subroutine phase_set_phase_corr
418
419 ! ----------------------------------------------------------------------------------
421 !
422 subroutine phase_unset_phase_corr(phase, mesh, psib, async)
423 class(phase_t), intent(in) :: phase
424 class(mesh_t), intent(in) :: mesh
425 type(wfs_elec_t), intent(inout) :: psib
426 logical, optional, intent(in) :: async
427
428 logical :: phase_correction
429
430 push_sub(phase_unset_phase_corr)
431
432 ! check if we only want a phase correction for the boundary points
433 phase_correction = phase%is_allocated()
434
435 !We apply the phase only to np points, and the phase for the np+1 to np_part points
436 !will be treated as a phase correction in the Hamiltonian
437 if (phase_correction) then
438 call phase%apply_to(mesh, mesh%np, .true., psib, async=async)
439 end if
440
442 end subroutine phase_unset_phase_corr
443
444 ! ---------------------------------------------------------------------------------------
446 !
447 subroutine phase_apply_batch(this, mesh, np, conjugate, psib, src, async)
448 class(phase_t), intent(in) :: this
449 class(mesh_t), intent(in) :: mesh
450 integer, intent(in) :: np
451 logical, intent(in) :: conjugate
452 type(wfs_elec_t), target, intent(inout) :: psib
453 type(wfs_elec_t), optional, target, intent(in) :: src
454 logical, optional, intent(in) :: async
455
456 integer :: ip, ii, sp
457 type(wfs_elec_t), pointer :: src_
458 complex(real64) :: phase
459 integer(int64), dimension(3) :: gsizes, bsizes
460 type(accel_kernel_t), save :: ker_phase
461
462 push_sub(phase_apply_batch)
463 call profiling_in("PHASE_APPLY_BATCH")
464
465 call profiling_count_operations(6*np*psib%nst_linear)
466
467 assert(np <= mesh%np_part)
468 assert(psib%type() == type_cmplx)
469 assert(psib%ik >= lbound(this%phase, dim=2))
470 assert(psib%ik <= ubound(this%phase, dim=2))
471
472 src_ => psib
473 if (present(src)) src_ => src
474
475 assert(src_%has_phase .eqv. conjugate)
476 assert(src_%ik == psib%ik)
477 assert(src_%type() == type_cmplx)
478
479 ! We want to skip the ghost points for setting the phase
480 sp = min(np, mesh%np)
481 if (np > mesh%np .and. mesh%parallel_in_domains) sp = mesh%np + mesh%pv%np_ghost
482
483 select case (psib%status())
484 case (batch_packed)
485
486 if (conjugate) then
487
488 !$omp parallel private(ii, phase)
489 !$omp do
490 do ip = 1, min(mesh%np, np)
491 phase = conjg(this%phase(ip, psib%ik))
492 !$omp simd
493 do ii = 1, psib%nst_linear
494 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
495 end do
496 end do
497 !$omp end do nowait
498
499 ! Boundary points, if requested
500 !$omp do
501 do ip = sp+1, np
502 phase = conjg(this%phase(ip, psib%ik))
503 !$omp simd
504 do ii = 1, psib%nst_linear
505 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
506 end do
507 end do
508 !$omp end parallel
509
510 else
511
512 !$omp parallel private(ii, phase)
513 !$omp do
514 do ip = 1, min(mesh%np, np)
515 phase = this%phase(ip, psib%ik)
516 !$omp simd
517 do ii = 1, psib%nst_linear
518 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
519 end do
520 end do
521 !$omp end do nowait
522
523 ! Boundary points, if requested
524 !$omp do
525 do ip = sp+1, np
526 phase = this%phase(ip, psib%ik)
527 !$omp simd
528 do ii = 1, psib%nst_linear
529 psib%zff_pack(ii, ip) = phase*src_%zff_pack(ii, ip)
530 end do
531 end do
532 !$omp end parallel
533
534 end if
535
536 case (batch_not_packed)
537
538 if (conjugate) then
539
540 !$omp parallel private(ii, ip)
541 do ii = 1, psib%nst_linear
542 !$omp do simd
543 do ip = 1, min(mesh%np, np)
544 psib%zff_linear(ip, ii) = conjg(this%phase(ip, psib%ik))*src_%zff_linear(ip, ii)
545 end do
546 !$omp end do simd nowait
547
548 ! Boundary points, if requested
549 !$omp do simd
550 do ip = sp+1, np
551 psib%zff_linear(ip, ii) = conjg(this%phase(ip, psib%ik))*src_%zff_linear(ip, ii)
552 end do
553 !$omp end do simd nowait
554 end do
555 !$omp end parallel
556
557 else
558 !$omp parallel private(ii, ip)
559 do ii = 1, psib%nst_linear
560 !$omp do simd
561 do ip = 1, min(mesh%np, np)
562 psib%zff_linear(ip, ii) = this%phase(ip, psib%ik)*src_%zff_linear(ip, ii)
563 end do
564 !$omp end do simd nowait
565
566 ! Boundary points, if requested
567 !$omp do simd
568 do ip = sp+1, np
569 psib%zff_linear(ip, ii) = this%phase(ip, psib%ik)*src_%zff_linear(ip, ii)
570 end do
571 !$omp end do simd nowait
572
573 end do
574 !$omp end parallel
575
576 end if
577
579 call accel_kernel_start_call(ker_phase, 'phase.cu', 'phase_hamiltonian')
580
581 if (conjugate) then
582 call accel_set_kernel_arg(ker_phase, 0, 1_4)
583 else
584 call accel_set_kernel_arg(ker_phase, 0, 0_4)
585 end if
586
587 call accel_set_kernel_arg(ker_phase, 1, (psib%ik - this%buff_phase_qn_start)*mesh%np_part)
588 call accel_set_kernel_arg(ker_phase, 2, np)
589 call accel_set_kernel_arg(ker_phase, 3, this%buff_phase)
590 call accel_set_kernel_arg(ker_phase, 4, src_%ff_device)
591 call accel_set_kernel_arg(ker_phase, 5, log2(int(src_%pack_size(1), int32)))
592 call accel_set_kernel_arg(ker_phase, 6, psib%ff_device)
593 call accel_set_kernel_arg(ker_phase, 7, log2(int(psib%pack_size(1), int32)))
594
595 ! Compute the grid (extend to another dimensions if the size of the problem is too big)
596 call accel_grid_size_extend_dim(int(np, int64), psib%pack_size(1), gsizes, bsizes, ker_phase)
597
598 call accel_kernel_run(ker_phase, gsizes, bsizes)
599
600 if(.not. optional_default(async, .false.)) call accel_finish()
601 end select
602
603 psib%has_phase = .not. conjugate
604
605 call profiling_out("PHASE_APPLY_BATCH")
606 pop_sub(phase_apply_batch)
607 end subroutine phase_apply_batch
608
614 !
615 subroutine phase_apply_mf(this, psi, np, dim, ik, conjugate)
616 class(phase_t), intent(in) :: this
617 complex(real64), intent(inout) :: psi(:, :)
618 integer, intent(in) :: np
619 integer, intent(in) :: dim
620 integer, intent(in) :: ik
621 logical, intent(in) :: conjugate
622
623 integer :: idim, ip
624
625 push_sub(phase_apply_mf)
626
627 assert(ik >= lbound(this%phase, dim=2))
628 assert(ik <= ubound(this%phase, dim=2))
629
630 call profiling_in("PHASE_APPLY_SINGLE")
631
632 if (conjugate) then
633 ! Apply the phase that contains both the k-point and vector-potential terms.
634 do idim = 1, dim
635 !$omp parallel do
636 do ip = 1, np
637 psi(ip, idim) = conjg(this%phase(ip, ik))*psi(ip, idim)
638 end do
639 !$omp end parallel do
640 end do
641 else
642 ! Apply the conjugate of (i.e. remove) the phase that contains both the k-point and vector-potential terms.
643 do idim = 1, dim
644 !$omp parallel do
645 do ip = 1, np
646 psi(ip, idim) = this%phase(ip, ik)*psi(ip, idim)
647 end do
648 !$omp end parallel do
649 end do
650 end if
651
652 call profiling_out("PHASE_APPLY_SINGLE")
653
654 pop_sub(phase_apply_mf)
655 end subroutine phase_apply_mf
656
657
658 ! ---------------------------------------------------------------------------------------
660 !
661 subroutine phase_phase_spiral(this, der, psib)
662 class(phase_t), intent(in) :: this
663 type(derivatives_t), intent(in) :: der
664 class(wfs_elec_t), intent(inout) :: psib
665
666 integer :: ip, ii, sp
667 integer, allocatable :: spin_label(:)
668 type(accel_mem_t) :: spin_label_buffer
669 integer(int64) :: bsize
670 integer(int64), dimension(2) :: np, gsizes, bsizes
671
672 push_sub(phase_phase_spiral)
673 call profiling_in("PBC_PHASE_SPIRAL")
674
675 call profiling_count_operations(6*(der%mesh%np_part-der%mesh%np)*psib%nst_linear)
676
677 assert(der%boundaries%spiral)
678 assert(psib%type() == type_cmplx)
679
680 sp = der%mesh%np
681 if (der%mesh%parallel_in_domains) sp = der%mesh%np + der%mesh%pv%np_ghost
682
683
684 select case (psib%status())
685 case (batch_packed)
686
687 !$omp parallel do private(ip, ii)
688 do ip = sp + 1, der%mesh%np_part
689 do ii = 1, psib%nst_linear, 2
690 if (this%spin(3,psib%linear_to_ist(ii), psib%ik)>0) then
691 psib%zff_pack(ii+1, ip) = psib%zff_pack(ii+1, ip)*this%phase_spiral(ip-sp, 1)
692 else
693 psib%zff_pack(ii, ip) = psib%zff_pack(ii, ip)*this%phase_spiral(ip-sp, 2)
694 end if
695 end do
696 end do
697 !$omp end parallel do
698
699 case (batch_not_packed)
700
701 !$omp parallel private(ii, ip)
702 do ii = 1, psib%nst_linear, 2
703 if (this%spin(3,psib%linear_to_ist(ii), psib%ik)>0) then
704 !$omp do
705 do ip = sp + 1, der%mesh%np_part
706 psib%zff_linear(ip, ii+1) = psib%zff_linear(ip, ii+1)*this%phase_spiral(ip-sp, 1)
707 end do
708 !$omp end do nowait
709 else
710 !$omp do
711 do ip = sp + 1, der%mesh%np_part
712 psib%zff_linear(ip, ii) = psib%zff_linear(ip, ii)*this%phase_spiral(ip-sp, 2)
713 end do
714 !$omp end do nowait
715 end if
716 end do
717 !$omp end parallel
718
720
721 assert(accel_is_enabled())
722
723 ! generate array of offsets for access of psib and phase_spiral:
724 ! TODO: Move this to the routine where spin(:,:,:) is generated
725 ! and also move the buffer to the GPU at this point to
726 ! avoid unecessary latency here!
727
728 safe_allocate(spin_label(1:psib%nst_linear))
729 spin_label = 0
730 do ii = 1, psib%nst_linear, 2
731 if (this%spin(3, psib%linear_to_ist(ii), psib%ik) > 0) spin_label(ii)=1
732 end do
733
734 call accel_create_buffer(spin_label_buffer, accel_mem_read_only, type_integer, psib%nst_linear)
735 call accel_write_buffer(spin_label_buffer, psib%nst_linear, spin_label)
736
737 call accel_kernel_start_call(kernel_phase_spiral, 'phase_spiral.cu', 'phase_spiral_apply')
738
741 call accel_set_kernel_arg(kernel_phase_spiral, 2, der%mesh%np_part)
742 call accel_set_kernel_arg(kernel_phase_spiral, 3, psib%ff_device)
743 call accel_set_kernel_arg(kernel_phase_spiral, 4, log2(psib%pack_size(1)))
744 call accel_set_kernel_arg(kernel_phase_spiral, 5, this%buff_phase_spiral)
745 call accel_set_kernel_arg(kernel_phase_spiral, 6, spin_label_buffer)
746
747 ! Compute the grid size
748 bsize = accel_kernel_block_size(kernel_phase_spiral)/psib%pack_size(1)
749 np = (/psib%pack_size(1)/2_int64, int(der%mesh%np_part - sp, int64)/)
750 bsizes = (/psib%pack_size(1)/2, 2*bsize/)
751 call accel_grid_size(np, bsizes, gsizes)
752
753 call accel_kernel_run(kernel_phase_spiral, bsizes, gsizes)
754
755 call accel_finish()
757 call accel_free_buffer(spin_label_buffer)
758
759 safe_deallocate_a(spin_label)
760
761 end select
762
763 call profiling_out("PBC_PHASE_SPIRAL")
764 pop_sub(phase_phase_spiral)
765 end subroutine phase_phase_spiral
766
767
768 ! ---------------------------------------------------------------------------------------
769 logical pure function phase_is_allocated(this)
770 class(phase_t), intent(in) :: this
771
772 phase_is_allocated = allocated(this%phase)
773 end function phase_is_allocated
774
775 !----------------------------------------------------------
782 subroutine phase_copy_and_set_phase(phase, gr, kpt, psib, psib_with_phase)
783 class(phase_t), intent(in) :: phase
784 type(grid_t), intent(in) :: gr
785 type(distributed_t), intent(in) :: kpt
786 type(wfs_elec_t), intent(in) :: psib
787 type(wfs_elec_t), intent(out) :: psib_with_phase
788
789 integer :: k_offset, n_boundary_points
790
792
793 call psib%copy_to(psib_with_phase)
794 if (phase%is_allocated()) then
795 call phase%apply_to(gr, gr%np, conjugate = .false., psib = psib_with_phase, src = psib, async=.true.)
796 ! apply phase correction while setting boundary -> memory needs to be
797 ! accessed only once
798 k_offset = psib%ik - kpt%start
799 n_boundary_points = int(gr%np_part - gr%np)
800 call boundaries_set(gr%der%boundaries, gr, psib_with_phase, phase_correction = phase%phase_corr(:, psib%ik), &
801 buff_phase_corr = phase%buff_phase_corr, offset=k_offset*n_boundary_points, async=.true.)
802 else
803 call psib%copy_data_to(gr%np, psib_with_phase)
804 call boundaries_set(gr%der%boundaries, gr, psib_with_phase)
805 end if
806
807 call psib_with_phase%do_pack(copy = .true.)
808
810 end subroutine phase_copy_and_set_phase
811
812
813end module phase_oct_m
814
815!! Local Variables:
816!! mode: f90
817!! coding: utf-8
818!! End:
double exp(double __x) __attribute__((__nothrow__
double sin(double __x) __attribute__((__nothrow__
double cos(double __x) __attribute__((__nothrow__
integer function, public accel_kernel_block_size(kernel)
Definition: accel.F90:1167
subroutine, public accel_free_buffer(this, async)
Definition: accel.F90:1004
subroutine, public accel_kernel_start_call(this, file_name, kernel_name, flags)
Definition: accel.F90:1392
subroutine, public accel_finish()
Definition: accel.F90:1077
integer, parameter, public accel_mem_read_write
Definition: accel.F90:184
type(accel_kernel_t), target, save, public kernel_phase_spiral
Definition: accel.F90:274
pure logical function, public accel_is_enabled()
Definition: accel.F90:401
integer, parameter, public accel_mem_read_only
Definition: accel.F90:184
This module implements batches of mesh functions.
Definition: batch.F90:135
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
Definition: batch.F90:286
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
Definition: batch.F90:286
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
Definition: batch.F90:286
This module implements common operations on batches of mesh functions.
Definition: batch_ops.F90:118
Module implementing boundary conditions in Octopus.
Definition: boundaries.F90:124
This module calculates the derivatives (gradients, Laplacians, etc.) of a function.
real(real64), parameter, public m_zero
Definition: global.F90:191
complex(real64), parameter, public m_zi
Definition: global.F90:205
real(real64), parameter, public m_one
Definition: global.F90:192
This module implements the underlying real-space grid.
Definition: grid.F90:119
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:117
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:120
integer(int64) function, public mesh_periodic_point(mesh, space, ip)
This function returns the point inside the grid corresponding to a boundary point when PBCs are used....
Definition: mesh.F90:724
real(real64) function, dimension(1:mesh%box%dim), public mesh_x_global(mesh, ipg)
Given a global point index, this function returns the coordinates of the point.
Definition: mesh.F90:817
subroutine phase_phase_spiral(this, der, psib)
apply spiral phase
Definition: phase.F90:757
subroutine phase_unset_phase_corr(phase, mesh, psib, async)
unset the phase correction (if necessary)
Definition: phase.F90:518
subroutine phase_copy_and_set_phase(phase, gr, kpt, psib, psib_with_phase)
Copy a batch to another batch and apply the Bloch phase to it.
Definition: phase.F90:878
subroutine phase_init_phases(phase, gr, kpt, kpoints, d, space)
Initiliaze the phase arrays and copy to GPU the data.
Definition: phase.F90:223
subroutine phase_end(phase)
Releases the memory of the phase object.
Definition: phase.F90:468
subroutine phase_update_phases(phase, mesh, kpt, kpoints, d, space, uniform_vector_potential)
Update the phases.
Definition: phase.F90:334
logical pure function phase_is_allocated(this)
Definition: phase.F90:865
subroutine phase_apply_batch(this, mesh, np, conjugate, psib, src, async)
apply (remove) the phase to the wave functions before (after) applying the Hamiltonian
Definition: phase.F90:543
subroutine phase_set_phase_corr(phase, mesh, psib, async)
set the phase correction (if necessary)
Definition: phase.F90:492
subroutine phase_apply_mf(this, psi, np, dim, ik, conjugate)
apply (or remove) the phase to a wave function psi
Definition: phase.F90:711
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
Definition: profiling.F90:631
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
Definition: profiling.F90:554
This module handles spin dimensions of the states and the k-point distribution.
type(type_t), public type_float
Definition: types.F90:135
type(type_t), public type_cmplx
Definition: types.F90:136
type(type_t), public type_integer
Definition: types.F90:137
class representing derivatives
Distribution of N instances over mpi_grpsize processes, for the local rank mpi_grprank....
Description of the grid, containing information on derivatives, stencil, and symmetries.
Definition: grid.F90:171
Describes mesh distribution to nodes.
Definition: mesh.F90:187
A container for the phase.
Definition: phase.F90:180
class for organizing spins and k-points
batches of electronic states
Definition: wfs_elec.F90:141
int true(void)