Octopus
nl_operator.F90
Go to the documentation of this file.
1!! Copyright (C) 2002-2006 M. Marques, A. Castro, A. Rubio, G. Bertsch
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
24 use accel_oct_m
25 use batch_oct_m
27 use debug_oct_m
28 use global_oct_m
29 use index_oct_m
30 use iso_c_binding
31 use math_oct_m
32 use mesh_oct_m
34 use mpi_oct_m
39 use parser_oct_m
41 use space_oct_m
42 use sort_oct_m
44 use types_oct_m
46
47 implicit none
48
49 private
50 public :: &
71
77 private
78 integer :: nri = 0
79 integer, allocatable :: imin(:)
80 integer, allocatable :: imax(:)
81 integer, allocatable :: ri(:, :)
82 integer, allocatable :: ri_pos(:,:,:)
83 integer, allocatable :: ri_neg(:,:,:)
84 end type nl_operator_index_t
85
86 integer, public, parameter :: &
87 OP_GENERAL = 1, &
88 op_symmetric = 2, &
90
92 type nl_operator_t
93 private
94 type(stencil_t), public :: stencil
95 type(mesh_t), pointer :: mesh => null()
96 integer, allocatable :: nn(:)
97 integer, public :: np = 0
98 ! When running in parallel mode, the next three arrays are unique on each node.
99 real(real64), allocatable, public :: w(:,:)
100
101 logical, public :: const_w = .true.
102
103 type(accel_mem_t), public :: buff_weights
104 type(accel_mem_t), public :: buff_half_weights
105
106 integer :: symmetry = op_general
107
108 character(len=40) :: label
109
111 integer, public :: nri = 0
112 integer, allocatable, public :: ri(:,:)
113 integer, allocatable, public :: rimap(:)
114 integer, allocatable, public :: rimap_inv(:)
115
117 integer :: npairs = 0
118 real(real64),allocatable :: wpair(:)
119 real(real64) :: wcenter
120 integer, allocatable :: ri_pos(:,:,:)
121 integer, allocatable :: ri_neg(:,:,:)
122 integer :: max_allocated_ri_pair = 0
123
124 integer :: ninner = 0
125 integer :: nouter = 0
126
127 type(nl_operator_index_t) :: inner
128 type(nl_operator_index_t) :: outer
129
130 type(accel_kernel_t) :: kernel
131 type(accel_mem_t) :: buff_imin
132 type(accel_mem_t) :: buff_imax
133 type(accel_mem_t) :: buff_ri
134 type(accel_mem_t) :: buff_map
135 type(accel_mem_t) :: buff_all
136 type(accel_mem_t) :: buff_inner
137 type(accel_mem_t) :: buff_outer
138 type(accel_mem_t) :: buff_stencil
139 type(accel_mem_t) :: buff_ip_to_xyz
140 type(accel_mem_t) :: buff_xyz_to_ip
141
143 type(accel_mem_t), public :: buff_wpair
144 type(accel_mem_t), public :: buff_half_wpair
145 type(accel_mem_t), public :: buff_ri_pos
146 type(accel_mem_t), public :: buff_ri_neg
147 type(accel_mem_t), public :: buff_map_sym
148 integer :: max_allocated_ri_pair_gpu = 0
149
150 ! For multigrid solvers
151 type(nl_operator_t), public, pointer :: coarser => null()
152
153 end type nl_operator_t
154
155 integer, parameter :: &
156 OP_FORTRAN = 0, &
157 op_vec = 1, &
158 op_min = op_fortran, &
159 op_max = op_vec
160
161 integer, parameter :: &
162 OP_INVMAP = 1, &
163 op_map = 2, &
164 op_nomap = 3
165
166 integer, public, parameter :: OP_ALL = 3, op_inner = 1, op_outer = 2
167
168 interface
169 integer function op_is_available(opid, type)
170 implicit none
171 integer, intent(in) :: opid, type
172 end function op_is_available
173 end interface
175 integer :: dfunction_global = -1
176 integer :: zfunction_global = -1
177 integer :: function_accel
178 logical :: use_symmetries = .false.
179
180contains
182 ! ---------------------------------------------------------
185 subroutine nl_operator_global_init(namespace)
186 type(namespace_t), intent(in) :: namespace
188 integer :: default
192 !%Variable OperateDouble
193 !%Type integer
194 !%Section Execution::Optimization
195 !%Default optimized
196 !%Description
197 !% This variable selects the subroutine used to apply non-local
198 !% operators over the grid for real functions.
199 !%Option fortran 0
200 !% The standard Fortran function.
201 !%Option optimized 1
202 !% This version is optimized using vector primitives (if available).
203 !%End
204
205 !%Variable OperateComplex
206 !%Type integer
207 !%Section Execution::Optimization
208 !%Default optimized
209 !%Description
210 !% This variable selects the subroutine used to apply non-local
211 !% operators over the grid for complex functions.
212 !%Option fortran 0
213 !% The standard Fortran function.
214 !%Option optimized 1
215 !% This version is optimized using vector primitives (if available).
216 !%End
218 default = op_vec
220 call parse_variable(namespace, 'OperateDouble', default, dfunction_global)
221 if (.not. varinfo_valid_option('OperateDouble', dfunction_global)) call messages_input_error(namespace, 'OperateDouble')
223 call parse_variable(namespace, 'OperateComplex', default, zfunction_global)
224 if (.not. varinfo_valid_option('OperateComplex', zfunction_global)) call messages_input_error(namespace, 'OperateComplex')
226 !%Variable OperateUseSymmetries
227 !%Type logical
228 !%Section Execution::Optimization
229 !%Default no
230 !%Description
231 !% This variable selects if the operators are built using symmetries or not.
232 !%End
233 call parse_variable(namespace, 'OperateUseSymmetries', .false., use_symmetries)
236
237 !%Variable OperateAccel
238 !%Type integer
239 !%Default map
240 !%Section Execution::Optimization
241 !%Description
242 !% This variable selects the subroutine used to apply non-local
243 !% operators over the grid when an accelerator device is used.
244 !%Option invmap 1
245 !% The standard implementation ported to GPUs.
246 !%Option map 2
247 !% A different version, more suitable for GPUs.
248 !%End
249 call parse_variable(namespace, 'OperateAccel', op_map, function_accel)
251 call messages_obsolete_variable(namespace, 'OperateOpenCL', 'OperateAccel')
252
253 end if
254
257
258 ! ---------------------------------------------------------
259
260 subroutine nl_operator_global_end()
262
265
266 ! ---------------------------------------------------------
270 subroutine nl_operator_init(op, label, symm)
271 type(nl_operator_t), intent(inout) :: op
272 character(len=*), intent(in) :: label
273 integer, optional, intent(in) :: symm
274
275 push_sub(nl_operator_init)
276
277 op%label = label
278 op%symmetry = op_general
279 if (use_symmetries) then
280 op%symmetry = optional_default(symm, op_general)
281 end if
282
283 pop_sub(nl_operator_init)
284 end subroutine nl_operator_init
285
286 ! ---------------------------------------------------------
296 subroutine nl_operator_build(space, mesh, op, np, const_w, regenerate)
297 class(space_t), intent(in) :: space
298 type(mesh_t), target, intent(in) :: mesh
299 type(nl_operator_t), intent(inout) :: op
300 integer, intent(in) :: np
301 logical, optional, intent(in) :: const_w
302 logical, optional, intent(in) :: regenerate
303
304 integer :: ii, jj, p1(space%dim), time, current
305 integer, allocatable :: st1(:), st2(:), st1r(:)
306 integer :: ir, maxp, iinner, iouter
307 logical :: change, force_change
308 character(len=200) :: flags
309 integer, allocatable :: inner_points(:), outer_points(:), all_points(:)
310
311 push_sub(nl_operator_build)
312
313 op%const_w = optional_default(const_w, .false.)
314 if (mesh%parallel_in_domains .and. .not. op%const_w) then
315 call messages_experimental('Domain parallelization with curvilinear coordinates')
316 end if
317
318 assert(np > 0)
319
320 ! store values in structure
321 op%np = np
322 op%mesh => mesh
323 if (.not. optional_default(regenerate, .false.)) then
324 op%const_w = optional_default(const_w, .false.)
325 end if
326
327 ! allocate weights op%w
328 if (op%const_w) then
329 safe_allocate(op%w(1:op%stencil%size, 1))
330 message(1) = 'Debug: nl_operator_build: working with constant weights.'
331 call messages_info(1, debug_only=.true.)
332 else
333 safe_allocate(op%w(1:op%stencil%size, 1:op%np))
334 message(1) = 'Debug: nl_operator_build: working with non-constant weights.'
335 call messages_info(1, debug_only=.true.)
336 end if
337
338 ! set initially to zero
339 op%w = m_zero
340
341 ! Build lookup table
342 safe_allocate(st1(1:op%stencil%size))
343 safe_allocate(st1r(1:op%stencil%size))
344 safe_allocate(st2(1:op%stencil%size))
345
346 op%nri = 0
347 do time = 1, 2
348 st2 = 0
349 do ii = 1, np
350 p1 = 0
351 call mesh_local_index_to_coords(mesh, ii, p1)
352
353 do jj = 1, op%stencil%size
354 ! Get local index of p1 plus current stencil point.
355 st1(jj) = mesh_local_index_from_coords(mesh, p1 + op%stencil%points(:, jj))
356
357 assert(st1(jj) > 0)
358 end do
359
360 st1(1:op%stencil%size) = st1(1:op%stencil%size) - ii
361
362 change = any(st1 /= st2)
363
364 !the next is to detect when we move from a point that does not
365 !have boundary points as neighbours to one that has
366 force_change = any(st1 + ii > mesh%np) .and. all(st2 + ii - 1 <= mesh%np)
367
368 ! if the stencil changes
369 if (change .or. force_change) then
370 !store it
371 st2(:) = st1(:)
372
373 !first time, just count
374 if (time == 1) op%nri = op%nri + 1
375
376 !second time, store
377 if (time == 2) then
378 current = current + 1
379 op%ri(1:op%stencil%size, current) = st1(1:op%stencil%size)
380 end if
381 end if
382
383 if (time == 2) op%rimap(ii) = current
384
385 end do
386
387 !after counting, allocate
388 if (time == 1) then
389 safe_deallocate_a(op%ri)
390 safe_deallocate_a(op%rimap)
391 safe_deallocate_a(op%rimap_inv)
392
393 safe_allocate(op%ri(1:op%stencil%size, 1:op%nri))
394 safe_allocate(op%rimap(1:op%np))
395 safe_allocate(op%rimap_inv(1:op%nri + 1))
396 op%ri = 0
397 op%rimap = 0
398 op%rimap_inv = 0
399 current = 0
400
401 ! the sizes
402 if (mesh%use_curvilinear) then
403 safe_allocate(op%nn(1:op%nri))
404 ! for the moment all the sizes are the same
405 op%nn = op%stencil%size
406 end if
407 end if
408
409 end do
410
411 !the inverse mapping
412 op%rimap_inv(1) = 0
413 do jj = 1, op%np
414 op%rimap_inv(op%rimap(jj) + 1) = jj
415 end do
416 op%rimap_inv(op%nri + 1) = op%np
417
418 safe_deallocate_a(st1)
419 safe_deallocate_a(st1r)
420 safe_deallocate_a(st2)
421
422 if (op%mesh%parallel_in_domains) then
423 !now build the arrays required to apply the nl_operator by parts
424
425 !count points
426 op%inner%nri = 0
427 op%outer%nri = 0
428 do ir = 1, op%nri
429 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
430 if (maxp <= np) then
431 !inner point
432 op%inner%nri = op%inner%nri + 1
433 assert(op%inner%nri <= op%nri)
434 else
435 !outer point
436 op%outer%nri = op%outer%nri + 1
437 assert(op%outer%nri <= op%nri)
438 end if
439 end do
440
441 assert(op%inner%nri + op%outer%nri == op%nri)
442
443 if (optional_default(regenerate, .false.)) then
444 safe_deallocate_a(op%inner%imin)
445 safe_deallocate_a(op%inner%imax)
446 safe_deallocate_a(op%inner%ri)
447 safe_deallocate_a(op%outer%imin)
448 safe_deallocate_a(op%outer%imax)
449 safe_deallocate_a(op%outer%ri)
450 end if
451 safe_allocate(op%inner%imin(1:op%inner%nri + 1))
452 safe_allocate(op%inner%imax(1:op%inner%nri))
453 safe_allocate(op%inner%ri(1:op%stencil%size, 1:op%inner%nri))
454
455 safe_allocate(op%outer%imin(1:op%outer%nri + 1))
456 safe_allocate(op%outer%imax(1:op%outer%nri))
457 safe_allocate(op%outer%ri(1:op%stencil%size, 1:op%outer%nri))
458
459 !now populate the arrays
460 iinner = 0
461 iouter = 0
462 do ir = 1, op%nri
463 maxp = op%rimap_inv(ir + 1) + maxval(op%ri(1:op%stencil%size, ir))
464 if (maxp <= np) then
465 !inner point
466 iinner = iinner + 1
467 op%inner%imin(iinner) = op%rimap_inv(ir)
468 op%inner%imax(iinner) = op%rimap_inv(ir + 1)
469 op%inner%ri(1:op%stencil%size, iinner) = op%ri(1:op%stencil%size, ir)
470 else
471 !outer point
472 iouter = iouter + 1
473 op%outer%imin(iouter) = op%rimap_inv(ir)
474 op%outer%imax(iouter) = op%rimap_inv(ir + 1)
475 op%outer%ri(1:op%stencil%size, iouter) = op%ri(1:op%stencil%size, ir)
476 end if
477 end do
478
479 !verify that all points in the inner operator are actually inner
480 do ir = 1, op%inner%nri
481 do ii = op%inner%imin(ir) + 1, op%inner%imax(ir)
482 assert(all(ii + op%inner%ri(1:op%stencil%size, ir) <= mesh%np))
483 end do
484 end do
485
486 end if
487
488 if (accel_is_enabled() .and. op%const_w) then
489
490 write(flags, '(i5)') op%stencil%size
491 flags='-DNDIM=3 -DSTENCIL_SIZE='//trim(adjustl(flags))
492
493 if (op%symmetry /= op_general) then
494 block
495 character(len=16) :: npairs_str
496 write(npairs_str, '(i0)') op%stencil%size/2
497 flags = trim(flags)//' -DNPAIRS='//trim(adjustl(npairs_str))
498 end block
499 end if
500
501 if (op%mesh%parallel_in_domains) flags = '-DINDIRECT '//trim(flags)
502
503 select case (function_accel)
504 case (op_invmap)
505 if (op%symmetry /= op_general) then
506 call messages_not_implemented("OperateUseSymmetries=yes with OperateAccel=invmap")
507 end if
508 call accel_kernel_build(op%kernel, 'operate.cu', 'operate', flags)
509 case (op_map)
510 select case (op%symmetry)
511 case (op_general)
512 call accel_kernel_build(op%kernel, 'operate.cu', 'operate_map', flags)
513 case (op_symmetric)
514 call accel_kernel_build(op%kernel, 'operate.cu', 'operate_map_sym', flags)
515 case (op_antisymmetric)
516 call accel_kernel_build(op%kernel, 'operate.cu', 'operate_map_antisym', flags)
517 end select
518 end select
519
520 ! conversion to i8 needed to avoid integer overflow
521 call accel_create_buffer(op%buff_ri, accel_mem_read_only, type_integer, int(op%nri, int64)*op%stencil%size)
522 call accel_write_buffer(op%buff_ri, op%stencil%size, op%nri, op%ri)
523
524 select case (function_accel)
525 case (op_invmap)
526 call accel_create_buffer(op%buff_imin, accel_mem_read_only, type_integer, op%nri)
527 call accel_write_buffer(op%buff_imin, op%nri, op%rimap_inv(1:))
528 call accel_create_buffer(op%buff_imax, accel_mem_read_only, type_integer, op%nri)
529 call accel_write_buffer(op%buff_imax, op%nri, op%rimap_inv(2:))
530
531 case (op_map)
532
534 call accel_write_buffer(op%buff_map, op%mesh%np, (op%rimap - 1)*op%stencil%size)
535
536 if (op%symmetry /= op_general) then
537 call accel_create_buffer(op%buff_map_sym, accel_mem_read_only, type_integer, pad(op%mesh%np, accel_max_block_size()))
538 call accel_write_buffer(op%buff_map_sym, op%mesh%np, (op%rimap - 1)*(op%stencil%size/2))
539 end if
540
541 if (op%mesh%parallel_in_domains) then
542
543 safe_allocate(inner_points(1:op%mesh%np))
544 safe_allocate(outer_points(1:op%mesh%np))
545 safe_allocate(all_points(1:op%mesh%np))
546
547 op%ninner = 0
548 op%nouter = 0
549
550 do ii = 1, op%mesh%np
551 all_points(ii) = ii - 1
552 maxp = ii + maxval(op%ri(1:op%stencil%size, op%rimap(ii)))
553 if (maxp <= op%mesh%np) then
554 op%ninner = op%ninner + 1
555 inner_points(op%ninner) = ii - 1
556 else
557 op%nouter = op%nouter + 1
558 outer_points(op%nouter) = ii - 1
559 end if
560 end do
561
563 call accel_write_buffer(op%buff_all, op%mesh%np, all_points)
564
566 call accel_write_buffer(op%buff_inner, op%ninner, inner_points)
567
569 call accel_write_buffer(op%buff_outer, op%nouter, outer_points)
570
571 safe_deallocate_a(inner_points)
572 safe_deallocate_a(outer_points)
573 safe_deallocate_a(all_points)
574
575 end if
576 end select
577 end if
578
579 pop_sub(nl_operator_build)
580
581 end subroutine nl_operator_build
582
583 ! ---------------------------------------------------------
584 subroutine nl_operator_output_weights(this)
585 type(nl_operator_t), intent(inout) :: this
586
587 integer :: istencil, idir
588
590
591 write(message(1), '(3a)') 'Debug info: Finite difference weights for ', trim(this%label), '.'
592 write(message(2), '(a)') ' Spacing:'
593 do idir = 1, this%mesh%box%dim
594 write(message(2), '(a,f16.8)') trim(message(2)), this%mesh%spacing(idir)
595 end do
596 call messages_info(2, debug_only=.true.)
597
598 do istencil = 1, this%stencil%size
599 select case(this%mesh%box%dim)
600 case(1)
601 write(message(1), '(a,i3,1i4,f25.10)') ' ', istencil, this%stencil%points(1:1, istencil), this%w(istencil, 1)
602 case(2)
603 write(message(1), '(a,i3,2i4,f25.10)') ' ', istencil, this%stencil%points(1:2, istencil), this%w(istencil, 1)
604 case(3)
605 write(message(1), '(a,i3,3i4,f25.10)') ' ', istencil, this%stencil%points(1:3, istencil), this%w(istencil, 1)
606 end select
607 call messages_info(1, debug_only=.true.)
608 end do
609
611
612 end subroutine nl_operator_output_weights
613
614 ! ---------------------------------------------------------
615 subroutine nl_operator_end(op)
616 type(nl_operator_t), intent(inout) :: op
617
618 push_sub(nl_operator_end)
619
620 if (accel_is_enabled() .and. op%const_w) then
622 end if
623
624 safe_deallocate_a(op%inner%imin)
625 safe_deallocate_a(op%inner%imax)
626 safe_deallocate_a(op%inner%ri)
627 safe_deallocate_a(op%outer%imin)
628 safe_deallocate_a(op%outer%imax)
629 safe_deallocate_a(op%outer%ri)
630
631 safe_deallocate_a(op%w)
632
633 safe_deallocate_a(op%ri)
634 safe_deallocate_a(op%rimap)
635 safe_deallocate_a(op%rimap_inv)
636 safe_deallocate_a(op%nn)
637
638 safe_deallocate_a(op%wpair)
639 safe_deallocate_a(op%ri_pos)
640 safe_deallocate_a(op%ri_neg)
641
642 safe_deallocate_a(op%inner%ri_pos)
643 safe_deallocate_a(op%inner%ri_neg)
644 safe_deallocate_a(op%outer%ri_pos)
645 safe_deallocate_a(op%outer%ri_neg)
646 call stencil_end(op%stencil)
647
648 pop_sub(nl_operator_end)
649 end subroutine nl_operator_end
650
651
652 subroutine nl_operator_clear_gpu_buffers(op)
653 type(nl_operator_t), intent(inout) :: op
654
656
657 call accel_free_buffer(op%buff_ri)
658 select case (function_accel)
659 case (op_invmap)
660 call accel_free_buffer(op%buff_imin)
661 call accel_free_buffer(op%buff_imax)
662
663 case (op_map)
664 call accel_free_buffer(op%buff_map)
665 if (op%symmetry /= op_general) then
666 call accel_free_buffer(op%buff_map_sym)
667 end if
668 if (op%mesh%parallel_in_domains) then
669 call accel_free_buffer(op%buff_all)
670 call accel_free_buffer(op%buff_inner)
671 call accel_free_buffer(op%buff_outer)
672 end if
673
674 case (op_nomap)
675 call accel_free_buffer(op%buff_map)
676 call accel_free_buffer(op%buff_stencil)
677 call accel_free_buffer(op%buff_xyz_to_ip)
678 call accel_free_buffer(op%buff_ip_to_xyz)
679 end select
680
681 call accel_free_buffer(op%buff_weights)
682 call accel_free_buffer(op%buff_half_weights)
683
684 if (op%symmetry /= op_general) then
685 call accel_free_buffer(op%buff_wpair)
686 call accel_free_buffer(op%buff_half_wpair)
687 if (op%max_allocated_ri_pair_gpu > 0) then
688 call accel_free_buffer(op%buff_ri_pos)
689 call accel_free_buffer(op%buff_ri_neg)
690 op%max_allocated_ri_pair_gpu = 0
691 end if
692 end if
693
695 end subroutine nl_operator_clear_gpu_buffers
696
697 ! ---------------------------------------------------------
698 integer pure function nl_operator_get_index(op, is, ip) result(res)
699 type(nl_operator_t), intent(in) :: op
700 integer, intent(in) :: is
701 integer, intent(in) :: ip
702
703 res = ip + op%ri(is, op%rimap(ip))
704 end function nl_operator_get_index
705
706 ! ---------------------------------------------------------
707
709 type(nl_operator_t), intent(inout) :: op
712
713 ! Update the GPU weights
714 if (accel_is_enabled() .and. op%const_w) then
715 call accel_create_buffer(op%buff_weights, accel_mem_read_only, type_float, op%stencil%size)
716 call accel_create_buffer(op%buff_half_weights, accel_mem_read_only, type_float, op%stencil%size)
717 if (op%symmetry /= op_general) then
718 call accel_create_buffer(op%buff_wpair, accel_mem_read_only, type_float, op%stencil%size/2)
719 call accel_create_buffer(op%buff_half_wpair, accel_mem_read_only, type_float, op%stencil%size/2)
720 end if
721 end if
722
725
726
727 ! ---------------------------------------------------------
728
729 subroutine nl_operator_update_gpu_buffers(op)
730 type(nl_operator_t), intent(inout) :: op
731
732 integer(int64) :: buf_size
733
735
736 ! Update the GPU weights
737 if (accel_is_enabled() .and. op%const_w) then
738 call accel_write_buffer(op%buff_weights, op%stencil%size, op%w(:, 1))
739 call accel_write_buffer(op%buff_half_weights, op%stencil%size, -m_half*op%w(:, 1))
740
741 if (op%symmetry /= op_general) then
742 call accel_write_buffer(op%buff_wpair, op%npairs, op%wpair)
743 call accel_write_buffer(op%buff_half_wpair, op%npairs, -m_half*op%wpair)
744
745 ! (Re)allocate the pair-index buffers if max_allocated_ri_pair has grown
746 if (op%max_allocated_ri_pair > op%max_allocated_ri_pair_gpu) then
747 if (op%max_allocated_ri_pair_gpu > 0) then
748 call accel_free_buffer(op%buff_ri_pos)
749 call accel_free_buffer(op%buff_ri_neg)
750 end if
751 buf_size = int(op%npairs, int64)*op%nri*op%max_allocated_ri_pair
752 call accel_create_buffer(op%buff_ri_pos, accel_mem_read_only, type_integer, buf_size)
753 call accel_create_buffer(op%buff_ri_neg, accel_mem_read_only, type_integer, buf_size)
754 op%max_allocated_ri_pair_gpu = op%max_allocated_ri_pair
755 end if
756
757 if (op%max_allocated_ri_pair > 0) then
758 call accel_write_buffer(op%buff_ri_pos, op%npairs, op%nri, op%max_allocated_ri_pair, op%ri_pos)
759 call accel_write_buffer(op%buff_ri_neg, op%npairs, op%nri, op%max_allocated_ri_pair, op%ri_neg)
760 end if
761 end if
762 end if
763
765 end subroutine nl_operator_update_gpu_buffers
766
767 ! ---------------------------------------------------------
768
769 integer pure function nl_operator_np_zero_bc(op) result(np_bc)
770 type(nl_operator_t), intent(in) :: op
771
772 integer :: jj, ii
773
774 np_bc = 0
775 do jj = 1, op%nri
776 ii = op%rimap_inv(jj + 1) + maxval(op%ri(1:op%stencil%size, jj))
777 np_bc = max(np_bc, ii)
778 end do
779
780 end function nl_operator_np_zero_bc
781
782
783 ! ---------------------------------------------------------
785 subroutine nl_operator_remove_zero_weight_points(op, space, mesh)
786 type(nl_operator_t), intent(inout) :: op
787 type(space_t), intent(in) :: space
788 class(mesh_t), intent(in) :: mesh
789
790 integer :: ip, size
791 real(real64), parameter :: tol = 1.0e-14_real64
792 real(real64) :: max_weight, new_w(op%stencil%size)
793 integer :: new_points(space%dim, op%stencil%size)
794
795 if (.not. op%const_w) return
796
798
799 max_weight = maxval(abs(op%w(:, 1)))
800 size = 0
801 do ip = 1, op%stencil%size
802 if (abs(op%w(ip, 1)) > tol * max_weight) then
803 size = size +1
804 new_w(size) = op%w(ip, 1)
805 new_points(:,size) = op%stencil%points(:, ip)
806 end if
807 end do
808
809 ! We regenerate the stencil without the zero-weight points
810 op%stencil%size = size
811 safe_deallocate_a(op%stencil%points)
812 safe_allocate(op%stencil%points(space%dim, op%stencil%size))
813 op%stencil%points(:, :) = new_points(:, 1:size)
814 safe_deallocate_a(op%w)
816 call nl_operator_build(space, mesh, op, mesh%np, const_w=op%const_w, regenerate=.true.)
817 op%w(1:size, 1) = new_w(1:size)
818
819 !Update Stencil%center
820 call stencil_init_center(op%stencil)
821
826 subroutine group_by_pairs_sym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg, wcenter)
827 integer, intent(in) :: size
828 integer, intent(in) :: ldf
829 integer, intent(in) :: offsets(:, :)
830 real(real64), intent(in) :: wre(:)
831 integer, intent(in) :: ri(:, :)
832 integer, intent(in) :: nri
833 integer, intent(out) :: npairs
834 real(real64), intent(inout) :: wpair(:)
835 integer, intent(inout) :: pair_pos(:,:), pair_neg(:,:)
836 real(real64), intent(out) :: wcenter
837
838 logical, allocatable :: used(:)
839 integer :: i, j, ndim, s
840 logical :: same
841 integer, allocatable :: idx(:)
842
843 real(real64), parameter :: tol = 1.0e-12_real64
844
845 push_sub(group_by_pairs_sym)
846
847 assert(mod(size,2) == 1)
848
849 safe_allocate(used(1:size))
850 used = .false.
851 npairs = 0
852
853 ndim = ubound(offsets, dim=1)
854
855 safe_allocate(idx(1:size))
856 call robust_sort_by_abs(wre, offsets, idx)
857
858 do i = 1, size
859 if (used(i)) cycle
860
861 if (all(offsets(:, idx(i))==0)) then
862 wcenter = wre(idx(i))
863 used(i) = .true.
864 cycle
865 end if
866
867 ! Try to find symmetric partner j
868 do j = i+1, size
869 if (used(j)) cycle
870
871 ! Weight equality
872 same = abs(wre(idx(i)) - wre(idx(j))) <= tol
873 if (.not. same) cycle
874
875 ! Offsets equal and opposite
876 if (any(offsets(:,idx(j))+offsets(:, idx(i)) /= 0)) cycle
877
878 npairs = npairs + 1
879 do s = 1, nri
880 pair_pos(npairs, s) = ri(idx(i), s) * 2**ldf
881 pair_neg(npairs, s) = ri(idx(j), s) * 2**ldf
882 end do
883 wpair(npairs) = m_half*(wre(idx(i)) + wre(idx(j)))
884
885 used(i) = .true.
886 used(j) = .true.
887 exit
888 end do
889 end do
890
891 assert(npairs == size/2)
892
893 safe_deallocate_a(idx)
894
895 pop_sub(group_by_pairs_sym)
896 end subroutine group_by_pairs_sym
897
899 subroutine group_by_pairs_antisym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg)
900 integer, intent(in) :: size
901 integer, intent(in) :: ldf
902 integer, intent(in) :: offsets(:, :)
903 real(real64), intent(in) :: wre(:)
904 integer, intent(in) :: ri(:, :)
905 integer, intent(in) :: nri
906 integer, intent(out) :: npairs
907 real(real64), intent(inout) :: wpair(:)
908 integer, intent(inout) :: pair_pos(:,:), pair_neg(:,:)
909
910 logical, allocatable :: used(:)
911 integer :: i, j, ndim, s
912 logical :: same
913 integer, allocatable :: idx(:)
914
915 real(real64), parameter :: tol = 1.0e-12_real64
916
917 push_sub(group_by_pairs_antisym)
918
919 assert(mod(size,2) == 0)
920
921 safe_allocate(used(1:size))
922 used = .false.
923 npairs = 0
924
925 ndim = ubound(offsets, dim=1)
926
927 safe_allocate(idx(1:size))
928 call robust_sort_by_abs(wre, offsets, idx)
929
930 ! Max pairs = n/2
931 do i = 1, size
932 if (used(i)) cycle
933
934 ! Try to find symmetric partner j
935 do j = i+1, size
936 if (used(j)) cycle
937
938 ! Weight equality
939 same = abs(wre(idx(i)) + wre(idx(j))) <= tol
940 if (.not. same) cycle
941
942 ! Offsets equal and opposite
943 if (any(offsets(:,idx(j))+offsets(:, idx(i)) /= 0)) cycle
944
945 npairs = npairs + 1
946 do s = 1, nri
947 pair_pos(npairs, s) = ri(idx(i), s) * 2**ldf
948 pair_neg(npairs, s) = ri(idx(j), s) * 2**ldf
949 end do
950 wpair(npairs) = m_half*(wre(idx(i)) - wre(idx(j)))
951
952 used(i) = .true.
953 used(j) = .true.
954 exit
955 end do
956 end do
957
958 assert(npairs == size/2)
959
960 safe_deallocate_a(idx)
962 end subroutine group_by_pairs_antisym
963
965 subroutine nl_operator_build_symmetric_weights(op, max_size)
966 type(nl_operator_t), intent(inout) :: op
967 integer, optional, intent(in) :: max_size
968
969 integer :: ldf, start, end, ipair
970
971 if (op%symmetry == op_general) return
972
974
975 assert(op%const_w)
976 assert(conf%target_states_block_size > 0)
977
978 if(present(max_size)) then
979 start = op%max_allocated_ri_pair + 1
980 end = max_size
981 call reallocate_array(op%ri_pos, op%stencil%size/2, op%nri, op%max_allocated_ri_pair, end)
982 call reallocate_array(op%ri_neg, op%stencil%size/2, op%nri, op%max_allocated_ri_pair, end)
983 if (op%mesh%parallel_in_domains) then
984 call reallocate_array(op%inner%ri_pos, op%stencil%size/2, op%inner%nri, op%max_allocated_ri_pair, end)
985 call reallocate_array(op%inner%ri_neg, op%stencil%size/2, op%inner%nri, op%max_allocated_ri_pair, end)
986 call reallocate_array(op%outer%ri_pos, op%stencil%size/2, op%outer%nri, op%max_allocated_ri_pair, end)
987 call reallocate_array(op%outer%ri_neg, op%stencil%size/2, op%outer%nri, op%max_allocated_ri_pair, end)
988 end if
989 else
990 ! Max pairs = n/2
991 safe_allocate(op%wpair(1:op%stencil%size/2))
992 start = 1
993 end = log2(conf%target_states_block_size)+1
995 safe_allocate(op%ri_pos(1:op%stencil%size/2, 1:op%nri, 1:end))
996 safe_allocate(op%ri_neg(1:op%stencil%size/2, 1:op%nri, 1:end))
997 if (op%mesh%parallel_in_domains) then
998 safe_allocate(op%inner%ri_pos(1:op%stencil%size/2, 1:op%inner%nri, 1:end))
999 safe_allocate(op%inner%ri_neg(1:op%stencil%size/2, 1:op%inner%nri, 1:end))
1000 safe_allocate(op%outer%ri_pos(1:op%stencil%size/2, 1:op%outer%nri, 1:end))
1001 safe_allocate(op%outer%ri_neg(1:op%stencil%size/2, 1:op%outer%nri, 1:end))
1002 end if
1003 end if
1004 op%max_allocated_ri_pair = end
1005
1006 do ldf = start-1, end-1
1007 select case(op%symmetry)
1008 case(op_symmetric)
1009 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%ri, op%nri, &
1010 op%npairs, op%wpair, op%ri_pos(:,:,ldf+1), op%ri_neg(:,:,ldf+1), op%wcenter)
1011 if (op%mesh%parallel_in_domains) then
1012 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%inner%ri, op%inner%nri, &
1013 op%npairs, op%wpair, op%inner%ri_pos(:,:,ldf+1), op%inner%ri_neg(:,:,ldf+1), op%wcenter)
1014 call group_by_pairs_sym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%outer%ri, op%outer%nri, &
1015 op%npairs, op%wpair, op%outer%ri_pos(:,:,ldf+1), op%outer%ri_neg(:,:,ldf+1), op%wcenter)
1016 end if
1017 case(op_antisymmetric)
1018 call group_by_pairs_antisym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%ri, op%nri, &
1019 op%npairs, op%wpair, op%ri_pos(:,:,ldf+1), op%ri_neg(:,:,ldf+1))
1020 if (op%mesh%parallel_in_domains) then
1021 call group_by_pairs_antisym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%inner%ri, op%inner%nri, &
1022 op%npairs, op%wpair, op%inner%ri_pos(:,:,ldf+1), op%inner%ri_neg(:,:,ldf+1))
1023 call group_by_pairs_antisym(op%stencil%size, ldf, op%stencil%points, op%w(:,1), op%outer%ri, op%outer%nri, &
1024 op%npairs, op%wpair, op%outer%ri_pos(:,:,ldf+1), op%outer%ri_neg(:,:,ldf+1))
1025 end if
1026 end select
1027 end do
1028
1029 if (.not. present(max_size)) then
1030 write(message(1), '(3a)') 'Debug info: Sorted weights for ', trim(op%label), '.'
1031 call messages_info(1, debug_only=.true.)
1032
1033 do ipair = 1, op%npairs
1034 write(message(1), '(a,i3,f25.10,2(1x,i4))') ' ', ipair, op%wpair(ipair), op%ri_pos(ipair,1,1), op%ri_neg(ipair,1,1)
1035 call messages_info(1, debug_only=.true.)
1036 end do
1037 end if
1038
1041
1043 subroutine reallocate_array(ri, stencil_size, nri, old_size, new_size)
1044 integer, allocatable, intent(inout) :: ri(:,:,:)
1045 integer, intent(in) :: stencil_size, nri
1046 integer, intent(in) :: old_size, new_size
1047
1048 integer, allocatable :: tmp(:,:,:)
1049
1050 safe_allocate_source_a(tmp, ri)
1051 safe_deallocate_a(ri)
1052 safe_allocate(ri(1:stencil_size, 1:nri, 1:new_size))
1053 ri(:,:,1:old_size) = tmp
1054 safe_deallocate_a(tmp)
1055 end subroutine reallocate_array
1056
1057#include "undef.F90"
1058#include "real.F90"
1059#include "nl_operator_inc.F90"
1061#include "undef.F90"
1062#include "complex.F90"
1063#include "nl_operator_inc.F90"
1064
1065end module nl_operator_oct_m
1066
1067!! Local Variables:
1068!! mode: f90
1069!! coding: utf-8
1070!! End:
subroutine, public accel_kernel_build(this, file_name, kernel_name, flags)
Definition: accel.F90:1371
pure logical function, public accel_is_enabled()
Definition: accel.F90:402
integer, parameter, public accel_mem_read_only
Definition: accel.F90:185
integer pure function, public accel_max_block_size()
Definition: accel.F90:1182
This module implements batches of mesh functions.
Definition: batch.F90:135
Module implementing boundary conditions in Octopus.
Definition: boundaries.F90:124
real(real64), parameter, public m_zero
Definition: global.F90:200
This module implements the index, used for the mesh points.
Definition: index.F90:124
This module is intended to contain "only mathematical" functions and procedures.
Definition: math.F90:117
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:120
integer function, public mesh_local_index_from_coords(mesh, ix)
This function returns the local index of the point for a given vector of integer coordinates.
Definition: mesh.F90:938
subroutine, public mesh_local_index_to_coords(mesh, ip, ix)
Given a local point index, this function returns the set of integer coordinates of the point.
Definition: mesh.F90:950
subroutine, public messages_not_implemented(feature, namespace)
Definition: messages.F90:1068
subroutine, public messages_obsolete_variable(namespace, name, rep)
Definition: messages.F90:1000
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:162
subroutine, public messages_input_error(namespace, var, details, row, column)
Definition: messages.F90:691
subroutine, public messages_experimental(name, namespace)
Definition: messages.F90:1040
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:594
This module handles the communicators for the various parallelization strategies.
Definition: multicomm.F90:147
This module defines non-local operators.
subroutine, public dnl_operator_operate_diag(op, fo)
integer, parameter op_map
integer, parameter op_max
integer, parameter op_vec
subroutine, public nl_operator_init(op, label, symm)
initialize an instance of a non-local operator by setting the label
subroutine, public dnl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
subroutine nl_operator_clear_gpu_buffers(op)
subroutine, public nl_operator_build_symmetric_weights(op, max_size)
Builds (or rebuild) the necessary arrays for symmetric and antisymmetric stencils.
integer dfunction_global
subroutine group_by_pairs_sym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg, wcenter)
Take a list of weights and offsets and build pairs of symmetric points with common weights.
subroutine group_by_pairs_antisym(size, ldf, offsets, wre, ri, nri, npairs, wpair, pair_pos, pair_neg)
Take a list of weights and offsets and build pairs of symmetric points with common weights.
subroutine, public dnl_operator_operate(op, fi, fo, ghost_update, profile, points)
subroutine, public nl_operator_update_gpu_buffers(op)
subroutine, public nl_operator_global_init(namespace)
initialize global settings for non-local operators
subroutine, public nl_operator_output_weights(this)
integer, parameter, public op_general
subroutine, public nl_operator_end(op)
integer, parameter, public op_inner
subroutine, public znl_operator_operate(op, fi, fo, ghost_update, profile, points)
subroutine, public nl_operator_remove_zero_weight_points(op, space, mesh)
Removes the zero-weight points for constant weight stencils.
integer zfunction_global
integer, parameter op_invmap
integer, parameter, public op_symmetric
subroutine, public nl_operator_global_end()
integer, parameter, public op_outer
subroutine, public nl_operator_build(space, mesh, op, np, const_w, regenerate)
Creates the nonlocal operators for the stencils used for finite differences.
subroutine, public znl_operator_operate_batch(op, fi, fo, ghost_update, profile, points, factor, async)
integer, parameter op_nomap
integer pure function, public nl_operator_np_zero_bc(op)
integer, parameter, public op_antisymmetric
subroutine, public znl_operator_operate_diag(op, fo)
subroutine reallocate_array(ri, stencil_size, nri, old_size, new_size)
Reallocate an ri array.
integer pure function, public nl_operator_get_index(op, is, ip)
subroutine, public nl_operator_allocate_gpu_buffers(op)
integer, parameter op_min
This module contains interfaces for routines in operate.c.
Definition: operate_f.F90:119
Some general things and nomenclature:
Definition: par_vec.F90:173
This module is intended to contain "only mathematical" functions and procedures.
Definition: sort.F90:119
This module defines stencils used in Octopus.
Definition: stencil.F90:137
type(type_t), public type_integer
Definition: types.F90:137
Describes mesh distribution to nodes.
Definition: mesh.F90:187
index type for non-local operators
data type for non local operators
int true(void)