68 integer,
public :: nst
69 integer,
public :: dim
72 integer,
allocatable :: ist_idim_index(:, :)
77 integer,
allocatable,
public :: ist(:)
84 logical :: is_allocated
87 integer,
public :: nst_linear
93 integer :: status_host
102 type(type_t) :: type_of
103 integer :: device_buffer_count
104 integer :: host_buffer_count
105 logical :: special_memory
106 logical :: needs_finish_unpack
110 real(real64),
pointer,
contiguous,
public :: dff(:, :, :)
112 complex(real64),
pointer,
contiguous,
public :: zff(:, :, :)
114 real(real64),
pointer,
contiguous,
public :: dff_linear(:, :)
116 complex(real64),
pointer,
contiguous,
public :: zff_linear(:, :)
120 real(real64),
pointer,
contiguous,
public :: dff_pack(:, :)
122 complex(real64),
pointer,
contiguous,
public :: zff_pack(:, :)
125 integer(int64),
public :: pack_size(1:2)
127 integer(int64),
public :: pack_size_real(1:2)
131 type(accel_mem_t),
public :: ff_device
141 generic :: do_pack => do_pack_generic, do_pack_target
191 integer,
public,
parameter :: &
192 batch_not_packed = 0, & !< functions are stored in CPU memory, unpacked order
210 class(
batch_t),
intent(inout) :: this
211 logical,
optional,
intent(in) :: copy
216 if (this%own_memory .and. this%is_packed())
then
219 call this%deallocate_packed_device()
222 call this%deallocate_packed_host()
226 this%host_buffer_count = 0
227 this%device_buffer_count = 0
232 if (this%is_allocated)
then
233 call this%deallocate_unpacked_host()
236 safe_deallocate_a(this%ist_idim_index)
237 safe_deallocate_a(this%ist)
252 this%is_allocated = .false.
254 if (this%special_memory)
then
255 if (
associated(this%dff))
then
258 if (
associated(this%zff))
then
262 safe_deallocate_p(this%dff)
263 safe_deallocate_p(this%zff)
266 nullify(this%dff_linear)
268 nullify(this%zff_linear)
279 class(batch_t),
intent(inout) :: this
283 if (this%special_memory)
then
284 if (
associated(this%dff_pack))
then
287 if (
associated(this%zff_pack))
then
291 safe_deallocate_p(this%dff_pack)
292 safe_deallocate_p(this%zff_pack)
294 nullify(this%dff_pack)
295 nullify(this%zff_pack)
319 class(
batch_t),
intent(inout) :: this
324 call this%dallocate_unpacked_host()
326 call this%zallocate_unpacked_host()
338 class(
batch_t),
intent(inout) :: this
343 call this%dallocate_packed_host()
345 call this%zallocate_packed_host()
357 class(
batch_t),
intent(inout) :: this
362 product(this%pack_size))
375 type(
batch_t),
intent(out) :: this
376 integer,
intent(in) :: dim
377 integer,
intent(in) :: nst
378 integer,
intent(in) :: np
382 this%is_allocated = .false.
383 this%own_memory = .false.
384 this%special_memory = .false.
385 this%needs_finish_unpack = .false.
390 this%nst_linear = nst*dim
393 this%device_buffer_count = 0
394 this%host_buffer_count = 0
399 safe_allocate(this%ist_idim_index(1:this%nst_linear, 1:this%ndims))
400 safe_allocate(this%ist(1:this%nst))
402 nullify(this%dff, this%zff, this%dff_linear, this%zff_linear)
403 nullify(this%dff_pack, this%zff_pack)
414 subroutine batch_clone_to(this, dest, pack, copy_data, new_np, special, dest_type)
415 class(
batch_t),
intent(in) :: this
416 class(
batch_t),
allocatable,
intent(out) :: dest
417 logical,
optional,
intent(in) :: pack
419 logical,
optional,
intent(in) :: copy_data
421 integer,
optional,
intent(in) :: new_np
422 logical,
optional,
intent(in) :: special
424 type(
type_t),
optional,
intent(in) :: dest_type
428 if (.not.
allocated(dest))
then
429 safe_allocate_type(
batch_t, dest)
431 message(1) =
"Internal error: destination batch in batch_clone_to has been previously allocated."
435 call this%copy_to(dest, pack, copy_data, new_np, special, dest_type)
442 subroutine batch_clone_to_array(this, dest, n_batches, pack, copy_data, new_np, special, dest_type)
443 class(
batch_t),
intent(in) :: this
444 class(
batch_t),
allocatable,
intent(out) :: dest(:)
445 integer,
intent(in) :: n_batches
446 logical,
optional,
intent(in) :: pack
448 logical,
optional,
intent(in) :: copy_data
450 integer,
optional,
intent(in) :: new_np
451 logical,
optional,
intent(in) :: special
453 type(
type_t),
optional,
intent(in) :: dest_type
459 if (.not.
allocated(dest))
then
460 safe_allocate_type_array(
batch_t, dest, (1:n_batches))
462 message(1) =
"Internal error: destination batch in batch_clone_to_array has been previously allocated."
467 call this%copy_to(dest(ib), pack, copy_data, new_np, special, dest_type)
478 subroutine batch_copy_to(this, dest, pack, copy_data, new_np, special, dest_type)
479 class(
batch_t),
intent(in) :: this
480 class(
batch_t),
intent(out) :: dest
481 logical,
optional,
intent(in) :: pack
483 logical,
optional,
intent(in) :: copy_data
485 integer,
optional,
intent(in) :: new_np
486 logical,
optional,
intent(in) :: special
488 type(
type_t),
optional,
intent(in) :: dest_type
490 logical :: host_packed, special_
498 host_packed = this%host_buffer_count > 0
502 if (
present(special))
then
503 special_ = this%special_memory
505 special_ = this%special_memory .and. .not. this%device_buffer_count > 0
508 if (
present(dest_type))
then
515 call dbatch_init(dest, this%dim, 1, this%nst, np_, packed=host_packed, special=special_)
517 call zbatch_init(dest, this%dim, 1, this%nst, np_, packed=host_packed, special=special_)
519 message(1) =
"Internal error: unknown batch type in batch_copy_to."
523 if (this%status() /= dest%status() .and.
optional_default(pack, this%is_packed()))
call dest%do_pack(copy = .false.)
525 dest%ist_idim_index(1:this%nst_linear, 1:this%ndims) = this%ist_idim_index(1:this%nst_linear, 1:this%ndims)
526 dest%ist(1:this%nst) = this%ist(1:this%nst)
529 assert(np_ == this%np)
530 call this%copy_data_to(min(this%np, np_), dest)
541 type(
type_t)
pure function batch_type(this) result(btype)
542 class(
batch_t),
intent(in) :: this
550 integer pure function batch_type_as_integer(this) result(itype)
551 class(
batch_t),
intent(in) :: this
553 type(type_t) :: btype
557 if (btype == type_float) itype = 1
558 if (btype == type_cmplx) itype = 2
567 integer pure function batch_status(this) result(bstatus)
568 class(
batch_t),
intent(in) :: this
570 bstatus = this%status_of
575 logical pure function batch_is_packed(this) result(in_buffer)
576 class(
batch_t),
intent(in) :: this
578 in_buffer = (this%device_buffer_count > 0) .or. (this%host_buffer_count > 0)
584 class(
batch_t),
intent(inout) :: this
587 if (accel_is_enabled())
size = accel_padded_size(size)
588 size = size*pad_pow2(this%nst_linear)*types_get_size(this%type())
600 class(
batch_t),
intent(inout) :: this
601 logical,
optional,
intent(in) :: copy
602 logical,
optional,
intent(in) :: async
605 integer :: source, target
610 source = this%status()
613 if (accel_is_enabled())
then
622 call this%do_pack(
target, copy, async)
631 class(
batch_t),
intent(inout) :: this
632 integer,
intent(in) ::
target
633 logical,
optional,
intent(in) :: copy
634 logical,
optional,
intent(in) :: async
636 logical,
optional,
intent(in) :: cpu_only
644 call profiling_in(
"BATCH_DO_PACK")
646 copy_ = optional_default(copy, .
true.)
648 async_ = optional_default(async, .false.)
651 source = this%status()
654 if (source /=
target)
then
657 call this%allocate_packed_device()
671 call this%allocate_packed_host()
676 if (this%type() == type_float)
then
678 else if (this%type() == type_cmplx)
then
682 if (this%own_memory)
call this%deallocate_unpacked_host()
684 call messages_not_implemented(
"Error: batch_do_pack called with BATCH_NOT_PACKED as target.")
690 this%device_buffer_count = this%device_buffer_count + 1
692 this%host_buffer_count = this%host_buffer_count + 1
695 call profiling_out(
"BATCH_DO_PACK")
705 class(
batch_t),
intent(inout) :: this
706 logical,
optional,
intent(in) :: copy
707 logical,
optional,
intent(in) :: force
708 logical,
optional,
intent(in) :: async
711 logical :: copy_, force_, async_
712 integer :: source, target
716 call profiling_in(
"BATCH_DO_UNPACK")
718 copy_ = optional_default(copy, .
true.)
720 force_ = optional_default(force, .false.)
722 async_ = optional_default(async, .false.)
725 source = this%status()
732 target = this%status_host
736 if (source /=
target)
then
739 if (this%host_buffer_count == 1 .or. force_)
then
740 if (this%own_memory)
call this%allocate_unpacked_host()
742 if (copy_ .or. this%own_memory)
then
743 if (this%type() == type_float)
then
745 else if (this%type() == type_cmplx)
then
749 call this%deallocate_packed_host()
750 this%status_host =
target
751 this%status_of =
target
752 this%host_buffer_count = 1
754 this%host_buffer_count = this%host_buffer_count - 1
756 if (this%device_buffer_count == 1 .or. force_)
then
768 this%needs_finish_unpack = .
true.
770 call this%deallocate_packed_device()
772 this%status_of =
target
773 this%device_buffer_count = 1
775 this%device_buffer_count = this%device_buffer_count - 1
779 call profiling_out(
"BATCH_DO_UNPACK")
787 class(
batch_t),
intent(inout) :: this
790 if (this%needs_finish_unpack)
then
792 call this%deallocate_packed_device()
793 this%needs_finish_unpack = .false.
801 class(
batch_t),
intent(inout) :: this
804 integer(int64) :: unroll
805 type(accel_mem_t) :: tmp
806 type(accel_kernel_t),
pointer :: kernel
810 call profiling_in(
"BATCH_WRT_UNPACK_ACCEL")
811 if (this%nst_linear == 1)
then
813 if (this%type() == type_float)
then
814 call accel_write_buffer(this%ff_device, ubound(this%dff_linear, dim=1), this%dff_linear(:, 1))
815 else if (this%type() == type_cmplx)
then
816 call accel_write_buffer(this%ff_device, ubound(this%zff_linear, dim=1), this%zff_linear(:, 1))
824 if (this%type() == type_float)
then
832 call accel_create_buffer(tmp, accel_mem_read_only, this%type(), unroll*this%pack_size(2))
834 do ist = 1, this%nst_linear, int(unroll, int32)
837 do ist2 = ist, min(ist + int(unroll, int32) - 1, this%nst_linear)
839 if (this%type() == type_float)
then
840 call accel_write_buffer(tmp, ubound(this%dff_linear, dim=1, kind=int64), this%dff_linear(:, ist2), &
841 offset = (ist2 - ist)*this%pack_size(2))
843 call accel_write_buffer(tmp, ubound(this%zff_linear, dim=1, kind=int64), this%zff_linear(:, ist2), &
844 offset = (ist2 - ist)*this%pack_size(2))
849 call accel_set_kernel_arg(kernel, 0, int(this%pack_size(1), int32))
850 call accel_set_kernel_arg(kernel, 1, this%np)
851 call accel_set_kernel_arg(kernel, 2, this%nst_linear)
852 call accel_set_kernel_arg(kernel, 3, ist - 1)
853 call accel_set_kernel_arg(kernel, 4, tmp)
854 call accel_set_kernel_arg(kernel, 5, this%ff_device)
856 call profiling_in(
"CL_PACK")
857 call accel_kernel_run(kernel, (/this%pack_size(2), unroll/), (/accel_max_workgroup_size()/unroll, unroll/))
859 if (this%type() == type_float)
then
860 call profiling_count_transfers(unroll*this%pack_size(2), m_one)
862 call profiling_count_transfers(unroll*this%pack_size(2), m_zi)
866 call profiling_out(
"CL_PACK")
870 call accel_release_buffer(tmp)
874 call profiling_out(
"BATCH_WRT_UNPACK_ACCEL")
884 integer(int64) :: unroll
885 type(accel_mem_t) :: tmp
886 type(accel_kernel_t),
pointer :: kernel
889 call profiling_in(
"BATCH_READ_UNPACKED_ACCEL")
891 if (this%nst_linear == 1)
then
893 if (this%type() == type_float)
then
894 call accel_read_buffer(this%ff_device, ubound(this%dff_linear, dim=1), this%dff_linear(:, 1))
896 call accel_read_buffer(this%ff_device, ubound(this%zff_linear, dim=1), this%zff_linear(:, 1))
903 call accel_create_buffer(tmp, accel_mem_write_only, this%type(), unroll*this%pack_size(2))
905 if (this%type() == type_float)
then
911 do ist = 1, this%nst_linear, int(unroll, int32)
912 call accel_set_kernel_arg(kernel, 0, int(this%pack_size(1), int32))
913 call accel_set_kernel_arg(kernel, 1, this%np)
914 call accel_set_kernel_arg(kernel, 2, this%nst_linear)
915 call accel_set_kernel_arg(kernel, 3, ist - 1)
916 call accel_set_kernel_arg(kernel, 4, this%ff_device)
917 call accel_set_kernel_arg(kernel, 5, tmp)
919 call profiling_in(
"CL_UNPACK")
920 call accel_kernel_run(kernel, (/unroll, this%pack_size(2)/), (/unroll, accel_max_workgroup_size()/unroll/))
922 if (this%type() == type_float)
then
923 call profiling_count_transfers(unroll*this%pack_size(2), m_one)
925 call profiling_count_transfers(unroll*this%pack_size(2), m_zi)
929 call profiling_out(
"CL_UNPACK")
932 do ist2 = ist, min(ist + int(unroll, int32) - 1, this%nst_linear)
934 if (this%type() == type_float)
then
935 call accel_read_buffer(tmp, ubound(this%dff_linear, dim=1, kind=int64), this%dff_linear(:, ist2), &
936 offset = (ist2 - ist)*this%pack_size(2))
938 call accel_read_buffer(tmp, ubound(this%zff_linear, dim=1, kind=int64), this%zff_linear(:, ist2), &
939 offset = (ist2 - ist)*this%pack_size(2))
945 call accel_release_buffer(tmp)
948 call profiling_out(
"BATCH_READ_UNPACKED_ACCEL")
954 class(
batch_t),
intent(inout) :: this
955 logical,
optional,
intent(in) :: async
960 call profiling_in(
"BATCH_WRITE_PACKED_ACCEL")
961 if (this%type() == type_float)
then
962 call accel_write_buffer(this%ff_device, this%pack_size(1), this%pack_size(2), this%dff_pack, async=async)
964 call accel_write_buffer(this%ff_device, this%pack_size(1), this%pack_size(2), this%zff_pack, async=async)
966 call profiling_out(
"BATCH_WRITE_PACKED_ACCEL")
973 class(
batch_t),
intent(inout) :: this
974 logical,
optional,
intent(in) :: async
979 call profiling_in(
"BATCH_READ_PACKED_ACCEL")
980 if (this%type() == type_float)
then
981 call accel_read_buffer(this%ff_device, this%pack_size(1), this%pack_size(2), this%dff_pack, async=async)
983 call accel_read_buffer(this%ff_device, this%pack_size(1), this%pack_size(2), this%zff_pack, async=async)
985 call profiling_out(
"BATCH_READ_PACKED_ACCEL")
996 class(
batch_t),
intent(in) :: this
997 integer,
intent(in) :: cind(:)
999 do index = 1, this%nst_linear
1000 if (all(cind(1:this%ndims) == this%ist_idim_index(index, 1:this%ndims)))
exit
1003 assert(index <= this%nst_linear)
1012 integer pure function batch_ist_idim_to_linear(this, cind) result(index)
1013 class(
batch_t),
intent(in) :: this
1014 integer,
intent(in) :: cind(:)
1016 if (ubound(cind, dim = 1) == 1)
then
1019 index = (cind(1) - 1)*this%dim + cind(2)
1030 integer pure function batch_linear_to_ist(this, linear_index) result(ist)
1031 class(
batch_t),
intent(in) :: this
1032 integer,
intent(in) :: linear_index
1034 ist = this%ist_idim_index(linear_index, 1)
1041 integer pure function batch_linear_to_idim(this, linear_index) result(idim)
1042 class(
batch_t),
intent(in) :: this
1043 integer,
intent(in) :: linear_index
1045 idim = this%ist_idim_index(linear_index, 2)
1060 class(
batch_t),
intent(inout) :: this
1061 type(mpi_grp_t),
intent(in) :: mpi_grp
1062 type(mpi_win),
intent(out) :: rma_win
1066 if (mpi_grp%size > 1)
then
1068 assert(.not. accel_is_enabled())
1072 if (this%type() == type_cmplx)
then
1074 call mpi_win_create(this%zff_pack(1, 1), int(product(this%pack_size)*types_get_size(this%type()), mpi_address_kind), &
1075 types_get_size(this%type()), mpi_info_null, mpi_grp%comm, rma_win)
1077 else if (this%type() == type_float)
then
1079 call mpi_win_create(this%dff_pack(1, 1), int(product(this%pack_size)*types_get_size(this%type()), mpi_address_kind), &
1080 types_get_size(this%type()), mpi_info_null, mpi_grp%comm, rma_win)
1083 message(1) =
"Internal error: unknown batch type in batch_remote_access_start."
1084 call messages_fatal(1)
1088 rma_win = mpi_win_null
1102 class(
batch_t),
intent(inout) :: this
1103 type(mpi_win),
intent(inout) :: rma_win
1107 if (rma_win /= mpi_win_null)
then
1109 call mpi_win_free(rma_win)
1111 call this%do_unpack()
1121 class(
batch_t),
intent(in) :: this
1122 integer,
intent(in) :: np
1123 class(
batch_t),
intent(inout) :: dest
1124 logical,
optional,
intent(in) :: async
1126 integer(int64) :: localsize, dim2, dim3
1130 call profiling_in(
"BATCH_COPY_DATA_TO")
1133 call this%check_compatibility_with(dest, type_check=.false.)
1135 if (this%type() == dest%type())
then
1136 select case (this%status())
1138 call accel_set_kernel_arg(kernel_copy, 0, np)
1139 call accel_set_kernel_arg(kernel_copy, 1, this%ff_device)
1140 call accel_set_kernel_arg(kernel_copy, 2,
log2(int(this%pack_size_real(1), int32)))
1141 call accel_set_kernel_arg(kernel_copy, 3, dest%ff_device)
1142 call accel_set_kernel_arg(kernel_copy, 4,
log2(int(dest%pack_size_real(1), int32)))
1144 localsize = accel_kernel_workgroup_size(kernel_copy)/dest%pack_size_real(1)
1146 dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1
1147 dim2 = min(accel_max_size_per_dim(2)*localsize, pad(int(np, int64), localsize))
1149 call accel_kernel_run(kernel_copy, (/dest%pack_size_real(1), dim2, dim3/), (/dest%pack_size_real(1), localsize, 1_int64/))
1151 if(.not. optional_default(async, .false.))
call accel_finish()
1154 if (np*this%pack_size(1) > huge(0_int32))
then
1157 if (dest%type() == type_float)
then
1158 call blas_copy(int(this%pack_size(1), int32), this%dff_pack(1, ip), 1, dest%dff_pack(1, ip), 1)
1160 call blas_copy(int(this%pack_size(1), int32), this%zff_pack(1, ip), 1, dest%zff_pack(1, ip), 1)
1164 if (dest%type() == type_float)
then
1165 call blas_copy(int(this%pack_size(1)*np, int32), this%dff_pack(1, 1), 1, dest%dff_pack(1, 1), 1)
1167 call blas_copy(int(this%pack_size(1)*np, int32), this%zff_pack(1, 1), 1, dest%zff_pack(1, 1), 1)
1172 do ist = 1, dest%nst_linear
1173 if (dest%type() == type_cmplx)
then
1174 call blas_copy(np, this%zff_linear(1, ist), 1, dest%zff_linear(1, ist), 1)
1176 call blas_copy(np, this%dff_linear(1, ist), 1, dest%dff_linear(1, ist), 1)
1181 else if (this%type() == type_cmplx)
then
1183 select case (this%status())
1185 call accel_set_kernel_arg(kernel_copy_complex_to_real, 0, np)
1186 call accel_set_kernel_arg(kernel_copy_complex_to_real, 1, this%ff_device)
1187 call accel_set_kernel_arg(kernel_copy_complex_to_real, 2,
log2(int(this%pack_size_real(1), int32)))
1188 call accel_set_kernel_arg(kernel_copy_complex_to_real, 3, dest%ff_device)
1189 call accel_set_kernel_arg(kernel_copy_complex_to_real, 4,
log2(int(dest%pack_size_real(1), int32)))
1191 localsize = accel_kernel_workgroup_size(kernel_copy_complex_to_real)/dest%pack_size_real(1)
1193 dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1
1194 dim2 = min(accel_max_size_per_dim(2)*localsize, pad(int(np, int64), localsize))
1196 call accel_kernel_run(kernel_copy_complex_to_real, &
1197 (/dest%pack_size_real(1), dim2, dim3/), (/dest%pack_size_real(1), localsize, 1_int64/))
1199 if(.not. optional_default(async, .false.))
call accel_finish()
1205 do ist = 1, dest%nst_linear
1206 dest%dff_pack(ist, ip) = real(this%zff_pack(ist, ip), real64)
1212 do ist = 1, dest%nst_linear
1215 dest%dff_linear(ip, ist) = real(this%zff_linear(ip, ist), real64)
1221 else if (this%type() == type_float)
then
1223 select case (this%status())
1225 call accel_set_kernel_arg(kernel_copy_real_to_complex, 0, np)
1226 call accel_set_kernel_arg(kernel_copy_real_to_complex, 1, this%ff_device)
1227 call accel_set_kernel_arg(kernel_copy_real_to_complex, 2,
log2(int(this%pack_size_real(1), int32)))
1228 call accel_set_kernel_arg(kernel_copy_real_to_complex, 3, dest%ff_device)
1229 call accel_set_kernel_arg(kernel_copy_real_to_complex, 4,
log2(int(dest%pack_size_real(1), int32)))
1231 localsize = accel_kernel_workgroup_size(kernel_copy_real_to_complex)/this%pack_size_real(1)
1233 dim3 = np/(accel_max_size_per_dim(2)*localsize) + 1
1234 dim2 = min(accel_max_size_per_dim(2)*localsize, pad(int(np, int64), localsize))
1236 call accel_kernel_run(kernel_copy_real_to_complex, &
1237 (/this%pack_size_real(1), dim2, dim3/), (/this%pack_size_real(1), localsize, 1_int64/))
1239 if(.not. optional_default(async, .false.))
call accel_finish()
1245 do ist = 1, dest%nst_linear
1246 dest%zff_pack(ist, ip) = cmplx(this%dff_pack(ist, ip), m_zero, real64)
1252 do ist = 1, dest%nst_linear
1255 dest%zff_linear(ip, ist) = cmplx(this%dff_linear(ip, ist), m_zero, real64)
1262 message(1) =
"Error! This should not happen."
1263 call messages_fatal(1)
1266 call profiling_out(
"BATCH_COPY_DATA_TO")
1274 class(
batch_t),
intent(in) :: this
1275 class(
batch_t),
intent(in) :: target
1276 logical,
optional,
intent(in) :: only_check_dim
1277 logical,
optional,
intent(in) :: type_check
1281 if (optional_default(type_check, .
true.))
then
1282 assert(this%type() ==
target%type())
1284 if (.not. optional_default(only_check_dim, .false.))
then
1285 assert(this%nst_linear ==
target%nst_linear)
1287 assert(this%status() ==
target%status())
1288 assert(this%dim ==
target%dim)
1298 class(
batch_t),
intent(inout) :: this
1299 integer,
intent(in) :: st_start
1300 integer,
intent(in) :: st_end
1302 integer :: idim, ii, ist
1306 do ist = st_start, st_end
1308 do idim = 1, this%dim
1309 ii = this%dim*(ist - st_start) + idim
1310 this%ist_idim_index(ii, 1) = ist
1311 this%ist_idim_index(ii, 2) = idim
1313 this%ist(ist - st_start + 1) = ist
1317 this%pack_size(1) = pad_pow2(this%nst_linear)
1318 this%pack_size(2) = this%np
1319 if (accel_is_enabled()) this%pack_size(2) = accel_padded_size(this%pack_size(2))
1321 this%pack_size_real = this%pack_size
1322 if (type_is_complex(this%type())) this%pack_size_real(1) = 2*this%pack_size_real(1)
1329#include "batch_inc.F90"
1332#include "complex.F90"
1333#include "batch_inc.F90"
initialize a batch with existing memory
double log2(double __x) __attribute__((__nothrow__
integer, parameter, public accel_mem_read_write
subroutine, public accel_release_buffer(this, async)
This module contains interfaces for routines in allocate_hardware_aware.c.
subroutine, public deallocate_hardware_aware(array, size)
This module implements batches of mesh functions.
type(type_t) pure function batch_type(this)
return the type of a batch
subroutine zbatch_pack_copy(this)
copy data from the unpacked to the packed arrays
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
subroutine zbatch_init_with_memory_3(this, dim, st_start, st_end, psi)
initialize a batch with an rank-3 array of TYPE_CMPLX valued mesh functions psi.
subroutine batch_check_compatibility_with(this, target, only_check_dim, type_check)
check whether two batches have compatible dimensions (and type)
integer, parameter cl_pack_max_buffer_size
this value controls the size (in number of wave-functions) of the buffer used to copy states to the o...
logical pure function batch_is_packed(this)
subroutine dbatch_unpack_copy(this)
copy data from the packed to the unpacked arrays
subroutine dbatch_init_with_memory_1(this, psi)
initialize a batch with an rank-1 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_write_unpacked_to_device(this)
subroutine batch_do_unpack(this, copy, force, async)
unpack a batch
subroutine batch_finish_unpack(this)
finish the unpacking if do_unpack() was called with async=.true.
subroutine zbatch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data of type TYPE_CMPLX
subroutine batch_deallocate_packed_device(this)
release packed device memory
integer pure function batch_type_as_integer(this)
For debuging purpose only.
subroutine batch_do_pack_generic(this, copy, async)
pack the data in a batch
integer function batch_inv_index(this, cind)
inverse index lookup
subroutine dbatch_init_with_memory_2(this, dim, st_start, st_end, psi)
initialize a batch with an rank-2 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_allocate_packed_host(this)
allocate host (CPU) memory for packed data
subroutine, public zbatch_init(this, dim, st_start, st_end, np, special, packed)
initialize a TYPE_CMPLX valued batch to given size without providing external memory
subroutine zbatch_allocate_packed_host(this)
allocate host (CPU) memory for packed data of type TYPE_CMPLX
subroutine batch_clone_to(this, dest, pack, copy_data, new_np, special, dest_type)
clone a batch to a new batch
subroutine batch_remote_access_stop(this, rma_win)
stop the remote access to the batch
subroutine batch_read_device_to_unpacked(this)
subroutine zbatch_init_with_memory_1(this, psi)
initialize a batch with an rank-1 array of TYPE_CMPLX valued mesh functions psi.
subroutine dbatch_allocate_packed_host(this)
allocate host (CPU) memory for packed data of type TYPE_FLOAT
subroutine batch_clone_to_array(this, dest, n_batches, pack, copy_data, new_np, special, dest_type)
subroutine batch_allocate_packed_device(this)
allocate device (GPU) memory for packed data
subroutine batch_build_indices(this, st_start, st_end)
build the index ist(:) and ist_idim_index(:,:) and set pack_size
integer pure function batch_ist_idim_to_linear(this, cind)
direct index lookup
subroutine batch_do_pack_target(this, target, copy, async, cpu_only)
pack the data in a batch
integer pure function batch_linear_to_ist(this, linear_index)
get state index ist from linear (combined dim and nst) index
subroutine batch_copy_to(this, dest, pack, copy_data, new_np, special, dest_type)
make a copy of a batch
subroutine, public batch_read_device_to_packed(this, async)
subroutine batch_write_packed_to_device(this, async)
subroutine dbatch_init_with_memory_3(this, dim, st_start, st_end, psi)
initialize a batch with an rank-3 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data
subroutine batch_init_empty(this, dim, nst, np)
initialize an empty batch
subroutine, public dbatch_init(this, dim, st_start, st_end, np, special, packed)
initialize a TYPE_FLOAT valued batch to given size without providing external memory
subroutine zbatch_init_with_memory_2(this, dim, st_start, st_end, psi)
initialize a batch with an rank-2 array of TYPE_CMPLX valued mesh functions psi.
integer pure function batch_linear_to_idim(this, linear_index)
extract idim from linear index
subroutine batch_remote_access_start(this, mpi_grp, rma_win)
start remote access to a batch on another node
subroutine batch_copy_data_to(this, np, dest, async)
copy data to another batch.
subroutine dbatch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data of type TYPE_FLOAT
subroutine dbatch_pack_copy(this)
copy data from the unpacked to the packed arrays
integer pure function batch_status(this)
return the status of a batch
subroutine batch_deallocate_unpacked_host(this)
release unpacked host memory
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
subroutine batch_deallocate_packed_host(this)
release packed host memory
integer(int64) function batch_pack_total_size(this)
subroutine batch_end(this, copy)
finalize a batch and release allocated memory, if necessary
subroutine zbatch_unpack_copy(this)
copy data from the packed to the unpacked arrays
This module contains interfaces for BLAS routines You should not use these routines directly....
This module is intended to contain "only mathematical" functions and procedures.
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
type(type_t), public type_float
type(type_t), public type_cmplx
type(type_t), public type_none
Class defining batches of mesh functions.