68 integer,
public :: nst
69 integer,
public :: dim
72 integer,
allocatable :: ist_idim_index(:, :)
77 integer,
allocatable,
public :: ist(:)
84 logical :: is_allocated
87 integer,
public :: nst_linear
93 integer :: status_host
102 type(type_t) :: type_of
103 integer :: device_buffer_count
104 integer :: host_buffer_count
105 logical :: special_memory
106 logical :: needs_finish_unpack
110 real(real64),
pointer,
contiguous,
public :: dff(:, :, :)
112 complex(real64),
pointer,
contiguous,
public :: zff(:, :, :)
114 real(real64),
pointer,
contiguous,
public :: dff_linear(:, :)
116 complex(real64),
pointer,
contiguous,
public :: zff_linear(:, :)
120 real(real64),
pointer,
contiguous,
public :: dff_pack(:, :)
122 complex(real64),
pointer,
contiguous,
public :: zff_pack(:, :)
125 integer(int64),
public :: pack_size(1:2)
127 integer(int64),
public :: pack_size_real(1:2)
131 type(accel_mem_t),
public :: ff_device
141 generic :: do_pack => do_pack_generic, do_pack_target
191 integer,
public,
parameter :: &
192 batch_not_packed = 0, & !< functions are stored in CPU memory, unpacked order
210 class(
batch_t),
intent(inout) :: this
211 logical,
optional,
intent(in) :: copy
216 if (this%own_memory .and. this%is_packed())
then
219 call this%deallocate_packed_device()
222 call this%deallocate_packed_host()
226 this%host_buffer_count = 0
227 this%device_buffer_count = 0
232 if (this%is_allocated)
then
233 call this%deallocate_unpacked_host()
236 safe_deallocate_a(this%ist_idim_index)
237 safe_deallocate_a(this%ist)
252 this%is_allocated = .false.
254 if (this%special_memory)
then
255 if (
associated(this%dff))
then
258 if (
associated(this%zff))
then
262 safe_deallocate_p(this%dff)
263 safe_deallocate_p(this%zff)
266 nullify(this%dff_linear)
268 nullify(this%zff_linear)
279 class(batch_t),
intent(inout) :: this
283 if (this%special_memory)
then
284 if (
associated(this%dff_pack))
then
287 if (
associated(this%zff_pack))
then
291 safe_deallocate_p(this%dff_pack)
292 safe_deallocate_p(this%zff_pack)
294 nullify(this%dff_pack)
295 nullify(this%zff_pack)
319 class(
batch_t),
intent(inout) :: this
324 call this%dallocate_unpacked_host()
326 call this%zallocate_unpacked_host()
338 class(
batch_t),
intent(inout) :: this
343 call this%dallocate_packed_host()
345 call this%zallocate_packed_host()
357 class(
batch_t),
intent(inout) :: this
362 product(this%pack_size))
375 type(
batch_t),
intent(out) :: this
376 integer,
intent(in) :: dim
377 integer,
intent(in) :: nst
378 integer,
intent(in) :: np
382 this%is_allocated = .false.
383 this%own_memory = .false.
384 this%special_memory = .false.
385 this%needs_finish_unpack = .false.
390 this%nst_linear = nst*dim
393 this%device_buffer_count = 0
394 this%host_buffer_count = 0
399 safe_allocate(this%ist_idim_index(1:this%nst_linear, 1:this%ndims))
400 safe_allocate(this%ist(1:this%nst))
402 nullify(this%dff, this%zff, this%dff_linear, this%zff_linear)
403 nullify(this%dff_pack, this%zff_pack)
414 subroutine batch_clone_to(this, dest, pack, copy_data, new_np, special, dest_type)
415 class(
batch_t),
intent(in) :: this
416 class(
batch_t),
allocatable,
intent(out) :: dest
417 logical,
optional,
intent(in) :: pack
419 logical,
optional,
intent(in) :: copy_data
421 integer,
optional,
intent(in) :: new_np
422 logical,
optional,
intent(in) :: special
424 type(
type_t),
optional,
intent(in) :: dest_type
428 if (.not.
allocated(dest))
then
429 safe_allocate_type(
batch_t, dest)
431 message(1) =
"Internal error: destination batch in batch_clone_to has been previously allocated."
435 call this%copy_to(dest, pack, copy_data, new_np, special, dest_type)
442 subroutine batch_clone_to_array(this, dest, n_batches, pack, copy_data, new_np, special, dest_type)
443 class(
batch_t),
intent(in) :: this
444 class(
batch_t),
allocatable,
intent(out) :: dest(:)
445 integer,
intent(in) :: n_batches
446 logical,
optional,
intent(in) :: pack
448 logical,
optional,
intent(in) :: copy_data
450 integer,
optional,
intent(in) :: new_np
451 logical,
optional,
intent(in) :: special
453 type(
type_t),
optional,
intent(in) :: dest_type
459 if (.not.
allocated(dest))
then
460 safe_allocate_type_array(
batch_t, dest, (1:n_batches))
462 message(1) =
"Internal error: destination batch in batch_clone_to_array has been previously allocated."
467 call this%copy_to(dest(ib), pack, copy_data, new_np, special, dest_type)
478 subroutine batch_copy_to(this, dest, pack, copy_data, new_np, special, dest_type)
479 class(
batch_t),
intent(in) :: this
480 class(
batch_t),
intent(out) :: dest
481 logical,
optional,
intent(in) :: pack
483 logical,
optional,
intent(in) :: copy_data
485 integer,
optional,
intent(in) :: new_np
486 logical,
optional,
intent(in) :: special
488 type(
type_t),
optional,
intent(in) :: dest_type
490 logical :: host_packed, special_
498 host_packed = this%host_buffer_count > 0
502 if (
present(special))
then
503 special_ = this%special_memory
505 special_ = this%special_memory .and. .not. this%device_buffer_count > 0
508 if (
present(dest_type))
then
515 call dbatch_init(dest, this%dim, 1, this%nst, np_, packed=host_packed, special=special_)
517 call zbatch_init(dest, this%dim, 1, this%nst, np_, packed=host_packed, special=special_)
519 message(1) =
"Internal error: unknown batch type in batch_copy_to."
523 if (this%status() /= dest%status() .and.
optional_default(pack, this%is_packed()))
call dest%do_pack(copy = .false.)
525 dest%ist_idim_index(1:this%nst_linear, 1:this%ndims) = this%ist_idim_index(1:this%nst_linear, 1:this%ndims)
526 dest%ist(1:this%nst) = this%ist(1:this%nst)
529 assert(np_ == this%np)
530 call this%copy_data_to(min(this%np, np_), dest)
541 type(
type_t)
pure function batch_type(this) result(btype)
542 class(
batch_t),
intent(in) :: this
550 integer pure function batch_type_as_integer(this) result(itype)
551 class(
batch_t),
intent(in) :: this
553 type(type_t) :: btype
557 if (btype == type_float) itype = 1
558 if (btype == type_cmplx) itype = 2
567 integer pure function batch_status(this) result(bstatus)
568 class(
batch_t),
intent(in) :: this
570 bstatus = this%status_of
575 logical pure function batch_is_packed(this) result(in_buffer)
576 class(
batch_t),
intent(in) :: this
578 in_buffer = (this%device_buffer_count > 0) .or. (this%host_buffer_count > 0)
584 class(
batch_t),
intent(inout) :: this
587 if (accel_is_enabled())
size = accel_padded_size(size)
588 size = size*pad_pow2(this%nst_linear)*types_get_size(this%type())
600 class(
batch_t),
intent(inout) :: this
601 logical,
optional,
intent(in) :: copy
602 logical,
optional,
intent(in) :: async
605 integer :: source, target
610 source = this%status()
613 if (accel_is_enabled())
then
622 call this%do_pack(
target, copy, async)
631 class(
batch_t),
intent(inout) :: this
632 integer,
intent(in) ::
target
633 logical,
optional,
intent(in) :: copy
634 logical,
optional,
intent(in) :: async
636 logical,
optional,
intent(in) :: cpu_only
644 call profiling_in(
"BATCH_DO_PACK")
646 copy_ = optional_default(copy, .
true.)
648 async_ = optional_default(async, .false.)
651 source = this%status()
654 if (source /=
target)
then
657 call this%allocate_packed_device()
671 call this%allocate_packed_host()
676 if (this%type() == type_float)
then
678 else if (this%type() == type_cmplx)
then
682 if (this%own_memory)
call this%deallocate_unpacked_host()
684 call messages_not_implemented(
"Error: batch_do_pack called with BATCH_NOT_PACKED as target.")
690 this%device_buffer_count = this%device_buffer_count + 1
692 this%host_buffer_count = this%host_buffer_count + 1
695 call profiling_out(
"BATCH_DO_PACK")
705 class(
batch_t),
intent(inout) :: this
706 logical,
optional,
intent(in) :: copy
707 logical,
optional,
intent(in) :: force
708 logical,
optional,
intent(in) :: async
711 logical :: copy_, force_, async_
712 integer :: source, target
716 call profiling_in(
"BATCH_DO_UNPACK")
718 copy_ = optional_default(copy, .
true.)
720 force_ = optional_default(force, .false.)
722 async_ = optional_default(async, .false.)
725 source = this%status()
732 target = this%status_host
736 if (source /=
target)
then
739 if (this%host_buffer_count == 1 .or. force_)
then
740 if (this%own_memory)
call this%allocate_unpacked_host()
742 if (copy_ .or. this%own_memory)
then
743 if (this%type() == type_float)
then
745 else if (this%type() == type_cmplx)
then
749 call this%deallocate_packed_host()
750 this%status_host =
target
751 this%status_of =
target
752 this%host_buffer_count = 1
754 this%host_buffer_count = this%host_buffer_count - 1
756 if (this%device_buffer_count == 1 .or. force_)
then
768 this%needs_finish_unpack = .
true.
770 call this%deallocate_packed_device()
772 this%status_of =
target
773 this%device_buffer_count = 1
775 this%device_buffer_count = this%device_buffer_count - 1
779 call profiling_out(
"BATCH_DO_UNPACK")
787 class(
batch_t),
intent(inout) :: this
790 if (this%needs_finish_unpack)
then
792 call this%deallocate_packed_device()
793 this%needs_finish_unpack = .false.
801 class(
batch_t),
intent(inout) :: this
804 integer(int64) :: unroll, bsize, gsize
805 type(accel_mem_t) :: tmp
806 type(accel_kernel_t),
pointer :: kernel
810 call profiling_in(
"BATCH_WRT_UNPACK_ACCEL")
811 if (this%nst_linear == 1)
then
813 if (this%type() == type_float)
then
814 call accel_write_buffer(this%ff_device, ubound(this%dff_linear, dim=1), this%dff_linear(:, 1))
815 else if (this%type() == type_cmplx)
then
816 call accel_write_buffer(this%ff_device, ubound(this%zff_linear, dim=1), this%zff_linear(:, 1))
824 if (this%type() == type_float)
then
832 call accel_create_buffer(tmp, accel_mem_read_only, this%type(), unroll*this%pack_size(2))
834 do ist = 1, this%nst_linear, int(unroll, int32)
837 do ist2 = ist, min(ist + int(unroll, int32) - 1, this%nst_linear)
839 if (this%type() == type_float)
then
840 call accel_write_buffer(tmp, ubound(this%dff_linear, dim=1, kind=int64), this%dff_linear(:, ist2), &
841 offset = (ist2 - ist)*this%pack_size(2))
843 call accel_write_buffer(tmp, ubound(this%zff_linear, dim=1, kind=int64), this%zff_linear(:, ist2), &
844 offset = (ist2 - ist)*this%pack_size(2))
849 call accel_set_kernel_arg(kernel, 0, int(this%pack_size(1), int32))
850 call accel_set_kernel_arg(kernel, 1, this%np)
851 call accel_set_kernel_arg(kernel, 2, this%nst_linear)
852 call accel_set_kernel_arg(kernel, 3, ist - 1)
853 call accel_set_kernel_arg(kernel, 4, tmp)
854 call accel_set_kernel_arg(kernel, 5, this%ff_device)
856 call profiling_in(
"GPU_PACK")
859 bsize = accel_max_block_size()/unroll
860 call accel_grid_size(this%pack_size(2), bsize, gsize)
862 call accel_kernel_run(kernel, (/gsize, 1_int64/), (/bsize, unroll/))
864 if (this%type() == type_float)
then
865 call profiling_count_transfers(unroll*this%pack_size(2), m_one)
867 call profiling_count_transfers(unroll*this%pack_size(2), m_zi)
871 call profiling_out(
"GPU_PACK")
875 call accel_free_buffer(tmp)
879 call profiling_out(
"BATCH_WRT_UNPACK_ACCEL")
886 class(
batch_t),
intent(inout) :: this
889 integer(int64) :: unroll, bsize, gsize
890 type(accel_mem_t) :: tmp
891 type(accel_kernel_t),
pointer :: kernel
894 call profiling_in(
"BATCH_READ_UNPACKED_ACCEL")
896 if (this%nst_linear == 1)
then
898 if (this%type() == type_float)
then
899 call accel_read_buffer(this%ff_device, ubound(this%dff_linear, dim=1), this%dff_linear(:, 1))
901 call accel_read_buffer(this%ff_device, ubound(this%zff_linear, dim=1), this%zff_linear(:, 1))
908 call accel_create_buffer(tmp, accel_mem_write_only, this%type(), unroll*this%pack_size(2))
910 if (this%type() == type_float)
then
916 do ist = 1, this%nst_linear, int(unroll, int32)
917 call accel_set_kernel_arg(kernel, 0, int(this%pack_size(1), int32))
918 call accel_set_kernel_arg(kernel, 1, this%np)
919 call accel_set_kernel_arg(kernel, 2, this%nst_linear)
920 call accel_set_kernel_arg(kernel, 3, ist - 1)
921 call accel_set_kernel_arg(kernel, 4, this%ff_device)
922 call accel_set_kernel_arg(kernel, 5, tmp)
924 call profiling_in(
"GPU_UNPACK")
927 bsize = accel_max_block_size()/unroll
928 call accel_grid_size(this%pack_size(2), bsize, gsize)
930 call accel_kernel_run(kernel, (/1_int64, gsize/), (/unroll, bsize/))
932 if (this%type() == type_float)
then
933 call profiling_count_transfers(unroll*this%pack_size(2), m_one)
935 call profiling_count_transfers(unroll*this%pack_size(2), m_zi)
939 call profiling_out(
"GPU_UNPACK")
942 do ist2 = ist, min(ist + int(unroll, int32) - 1, this%nst_linear)
944 if (this%type() == type_float)
then
945 call accel_read_buffer(tmp, ubound(this%dff_linear, dim=1, kind=int64), this%dff_linear(:, ist2), &
946 offset = (ist2 - ist)*this%pack_size(2))
948 call accel_read_buffer(tmp, ubound(this%zff_linear, dim=1, kind=int64), this%zff_linear(:, ist2), &
949 offset = (ist2 - ist)*this%pack_size(2))
955 call accel_free_buffer(tmp)
958 call profiling_out(
"BATCH_READ_UNPACKED_ACCEL")
964 class(
batch_t),
intent(inout) :: this
965 logical,
optional,
intent(in) :: async
970 call profiling_in(
"BATCH_WRITE_PACKED_ACCEL")
971 if (this%type() == type_float)
then
972 call accel_write_buffer(this%ff_device, this%pack_size(1), this%pack_size(2), this%dff_pack, async=async)
974 call accel_write_buffer(this%ff_device, this%pack_size(1), this%pack_size(2), this%zff_pack, async=async)
976 call profiling_out(
"BATCH_WRITE_PACKED_ACCEL")
983 class(
batch_t),
intent(inout) :: this
984 logical,
optional,
intent(in) :: async
989 call profiling_in(
"BATCH_READ_PACKED_ACCEL")
990 if (this%type() == type_float)
then
991 call accel_read_buffer(this%ff_device, this%pack_size(1), this%pack_size(2), this%dff_pack, async=async)
993 call accel_read_buffer(this%ff_device, this%pack_size(1), this%pack_size(2), this%zff_pack, async=async)
995 call profiling_out(
"BATCH_READ_PACKED_ACCEL")
1006 class(
batch_t),
intent(in) :: this
1007 integer,
intent(in) :: cind(:)
1009 do index = 1, this%nst_linear
1010 if (all(cind(1:this%ndims) == this%ist_idim_index(index, 1:this%ndims)))
exit
1013 assert(index <= this%nst_linear)
1022 integer pure function batch_ist_idim_to_linear(this, cind) result(index)
1023 class(
batch_t),
intent(in) :: this
1024 integer,
intent(in) :: cind(:)
1026 if (ubound(cind, dim = 1) == 1)
then
1029 index = (cind(1) - 1)*this%dim + cind(2)
1040 integer pure function batch_linear_to_ist(this, linear_index) result(ist)
1041 class(
batch_t),
intent(in) :: this
1042 integer,
intent(in) :: linear_index
1044 ist = this%ist_idim_index(linear_index, 1)
1051 integer pure function batch_linear_to_idim(this, linear_index) result(idim)
1052 class(
batch_t),
intent(in) :: this
1053 integer,
intent(in) :: linear_index
1055 idim = this%ist_idim_index(linear_index, 2)
1070 class(
batch_t),
intent(inout) :: this
1071 type(mpi_grp_t),
intent(in) :: mpi_grp
1072 type(mpi_win),
intent(out) :: rma_win
1076 if (mpi_grp%size > 1)
then
1078 assert(.not. accel_is_enabled())
1082 if (this%type() == type_cmplx)
then
1084 call mpi_win_create(this%zff_pack(1, 1), int(product(this%pack_size)*types_get_size(this%type()), mpi_address_kind), &
1085 types_get_size(this%type()), mpi_info_null, mpi_grp%comm, rma_win)
1087 else if (this%type() == type_float)
then
1089 call mpi_win_create(this%dff_pack(1, 1), int(product(this%pack_size)*types_get_size(this%type()), mpi_address_kind), &
1090 types_get_size(this%type()), mpi_info_null, mpi_grp%comm, rma_win)
1093 message(1) =
"Internal error: unknown batch type in batch_remote_access_start."
1094 call messages_fatal(1)
1098 rma_win = mpi_win_null
1112 class(
batch_t),
intent(inout) :: this
1113 type(mpi_win),
intent(inout) :: rma_win
1117 if (rma_win /= mpi_win_null)
then
1119 call mpi_win_free(rma_win)
1121 call this%do_unpack()
1131 class(
batch_t),
intent(in) :: this
1132 integer,
intent(in) :: np
1133 class(
batch_t),
intent(inout) :: dest
1134 logical,
optional,
intent(in) :: async
1136 integer(int64),
dimension(3) :: gsizes, bsizes
1140 call profiling_in(
"BATCH_COPY_DATA_TO")
1143 call this%check_compatibility_with(dest, type_check=.false.)
1145 if (this%type() == dest%type())
then
1146 select case (this%status())
1148 call accel_set_kernel_arg(kernel_copy, 0, np)
1149 call accel_set_kernel_arg(kernel_copy, 1, this%ff_device)
1150 call accel_set_kernel_arg(kernel_copy, 2,
log2(int(this%pack_size_real(1), int32)))
1151 call accel_set_kernel_arg(kernel_copy, 3, dest%ff_device)
1152 call accel_set_kernel_arg(kernel_copy, 4,
log2(int(dest%pack_size_real(1), int32)))
1155 call accel_grid_size_extend_dim(int(np, int64), dest%pack_size_real(1), gsizes, bsizes, kernel_copy)
1157 call accel_kernel_run(kernel_copy, gsizes, bsizes)
1159 if(.not. optional_default(async, .false.))
call accel_finish()
1162 if (np*this%pack_size(1) > huge(0_int32))
then
1165 if (dest%type() == type_float)
then
1166 call blas_copy(int(this%pack_size(1), int32), this%dff_pack(1, ip), 1, dest%dff_pack(1, ip), 1)
1168 call blas_copy(int(this%pack_size(1), int32), this%zff_pack(1, ip), 1, dest%zff_pack(1, ip), 1)
1172 if (dest%type() == type_float)
then
1173 call blas_copy(int(this%pack_size(1)*np, int32), this%dff_pack(1, 1), 1, dest%dff_pack(1, 1), 1)
1175 call blas_copy(int(this%pack_size(1)*np, int32), this%zff_pack(1, 1), 1, dest%zff_pack(1, 1), 1)
1180 do ist = 1, dest%nst_linear
1181 if (dest%type() == type_cmplx)
then
1182 call blas_copy(np, this%zff_linear(1, ist), 1, dest%zff_linear(1, ist), 1)
1184 call blas_copy(np, this%dff_linear(1, ist), 1, dest%dff_linear(1, ist), 1)
1189 else if (this%type() == type_cmplx)
then
1191 select case (this%status())
1193 call accel_set_kernel_arg(kernel_copy_complex_to_real, 0, np)
1194 call accel_set_kernel_arg(kernel_copy_complex_to_real, 1, this%ff_device)
1195 call accel_set_kernel_arg(kernel_copy_complex_to_real, 2,
log2(int(this%pack_size_real(1), int32)))
1196 call accel_set_kernel_arg(kernel_copy_complex_to_real, 3, dest%ff_device)
1197 call accel_set_kernel_arg(kernel_copy_complex_to_real, 4,
log2(int(dest%pack_size_real(1), int32)))
1200 call accel_grid_size_extend_dim(int(np, int64), dest%pack_size_real(1), gsizes, bsizes, kernel_copy_complex_to_real)
1202 call accel_kernel_run(kernel_copy_complex_to_real, gsizes, bsizes)
1204 if(.not. optional_default(async, .false.))
call accel_finish()
1210 do ist = 1, dest%nst_linear
1211 dest%dff_pack(ist, ip) = real(this%zff_pack(ist, ip), real64)
1217 do ist = 1, dest%nst_linear
1220 dest%dff_linear(ip, ist) = real(this%zff_linear(ip, ist), real64)
1226 else if (this%type() == type_float)
then
1228 select case (this%status())
1230 call accel_set_kernel_arg(kernel_copy_real_to_complex, 0, np)
1231 call accel_set_kernel_arg(kernel_copy_real_to_complex, 1, this%ff_device)
1232 call accel_set_kernel_arg(kernel_copy_real_to_complex, 2,
log2(int(this%pack_size_real(1), int32)))
1233 call accel_set_kernel_arg(kernel_copy_real_to_complex, 3, dest%ff_device)
1234 call accel_set_kernel_arg(kernel_copy_real_to_complex, 4,
log2(int(dest%pack_size_real(1), int32)))
1237 call accel_grid_size_extend_dim(int(np, int64), this%pack_size_real(1), gsizes, bsizes, kernel_copy_real_to_complex)
1239 call accel_kernel_run(kernel_copy_real_to_complex, gsizes, bsizes)
1241 if(.not. optional_default(async, .false.))
call accel_finish()
1247 do ist = 1, dest%nst_linear
1248 dest%zff_pack(ist, ip) = cmplx(this%dff_pack(ist, ip), m_zero, real64)
1254 do ist = 1, dest%nst_linear
1257 dest%zff_linear(ip, ist) = cmplx(this%dff_linear(ip, ist), m_zero, real64)
1264 message(1) =
"Error! This should not happen."
1265 call messages_fatal(1)
1268 call profiling_out(
"BATCH_COPY_DATA_TO")
1276 class(
batch_t),
intent(in) :: this
1277 class(
batch_t),
intent(in) :: target
1278 logical,
optional,
intent(in) :: only_check_dim
1279 logical,
optional,
intent(in) :: type_check
1283 if (optional_default(type_check, .
true.))
then
1284 assert(this%type() ==
target%type())
1286 if (.not. optional_default(only_check_dim, .false.))
then
1287 assert(this%nst_linear ==
target%nst_linear)
1289 assert(this%status() ==
target%status())
1290 assert(this%dim ==
target%dim)
1300 class(
batch_t),
intent(inout) :: this
1301 integer,
intent(in) :: st_start
1302 integer,
intent(in) :: st_end
1304 integer :: idim, ii, ist
1308 do ist = st_start, st_end
1310 do idim = 1, this%dim
1311 ii = this%dim*(ist - st_start) + idim
1312 this%ist_idim_index(ii, 1) = ist
1313 this%ist_idim_index(ii, 2) = idim
1315 this%ist(ist - st_start + 1) = ist
1319 this%pack_size(1) = pad_pow2(this%nst_linear)
1320 this%pack_size(2) = this%np
1321 if (accel_is_enabled()) this%pack_size(2) = accel_padded_size(this%pack_size(2))
1323 this%pack_size_real = this%pack_size
1324 if (type_is_complex(this%type())) this%pack_size_real(1) = 2*this%pack_size_real(1)
1331#include "batch_inc.F90"
1334#include "complex.F90"
1335#include "batch_inc.F90"
initialize a batch with existing memory
double log2(double __x) __attribute__((__nothrow__
subroutine, public accel_free_buffer(this, async)
integer, parameter, public accel_mem_read_write
This module contains interfaces for routines in allocate_hardware_aware.c.
subroutine, public deallocate_hardware_aware(array, size)
This module implements batches of mesh functions.
type(type_t) pure function batch_type(this)
return the type of a batch
subroutine zbatch_pack_copy(this)
copy data from the unpacked to the packed arrays
integer, parameter, public batch_not_packed
functions are stored in CPU memory, unpacked order
integer, parameter, public batch_device_packed
functions are stored in device memory in packed order
subroutine zbatch_init_with_memory_3(this, dim, st_start, st_end, psi)
initialize a batch with an rank-3 array of TYPE_CMPLX valued mesh functions psi.
subroutine batch_check_compatibility_with(this, target, only_check_dim, type_check)
check whether two batches have compatible dimensions (and type)
logical pure function batch_is_packed(this)
subroutine dbatch_unpack_copy(this)
copy data from the packed to the unpacked arrays
subroutine dbatch_init_with_memory_1(this, psi)
initialize a batch with an rank-1 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_write_unpacked_to_device(this)
subroutine batch_do_unpack(this, copy, force, async)
unpack a batch
subroutine batch_finish_unpack(this)
finish the unpacking if do_unpack() was called with async=.true.
subroutine zbatch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data of type TYPE_CMPLX
subroutine batch_deallocate_packed_device(this)
release packed device memory
integer pure function batch_type_as_integer(this)
For debuging purpose only.
subroutine batch_do_pack_generic(this, copy, async)
pack the data in a batch
integer function batch_inv_index(this, cind)
inverse index lookup
subroutine dbatch_init_with_memory_2(this, dim, st_start, st_end, psi)
initialize a batch with an rank-2 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_allocate_packed_host(this)
allocate host (CPU) memory for packed data
subroutine, public zbatch_init(this, dim, st_start, st_end, np, special, packed)
initialize a TYPE_CMPLX valued batch to given size without providing external memory
subroutine zbatch_allocate_packed_host(this)
allocate host (CPU) memory for packed data of type TYPE_CMPLX
subroutine batch_clone_to(this, dest, pack, copy_data, new_np, special, dest_type)
clone a batch to a new batch
subroutine batch_remote_access_stop(this, rma_win)
stop the remote access to the batch
subroutine batch_read_device_to_unpacked(this)
subroutine zbatch_init_with_memory_1(this, psi)
initialize a batch with an rank-1 array of TYPE_CMPLX valued mesh functions psi.
subroutine dbatch_allocate_packed_host(this)
allocate host (CPU) memory for packed data of type TYPE_FLOAT
subroutine batch_clone_to_array(this, dest, n_batches, pack, copy_data, new_np, special, dest_type)
subroutine batch_allocate_packed_device(this)
allocate device (GPU) memory for packed data
subroutine batch_build_indices(this, st_start, st_end)
build the index ist(:) and ist_idim_index(:,:) and set pack_size
integer pure function batch_ist_idim_to_linear(this, cind)
direct index lookup
integer, parameter gpu_pack_max_buffer_size
this value controls the size (in number of wave-functions) of the buffer used to copy states to the G...
subroutine batch_do_pack_target(this, target, copy, async, cpu_only)
pack the data in a batch
integer pure function batch_linear_to_ist(this, linear_index)
get state index ist from linear (combined dim and nst) index
subroutine batch_copy_to(this, dest, pack, copy_data, new_np, special, dest_type)
make a copy of a batch
subroutine, public batch_read_device_to_packed(this, async)
subroutine batch_write_packed_to_device(this, async)
subroutine dbatch_init_with_memory_3(this, dim, st_start, st_end, psi)
initialize a batch with an rank-3 array of TYPE_FLOAT valued mesh functions psi.
subroutine batch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data
subroutine batch_init_empty(this, dim, nst, np)
initialize an empty batch
subroutine, public dbatch_init(this, dim, st_start, st_end, np, special, packed)
initialize a TYPE_FLOAT valued batch to given size without providing external memory
subroutine zbatch_init_with_memory_2(this, dim, st_start, st_end, psi)
initialize a batch with an rank-2 array of TYPE_CMPLX valued mesh functions psi.
integer pure function batch_linear_to_idim(this, linear_index)
extract idim from linear index
subroutine batch_remote_access_start(this, mpi_grp, rma_win)
start remote access to a batch on another node
subroutine batch_copy_data_to(this, np, dest, async)
copy data to another batch.
subroutine dbatch_allocate_unpacked_host(this)
allocate host (CPU) memory for unpacked data of type TYPE_FLOAT
subroutine dbatch_pack_copy(this)
copy data from the unpacked to the packed arrays
integer pure function batch_status(this)
return the status of a batch
subroutine batch_deallocate_unpacked_host(this)
release unpacked host memory
integer, parameter, public batch_packed
functions are stored in CPU memory, in transposed (packed) order
subroutine batch_deallocate_packed_host(this)
release packed host memory
integer(int64) function batch_pack_total_size(this)
subroutine batch_end(this, copy)
finalize a batch and release allocated memory, if necessary
subroutine zbatch_unpack_copy(this)
copy data from the packed to the unpacked arrays
This module contains interfaces for BLAS routines You should not use these routines directly....
This module is intended to contain "only mathematical" functions and procedures.
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
type(type_t), public type_float
type(type_t), public type_cmplx
type(type_t), public type_none
Class defining batches of mesh functions.