Octopus
fft.F90
Go to the documentation of this file.
1!! Copyright (C) 2002-2006 M. Marques, A. Castro, A. Rubio, G. Bertsch
2!! Copyright (C) 2011 J. Alberdi-Rodriguez, P. Garcia RisueƱo, M. Oliveira
3!!
4!! This program is free software; you can redistribute it and/or modify
5!! it under the terms of the GNU General Public License as published by
6!! the Free Software Foundation; either version 2, or (at your option)
7!! any later version.
8!!
9!! This program is distributed in the hope that it will be useful,
10!! but WITHOUT ANY WARRANTY; without even the implied warranty of
11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12!! GNU General Public License for more details.
13!!
14!! You should have received a copy of the GNU General Public License
15!! along with this program; if not, write to the Free Software
16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17!! 02110-1301, USA.
18!!
19
20#include "global.h"
21
25module fft_oct_m
26 use, intrinsic :: iso_c_binding
27 use, intrinsic :: ieee_arithmetic
28
29 use accel_oct_m
30 use fftw_oct_m
32 use debug_oct_m
33 use global_oct_m
34 use, intrinsic :: iso_fortran_env
39 use mpi_oct_m
41#ifdef HAVE_NFFT
42 use nfft_oct_m
43#endif
44#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
45 use omp_lib
46#endif
47 use parser_oct_m
48 use pfft_oct_m
50 use pnfft_oct_m
52 use types_oct_m
55
56 implicit none
57
58 private
59 public :: &
60 fft_t, &
63 fft_init, &
65 fft_end, &
66 fft_copy, &
68 pad_feq, &
76
77
79 integer, public, parameter :: &
80 FFT_NONE = 0, &
81 fft_real = 1, &
82 fft_complex = 2
83
84 integer, public, parameter :: &
85 FFTLIB_NONE = 0, &
86 fftlib_fftw = 1, &
87 fftlib_pfft = 2, &
88 fftlib_accel = 3, &
89 fftlib_nfft = 4, &
90 fftlib_pnfft = 5
91
92 integer, parameter :: &
93 FFT_MAX = 10, &
94 fft_null = -1
95
96 type fft_t
97 private
98 integer :: slot = 0
99
100 integer, public :: type
101 integer, public :: library
102
103 type(MPI_Comm) :: comm
104 integer :: rs_n_global(3)
105 integer :: fs_n_global(3)
106 integer :: rs_n(3)
107 integer :: fs_n(3)
108 integer :: rs_istart(1:3)
109 integer :: fs_istart(1:3)
110
111 integer, public :: stride_rs(1:3)
112 integer, public :: stride_fs(1:3)
113
114 type(c_ptr) :: planf
115 type(c_ptr) :: planb
116 !integer(ptrdiff_t_kind) :: pfft_planf !< PFFT plan for forward transform
117 !integer(ptrdiff_t_kind) :: pfft_planb !< PFFT plan for backward transform
118
121 real(real64), pointer, public :: drs_data(:,:,:)
122 complex(real64), pointer, public :: zrs_data(:,:,:)
123 complex(real64), pointer, public :: fs_data(:,:,:)
124 type(c_ptr) :: cuda_plan_fw
125 type(c_ptr) :: cuda_plan_bw
126#ifdef HAVE_NFFT
127 type(nfft_t), public :: nfft
128#endif
129 type(pnfft_t), public :: pnfft
130
131 logical, public :: aligned_memory
132 end type fft_t
133
134 interface dfft_forward
136 end interface dfft_forward
137
138 interface zfft_forward
140 end interface zfft_forward
141
142 interface dfft_backward
144 end interface dfft_backward
145
146 interface zfft_backward
148 end interface zfft_backward
149
150 logical, save, public :: fft_initialized = .false.
151 integer, save :: fft_refs(FFT_MAX)
152 type(fft_t), save :: fft_array(FFT_MAX)
153 logical :: fft_optimize
154 integer, save :: fft_prepare_plan
155 integer, public :: fft_default_lib = -1
156#ifdef HAVE_NFFT
157 type(nfft_t), save :: nfft_options
158#endif
159 type(pnfft_t), save :: pnfft_options
160
161 integer, parameter :: &
162 CUFFT_R2C = int(z'2a'), &
163 cufft_c2r = int(z'2c'), &
164 cufft_c2c = int(z'29'), &
165 cufft_d2z = int(z'6a'), &
166 cufft_z2d = int(z'6c'), &
167 cufft_z2z = int(z'69')
168
169contains
170
171 ! ---------------------------------------------------------
173 subroutine fft_all_init(namespace)
174 type(namespace_t), intent(in) :: namespace
175
176 integer :: ii, fft_default
177#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
178 integer :: iret
179#endif
180
181 push_sub(fft_all_init)
182
183 fft_initialized = .true.
184
185 !%Variable FFTOptimize
186 !%Type logical
187 !%Default yes
188 !%Section Mesh::FFTs
189 !%Description
190 !% Should <tt>octopus</tt> optimize the FFT dimensions?
191 !% This means that the mesh to which FFTs are applied is not taken to be as small
192 !% as possible: some points may be added to each direction in order to get a "good number"
193 !% for the performance of the FFT algorithm.
194 !% The best FFT grid dimensions are given by <math>2^a 3^b 5^c 7^d 11^e 13^f</math>
195 !% where <math>a,b,c,d</math> are arbitrary and <math>e,f</math> are 0 or 1.
196 !% (<a href=http://www.fftw.org/doc/Complex-DFTs.html>ref</a>).
197 !% In some cases, namely when using
198 !% the split-operator, or Suzuki-Trotter propagators, this option should be turned off.
199 !% For spatial FFTs in periodic directions, the grid is never optimized, but a warning will
200 !% be written if the number is not good, with a suggestion of a better one to use, so you
201 !% can try a different spacing if you want to get a good number.
202 !%End
203 call parse_variable(namespace, 'FFTOptimize', .true., fft_optimize)
204 do ii = 1, fft_max
205 fft_refs(ii) = fft_null
206 end do
208 !%Variable FFTPreparePlan
209 !%Type integer
210 !%Default fftw_measure
211 !%Section Mesh::FFTs
212 !%Description
213 !% The FFTs are performed in octopus with the help of <a href=http://www.fftw.org>FFTW</a> and similar packages.
214 !% Before doing the actual computations, this package prepares a "plan", which means that
215 !% the precise numerical strategy to be followed to compute the FFT is machine/compiler-dependent,
216 !% and therefore the software attempts to figure out which is this precise strategy (see the
217 !% FFTW documentation for details). This plan preparation, which has to be done for each particular
218 !% FFT shape, can be done exhaustively and carefully (slow), or merely estimated. Since this is
219 !% a rather critical numerical step, by default it is done carefully, which implies a longer initial
220 !% initialization, but faster subsequent computations. You can change this behaviour by changing
221 !% this <tt>FFTPreparePlan</tt> variable, and in this way you can force FFTW to do a fast guess or
222 !% estimation of which is the best way to perform the FFT.
223 !%Option fftw_measure 0
224 !% This plan implies a longer initialization, but involves a more careful analysis
225 !% of the strategy to follow, and therefore more efficient FFTs. A side effect of the runtime
226 !% choices is that this plan can introduce slight numerical fluctuations between runs.
227 !%Option fftw_estimate 64
228 !% This is the "fast initialization" scheme, in which the plan is merely guessed from "reasonable"
229 !% assumptions. This is the default option, as it guarantees stable results
230 !%Option fftw_patient 32
231 !% It is like fftw_measure, but considers a wider range of algorithms and often produces a
232 !% "more optimal" plan (especially for large transforms), but at the expense of several times
233 !% longer planning time (especially for large transforms).
234 !%Option fftw_exhaustive 8
235 !% It is like fftw_patient, but considers an even wider range of algorithms,
236 !% including many that we think are unlikely to be fast, to produce the most optimal
237 !% plan but with a substantially increased planning time.
238 !%End
239 call parse_variable(namespace, 'FFTPreparePlan', fftw_estimate, fft_prepare_plan)
240 if (.not. varinfo_valid_option('FFTPreparePlan', fft_prepare_plan)) then
241 call messages_input_error(namespace, 'FFTPreparePlan')
242 end if
243
244 !%Variable FFTLibrary
245 !%Type integer
246 !%Section Mesh::FFTs
247 !%Default fftw
248 !%Description
249 !% (experimental) You can select the FFT library to use.
250 !%Option fftw 1
251 !% Uses FFTW3 library.
252 !%Option pfft 2
253 !% (experimental) Uses PFFT library, which has to be linked.
254 !%Option accel 3
255 !% Uses a GPU accelerated library. This only
256 !% works if Octopus was compiled with HIP, or CUDA support.
257 !%End
258 fft_default = fftlib_fftw
259 if(accel_is_enabled()) then
260 fft_default = fftlib_accel
261 end if
262 call parse_variable(namespace, 'FFTLibrary', fft_default, fft_default_lib)
263
264 if (fft_default_lib == fftlib_accel) then
265#if ! defined(HAVE_CUDA)
266 call messages_write('You have selected the Accelerated FFT, but Octopus was compiled', new_line = .true.)
267 call messages_write('without CUDA support.')
269#endif
270 if (.not. accel_is_enabled()) then
271 call messages_write('You have selected the accelerated FFT, but acceleration is disabled.')
272 call messages_fatal()
273 end if
274 end if
275
276#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
277 if (omp_get_max_threads() > 1) then
278
279 call messages_write('Info: Initializing Multi-threaded FFTW')
280 call messages_info()
281
282 iret = fftw_init_threads()
283 if (iret == 0) then
284 call messages_write('Initialization of FFTW3 threads failed.')
285 call messages_fatal()
286 end if
287 call fftw_plan_with_nthreads(omp_get_max_threads())
288
289 end if
290#endif
291#ifdef HAVE_NFFT
292 call nfft_guru_options(nfft_options, namespace)
293#endif
294 call pnfft_guru_options(pnfft_options, namespace)
295
296 pop_sub(fft_all_init)
297 end subroutine fft_all_init
298
299
300 ! ---------------------------------------------------------
302 subroutine fft_all_end()
303 integer :: ii
304
305 push_sub(fft_all_end)
306
307 do ii = 1, fft_max
308 if (fft_refs(ii) /= fft_null) then
309 call fft_end(fft_array(ii))
310 end if
311 end do
312
313#ifdef HAVE_PFFT
314 call pfft_cleanup()
315#endif
316
317#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS)
318 call fftw_cleanup_threads()
319#else
320 call fftw_cleanup()
321#endif
322
323 fft_initialized = .false.
324
325 pop_sub(fft_all_end)
326 end subroutine fft_all_end
327
328 ! ---------------------------------------------------------
329 subroutine fft_init(this, nn, dim, type, library, optimize, optimize_parity, comm, mpi_grp, use_aligned)
330 type(fft_t), intent(inout) :: this
331 integer, intent(inout) :: nn(3)
332 integer, intent(in) :: dim
333 integer, intent(in) :: type
334 integer, intent(in) :: library
335 logical, intent(in) :: optimize(3)
336 integer, intent(in) :: optimize_parity(3)
338 type(mpi_comm), optional, intent(out) :: comm
339 type(mpi_grp_t), optional, intent(in) :: mpi_grp
340 logical, optional :: use_aligned
341
342 integer :: ii, jj, fft_dim, idir, column_size, row_size, n3
343 integer :: n_1, n_2, n_3, nn_temp(3)
344 integer :: library_
345 type(mpi_grp_t) :: mpi_grp_
346 integer(int64) :: number_points, alloc_size
347
348#ifdef HAVE_PFFT
349 integer :: ierror
350#endif
351
352 push_sub(fft_init)
353
354 assert(fft_initialized)
355
356 assert(type == fft_real .or. type == fft_complex)
357
358 mpi_grp_ = mpi_world
359 if (present(mpi_grp)) mpi_grp_ = mpi_grp
360
361 this%aligned_memory = optional_default(use_aligned, .false.)
362
363 ! First, figure out the dimensionality of the FFT.
364 fft_dim = 0
365 do ii = 1, dim
366 if (nn(ii) <= 1) exit
367 fft_dim = fft_dim + 1
368 end do
369
370 if (fft_dim == 0) then
371 message(1) = "Internal error in fft_init: apparently, a 1x1x1 FFT is required."
372 call messages_fatal(1)
373 end if
374
375 if (fft_dim > 3) call messages_not_implemented('FFT for dimension > 3')
376
377 library_ = library
378 nn_temp(1:fft_dim) = nn(1:fft_dim)
380 select case (library_)
381 case (fftlib_accel)
382 ! FFT optimization
383 if(any(optimize_parity(1:fft_dim) > 1)) then
384 message(1) = "Internal error in fft_init: optimize_parity must be negative, 0, or 1."
385 call messages_fatal(1)
386 end if
387
388 do ii = 1, fft_dim
389 nn_temp(ii) = fft_size(nn(ii), (/2, 3, 5, 7/), optimize_parity(ii))
390 if (fft_optimize .and. optimize(ii)) nn(ii) = nn_temp(ii)
391 end do
392
393 case (fftlib_nfft)
394
395 do ii = 1, fft_dim
396 !NFFT likes even grids
397 !The underlying FFT grids are optimized inside the nfft_init routine
398 if (int(nn(ii)/2)*2 /= nn(ii) .and. (fft_optimize .and. optimize(ii)))&
399 nn(ii)=nn(ii)+1
400 end do
401
402 case (fftlib_pnfft)
403
404 do ii = 1, fft_dim
405 !also PNFFT likes even grids
406 if (int(nn(ii)/2)*2 /= nn(ii)) nn(ii) = nn(ii) + 1
407 end do
408
409 if (fft_dim < 3) then
410 call messages_not_implemented('PNFFT support for dimension < 3')
411 end if
412
413 case default
414
415 if (fft_dim < 3 .and. library_ == fftlib_pfft) then
416 call messages_not_implemented('PFFT support for dimension < 3')
417 end if
418
419 ! FFT optimization
420 if (any(optimize_parity(1:fft_dim) > 1)) then
421 message(1) = "Internal error in fft_init: optimize_parity must be negative, 0, or 1."
422 call messages_fatal(1)
423 end if
424
425 do ii = 1, fft_dim
426 call loct_fft_optimize(nn_temp(ii), optimize_parity(ii))
427 if (fft_optimize .and. optimize(ii)) nn(ii) = nn_temp(ii)
428 end do
429
430 end select
431
432 ! find out if fft has already been allocated
433 jj = 0
434 do ii = fft_max, 1, -1
435 if (fft_refs(ii) /= fft_null) then
436 if (all(nn(1:dim) == fft_array(ii)%rs_n_global(1:dim)) .and. type == fft_array(ii)%type &
437 .and. library_ == fft_array(ii)%library .and. library_ /= fftlib_nfft &
438 .and. library_ /= fftlib_pnfft &
439 .and. this%aligned_memory .eqv. fft_array(ii)%aligned_memory) then
440
441 ! NFFT and PNFFT plans are always allocated from scratch since they
442 ! are very likely to be different
443 this = fft_array(ii) ! return a copy
444 fft_refs(ii) = fft_refs(ii) + 1 ! increment the ref count
445 if (present(comm)) comm = fft_array(ii)%comm ! also return the MPI communicator
446 pop_sub(fft_init)
447 return
448 end if
449 else
450 jj = ii
451 end if
452 end do
453
454 if (jj == 0) then
455 message(1) = "Not enough slots for FFTs."
456 message(2) = "Please increase FFT_MAX in fft.F90 and recompile."
457 call messages_fatal(2)
458 end if
459
460 ! jj now contains an empty slot
461 fft_refs(jj) = 1
462 fft_array(jj)%slot = jj
463 fft_array(jj)%type = type
464 fft_array(jj)%library = library_
465 fft_array(jj)%rs_n_global(1:dim) = nn(1:dim)
466 fft_array(jj)%rs_n_global(dim+1:) = 1
467 nullify(fft_array(jj)%drs_data)
468 nullify(fft_array(jj)%zrs_data)
469 nullify(fft_array(jj)%fs_data)
470
471 fft_array(jj)%aligned_memory = this%aligned_memory
472
473 ! Initialize parallel communicator
474 select case (library_)
475 case (fftlib_pfft)
476#ifdef HAVE_PFFT
477 call pfft_init()
478
479 call pfft_decompose(mpi_grp_%size, column_size, row_size)
480
481 ierror = pfft_create_procmesh_2d(mpi_grp_%comm%MPI_VAL, column_size, row_size, fft_array(jj)%comm%MPI_VAL)
482
483 if (ierror /= 0) then
484 message(1) = "The number of rows and columns in PFFT processor grid is not equal to "
485 message(2) = "the number of processor in the MPI communicator."
486 message(3) = "Please check it."
487 call messages_fatal(3)
488 end if
489#endif
490
491 case (fftlib_pnfft)
492#ifdef HAVE_PNFFT
493 call pnfft_init_procmesh(fft_array(jj)%pnfft, mpi_grp_, fft_array(jj)%comm)
494#endif
495 case default
496 fft_array(jj)%comm = mpi_comm_undefined
497
498 end select
499
500 if (present(comm)) comm = fft_array(jj)%comm
501
502 ! Get dimentions of arrays
503 select case (library_)
504 case (fftlib_fftw)
505 call fftw_get_dims(fft_array(jj)%rs_n_global, type == fft_real, fft_array(jj)%fs_n_global)
506 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
507 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
508 fft_array(jj)%rs_istart = 1
509 fft_array(jj)%fs_istart = 1
510
511 if (this%aligned_memory) then
512 call fftw_alloc_memory(fft_array(jj)%rs_n_global, type == fft_real, fft_array(jj)%fs_n_global, &
513 fft_array(jj)%drs_data, fft_array(jj)%zrs_data, fft_array(jj)%fs_data)
514 end if
515
516 case (fftlib_pfft)
517#ifdef HAVE_PFFT
518 call pfft_get_dims(fft_array(jj)%rs_n_global, comm%MPI_VAL, type == fft_real, &
519 alloc_size, fft_array(jj)%fs_n_global, fft_array(jj)%rs_n, &
520 fft_array(jj)%fs_n, fft_array(jj)%rs_istart, fft_array(jj)%fs_istart)
521 !write(*,"(6(A,3I4,/),A,I10,/)") "PFFT: rs_n_global = ",fft_array(jj)%rs_n_global,&
522 ! "fs_n_global = ",fft_array(jj)%fs_n_global,&
523 ! "rs_n = ",fft_array(jj)%rs_n,&
524 ! "fs_n = ",fft_array(jj)%fs_n,&
525 ! "rs_istart = ",fft_array(jj)%rs_istart,&
526 ! "fs_istart = ",fft_array(jj)%fs_istart,&
527 ! "alloc_size = ",alloc_size
528#endif
529
530 ! Allocate memory. Note that PFFT may need extra memory space
531 ! and that in fourier space the function will be transposed
532 if (type == fft_real) then
533 n_1 = max(1, fft_array(jj)%rs_n(1))
534 n_2 = max(1, fft_array(jj)%rs_n(2))
535 n_3 = max(1, fft_array(jj)%rs_n(3))
536
537 n3 = ceiling(real(2*alloc_size)/real(n_1*n_2))
538 safe_allocate(fft_array(jj)%drs_data(1:n_1, 1:n_2, 1:n3))
539 else
540 n3 = ceiling(real(alloc_size)/real(fft_array(jj)%rs_n(1)*fft_array(jj)%rs_n(2)))
541 safe_allocate(fft_array(jj)%zrs_data(1:fft_array(jj)%rs_n(1), 1:fft_array(jj)%rs_n(2), 1:n3))
542 end if
543
544 n_1 = max(1, fft_array(jj)%fs_n(1))
545 n_2 = max(1, fft_array(jj)%fs_n(2))
546 n_3 = max(1, fft_array(jj)%fs_n(3))
547
548 n3 = ceiling(real(alloc_size)/real(n_3*n_1))
549 safe_allocate(fft_array(jj)%fs_data(1:n_3, 1:n_1, 1:n3))
550
551 case (fftlib_accel)
552 call fftw_get_dims(fft_array(jj)%rs_n_global, (type == fft_real), fft_array(jj)%fs_n_global)
553 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
554 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
555 fft_array(jj)%rs_istart = 1
556 fft_array(jj)%fs_istart = 1
557
558 case (fftlib_nfft)
559 fft_array(jj)%fs_n_global = fft_array(jj)%rs_n_global
560 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
561 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
562 fft_array(jj)%rs_istart = 1
563 fft_array(jj)%fs_istart = 1
564
565 case (fftlib_pnfft)
566 fft_array(jj)%fs_n_global = fft_array(jj)%rs_n_global
567 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global
568 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global
569 fft_array(jj)%rs_istart = 1
570 fft_array(jj)%fs_istart = 1
571 ! indices partition is performed together with the plan preparation
572
573
574 end select
575
576 ! Prepare plans
577 select case (library_)
578 case (fftlib_fftw)
579 if (.not. this%aligned_memory) then
580 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &
581 type == fft_real, fftw_forward, fft_prepare_plan+fftw_unaligned)
582 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &
583 type == fft_real, fftw_backward, fft_prepare_plan+fftw_unaligned)
584 else
585 if (type == fft_real) then
586 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &
587 type == fft_real, fftw_forward, fft_prepare_plan, &
588 din_=fft_array(jj)%drs_data, cout_=fft_array(jj)%fs_data)
589 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &
590 type == fft_real, fftw_backward, fft_prepare_plan, &
591 din_=fft_array(jj)%drs_data, cout_=fft_array(jj)%fs_data)
592 else
593 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, &
594 type == fft_real, fftw_forward, fft_prepare_plan, &
595 cin_=fft_array(jj)%zrs_data, cout_=fft_array(jj)%fs_data)
596 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, &
597 type == fft_real, fftw_backward, fft_prepare_plan, &
598 cin_=fft_array(jj)%zrs_data, cout_=fft_array(jj)%fs_data)
599 end if
600 end if
601
602 case (fftlib_nfft)
603#ifdef HAVE_NFFT
604 call nfft_copy_info(this%nfft,fft_array(jj)%nfft) !copy default parameters set in the calling routine
605 call nfft_init(fft_array(jj)%nfft, nfft_options, fft_array(jj)%rs_n_global, &
606 fft_dim, fft_array(jj)%rs_n_global, optimize = .true.)
607#endif
608 case (fftlib_pfft)
609#ifdef HAVE_PFFT
610 if (type == fft_real) then
611 call pfft_prepare_plan_r2c(fft_array(jj)%planf, fft_array(jj)%rs_n_global, fft_array(jj)%drs_data, &
612 fft_array(jj)%fs_data, fftw_forward, fft_prepare_plan, comm%MPI_VAL)
613 call pfft_prepare_plan_c2r(fft_array(jj)%planb, fft_array(jj)%rs_n_global, fft_array(jj)%fs_data, &
614 fft_array(jj)%drs_data, fftw_backward, fft_prepare_plan, comm%MPI_VAL)
615 else
616 call pfft_prepare_plan_c2c(fft_array(jj)%planf, fft_array(jj)%rs_n_global, fft_array(jj)%zrs_data, &
617 fft_array(jj)%fs_data, fftw_forward, fft_prepare_plan, comm%MPI_VAL)
618 call pfft_prepare_plan_c2c(fft_array(jj)%planb, fft_array(jj)%rs_n_global, fft_array(jj)%fs_data, &
619 fft_array(jj)%zrs_data, fftw_backward, fft_prepare_plan, comm%MPI_VAL)
620 end if
621#endif
622 case (fftlib_pnfft)
623#ifdef HAVE_PNFFT
624 call pnfft_copy_params(this%pnfft, fft_array(jj)%pnfft) ! pass default parameters like in NFFT
625
626 ! NOTE:
627 ! PNFFT (likewise NFFT) breaks the symmetry between real space and Fourier space
628 ! by allowing the possibility to have an unstructured grid in rs and by
629 ! using different parallelizations (the rs is transposed w.r.t. fs).
630 ! Octopus, in fourier_space_m, uses the convention for which the mapping
631 ! between rs and fs is done with a forward transform (and fs->rs with backward).
632 ! This is exactly the opposite of the definitions used by all the libraries
633 ! performing FFTs (PNFFT and NFFT included) [see e.g. M. Frigo, and S. G. Johnson, Proc.
634 ! IEEE 93, 216-231 (2005)].
635 ! While this leads to no problem on ordinary ffts where fs and rs can be exchanged
636 ! it does makes a fundamental difference for PNFFT (for some reason I don`t know NFFT
637 ! is still symmetric).
638 ! Therefore, in order to perform rs->fs tranforms with PNFFT one should use the
639 ! backward transform.
640
641 call pnfft_init_plan(fft_array(jj)%pnfft, pnfft_options, comm, fft_array(jj)%fs_n_global, &
642 fft_array(jj)%fs_n, fft_array(jj)%fs_istart, fft_array(jj)%rs_n, fft_array(jj)%rs_istart)
643#endif
644 case (fftlib_accel)
645
646 fft_array(jj)%stride_rs(1) = 1
647 fft_array(jj)%stride_fs(1) = 1
648 do ii = 2, fft_dim
649 fft_array(jj)%stride_rs(ii) = fft_array(jj)%stride_rs(ii - 1)*fft_array(jj)%rs_n(ii - 1)
650 fft_array(jj)%stride_fs(ii) = fft_array(jj)%stride_fs(ii - 1)*fft_array(jj)%fs_n(ii - 1)
651 end do
652
653#ifdef HAVE_CUDA
654 if (type == fft_real) then
655 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_fw, &
656 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), cufft_d2z, &
657 accel%cuda_stream)
658 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_bw, &
659 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), cufft_z2d, &
660 accel%cuda_stream)
661 else
662 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_fw, &
663 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), cufft_z2z, &
664 accel%cuda_stream)
665 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_bw, &
666 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), cufft_z2z, &
667 accel%cuda_stream)
668 end if
669#endif
670
671 case default
672 call messages_write('Invalid FFT library.')
673 call messages_fatal()
674 end select
675
676 this = fft_array(jj)
677
678 ! Write information
679 if (.not. (library_ == fftlib_nfft .or. library_ == fftlib_pnfft)) then
680 call messages_write('Info: FFT grid dimensions =')
681 number_points = 1
682 do idir = 1, dim
683 call messages_write(fft_array(jj)%rs_n_global(idir))
684 if (idir < dim) call messages_write(" x ")
685 ! do the multiplication in a integer(int64) to avoid overflow for large grids
686 number_points = number_points * fft_array(jj)%rs_n_global(idir)
687 end do
688 call messages_new_line()
689
690 call messages_write(' Total grid size =')
691 call messages_write(number_points)
692 call messages_write(' (')
693 call messages_write(number_points*8.0_real64, units = unit_megabytes, fmt = '(f9.1)')
694 call messages_write(' )')
695 if (any(nn(1:fft_dim) /= nn_temp(1:fft_dim))) then
696 call messages_new_line()
697 call messages_write(' Inefficient FFT grid. A better grid would be: ')
698 do idir = 1, fft_dim
699 call messages_write(nn_temp(idir))
700 end do
701 end if
702 call messages_info()
703 end if
704
705 select case (library_)
706 case (fftlib_pfft)
707 write(message(1),'(a)') "Info: FFT library = PFFT"
708 write(message(2),'(a)') "Info: PFFT processor grid"
709 write(message(3),'(a, i9)') " No. of processors = ", mpi_grp_%size
710 write(message(4),'(a, i9)') " No. of columns in the proc. grid = ", column_size
711 write(message(5),'(a, i9)') " No. of rows in the proc. grid = ", row_size
712 write(message(6),'(a, i9)') " The size of integer is = ", c_intptr_t
713 call messages_info(6)
714
715 case (fftlib_pnfft)
716#ifdef HAVE_PNFFT
717 call messages_write("Info: FFT library = PNFFT")
718 call messages_info()
719 call pnfft_write_info(fft_array(jj)%pnfft)
720#endif
721 case (fftlib_nfft)
722#ifdef HAVE_NFFT
723 call messages_write("Info: FFT library = NFFT")
724 call messages_info()
725 call nfft_write_info(fft_array(jj)%nfft)
726#endif
727 end select
728
729 pop_sub(fft_init)
730 end subroutine fft_init
731
732 ! ---------------------------------------------------------
736 subroutine fft_init_stage1(this, namespace, XX, nn)
737 type(fft_t), intent(inout) :: this
740 type(namespace_t), intent(in) :: namespace
741 real(real64), intent(in) :: xx(:,:)
742 integer, optional, intent(in) :: nn(:)
743
744 integer :: slot
745
746 push_sub(fft_init_stage1)
747
748 assert(size(xx,2) == 3)
749
750 slot = this%slot
751 select case (fft_array(slot)%library)
752 case (fftlib_fftw)
753 !Do nothing
754 case (fftlib_nfft)
755#ifdef HAVE_NFFT
756 assert(present(nn))
757 call nfft_precompute(fft_array(slot)%nfft, &
758 xx(1:nn(1),1), xx(1:nn(2),2), xx(1:nn(3),3))
759#endif
760 case (fftlib_pfft)
761 !Do nothing
762 case (fftlib_accel)
763 !Do nothing
764 case (fftlib_pnfft)
765#ifdef HAVE_PNFFT
766 call pnfft_set_sp_nodes(fft_array(slot)%pnfft, namespace, xx)
767#endif
768 case default
769 call messages_write('Invalid FFT library.')
770 call messages_fatal()
771 end select
773
774
775 pop_sub(fft_init_stage1)
776 end subroutine fft_init_stage1
777 ! ---------------------------------------------------------
778 subroutine fft_end(this)
779 type(fft_t), intent(inout) :: this
780
781 integer :: ii
782
783 push_sub(fft_end)
784
785 ii = this%slot
786 if (fft_refs(ii) == fft_null) then
787 message(1) = "Trying to deallocate FFT that has not been allocated."
788 call messages_warning(1)
789 else
790 if (fft_refs(ii) > 1) then
791 fft_refs(ii) = fft_refs(ii) - 1
792 else
793 select case (fft_array(ii)%library)
794 case (fftlib_fftw)
795 call fftw_destroy_plan(fft_array(ii)%planf)
796 call fftw_destroy_plan(fft_array(ii)%planb)
797
798 if (this%aligned_memory) then
799 call fftw_free_memory(this%type == fft_real, &
800 fft_array(ii)%drs_data, fft_array(ii)%zrs_data, fft_array(ii)%fs_data)
801 end if
802
803 case (fftlib_pfft)
804#ifdef HAVE_PFFT
805 call pfft_destroy_plan(fft_array(ii)%planf)
806 call pfft_destroy_plan(fft_array(ii)%planb)
807#endif
808 safe_deallocate_p(fft_array(ii)%drs_data)
809 safe_deallocate_p(fft_array(ii)%zrs_data)
810 safe_deallocate_p(fft_array(ii)%fs_data)
811
812 case (fftlib_accel)
813#ifdef HAVE_CUDA
814 call cuda_fft_destroy(fft_array(ii)%cuda_plan_fw)
815 call cuda_fft_destroy(fft_array(ii)%cuda_plan_bw)
816#endif
817
818 case (fftlib_nfft)
819#ifdef HAVE_NFFT
820 call nfft_end(fft_array(ii)%nfft)
821#endif
822 case (fftlib_pnfft)
823#ifdef HAVE_PNFFT
824 call pnfft_end(fft_array(ii)%pnfft)
825#endif
826 end select
827 fft_refs(ii) = fft_null
828 end if
829 end if
830 this%slot = 0
831
832 pop_sub(fft_end)
833 end subroutine fft_end
834
835 ! ---------------------------------------------------------
836 subroutine fft_copy(fft_i, fft_o)
837 type(fft_t), intent(in) :: fft_i
838 type(fft_t), intent(inout) :: fft_o
839
840 push_sub(fft_copy)
841
842 if (fft_o%slot > 0) then
843 call fft_end(fft_o)
844 end if
845 assert(fft_i%slot >= 1.and.fft_i%slot <= fft_max)
846 assert(fft_refs(fft_i%slot) > 0)
847
848 fft_o = fft_i
849 fft_refs(fft_i%slot) = fft_refs(fft_i%slot) + 1
850
851 pop_sub(fft_copy)
852 end subroutine fft_copy
853
854 ! ---------------------------------------------------------
855 subroutine fft_get_dims(fft, rs_n_global, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
856 type(fft_t), intent(in) :: fft
857 integer, intent(out) :: rs_n_global(1:3)
858 integer, intent(out) :: fs_n_global(1:3)
859 integer, intent(out) :: rs_n(1:3)
860 integer, intent(out) :: fs_n(1:3)
861 integer, intent(out) :: rs_istart(1:3)
862 integer, intent(out) :: fs_istart(1:3)
863
864 integer :: slot
865
866 push_sub(fft_get_dims)
867
868 slot = fft%slot
869 rs_n_global(1:3) = fft_array(slot)%rs_n_global(1:3)
870 fs_n_global(1:3) = fft_array(slot)%fs_n_global(1:3)
871 rs_n(1:3) = fft_array(slot)%rs_n(1:3)
872 fs_n(1:3) = fft_array(slot)%fs_n(1:3)
873 rs_istart(1:3) = fft_array(slot)%rs_istart(1:3)
874 fs_istart(1:3) = fft_array(slot)%fs_istart(1:3)
876 pop_sub(fft_get_dims)
877 end subroutine fft_get_dims
878
879 ! ---------------------------------------------------------
881 pure function pad_feq(ii, nn, mode)
882 integer, intent(in) :: ii,nn
883 logical, intent(in) :: mode
884 integer :: pad_feq
885
886 ! no push_sub: called too frequently
887
888 if (mode) then ! index to frequency number
889 if (ii <= nn/2 + 1) then
890 pad_feq = ii - 1
891 else
892 pad_feq = ii - nn - 1
893 end if
894 else
895 if (ii >= 0) then
896 pad_feq = ii + 1
897 else
898 pad_feq = ii + nn + 1
899 end if
900 end if
901
902 end function pad_feq
903
904 ! -------------------------------------------------------
905
906 integer function fft_size(size, factors, parity)
907 integer, intent(in) :: size
908 integer, intent(in) :: factors(:)
909 integer, intent(in) :: parity
910
911 integer :: nfactors
912 integer :: nondiv
913 integer, allocatable :: exponents(:)
914
915 push_sub(fft_size)
916
917 nfactors = ubound(factors, dim = 1)
918
919 safe_allocate(exponents(1:nfactors))
920
921 fft_size = size
922 do
923 call get_exponents(fft_size, nfactors, factors, exponents, nondiv)
924 if (nondiv == 1 .and. mod(fft_size, 2) == parity) exit
925 fft_size = fft_size + 1
926 end do
927
928 safe_deallocate_a(exponents)
930 pop_sub(fft_size)
931 end function fft_size
932
933 ! -------------------------------------------------------
934
935 subroutine get_exponents(num, nfactors, factors, exponents, nondiv)
936 integer, intent(in) :: num
937 integer, intent(in) :: nfactors
938 integer, intent(in) :: factors(:)
939 integer, intent(out) :: exponents(:)
940 integer, intent(out) :: nondiv
941
942 integer :: ifactor
943
944 push_sub(get_exponents)
945
946 nondiv = num
947 do ifactor = 1, nfactors
948 exponents(ifactor) = 0
949 do
950 if (mod(nondiv, factors(ifactor)) /= 0) exit
951 nondiv = nondiv/factors(ifactor)
952 exponents(ifactor) = exponents(ifactor) + 1
953 end do
954 end do
955
957 end subroutine get_exponents
958
959
960 ! ----------------------------------------------------------
961
962 subroutine fft_operation_count(fft)
963 type(fft_t), intent(in) :: fft
964
965 real(real64) :: fullsize
966
967 push_sub(fft_operation_count)
968
969 fullsize = product(real(fft%fs_n(1:3), real64))
970 call profiling_count_operations(5.0_real64*fullsize*log(fullsize)/log(m_two))
971
972 pop_sub(fft_operation_count)
973 end subroutine fft_operation_count
974
975 !-----------------------------------------------------------------
976 subroutine fft_gg_transform(gg_in, temp, periodic_dim, latt, qq, gg, modg2)
977 integer, intent(in) :: gg_in(:)
978 real(real64), intent(in) :: temp(:)
979 integer, intent(in) :: periodic_dim
980 type(lattice_vectors_t), intent(in) :: latt
981 real(real64), intent(in) :: qq(:)
982 real(real64), intent(inout) :: gg(:)
983 real(real64), intent(out) :: modg2
984
985 ! no PUSH_SUB, called too frequently
986
987 gg(1:3) = gg_in(1:3)
988 gg(1:periodic_dim) = gg(1:periodic_dim) + qq(1:periodic_dim)
989 gg(1:3) = gg(1:3) * temp(1:3)
990 gg(1:3) = matmul(latt%klattice_primitive(1:3,1:3),gg(1:3))
991 modg2 = sum(gg(1:3)**2)
992
993 end subroutine fft_gg_transform
994
995 ! ----------------------------------------------------------
996
999 real(real64) pure function fft_scaling_factor(fft) result(scaling_factor)
1000 type(fft_t), intent(in) :: fft
1001
1002 ! for the moment this factor is handled by the backwards transform for most libraries
1003 scaling_factor = m_one
1004
1005 select case (fft_array(fft%slot)%library)
1006 case (fftlib_accel)
1007#ifdef HAVE_CUDA
1008 scaling_factor = m_one/real(fft_array(fft%slot)%rs_n_global(1), real64)
1009 scaling_factor = scaling_factor/real(fft_array(fft%slot)%rs_n_global(2), real64)
1010 scaling_factor = scaling_factor/real(fft_array(fft%slot)%rs_n_global(3), real64)
1011#endif
1012 end select
1013
1014 end function fft_scaling_factor
1016 ! ----------------------------------------------------------
1019 !
1020 ! Inspired by the routine bounds from Abinit
1021 real(real64) function fft_get_ecut_from_box(box_dim, fs_istart, latt, gspacing, periodic_dim, qq) result(ecut)
1022 integer, intent(in) :: box_dim(:)
1023 integer, intent(in) :: fs_istart(:)
1024 type(lattice_vectors_t), intent(in) :: latt
1025 real(real64), intent(in) :: gspacing(:)
1026 integer, intent(in) :: periodic_dim
1027 real(real64), intent(in) :: qq(:)
1028
1029 integer :: lx, ix, iy, iz, idir, idir2, idir3
1030 real(real64) :: dminsq, gg(3), modg2
1031 integer :: box_dim_(3), ixx(3)
1032 integer :: ming(3), maxg(3)
1033
1034 ! no PUSH_SUB, called too frequently
1035
1036 assert(periodic_dim > 0)
1037
1038 box_dim_(1:periodic_dim) = box_dim(1:periodic_dim)
1039 if (periodic_dim < 3) box_dim_(periodic_dim+1:3) = 1
1040
1041 ! We first need to remove asymetric planes for the case of even FFT grids
1042 ming = 1
1043 maxg = 1
1044 do idir = 1, periodic_dim
1045 do lx = 1, box_dim(idir)
1046 ix = fs_istart(idir) + lx - 1
1047 ixx(idir) = pad_feq(ix, box_dim(idir), .true.)
1048 ming(idir) = min(ming(idir), ixx(idir))
1049 maxg(idir) = max(maxg(idir), ixx(idir))
1050 end do
1051 maxg(idir) = min(abs(ming(idir)), maxg(idir))
1052 end do
1053
1054 ! Given the boundaries, we can search the min distance, which gives us the the cutoff energy
1055 dminsq = m_huge
1056 do idir = 1, periodic_dim
1057 idir2 = mod(idir, 3)+1
1058 idir3 = mod(idir+1, 3)+1
1059
1060 ! Negative plane
1061 ixx(idir) = -maxg(idir)
1062 do iy = -maxg(idir2), maxg(idir2)
1063 ixx(idir2) = iy
1064 do iz = -maxg(idir3), maxg(idir3)
1065 ixx(idir3) = iz
1066 call fft_gg_transform(ixx, gspacing, periodic_dim, latt, qq, gg, modg2)
1067 dminsq = min(dminsq, sum(gg(1:periodic_dim)**2))
1068 end do
1069 end do
1070 ! Positive plane
1071 ixx(idir) = maxg(idir)
1072 do iy = -maxg(idir2), maxg(idir2)
1073 ixx(idir2) = iy
1074 do iz = -maxg(idir3), maxg(idir3)
1075 ixx(idir3) = iz
1076 call fft_gg_transform(ixx, gspacing, periodic_dim, latt, qq, gg, modg2)
1077 dminsq = min(dminsq, sum(gg(1:periodic_dim)**2))
1078 end do
1079 end do
1080 end do
1081
1082 ecut = m_half * dminsq
1083
1084 end function fft_get_ecut_from_box
1085
1086#include "undef.F90"
1087#include "real.F90"
1088#include "fft_inc.F90"
1089
1090#include "undef.F90"
1091#include "complex.F90"
1092#include "fft_inc.F90"
1093
1094end module fft_oct_m
1095
1096!! Local Variables:
1097!! mode: f90
1098!! coding: utf-8
1099!! End:
subroutine optimize()
if write to the Free Software Franklin Fifth USA !If the compiler accepts long Fortran it is better to use that and build all the preprocessor definitions in one line In !this the debuggers will provide the right line numbers !If the compiler accepts line number then CARDINAL and ACARDINAL !will put them just a new line or a ampersand plus a new line !These macros should be used in macros that span several lines They should by !put immedialty before a line where a compilation error might occur and at the !end of the macro !Note that the cardinal and newline words are substituted by the program !preprocess pl by the ampersand and by a real new line just before compilation !The assertions are ignored if the code is compiled in not debug mode(NDEBUG ! is defined). Otherwise it is merely a logical assertion that
double log(double __x) __attribute__((__nothrow__
pure logical function, public accel_is_enabled()
Definition: accel.F90:380
type(accel_t), public accel
Definition: accel.F90:250
Fast Fourier Transform module. This module provides a single interface that works with different FFT ...
Definition: fft.F90:120
subroutine zfft_forward_accel(fft, in, out)
Definition: fft.F90:1528
subroutine dfft_backward_1d(fft, in, out)
Definition: fft.F90:1366
integer, parameter cufft_z2d
Definition: fft.F90:256
subroutine get_exponents(num, nfactors, factors, exponents, nondiv)
Definition: fft.F90:930
subroutine, public fft_init(this, nn, dim, type, library, optimize, optimize_parity, comm, mpi_grp, use_aligned)
Definition: fft.F90:400
subroutine, public fft_all_init(namespace)
initialize the table
Definition: fft.F90:269
real(real64) function, public fft_get_ecut_from_box(box_dim, fs_istart, latt, gspacing, periodic_dim, qq)
Given an fft box (fixed by the real-space grid), it returns the cutoff energy of the sphere that fits...
Definition: fft.F90:1016
subroutine dfft_forward_3d(fft, in, out, norm)
Definition: fft.F90:1151
subroutine dfft_forward_accel(fft, in, out)
Definition: fft.F90:1227
subroutine, public fft_end(this)
Definition: fft.F90:773
subroutine, public fft_gg_transform(gg_in, temp, periodic_dim, latt, qq, gg, modg2)
Definition: fft.F90:971
real(real64) pure function, public fft_scaling_factor(fft)
This function returns the factor required to normalize a function after a forward and backward transf...
Definition: fft.F90:994
integer, parameter cufft_z2z
Definition: fft.F90:256
pure integer function, public pad_feq(ii, nn, mode)
convert between array index and G-vector
Definition: fft.F90:876
subroutine zfft_backward_1d(fft, in, out)
Definition: fft.F90:1667
integer, parameter, public fftlib_accel
Definition: fft.F90:179
subroutine, public fft_all_end()
delete all plans
Definition: fft.F90:380
integer function fft_size(size, factors, parity)
Definition: fft.F90:901
subroutine zfft_backward_3d(fft, in, out, norm)
Definition: fft.F90:1567
subroutine fft_operation_count(fft)
Definition: fft.F90:957
subroutine zfft_backward_accel(fft, in, out)
Definition: fft.F90:1647
integer, parameter cufft_c2r
Definition: fft.F90:256
integer, parameter cufft_c2c
Definition: fft.F90:256
integer, parameter, public fft_real
Definition: fft.F90:174
subroutine, public fft_get_dims(fft, rs_n_global, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
Definition: fft.F90:850
integer, parameter, public fft_complex
Definition: fft.F90:174
integer, parameter, public fftlib_nfft
Definition: fft.F90:179
subroutine dfft_backward_3d(fft, in, out, norm)
Definition: fft.F90:1266
subroutine, public fft_copy(fft_i, fft_o)
Definition: fft.F90:831
subroutine dfft_forward_1d(fft, in, out)
Definition: fft.F90:1248
integer, parameter cufft_d2z
Definition: fft.F90:256
integer, parameter fft_null
Definition: fft.F90:187
integer, parameter, public fftlib_pnfft
Definition: fft.F90:179
subroutine zfft_forward_1d(fft, in, out)
Definition: fft.F90:1549
subroutine zfft_forward_3d(fft, in, out, norm)
Definition: fft.F90:1463
integer, parameter, public fftlib_pfft
Definition: fft.F90:179
subroutine dfft_backward_accel(fft, in, out)
Definition: fft.F90:1346
integer, parameter, public fftlib_fftw
Definition: fft.F90:179
subroutine, public fft_init_stage1(this, namespace, XX, nn)
Some fft-libraries (only NFFT for the moment) need an additional precomputation stage that depends on...
Definition: fft.F90:731
subroutine, public fftw_prepare_plan(plan, dim, n, is_real, sign, flags, din_, cin_, cout_)
Definition: fftw.F90:189
subroutine, public fftw_free_memory(is_real, drs_data, zrs_data, fs_data)
Definition: fftw.F90:346
subroutine, public fftw_get_dims(rs_n, is_real, fs_n)
Definition: fftw.F90:307
subroutine, public fftw_alloc_memory(rs_n, is_real, fs_n, drs_data, zrs_data, fs_data)
Definition: fftw.F90:321
real(real64), parameter, public m_two
Definition: global.F90:193
real(real64), parameter, public m_huge
Definition: global.F90:209
real(real64), parameter, public m_half
Definition: global.F90:197
real(real64), parameter, public m_one
Definition: global.F90:192
subroutine, public messages_not_implemented(feature, namespace)
Definition: messages.F90:1091
subroutine, public messages_warning(no_lines, all_nodes, namespace)
Definition: messages.F90:525
subroutine, public messages_new_line()
Definition: messages.F90:1112
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:162
subroutine, public messages_fatal(no_lines, only_root_writes, namespace)
Definition: messages.F90:410
subroutine, public messages_input_error(namespace, var, details, row, column)
Definition: messages.F90:691
subroutine, public messages_info(no_lines, iunit, debug_only, stress, all_nodes, namespace)
Definition: messages.F90:594
type(mpi_comm), parameter, public mpi_comm_undefined
used to indicate a communicator has not been initialized
Definition: mpi.F90:138
type(mpi_grp_t), public mpi_world
Definition: mpi.F90:272
subroutine, public nfft_write_info(nfft)
Definition: nfft.F90:325
subroutine, public nfft_end(nfft)
Definition: nfft.F90:388
subroutine, public nfft_init(nfft, nfft_options, N, dim, M, optimize)
Definition: nfft.F90:259
subroutine, public nfft_copy_info(in, out)
Definition: nfft.F90:402
subroutine, public nfft_precompute(nfft, X1, X2, X3)
Definition: nfft.F90:431
subroutine, public nfft_guru_options(nfft, namespace)
Definition: nfft.F90:192
The low level module to work with the PFFT library. http:
Definition: pfft.F90:128
subroutine, public pfft_prepare_plan_c2c(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
Definition: pfft.F90:262
subroutine, public pfft_prepare_plan_c2r(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
Definition: pfft.F90:227
subroutine, public pfft_decompose(n_proc, dim1, dim2)
Decompose all available processors in 2D processor grid, most equally possible.
Definition: pfft.F90:152
subroutine, public pfft_prepare_plan_r2c(plan, n, in, out, sign, flags, mpi_comm)
Octopus subroutine to prepare a PFFT plan real to complex.
Definition: pfft.F90:193
subroutine, public pfft_get_dims(rs_n_global, mpi_comm, is_real, alloc_size, fs_n_global, rs_n, fs_n, rs_istart, fs_istart)
Definition: pfft.F90:289
The includes for the PFFT.
Definition: pfft.F90:117
The low level module to work with the PNFFT library. http:
Definition: pnfft.F90:130
subroutine, public pnfft_copy_params(in, out)
Definition: pnfft.F90:306
subroutine, public pnfft_set_sp_nodes(pnfft, namespace, X)
Definition: pnfft.F90:498
subroutine, public pnfft_init_plan(pnfft, pnfft_options, comm, fs_n_global, fs_n, fs_istart, rs_n, rs_istart)
Definition: pnfft.F90:364
subroutine, public pnfft_write_info(pnfft)
Definition: pnfft.F90:321
subroutine, public pnfft_guru_options(pnfft, namespace)
Definition: pnfft.F90:204
subroutine, public pnfft_end(pnfft)
Definition: pnfft.F90:474
subroutine, public pnfft_init_procmesh(pnfft, mpi_grp, comm)
Definition: pnfft.F90:270
This module defines the unit system, used for input and output.
type(unit_t), public unit_megabytes
For large amounts of data (natural code units are bytes)
This is defined even when running serial.
Definition: mpi.F90:144
int true(void)