Octopus
sgfft.F90
Go to the documentation of this file.
1!! Copyright (C) 2011 X. Andrade
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include <global.h>
20
24module sgfft_oct_m
25 use, intrinsic :: iso_fortran_env
26 use global_oct_m
28 use mpi_oct_m
29#ifdef HAVE_OPENMP
30 use omp_lib
31#endif
33
34 implicit none
35
36 private
37
38 public :: &
39 fft, &
41 kernelfft, &
43
44contains
45
46 !!****h* BigDFT/fourier_dim
47 !! NAME
48 !! fourier_dim
49 !!
50 !! FUNCTION
51 !! Give a number n_next > n compatible for the FFT
52 !!
53 !! SOURCE
54 !!
55 subroutine fourier_dim(n,n_next)
56 integer, intent(in) :: n
57 integer, intent(out) :: n_next
58
59 !Local variables
60 integer, parameter :: ndata1024 = 149, ndata = 149
61 !Multiple of 2,3,5
62 integer, dimension(ndata), parameter :: idata = (/ &
63 3, 4, 5, 6, 8, 9, 12, 15, 16, 18, &
64 20, 24, 25, 27, 30, 32, 36, 40, 45, 48, &
65 54, 60, 64, 72, 75, 80, 81, 90, 96, 100, &
66 108, 120, 125, 128, 135, 144, 150, 160, 162, 180, &
67 192, 200, 216, 225, 240, 243, 256, 270, 288, 300, &
68 320, 324, 360, 375, 384, 400, 405, 432, 450, 480, &
69 486, 500, 512, 540, 576, 600, 625, 640, 648, 675, &
70 720, 729, 750, 768, 800, 810, 864, 900, 960, 972, &
71 1000, 1024, 1080, 1125, 1152, 1200, 1215, 1280, 1296, 1350,&
72 1440, 1458, 1500, 1536, 1600, 1620, 1728, 1800, 1875, 1920,&
73 1944, 2000, 2025, 2048, 2160, 2250, 2304, 2400, 2430, 2500,&
74 2560, 2592, 2700, 2880, 3000, 3072, 3125, 3200, 3240, 3375,&
75 3456, 3600, 3750, 3840, 3888, 4000, 4050, 4096, 4320, 4500,&
76 4608, 4800, 5000, 5120, 5184, 5400, 5625, 5760, 6000, 6144,&
77 6400, 6480, 6750, 6912, 7200, 7500, 7680, 8000, 8192 /)
78 integer :: i
79
80 loop_data: do i = 1,ndata1024
81 if (n <= idata(i)) then
82 n_next = idata(i)
83 return
84 end if
85 end do loop_data
86 write(unit=*,fmt=*) "fourier_dim: ",n," is bigger than ",idata(ndata1024)
87 stop
88 end subroutine fourier_dim
89 !!***
90
91
92 ! Copyright (C) Stefan Goedecker, CEA Grenoble, 2002
93 ! This file is distributed under the terms of the
94 ! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
95
96
97 ! --------------------------------------------------------------
98 ! 3-dimensional complex-complex FFT routine:
99 ! When compared to the best vendor implementations on RISC architectures
100 ! it gives close to optimal performance (perhaps loosing 20 percent in speed)
101 ! and it is significanly faster than many not so good vendor implementations
102 ! as well as other portable FFT`s.
103 ! On all vector machines tested so far (Cray, NEC, Fujitsu) is
104 ! was significantly faster than the vendor routines
105 ! The theoretical background is described in :
106 ! 1) S. Goedecker: Rotating a three-dimensional array in optimal
107 ! positions for vector processing: Case study for a three-dimensional Fast
108 ! Fourier Transform, Comp. Phys. Commun. \underline{76}, 294 (1993)
109 ! Citing of this reference is greatly appreciated if the routines are used
110 ! for scientific work.
111
112
113 ! Presumably good compiler flags:
114 ! IBM, serial power 2: xlf -qarch=pwr2 -O2 -qmaxmem=-1
115 ! with OpenMP: IBM: xlf_r -qfree -O4 -qarch=pwr3 -qtune=pwr3 -qsmp=omp -qmaxmem=-1 ;
116 ! a.out
117 ! DEC: f90 -O3 -arch ev67 -pipeline
118 ! with OpenMP: DEC: f90 -O3 -arch ev67 -pipeline -omp -lelan ;
119 ! prun -N1 -c4 a.out
120
121
122 !-----------------------------------------------------------
123
124 ! FFT PART -----------------------------------------------------------------
125 ! CALCULATES THE DISCRETE FOURIER TRANSFORM F(I1,I2,I3)=
126 ! S_(j1,j2,j3) EXP(isign*i*2*pi*(j1*i1/n1+j2*i2/n2+j3*i3/n3)) R(j1,j2,j3)
127 ! with optimal performance on vector computer, workstations and
128 ! multiprocessor shared memory computers using OpenMP compiler directives
129 ! INPUT:
130 ! n1,n2,n3:physical dimension of the transform. It must be a
131 ! product of the prime factors 2,3,5, but greater than 3.
132 ! If two ni`s are equal it is recommended to place them
133 ! behind each other.
134 ! nd1,nd2,nd3:memory dimension of Z. ndi must always be greater or
135 ! equal than ni. On a vector machine, it is recomended
136 ! to chose ndi=ni if ni is odd and ndi=ni+1 if ni is
137 ! even to obtain optimal execution speed. On RISC
138 ! machines ndi=ni is usually fine for odd ni, for even
139 ! ni one should try ndi=ni+1, ni+2, ni+4 to find the
140 ! optimal performance.
141 ! inzee=1: first part of Z is data (input) array,
142 ! second part work array
143 ! inzee=2: first part of Z is work array, second part data array
144 ! Z(1,i1,i2,i3,inzee)=real(R(i1,i2,i3))
145 ! Z(2,i1,i2,i3,inzee)=imag(R(i1,i2,i3))
146 ! OUTPUT:
147 ! inzee=1: first part of Z is data (output) array,
148 ! second part work array
149 ! inzee=2: first part of Z is work array, second part data array
150 ! real(F(i1,i2,i3))=Z(1,i1,i2,i3,inzee)
151 ! imag(F(i1,i2,i3))=Z(2,i1,i2,i3,inzee)
152 ! inzee on output is in general different from inzee on input
153 ! The input data are always overwritten independently of the
154 ! value of inzee.
155 ! PERFORMANCE AND THE NCACHE
156 ! The most important feature for performance is the right choice of
157 ! the parameter ncache. On a vector machine ncache has to be put to 0.
158 ! On a RISC machine with cache, it is very important to find the optimal
159 ! value of NCACHE. NCACHE determines the size of the work array zw, that
160 ! has to fit into cache. It has therefore to be chosen to equal roughly
161 ! half the size of the physical cache in units of real*8 numbers.
162 ! If the machine has 2 cache levels it can not be predicted which
163 ! cache level will be the most relevant one for choosing ncache.
164 ! The optimal value of ncache can easily be determined by numerical
165 ! experimentation. A too large value of ncache leads to a dramatic
166 ! and sudden decrease of performance, a too small value to a to a
167 ! slow and less dramatic decrease of performance. If NCACHE is set
168 ! to a value so small, that not even a single one dimensional transform
169 ! can be done in the workarray zw, the program stops with an error
170 ! message.
171 ! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
172 ! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1995, 1999
173 ! Copyright (C) Stefan Goedecker, CEA Grenoble, 2002
174 ! This file is distributed under the terms of the
175 ! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
176
177 subroutine fft(n1, n2, n3, nd1, nd2, nd3, z, isign, inzee)
178 integer, intent(in) :: n1, n2, n3, nd1, nd2, nd3
179 real(real64), intent(inout) :: z(1:2, 1:nd1*nd2*nd3, 1:2)
180 integer, intent(in) :: isign
181 integer, intent(inout) :: inzee
182
183 real(real64), allocatable :: zw(:, :, :)
184 real(real64), allocatable :: trig(:, :)
185 integer, allocatable :: after(:), now(:), before(:)
186
187 integer :: ncache, nfft, mm, i, ic
188 integer :: npr, iam, inzet, m, lot, nn, lotomp, j, jompa, jompb
189 integer :: n, ma, mb, jj, inzeep
190
191 call profiling_in("SGFFT")
192
193 if (max(n1,n2,n3) > 8192) stop 'fft:8192 limit reached'
194
195 ! some reasonable values of ncache:
196 ! IBM/RS6000/590: 16*1024 ; IBM/RS6000/390: 3*1024 ;
197 ! IBM/PwPC: 1*1024 ; SGI/MIPS/R8000: 16*1024 ; DEC/Alpha/EV5 and EV6 6*1024
198 ! But if you care about performance find the optimal value of ncache yourself!
199 ! On all vector machines: ncache=0
200
201 ncache = 8192
202 if (ncache /= 0 .and. ncache <= max(n1,n2,n3)*4) ncache=max(n1,n2,n3/2)*4
203
204 ! check whether input values are reasonable
205 if (inzee <= 0 .or. inzee >= 3) stop 'fft:wrong inzee'
206 if (isign /= 1 .and. isign /= -1) stop 'fft:wrong isign'
207 if (n1 > nd1) stop 'fft:n1>nd1'
208 if (n2 > nd2) stop 'fft:n2>nd2'
209 if (n3 > nd3) stop 'fft:n3>nd3'
210
211
212 ! vector computer with memory banks:
213 if (ncache == 0) then
214 safe_allocate(trig(1:2,1:8192))
215 safe_allocate(after(1:20))
216 safe_allocate(now(1:20))
217 safe_allocate(before(1:20))
218
219 call ctrig(n3,trig,after,before,now,isign,ic)
220 nfft = nd1*n2
221 mm = nd1*nd2
222
223 do i = 1,ic-1
224 call fftstp(mm,nfft,nd3,mm,nd3,z(1,1,inzee),z(1,1,3-inzee), &
225 trig,after(i),now(i),before(i),isign)
226 inzee = 3-inzee
227 end do
228
229 i = ic
230
231 call fftrot(mm,nfft,nd3,mm,nd3,z(1,1,inzee),z(1,1,3-inzee), &
232 trig,after(i),now(i),before(i),isign)
233
234 inzee = 3-inzee
235
236 if (n2 /= n3) call ctrig(n2,trig,after,before,now,isign,ic)
237 nfft = nd3*n1
238 mm = nd3*nd1
239
240 do i = 1,ic-1
241 call fftstp(mm,nfft,nd2,mm,nd2,z(1,1,inzee),z(1,1,3-inzee), &
242 trig,after(i),now(i),before(i),isign)
243 inzee = 3-inzee
244 end do
245
246 i = ic
247 call fftrot(mm,nfft,nd2,mm,nd2,z(1,1,inzee),z(1,1,3-inzee), &
248 trig,after(i),now(i),before(i),isign)
249 inzee = 3-inzee
250
251 if (n1 /= n2) call ctrig(n1,trig,after,before,now,isign,ic)
252 nfft = nd2*n3
253 mm = nd2*nd3
254
255 do i = 1,ic-1
256 call fftstp(mm,nfft,nd1,mm,nd1,z(1,1,inzee),z(1,1,3-inzee), &
257 trig,after(i),now(i),before(i),isign)
258 inzee = 3-inzee
259 end do
260
261 i = ic
262
263 call fftrot(mm,nfft,nd1,mm,nd1,z(1,1,inzee),z(1,1,3-inzee), &
264 trig,after(i),now(i),before(i),isign)
265
266 inzee = 3-inzee
267
268 ! RISC machine with cache:
269 else
270 ! INtel IFC does not understand default(private)
271
272 !$omp parallel &
273 !$omp private(zw,trig,before,after,now,i,j,iam,npr,jj,ma,mb,mm,ic,n,m,jompa,jompb,lot,lotomp,inzeep,inzet,nn,nfft) &
274 !$omp shared(n1,n2,n3,nd1,nd2,nd3,z,isign,inzee,ncache)
275 npr = 1
276!$ npr = omp_get_num_threads()
277 iam = 0
278!$ iam = omp_get_thread_num()
279
280 allocate(zw(1:2,1:ncache/4,1:2))
281 allocate(trig(1:2,1:1024))
282 allocate(after(1:20))
283 allocate(now(1:20))
284 allocate(before(1:20))
285
286 inzet = inzee
287 ! TRANSFORM ALONG Z AXIS
288
289 mm = nd1*nd2
290 m = nd3
291 lot = max(1,ncache/(4*n3))
292 nn = lot
293 n = n3
294 if (2*n*lot*2 > ncache) stop 'fft:ncache1'
295
296 call ctrig(n3,trig,after,before,now,isign,ic)
297
298 if (ic == 1) then
299 i = ic
300 lotomp=(nd1*n2)/npr+1
301 ma = iam*lotomp+1
302 mb = min((iam+1)*lotomp,nd1*n2)
303 nfft = mb-ma+1
304 j = ma
305 jj = j*nd3-nd3+1
306 call fftrot(mm,nfft,m,mm,m,z(1,j,inzet),z(1,jj,3-inzet), &
307 trig,after(i),now(i),before(i),isign)
308
309 else
310
311 lotomp = (nd1*n2)/npr+1
312 jompa = iam*lotomp+1
313 jompb = min((iam+1)*lotomp,nd1*n2)
314 do j = jompa,jompb,lot
315 ma = j
316 mb = min(j+(lot-1),jompb)
317 nfft = mb-ma+1
318 jj = j*nd3-nd3+1
319
320 i = 1
321 inzeep = 2
322 call fftstp(mm,nfft,m,nn,n,z(1,j,inzet),zw(1,1,3-inzeep), &
323 trig,after(i),now(i),before(i),isign)
324 inzeep = 1
325
326 do i = 2,ic-1
327 call fftstp(nn,nfft,n,nn,n,zw(1,1,inzeep),zw(1,1,3-inzeep), &
328 trig,after(i),now(i),before(i),isign)
329 inzeep = 3-inzeep
330 end do
331 i = ic
332 call fftrot(nn,nfft,n,mm,m,zw(1,1,inzeep),z(1,jj,3-inzet), &
333 trig,after(i),now(i),before(i),isign)
334 end do
335 end if
336
337 inzet = 3-inzet
338
339 !$omp barrier
340
341 ! TRANSFORM ALONG Y AXIS
342 mm = nd3*nd1
343 m = nd2
344 lot = max(1,ncache/(4*n2))
345 nn = lot
346 n = n2
347 if (2*n*lot*2 > ncache) stop 'fft:ncache2'
348
349 if (n2 /= n3) call ctrig(n2,trig,after,before,now,isign,ic)
350
351 if (ic == 1) then
352 i = ic
353 lotomp = (nd3*n1)/npr+1
354 ma = iam*lotomp+1
355 mb = min((iam+1)*lotomp,nd3*n1)
356 nfft = mb-ma+1
357 j = ma
358 jj = j*nd2-nd2+1
359 call fftrot(mm,nfft,m,mm,m,z(1,j,inzet),z(1,jj,3-inzet), &
360 trig,after(i),now(i),before(i),isign)
361
362 else
363
364 lotomp = (nd3*n1)/npr+1
365 jompa = iam*lotomp+1
366 jompb = min((iam+1)*lotomp,nd3*n1)
367 do j = jompa,jompb,lot
368 ma = j
369 mb = min(j+(lot-1),jompb)
370 nfft = mb-ma+1
371 jj = j*nd2-nd2+1
372
373 i = 1
374 inzeep = 2
375 call fftstp(mm,nfft,m,nn,n,z(1,j,inzet),zw(1,1,3-inzeep), &
376 trig,after(i),now(i),before(i),isign)
377 inzeep = 1
378
379 do i = 2,ic-1
380 call fftstp(nn,nfft,n,nn,n,zw(1,1,inzeep),zw(1,1,3-inzeep), &
381 trig,after(i),now(i),before(i),isign)
382 inzeep = 3 - inzeep
383 end do
384
385 i = ic
386 call fftrot(nn,nfft,n,mm,m,zw(1,1,inzeep),z(1,jj,3-inzet), &
387 trig,after(i),now(i),before(i),isign)
388 end do
389 end if
390 inzet = 3 - inzet
391
392 !$omp barrier
393
394 ! TRANSFORM ALONG X AXIS
395 mm = nd2*nd3
396 m = nd1
397 lot = max(1,ncache/(4*n1))
398 nn = lot
399 n = n1
400 if (2*n*lot*2 > ncache) stop 'fft:ncache3'
401
402 if (n1 /= n2) call ctrig(n1,trig,after,before,now,isign,ic)
403
404 if (ic == 1) then
405 i = ic
406 lotomp = (nd2*n3)/npr+1
407 ma = iam*lotomp+1
408 mb = min((iam+1)*lotomp,nd2*n3)
409 nfft = mb-ma+1
410 j = ma
411 jj = j*nd1-nd1+1
412 call fftrot(mm,nfft,m,mm,m,z(1,j,inzet),z(1,jj,3-inzet), &
413 trig,after(i),now(i),before(i),isign)
414
415 else
416
417 lotomp=(nd2*n3)/npr+1
418 jompa=iam*lotomp+1
419 jompb=min((iam+1)*lotomp,nd2*n3)
420 do j=jompa,jompb,lot
421 ma=j
422 mb=min(j+(lot-1),jompb)
423 nfft=mb-ma+1
424 jj=j*nd1-nd1+1
425
426 i=1
427 inzeep=2
428 call fftstp(mm,nfft,m,nn,n,z(1,j,inzet),zw(1,1,3-inzeep), &
429 trig,after(i),now(i),before(i),isign)
430 inzeep=1
431
432 do i=2,ic-1
433 call fftstp(nn,nfft,n,nn,n,zw(1,1,inzeep),zw(1,1,3-inzeep), &
434 trig,after(i),now(i),before(i),isign)
435 inzeep=3-inzeep
436 end do
437 i=ic
438 call fftrot(nn,nfft,n,mm,m,zw(1,1,inzeep),z(1,jj,3-inzet), &
439 trig,after(i),now(i),before(i),isign)
440 end do
441 end if
442 inzet=3-inzet
443
444 deallocate(zw, trig, after, now, before)
445 if (iam == 0) inzee=inzet
446 !$omp end parallel
447
448 end if
449
450 call profiling_out("SGFFT")
451
452 end subroutine fft
453
454 ! ---------------------------------------------------------------------------------
455
456 ! Copyright (C) Stefan Goedecker, Lausanne, Switzerland, August 1, 1991
457 ! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
458 ! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
459 ! This file is distributed under the terms of the
460 ! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
461
462 ! Different factorizations affect the performance
463 ! Factoring 64 as 4*4*4 might for example be faster on some machines than 8*8.
464 subroutine ctrig(n,trig,after,before,now,isign,ic)
465 integer :: n, isign, ic
466 integer :: now(7), after(7), before(7)
467 real(real64) :: trig(:,:)
468
469 integer :: i, j, itt, nh
470 real(real64) :: angle, trigc, trigs, twopi
471 INTEGER, DIMENSION(7,149) :: idata
472
473 ! The factor 6 is only allowed in the first place!
474 data ((idata(i,j),i=1,7),j=1,74) / &
475 3, 3, 1, 1, 1, 1, 1, 4, 4, 1, 1, 1, 1, 1, &
476 5, 5, 1, 1, 1, 1, 1, 6, 6, 1, 1, 1, 1, 1, &
477 8, 8, 1, 1, 1, 1, 1, 9, 3, 3, 1, 1, 1, 1, &
478 12, 4, 3, 1, 1, 1, 1, 15, 5, 3, 1, 1, 1, 1, &
479 16, 4, 4, 1, 1, 1, 1, 18, 6, 3, 1, 1, 1, 1, &
480 20, 5, 4, 1, 1, 1, 1, 24, 8, 3, 1, 1, 1, 1, &
481 25, 5, 5, 1, 1, 1, 1, 27, 3, 3, 3, 1, 1, 1, &
482 30, 6, 5, 1, 1, 1, 1, 32, 8, 4, 1, 1, 1, 1, &
483 36, 4, 3, 3, 1, 1, 1, 40, 8, 5, 1, 1, 1, 1, &
484 45, 5, 3, 3, 1, 1, 1, 48, 4, 4, 3, 1, 1, 1, &
485 54, 6, 3, 3, 1, 1, 1, 60, 5, 4, 3, 1, 1, 1, &
486 64, 8, 8, 1, 1, 1, 1, 72, 8, 3, 3, 1, 1, 1, &
487 75, 5, 5, 3, 1, 1, 1, 80, 5, 4, 4, 1, 1, 1, &
488 81, 3, 3, 3, 3, 1, 1, 90, 6, 5, 3, 1, 1, 1, &
489 96, 8, 4, 3, 1, 1, 1, 100, 5, 5, 4, 1, 1, 1, &
490 108, 4, 3, 3, 3, 1, 1, 120, 8, 5, 3, 1, 1, 1, &
491 125, 5, 5, 5, 1, 1, 1, 128, 8, 4, 4, 1, 1, 1, &
492 135, 5, 3, 3, 3, 1, 1, 144, 6, 8, 3, 1, 1, 1, &
493 150, 6, 5, 5, 1, 1, 1, 160, 8, 5, 4, 1, 1, 1, &
494 162, 6, 3, 3, 3, 1, 1, 180, 5, 4, 3, 3, 1, 1, &
495 192, 6, 8, 4, 1, 1, 1, 200, 8, 5, 5, 1, 1, 1, &
496 216, 8, 3, 3, 3, 1, 1, 225, 5, 5, 3, 3, 1, 1, &
497 240, 6, 8, 5, 1, 1, 1, 243, 3, 3, 3, 3, 3, 1, &
498 256, 8, 8, 4, 1, 1, 1, 270, 6, 5, 3, 3, 1, 1, &
499 288, 8, 4, 3, 3, 1, 1, 300, 5, 5, 4, 3, 1, 1, &
500 320, 5, 4, 4, 4, 1, 1, 324, 4, 3, 3, 3, 3, 1, &
501 360, 8, 5, 3, 3, 1, 1, 375, 5, 5, 5, 3, 1, 1, &
502 384, 8, 4, 4, 3, 1, 1, 400, 5, 5, 4, 4, 1, 1, &
503 405, 5, 3, 3, 3, 3, 1, 432, 4, 4, 3, 3, 3, 1, &
504 450, 6, 5, 5, 3, 1, 1, 480, 8, 5, 4, 3, 1, 1, &
505 486, 6, 3, 3, 3, 3, 1, 500, 5, 5, 5, 4, 1, 1, &
506 512, 8, 8, 8, 1, 1, 1, 540, 5, 4, 3, 3, 3, 1, &
507 576, 4, 4, 4, 3, 3, 1, 600, 8, 5, 5, 3, 1, 1, &
508 625, 5, 5, 5, 5, 1, 1, 640, 8, 5, 4, 4, 1, 1, &
509 648, 8, 3, 3, 3, 3, 1, 675, 5, 5, 3, 3, 3, 1, &
510 720, 5, 4, 4, 3, 3, 1, 729, 3, 3, 3, 3, 3, 3, &
511 750, 6, 5, 5, 5, 1, 1, 768, 4, 4, 4, 4, 3, 1/
512
513 data ((idata(i,j),i=1,7),j=75,149) / &
514 800, 8, 5, 5, 4, 1, 1, 810, 6, 5, 3, 3, 3, 1, &
515 864, 8, 4, 3, 3, 3, 1, 900, 5, 5, 4, 3, 3, 1, &
516 960, 5, 4, 4, 4, 3, 1, 972, 4, 3, 3, 3, 3, 3, &
517 1000, 8, 5, 5, 5, 1, 1, 1024, 4, 4, 4, 4, 4, 1, &
518 1080, 6, 5, 4, 3, 3, 1, 1125, 5, 5, 5, 3, 3, 1, &
519 1152, 6, 4, 4, 4, 3, 1, 1200, 6, 8, 5, 5, 1, 1, &
520 1215, 5, 3, 3, 3, 3, 3, 1280, 8, 8, 5, 4, 1, 1, &
521 1296, 6, 8, 3, 3, 3, 1, 1350, 6, 5, 5, 3, 3, 1, &
522 1440, 6, 5, 4, 4, 3, 1, 1458, 6, 3, 3, 3, 3, 3, &
523 1500, 5, 5, 5, 4, 3, 1, 1536, 6, 8, 8, 4, 1, 1, &
524 1600, 8, 8, 5, 5, 1, 1, 1620, 5, 4, 3, 3, 3, 3, &
525 1728, 6, 8, 4, 3, 3, 1, 1800, 6, 5, 5, 4, 3, 1, &
526 1875, 5, 5, 5, 5, 3, 1, 1920, 6, 5, 4, 4, 4, 1, &
527 1944, 6, 4, 3, 3, 3, 3, 2000, 5, 5, 5, 4, 4, 1, &
528 2025, 5, 5, 3, 3, 3, 3, 2048, 8, 4, 4, 4, 4, 1, &
529 2160, 6, 8, 5, 3, 3, 1, 2250, 6, 5, 5, 5, 3, 1, &
530 2304, 6, 8, 4, 4, 3, 1, 2400, 6, 5, 5, 4, 4, 1, &
531 2430, 6, 5, 3, 3, 3, 3, 2500, 5, 5, 5, 5, 4, 1, &
532 2560, 8, 5, 4, 4, 4, 1, 2592, 6, 4, 4, 3, 3, 3, &
533 2700, 5, 5, 4, 3, 3, 3, 2880, 6, 8, 5, 4, 3, 1, &
534 3000, 6, 5, 5, 5, 4, 1, 3072, 6, 8, 4, 4, 4, 1, &
535 3125, 5, 5, 5, 5, 5, 1, 3200, 8, 5, 5, 4, 4, 1, &
536 3240, 6, 5, 4, 3, 3, 3, 3375, 5, 5, 5, 3, 3, 3, &
537 3456, 6, 4, 4, 4, 3, 3, 3600, 6, 8, 5, 5, 3, 1, &
538 3750, 6, 5, 5, 5, 5, 1, 3840, 6, 8, 5, 4, 4, 1, &
539 3888, 6, 8, 3, 3, 3, 3, 4000, 8, 5, 5, 5, 4, 1, &
540 4050, 6, 5, 5, 3, 3, 3, 4096, 8, 8, 4, 4, 4, 1, &
541 4320, 6, 5, 4, 4, 3, 3, 4500, 5, 5, 5, 4, 3, 3, &
542 4608, 6, 8, 8, 4, 3, 1, 4800, 6, 8, 5, 5, 4, 1, &
543 5000, 8, 5, 5, 5, 5, 1, 5120, 8, 8, 5, 4, 4, 1, &
544 5184, 6, 8, 4, 3, 3, 3, 5400, 6, 5, 5, 4, 3, 3, &
545 5625, 5, 5, 5, 5, 3, 3, 5760, 6, 8, 8, 5, 3, 1, &
546 6000, 6, 8, 5, 5, 5, 1, 6144, 6, 8, 8, 4, 4, 1, &
547 6400, 8, 8, 5, 5, 4, 1, 6480, 6, 8, 5, 3, 3, 3, &
548 6750, 6, 5, 5, 5, 3, 3, 6912, 6, 8, 4, 4, 3, 3, &
549 7200, 6, 5, 5, 4, 4, 3, 7500, 5, 5, 5, 5, 4, 3, &
550 7680, 6, 8, 8, 5, 4, 1, 8000, 8, 8, 5, 5, 5, 1, &
551 8192, 8, 8, 8, 4, 4, 1 /
552
553 do i = 1, 149
554 if (n == idata(1,i)) then
555 ic=0
556 do j = 1,6
557 itt=idata(1+j,i)
558 if (itt > 1) then
559 ic=ic+1
560 now(j)=idata(1+j,i)
561 else
562 goto 1000
563 end if
564 end do
565 goto 1000
566 end if
567 end do
568
569 print*,'VALUE OF',n,'NOT ALLOWED FOR FFT, ALLOWED VALUES ARE:'
57037 format(15(i5))
571 write(6,37) (idata(1,i),i=1,149)
572 stop
5731000 continue
574
575 after(1)=1
576 before(ic)=1
577 do i = 2, ic
578 after(i)=after(i-1)*now(i-1)
579 before(ic-i+1)=before(ic-i+2)*now(ic-i+2)
580 end do
581
582 twopi=6.283185307179586_real64
583 angle=isign*twopi/n
584
585 if (mod(n,2) == 0) then
586 nh=n/2
587 trig(1,1)=1.0_8
588 trig(2,1)=0.0_8
589 trig(1,nh+1)=-1.0_8
590 trig(2,nh+1)=0.0_8
591 do i = 1, nh - 1
592 trigc=cos(i*angle)
593 trigs=sin(i*angle)
594 trig(1,i+1)=trigc
595 trig(2,i+1)=trigs
596 trig(1,n-i+1)=trigc
597 trig(2,n-i+1)=-trigs
598 end do
599 else
600 nh=(n-1)/2
601 trig(1,1)=1.0_8
602 trig(2,1)=0.0_8
603 do i = 1, nh
604 trigc=cos(i*angle)
605 trigs=sin(i*angle)
606 trig(1,i+1)=trigc
607 trig(2,i+1)=trigs
608 trig(1,n-i+1)=trigc
609 trig(2,n-i+1)=-trigs
610 end do
611 end if
612
613 end subroutine ctrig
614
615
616 ! ------------------------------------------------------------------------
617 ! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
618 ! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1995, 1999
619 ! This file is distributed under the terms of the
620 ! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
621
622 subroutine fftstp(mm,nfft,m,nn,n,zin,zout,trig,after,now,before,isign)
623 integer :: mm, nfft, m, nn, n, isign
624 integer :: after, before, atn, atb, now
625 real(real64) :: trig(:,:)
626 real(real64) :: zin(2, mm, m), zout(2, nn, n)
627
628 real(real64) :: rt2i, dp, cp, cm, ci5, cr5, ci6, cr6, am, ap, ci8, cr8
629 real(real64) :: r, r1, r2, r3, r4, r5, r6, r7, r8_, r25, r34
630 real(real64) :: s, s1, s2, s3, s4, s5, s6, s7, s8, s25, s34
631 real(real64) :: bb, bm, dm, bp
632 real(real64) :: cr2, ci2, cr3, ci3, cr4, ci4, cr7, ci7
633 real(real64) :: cos2, sin2, cos4, sin4
634 real(real64) :: ur1, ur2, ur3, ui1, ui2, ui3
635 real(real64) :: vr1, vr2, vr3, vi1, vi2, vi3
636 integer :: ia, ib, j, ias, itrig, itt
637 integer :: nin1, nin2, nin3, nin4, nin5, nin6, nin7, nin8
638 integer :: nout1, nout2, nout3, nout4, nout5, nout6, nout7, nout8
639
640 atn=after*now
641 atb=after*before
642
643 ! sqrt(.5_real64)
644 rt2i=0.7071067811865475_real64
645 if (now == 2) then
646 ia=1
647 nin1=ia-after
648 nout1=ia-atn
649 do ib = 1, before
650 nin1=nin1+after
651 nin2=nin1+atb
652 nout1=nout1+atn
653 nout2=nout1+after
654 do j = 1, nfft
655 r1=zin(1,j,nin1)
656 s1=zin(2,j,nin1)
657 r2=zin(1,j,nin2)
658 s2=zin(2,j,nin2)
659 zout(1,j,nout1)= r2 + r1
660 zout(2,j,nout1)= s2 + s1
661 zout(1,j,nout2)= r1 - r2
662 zout(2,j,nout2)= s1 - s2
663 end do
664 end do
665 do ia = 2, after
666 ias=ia-1
667 if (2*ias == after) then
668 if (isign == 1) then
669 nin1=ia-after
670 nout1=ia-atn
671 do ib=1,before
672 nin1=nin1+after
673 nin2=nin1+atb
674 nout1=nout1+atn
675 nout2=nout1+after
676 do j = 1,nfft
677 r1=zin(1,j,nin1)
678 s1=zin(2,j,nin1)
679 r2=zin(2,j,nin2)
680 s2=zin(1,j,nin2)
681 zout(1,j,nout1)= r1 - r2
682 zout(2,j,nout1)= s2 + s1
683 zout(1,j,nout2)= r2 + r1
684 zout(2,j,nout2)= s1 - s2
685 end do
686 end do
687 else
688 nin1=ia-after
689 nout1=ia-atn
690 do ib=1,before
691 nin1=nin1+after
692 nin2=nin1+atb
693 nout1=nout1+atn
694 nout2=nout1+after
695 do j = 1,nfft
696 r1=zin(1,j,nin1)
697 s1=zin(2,j,nin1)
698 r2=zin(2,j,nin2)
699 s2=zin(1,j,nin2)
700 zout(1,j,nout1)= r2 + r1
701 zout(2,j,nout1)= s1 - s2
702 zout(1,j,nout2)= r1 - r2
703 zout(2,j,nout2)= s2 + s1
704 end do
705 end do
706 end if
707 else if (4*ias == after) then
708 if (isign == 1) then
709 nin1=ia-after
710 nout1=ia-atn
711 do ib = 1, before
712 nin1=nin1+after
713 nin2=nin1+atb
714 nout1=nout1+atn
715 nout2=nout1+after
716 do j = 1, nfft
717 r1=zin(1,j,nin1)
718 s1=zin(2,j,nin1)
719 r=zin(1,j,nin2)
720 s=zin(2,j,nin2)
721 r2=(r - s)*rt2i
722 s2=(r + s)*rt2i
723 zout(1,j,nout1)= r2 + r1
724 zout(2,j,nout1)= s2 + s1
725 zout(1,j,nout2)= r1 - r2
726 zout(2,j,nout2)= s1 - s2
727 end do
728 end do
729 else
730 nin1=ia-after
731 nout1=ia-atn
732 do ib=1,before
733 nin1=nin1+after
734 nin2=nin1+atb
735 nout1=nout1+atn
736 nout2=nout1+after
737 do j = 1,nfft
738 r1=zin(1,j,nin1)
739 s1=zin(2,j,nin1)
740 r=zin(1,j,nin2)
741 s=zin(2,j,nin2)
742 r2=(r + s)*rt2i
743 s2=(s - r)*rt2i
744 zout(1,j,nout1)= r2 + r1
745 zout(2,j,nout1)= s2 + s1
746 zout(1,j,nout2)= r1 - r2
747 zout(2,j,nout2)= s1 - s2
748 end do
749 end do
750 end if
751 else if (4*ias == 3*after) then
752 if (isign == 1) then
753 nin1=ia-after
754 nout1=ia-atn
755 do ib = 1, before
756 nin1=nin1+after
757 nin2=nin1+atb
758 nout1=nout1+atn
759 nout2=nout1+after
760 do j = 1, nfft
761 r1=zin(1,j,nin1)
762 s1=zin(2,j,nin1)
763 r=zin(1,j,nin2)
764 s=zin(2,j,nin2)
765 r2=(r + s)*rt2i
766 s2=(r - s)*rt2i
767 zout(1,j,nout1)= r1 - r2
768 zout(2,j,nout1)= s2 + s1
769 zout(1,j,nout2)= r2 + r1
770 zout(2,j,nout2)= s1 - s2
771 end do
772 end do
773 else
774 nin1=ia-after
775 nout1=ia-atn
776 do ib=1,before
777 nin1=nin1+after
778 nin2=nin1+atb
779 nout1=nout1+atn
780 nout2=nout1+after
781 do j = 1,nfft
782 r1=zin(1,j,nin1)
783 s1=zin(2,j,nin1)
784 r=zin(1,j,nin2)
785 s=zin(2,j,nin2)
786 r2=(s - r)*rt2i
787 s2=(r + s)*rt2i
788 zout(1,j,nout1)= r2 + r1
789 zout(2,j,nout1)= s1 - s2
790 zout(1,j,nout2)= r1 - r2
791 zout(2,j,nout2)= s2 + s1
792 end do
793 end do
794 end if
795 else
796 itrig=ias*before+1
797 cr2=trig(1,itrig)
798 ci2=trig(2,itrig)
799 nin1=ia-after
800 nout1=ia-atn
801 do ib=1,before
802 nin1=nin1+after
803 nin2=nin1+atb
804 nout1=nout1+atn
805 nout2=nout1+after
806 do j = 1,nfft
807 r1=zin(1,j,nin1)
808 s1=zin(2,j,nin1)
809 r=zin(1,j,nin2)
810 s=zin(2,j,nin2)
811 r2=r*cr2 - s*ci2
812 s2=r*ci2 + s*cr2
813 zout(1,j,nout1)= r2 + r1
814 zout(2,j,nout1)= s2 + s1
815 zout(1,j,nout2)= r1 - r2
816 zout(2,j,nout2)= s1 - s2
817 end do
818 end do
819 end if
820 end do
821 else if (now == 4) then
822 if (isign == 1) then
823 ia=1
824 nin1=ia-after
825 nout1=ia-atn
826 do ib = 1, before
827 nin1=nin1+after
828 nin2=nin1+atb
829 nin3=nin2+atb
830 nin4=nin3+atb
831 nout1=nout1+atn
832 nout2=nout1+after
833 nout3=nout2+after
834 nout4=nout3+after
835 do j = 1, nfft
836 r1=zin(1,j,nin1)
837 s1=zin(2,j,nin1)
838 r2=zin(1,j,nin2)
839 s2=zin(2,j,nin2)
840 r3=zin(1,j,nin3)
841 s3=zin(2,j,nin3)
842 r4=zin(1,j,nin4)
843 s4=zin(2,j,nin4)
844 r=r1 + r3
845 s=r2 + r4
846 zout(1,j,nout1) = r + s
847 zout(1,j,nout3) = r - s
848 r=r1 - r3
849 s=s2 - s4
850 zout(1,j,nout2) = r - s
851 zout(1,j,nout4) = r + s
852 r=s1 + s3
853 s=s2 + s4
854 zout(2,j,nout1) = r + s
855 zout(2,j,nout3) = r - s
856 r=s1 - s3
857 s=r2 - r4
858 zout(2,j,nout2) = r + s
859 zout(2,j,nout4) = r - s
860 end do
861 end do
862 do ia = 2, after
863 ias=ia-1
864 if (2*ias == after) then
865 nin1=ia-after
866 nout1=ia-atn
867 do ib = 1, before
868 nin1=nin1+after
869 nin2=nin1+atb
870 nin3=nin2+atb
871 nin4=nin3+atb
872 nout1=nout1+atn
873 nout2=nout1+after
874 nout3=nout2+after
875 nout4=nout3+after
876 do j = 1, nfft
877 r1=zin(1,j,nin1)
878 s1=zin(2,j,nin1)
879 r=zin(1,j,nin2)
880 s=zin(2,j,nin2)
881 r2=(r-s)*rt2i
882 s2=(r+s)*rt2i
883 r3=zin(2,j,nin3)
884 s3=zin(1,j,nin3)
885 r=zin(1,j,nin4)
886 s=zin(2,j,nin4)
887 r4=(r + s)*rt2i
888 s4=(r - s)*rt2i
889 r=r1 - r3
890 s=r2 - r4
891 zout(1,j,nout1) = r + s
892 zout(1,j,nout3) = r - s
893 r=r1 + r3
894 s=s2 - s4
895 zout(1,j,nout2) = r - s
896 zout(1,j,nout4) = r + s
897 r=s1 + s3
898 s=s2 + s4
899 zout(2,j,nout1) = r + s
900 zout(2,j,nout3) = r - s
901 r=s1 - s3
902 s=r2 + r4
903 zout(2,j,nout2) = r + s
904 zout(2,j,nout4) = r - s
905 end do
906 end do
907 else
908 itt=ias*before
909 itrig=itt+1
910 cr2=trig(1,itrig)
911 ci2=trig(2,itrig)
912 itrig=itrig+itt
913 cr3=trig(1,itrig)
914 ci3=trig(2,itrig)
915 itrig=itrig+itt
916 cr4=trig(1,itrig)
917 ci4=trig(2,itrig)
918 nin1=ia-after
919 nout1=ia-atn
920 do ib = 1, before
921 nin1=nin1+after
922 nin2=nin1+atb
923 nin3=nin2+atb
924 nin4=nin3+atb
925 nout1=nout1+atn
926 nout2=nout1+after
927 nout3=nout2+after
928 nout4=nout3+after
929 do j = 1, nfft
930 r1=zin(1,j,nin1)
931 s1=zin(2,j,nin1)
932 r=zin(1,j,nin2)
933 s=zin(2,j,nin2)
934 r2=r*cr2 - s*ci2
935 s2=r*ci2 + s*cr2
936 r=zin(1,j,nin3)
937 s=zin(2,j,nin3)
938 r3=r*cr3 - s*ci3
939 s3=r*ci3 + s*cr3
940 r=zin(1,j,nin4)
941 s=zin(2,j,nin4)
942 r4=r*cr4 - s*ci4
943 s4=r*ci4 + s*cr4
944 r=r1 + r3
945 s=r2 + r4
946 zout(1,j,nout1) = r + s
947 zout(1,j,nout3) = r - s
948 r=r1 - r3
949 s=s2 - s4
950 zout(1,j,nout2) = r - s
951 zout(1,j,nout4) = r + s
952 r=s1 + s3
953 s=s2 + s4
954 zout(2,j,nout1) = r + s
955 zout(2,j,nout3) = r - s
956 r=s1 - s3
957 s=r2 - r4
958 zout(2,j,nout2) = r + s
959 zout(2,j,nout4) = r - s
960 end do
961 end do
962 end if
963 end do
964 else
965 ia=1
966 nin1=ia-after
967 nout1=ia-atn
968 do ib = 1, before
969 nin1=nin1+after
970 nin2=nin1+atb
971 nin3=nin2+atb
972 nin4=nin3+atb
973 nout1=nout1+atn
974 nout2=nout1+after
975 nout3=nout2+after
976 nout4=nout3+after
977 do j = 1, nfft
978 r1=zin(1,j,nin1)
979 s1=zin(2,j,nin1)
980 r2=zin(1,j,nin2)
981 s2=zin(2,j,nin2)
982 r3=zin(1,j,nin3)
983 s3=zin(2,j,nin3)
984 r4=zin(1,j,nin4)
985 s4=zin(2,j,nin4)
986 r=r1 + r3
987 s=r2 + r4
988 zout(1,j,nout1) = r + s
989 zout(1,j,nout3) = r - s
990 r=r1 - r3
991 s=s2 - s4
992 zout(1,j,nout2) = r + s
993 zout(1,j,nout4) = r - s
994 r=s1 + s3
995 s=s2 + s4
996 zout(2,j,nout1) = r + s
997 zout(2,j,nout3) = r - s
998 r=s1 - s3
999 s=r2 - r4
1000 zout(2,j,nout2) = r - s
1001 zout(2,j,nout4) = r + s
1002 end do
1003 end do
1004 do ia = 2, after
1005 ias=ia-1
1006 if (2*ias == after) then
1007 nin1=ia-after
1008 nout1=ia-atn
1009 do ib = 1, before
1010 nin1=nin1+after
1011 nin2=nin1+atb
1012 nin3=nin2+atb
1013 nin4=nin3+atb
1014 nout1=nout1+atn
1015 nout2=nout1+after
1016 nout3=nout2+after
1017 nout4=nout3+after
1018 do j = 1,nfft
1019 r1=zin(1,j,nin1)
1020 s1=zin(2,j,nin1)
1021 r=zin(1,j,nin2)
1022 s=zin(2,j,nin2)
1023 r2=(r + s)*rt2i
1024 s2=(s - r)*rt2i
1025 r3=zin(2,j,nin3)
1026 s3=zin(1,j,nin3)
1027 r=zin(1,j,nin4)
1028 s=zin(2,j,nin4)
1029 r4=(s - r)*rt2i
1030 s4=(r + s)*rt2i
1031 r=r1 + r3
1032 s=r2 + r4
1033 zout(1,j,nout1) = r + s
1034 zout(1,j,nout3) = r - s
1035 r=r1 - r3
1036 s=s2 + s4
1037 zout(1,j,nout2) = r + s
1038 zout(1,j,nout4) = r - s
1039 r=s1 - s3
1040 s=s2 - s4
1041 zout(2,j,nout1) = r + s
1042 zout(2,j,nout3) = r - s
1043 r=s1 + s3
1044 s=r2 - r4
1045 zout(2,j,nout2) = r - s
1046 zout(2,j,nout4) = r + s
1047 end do
1048 end do
1049 else
1050 itt=ias*before
1051 itrig=itt+1
1052 cr2=trig(1,itrig)
1053 ci2=trig(2,itrig)
1054 itrig=itrig+itt
1055 cr3=trig(1,itrig)
1056 ci3=trig(2,itrig)
1057 itrig=itrig+itt
1058 cr4=trig(1,itrig)
1059 ci4=trig(2,itrig)
1060 nin1=ia-after
1061 nout1=ia-atn
1062 do ib = 1, before
1063 nin1=nin1+after
1064 nin2=nin1+atb
1065 nin3=nin2+atb
1066 nin4=nin3+atb
1067 nout1=nout1+atn
1068 nout2=nout1+after
1069 nout3=nout2+after
1070 nout4=nout3+after
1071 do j = 1, nfft
1072 r1=zin(1,j,nin1)
1073 s1=zin(2,j,nin1)
1074 r=zin(1,j,nin2)
1075 s=zin(2,j,nin2)
1076 r2=r*cr2 - s*ci2
1077 s2=r*ci2 + s*cr2
1078 r=zin(1,j,nin3)
1079 s=zin(2,j,nin3)
1080 r3=r*cr3 - s*ci3
1081 s3=r*ci3 + s*cr3
1082 r=zin(1,j,nin4)
1083 s=zin(2,j,nin4)
1084 r4=r*cr4 - s*ci4
1085 s4=r*ci4 + s*cr4
1086 r=r1 + r3
1087 s=r2 + r4
1088 zout(1,j,nout1) = r + s
1089 zout(1,j,nout3) = r - s
1090 r=r1 - r3
1091 s=s2 - s4
1092 zout(1,j,nout2) = r + s
1093 zout(1,j,nout4) = r - s
1094 r=s1 + s3
1095 s=s2 + s4
1096 zout(2,j,nout1) = r + s
1097 zout(2,j,nout3) = r - s
1098 r=s1 - s3
1099 s=r2 - r4
1100 zout(2,j,nout2) = r - s
1101 zout(2,j,nout4) = r + s
1102 end do
1103 end do
1104 end if
1105 end do
1106 end if
1107 else if (now == 8) then
1108 if (isign == -1) then
1109 ia=1
1110 nin1=ia-after
1111 nout1=ia-atn
1112 do ib = 1, before
1113 nin1=nin1+after
1114 nin2=nin1+atb
1115 nin3=nin2+atb
1116 nin4=nin3+atb
1117 nin5=nin4+atb
1118 nin6=nin5+atb
1119 nin7=nin6+atb
1120 nin8=nin7+atb
1121 nout1=nout1+atn
1122 nout2=nout1+after
1123 nout3=nout2+after
1124 nout4=nout3+after
1125 nout5=nout4+after
1126 nout6=nout5+after
1127 nout7=nout6+after
1128 nout8=nout7+after
1129 do j = 1, nfft
1130 r1=zin(1,j,nin1)
1131 s1=zin(2,j,nin1)
1132 r2=zin(1,j,nin2)
1133 s2=zin(2,j,nin2)
1134 r3=zin(1,j,nin3)
1135 s3=zin(2,j,nin3)
1136 r4=zin(1,j,nin4)
1137 s4=zin(2,j,nin4)
1138 r5=zin(1,j,nin5)
1139 s5=zin(2,j,nin5)
1140 r6=zin(1,j,nin6)
1141 s6=zin(2,j,nin6)
1142 r7=zin(1,j,nin7)
1143 s7=zin(2,j,nin7)
1144 r8_=zin(1,j,nin8)
1145 s8=zin(2,j,nin8)
1146 r=r1 + r5
1147 s=r3 + r7
1148 ap=r + s
1149 am=r - s
1150 r=r2 + r6
1151 s=r4 + r8_
1152 bp=r + s
1153 bm=r - s
1154 r=s1 + s5
1155 s=s3 + s7
1156 cp=r + s
1157 cm=r - s
1158 r=s2 + s6
1159 s=s4 + s8
1160 dp=r + s
1161 dm=r - s
1162 zout(1,j,nout1) = ap + bp
1163 zout(2,j,nout1) = cp + dp
1164 zout(1,j,nout5) = ap - bp
1165 zout(2,j,nout5) = cp - dp
1166 zout(1,j,nout3) = am + dm
1167 zout(2,j,nout3) = cm - bm
1168 zout(1,j,nout7) = am - dm
1169 zout(2,j,nout7) = cm + bm
1170 r=r1 - r5
1171 s=s3 - s7
1172 ap=r + s
1173 am=r - s
1174 r=s1 - s5
1175 s=r3 - r7
1176 bp=r + s
1177 bm=r - s
1178 r=s4 - s8
1179 s=r2 - r6
1180 cp=r + s
1181 cm=r - s
1182 r=s2 - s6
1183 s=r4 - r8_
1184 dp=r + s
1185 dm=r - s
1186 r = ( cp + dm)*rt2i
1187 s = ( dm - cp)*rt2i
1188 cp= ( cm + dp)*rt2i
1189 dp = ( cm - dp)*rt2i
1190 zout(1,j,nout2) = ap + r
1191 zout(2,j,nout2) = bm + s
1192 zout(1,j,nout6) = ap - r
1193 zout(2,j,nout6) = bm - s
1194 zout(1,j,nout4) = am + cp
1195 zout(2,j,nout4) = bp + dp
1196 zout(1,j,nout8) = am - cp
1197 zout(2,j,nout8) = bp - dp
1198 end do
1199 end do
1200 do ia = 2, after
1201 ias=ia-1
1202 itt=ias*before
1203 itrig=itt+1
1204 cr2=trig(1,itrig)
1205 ci2=trig(2,itrig)
1206 itrig=itrig+itt
1207 cr3=trig(1,itrig)
1208 ci3=trig(2,itrig)
1209 itrig=itrig+itt
1210 cr4=trig(1,itrig)
1211 ci4=trig(2,itrig)
1212 itrig=itrig+itt
1213 cr5=trig(1,itrig)
1214 ci5=trig(2,itrig)
1215 itrig=itrig+itt
1216 cr6=trig(1,itrig)
1217 ci6=trig(2,itrig)
1218 itrig=itrig+itt
1219 cr7=trig(1,itrig)
1220 ci7=trig(2,itrig)
1221 itrig=itrig+itt
1222 cr8=trig(1,itrig)
1223 ci8=trig(2,itrig)
1224 nin1=ia-after
1225 nout1=ia-atn
1226 do ib = 1, before
1227 nin1=nin1+after
1228 nin2=nin1+atb
1229 nin3=nin2+atb
1230 nin4=nin3+atb
1231 nin5=nin4+atb
1232 nin6=nin5+atb
1233 nin7=nin6+atb
1234 nin8=nin7+atb
1235 nout1=nout1+atn
1236 nout2=nout1+after
1237 nout3=nout2+after
1238 nout4=nout3+after
1239 nout5=nout4+after
1240 nout6=nout5+after
1241 nout7=nout6+after
1242 nout8=nout7+after
1243 do j = 1, nfft
1244 r1=zin(1,j,nin1)
1245 s1=zin(2,j,nin1)
1246 r=zin(1,j,nin2)
1247 s=zin(2,j,nin2)
1248 r2=r*cr2 - s*ci2
1249 s2=r*ci2 + s*cr2
1250 r=zin(1,j,nin3)
1251 s=zin(2,j,nin3)
1252 r3=r*cr3 - s*ci3
1253 s3=r*ci3 + s*cr3
1254 r=zin(1,j,nin4)
1255 s=zin(2,j,nin4)
1256 r4=r*cr4 - s*ci4
1257 s4=r*ci4 + s*cr4
1258 r=zin(1,j,nin5)
1259 s=zin(2,j,nin5)
1260 r5=r*cr5 - s*ci5
1261 s5=r*ci5 + s*cr5
1262 r=zin(1,j,nin6)
1263 s=zin(2,j,nin6)
1264 r6=r*cr6 - s*ci6
1265 s6=r*ci6 + s*cr6
1266 r=zin(1,j,nin7)
1267 s=zin(2,j,nin7)
1268 r7=r*cr7 - s*ci7
1269 s7=r*ci7 + s*cr7
1270 r=zin(1,j,nin8)
1271 s=zin(2,j,nin8)
1272 r8_=r*cr8 - s*ci8
1273 s8=r*ci8 + s*cr8
1274 r=r1 + r5
1275 s=r3 + r7
1276 ap=r + s
1277 am=r - s
1278 r=r2 + r6
1279 s=r4 + r8_
1280 bp=r + s
1281 bm=r - s
1282 r=s1 + s5
1283 s=s3 + s7
1284 cp=r + s
1285 cm=r - s
1286 r=s2 + s6
1287 s=s4 + s8
1288 dp=r + s
1289 dm=r - s
1290 zout(1,j,nout1) = ap + bp
1291 zout(2,j,nout1) = cp + dp
1292 zout(1,j,nout5) = ap - bp
1293 zout(2,j,nout5) = cp - dp
1294 zout(1,j,nout3) = am + dm
1295 zout(2,j,nout3) = cm - bm
1296 zout(1,j,nout7) = am - dm
1297 zout(2,j,nout7) = cm + bm
1298 r=r1 - r5
1299 s=s3 - s7
1300 ap=r + s
1301 am=r - s
1302 r=s1 - s5
1303 s=r3 - r7
1304 bp=r + s
1305 bm=r - s
1306 r=s4 - s8
1307 s=r2 - r6
1308 cp=r + s
1309 cm=r - s
1310 r=s2 - s6
1311 s=r4 - r8_
1312 dp=r + s
1313 dm=r - s
1314 r = ( cp + dm)*rt2i
1315 s = ( dm - cp)*rt2i
1316 cp= ( cm + dp)*rt2i
1317 dp = ( cm - dp)*rt2i
1318 zout(1,j,nout2) = ap + r
1319 zout(2,j,nout2) = bm + s
1320 zout(1,j,nout6) = ap - r
1321 zout(2,j,nout6) = bm - s
1322 zout(1,j,nout4) = am + cp
1323 zout(2,j,nout4) = bp + dp
1324 zout(1,j,nout8) = am - cp
1325 zout(2,j,nout8) = bp - dp
1326 end do
1327 end do
1328 end do
1329 else
1330 ia=1
1331 nin1=ia-after
1332 nout1=ia-atn
1333 do ib = 1, before
1334 nin1=nin1+after
1335 nin2=nin1+atb
1336 nin3=nin2+atb
1337 nin4=nin3+atb
1338 nin5=nin4+atb
1339 nin6=nin5+atb
1340 nin7=nin6+atb
1341 nin8=nin7+atb
1342 nout1=nout1+atn
1343 nout2=nout1+after
1344 nout3=nout2+after
1345 nout4=nout3+after
1346 nout5=nout4+after
1347 nout6=nout5+after
1348 nout7=nout6+after
1349 nout8=nout7+after
1350 do j = 1, nfft
1351 r1=zin(1,j,nin1)
1352 s1=zin(2,j,nin1)
1353 r2=zin(1,j,nin2)
1354 s2=zin(2,j,nin2)
1355 r3=zin(1,j,nin3)
1356 s3=zin(2,j,nin3)
1357 r4=zin(1,j,nin4)
1358 s4=zin(2,j,nin4)
1359 r5=zin(1,j,nin5)
1360 s5=zin(2,j,nin5)
1361 r6=zin(1,j,nin6)
1362 s6=zin(2,j,nin6)
1363 r7=zin(1,j,nin7)
1364 s7=zin(2,j,nin7)
1365 r8_=zin(1,j,nin8)
1366 s8=zin(2,j,nin8)
1367 r=r1 + r5
1368 s=r3 + r7
1369 ap=r + s
1370 am=r - s
1371 r=r2 + r6
1372 s=r4 + r8_
1373 bp=r + s
1374 bm=r - s
1375 r=s1 + s5
1376 s=s3 + s7
1377 cp=r + s
1378 cm=r - s
1379 r=s2 + s6
1380 s=s4 + s8
1381 dp=r + s
1382 dm=r - s
1383 zout(1,j,nout1) = ap + bp
1384 zout(2,j,nout1) = cp + dp
1385 zout(1,j,nout5) = ap - bp
1386 zout(2,j,nout5) = cp - dp
1387 zout(1,j,nout3) = am - dm
1388 zout(2,j,nout3) = cm + bm
1389 zout(1,j,nout7) = am + dm
1390 zout(2,j,nout7) = cm - bm
1391 r= r1 - r5
1392 s=-s3 + s7
1393 ap=r + s
1394 am=r - s
1395 r=s1 - s5
1396 s=r7 - r3
1397 bp=r + s
1398 bm=r - s
1399 r=-s4 + s8
1400 s= r2 - r6
1401 cp=r + s
1402 cm=r - s
1403 r=-s2 + s6
1404 s= r4 - r8_
1405 dp=r + s
1406 dm=r - s
1407 r = ( cp + dm)*rt2i
1408 s = ( cp - dm)*rt2i
1409 cp= ( cm + dp)*rt2i
1410 dp= ( dp - cm)*rt2i
1411 zout(1,j,nout2) = ap + r
1412 zout(2,j,nout2) = bm + s
1413 zout(1,j,nout6) = ap - r
1414 zout(2,j,nout6) = bm - s
1415 zout(1,j,nout4) = am + cp
1416 zout(2,j,nout4) = bp + dp
1417 zout(1,j,nout8) = am - cp
1418 zout(2,j,nout8) = bp - dp
1419 end do
1420 end do
1421
1422 do ia = 2, after
1423 ias=ia-1
1424 itt=ias*before
1425 itrig=itt+1
1426 cr2=trig(1,itrig)
1427 ci2=trig(2,itrig)
1428 itrig=itrig+itt
1429 cr3=trig(1,itrig)
1430 ci3=trig(2,itrig)
1431 itrig=itrig+itt
1432 cr4=trig(1,itrig)
1433 ci4=trig(2,itrig)
1434 itrig=itrig+itt
1435 cr5=trig(1,itrig)
1436 ci5=trig(2,itrig)
1437 itrig=itrig+itt
1438 cr6=trig(1,itrig)
1439 ci6=trig(2,itrig)
1440 itrig=itrig+itt
1441 cr7=trig(1,itrig)
1442 ci7=trig(2,itrig)
1443 itrig=itrig+itt
1444 cr8=trig(1,itrig)
1445 ci8=trig(2,itrig)
1446 nin1=ia-after
1447 nout1=ia-atn
1448 do ib = 1, before
1449 nin1=nin1+after
1450 nin2=nin1+atb
1451 nin3=nin2+atb
1452 nin4=nin3+atb
1453 nin5=nin4+atb
1454 nin6=nin5+atb
1455 nin7=nin6+atb
1456 nin8=nin7+atb
1457 nout1=nout1+atn
1458 nout2=nout1+after
1459 nout3=nout2+after
1460 nout4=nout3+after
1461 nout5=nout4+after
1462 nout6=nout5+after
1463 nout7=nout6+after
1464 nout8=nout7+after
1465 do j = 1, nfft
1466 r1=zin(1,j,nin1)
1467 s1=zin(2,j,nin1)
1468 r=zin(1,j,nin2)
1469 s=zin(2,j,nin2)
1470 r2=r*cr2 - s*ci2
1471 s2=r*ci2 + s*cr2
1472 r=zin(1,j,nin3)
1473 s=zin(2,j,nin3)
1474 r3=r*cr3 - s*ci3
1475 s3=r*ci3 + s*cr3
1476 r=zin(1,j,nin4)
1477 s=zin(2,j,nin4)
1478 r4=r*cr4 - s*ci4
1479 s4=r*ci4 + s*cr4
1480 r=zin(1,j,nin5)
1481 s=zin(2,j,nin5)
1482 r5=r*cr5 - s*ci5
1483 s5=r*ci5 + s*cr5
1484 r=zin(1,j,nin6)
1485 s=zin(2,j,nin6)
1486 r6=r*cr6 - s*ci6
1487 s6=r*ci6 + s*cr6
1488 r=zin(1,j,nin7)
1489 s=zin(2,j,nin7)
1490 r7=r*cr7 - s*ci7
1491 s7=r*ci7 + s*cr7
1492 r=zin(1,j,nin8)
1493 s=zin(2,j,nin8)
1494 r8_=r*cr8 - s*ci8
1495 s8=r*ci8 + s*cr8
1496 r=r1 + r5
1497 s=r3 + r7
1498 ap=r + s
1499 am=r - s
1500 r=r2 + r6
1501 s=r4 + r8_
1502 bp=r + s
1503 bm=r - s
1504 r=s1 + s5
1505 s=s3 + s7
1506 cp=r + s
1507 cm=r - s
1508 r=s2 + s6
1509 s=s4 + s8
1510 dp=r + s
1511 dm=r - s
1512 zout(1,j,nout1) = ap + bp
1513 zout(2,j,nout1) = cp + dp
1514 zout(1,j,nout5) = ap - bp
1515 zout(2,j,nout5) = cp - dp
1516 zout(1,j,nout3) = am - dm
1517 zout(2,j,nout3) = cm + bm
1518 zout(1,j,nout7) = am + dm
1519 zout(2,j,nout7) = cm - bm
1520 r= r1 - r5
1521 s=-s3 + s7
1522 ap=r + s
1523 am=r - s
1524 r=s1 - s5
1525 s=r7 - r3
1526 bp=r + s
1527 bm=r - s
1528 r=-s4 + s8
1529 s= r2 - r6
1530 cp=r + s
1531 cm=r - s
1532 r=-s2 + s6
1533 s= r4 - r8_
1534 dp=r + s
1535 dm=r - s
1536 r = ( cp + dm)*rt2i
1537 s = ( cp - dm)*rt2i
1538 cp= ( cm + dp)*rt2i
1539 dp= ( dp - cm)*rt2i
1540 zout(1,j,nout2) = ap + r
1541 zout(2,j,nout2) = bm + s
1542 zout(1,j,nout6) = ap - r
1543 zout(2,j,nout6) = bm - s
1544 zout(1,j,nout4) = am + cp
1545 zout(2,j,nout4) = bp + dp
1546 zout(1,j,nout8) = am - cp
1547 zout(2,j,nout8) = bp - dp
1548 end do
1549 end do
1550 end do
1551
1552 end if
1553 else if (now == 3) then
1554 ! .5_real64*sqrt(3._real64)
1555 bb=isign*0.8660254037844387_real64
1556 ia=1
1557 nin1=ia-after
1558 nout1=ia-atn
1559 do ib = 1, before
1560 nin1=nin1+after
1561 nin2=nin1+atb
1562 nin3=nin2+atb
1563 nout1=nout1+atn
1564 nout2=nout1+after
1565 nout3=nout2+after
1566 do j = 1, nfft
1567 r1=zin(1,j,nin1)
1568 s1=zin(2,j,nin1)
1569 r2=zin(1,j,nin2)
1570 s2=zin(2,j,nin2)
1571 r3=zin(1,j,nin3)
1572 s3=zin(2,j,nin3)
1573 r=r2 + r3
1574 s=s2 + s3
1575 zout(1,j,nout1) = r + r1
1576 zout(2,j,nout1) = s + s1
1577 r1=r1 - .5_real64*r
1578 s1=s1 - .5_real64*s
1579 r2=bb*(r2-r3)
1580 s2=bb*(s2-s3)
1581 zout(1,j,nout2) = r1 - s2
1582 zout(2,j,nout2) = s1 + r2
1583 zout(1,j,nout3) = r1 + s2
1584 zout(2,j,nout3) = s1 - r2
1585 end do
1586 end do
1587 do ia = 2, after
1588 ias=ia-1
1589 if (4*ias == 3*after) then
1590 if (isign == 1) then
1591 nin1=ia-after
1592 nout1=ia-atn
1593 do ib = 1, before
1594 nin1=nin1+after
1595 nin2=nin1+atb
1596 nin3=nin2+atb
1597 nout1=nout1+atn
1598 nout2=nout1+after
1599 nout3=nout2+after
1600 do j = 1, nfft
1601 r1=zin(1,j,nin1)
1602 s1=zin(2,j,nin1)
1603 r2=zin(2,j,nin2)
1604 s2=zin(1,j,nin2)
1605 r3=zin(1,j,nin3)
1606 s3=zin(2,j,nin3)
1607 r=r3 + r2
1608 s=s2 - s3
1609 zout(1,j,nout1) = r1 - r
1610 zout(2,j,nout1) = s + s1
1611 r1=r1 + .5_real64*r
1612 s1=s1 - .5_real64*s
1613 r2=bb*(r2-r3)
1614 s2=bb*(s2+s3)
1615 zout(1,j,nout2) = r1 - s2
1616 zout(2,j,nout2) = s1 - r2
1617 zout(1,j,nout3) = r1 + s2
1618 zout(2,j,nout3) = s1 + r2
1619 end do
1620 end do
1621 else
1622 nin1=ia-after
1623 nout1=ia-atn
1624 do ib = 1, before
1625 nin1=nin1+after
1626 nin2=nin1+atb
1627 nin3=nin2+atb
1628 nout1=nout1+atn
1629 nout2=nout1+after
1630 nout3=nout2+after
1631 do j = 1, nfft
1632 r1=zin(1,j,nin1)
1633 s1=zin(2,j,nin1)
1634 r2=zin(2,j,nin2)
1635 s2=zin(1,j,nin2)
1636 r3=zin(1,j,nin3)
1637 s3=zin(2,j,nin3)
1638 r=r2 - r3
1639 s=s2 + s3
1640 zout(1,j,nout1) = r + r1
1641 zout(2,j,nout1) = s1 - s
1642 r1=r1 - .5_real64*r
1643 s1=s1 + .5_real64*s
1644 r2=bb*(r2+r3)
1645 s2=bb*(s2-s3)
1646 zout(1,j,nout2) = r1 + s2
1647 zout(2,j,nout2) = s1 + r2
1648 zout(1,j,nout3) = r1 - s2
1649 zout(2,j,nout3) = s1 - r2
1650 end do
1651 end do
1652 end if
1653 else if (8*ias == 3*after) then
1654 if (isign == 1) then
1655 nin1=ia-after
1656 nout1=ia-atn
1657 do ib = 1, before
1658 nin1=nin1+after
1659 nin2=nin1+atb
1660 nin3=nin2+atb
1661 nout1=nout1+atn
1662 nout2=nout1+after
1663 nout3=nout2+after
1664 do j = 1, nfft
1665 r1=zin(1,j,nin1)
1666 s1=zin(2,j,nin1)
1667 r=zin(1,j,nin2)
1668 s=zin(2,j,nin2)
1669 r2=(r - s)*rt2i
1670 s2=(r + s)*rt2i
1671 r3=zin(2,j,nin3)
1672 s3=zin(1,j,nin3)
1673 r=r2 - r3
1674 s=s2 + s3
1675 zout(1,j,nout1) = r + r1
1676 zout(2,j,nout1) = s + s1
1677 r1=r1 - .5_real64*r
1678 s1=s1 - .5_real64*s
1679 r2=bb*(r2+r3)
1680 s2=bb*(s2-s3)
1681 zout(1,j,nout2) = r1 - s2
1682 zout(2,j,nout2) = s1 + r2
1683 zout(1,j,nout3) = r1 + s2
1684 zout(2,j,nout3) = s1 - r2
1685 end do
1686 end do
1687 else
1688 nin1=ia-after
1689 nout1=ia-atn
1690 do ib = 1, before
1691 nin1=nin1+after
1692 nin2=nin1+atb
1693 nin3=nin2+atb
1694 nout1=nout1+atn
1695 nout2=nout1+after
1696 nout3=nout2+after
1697 do j = 1, nfft
1698 r1=zin(1,j,nin1)
1699 s1=zin(2,j,nin1)
1700 r=zin(1,j,nin2)
1701 s=zin(2,j,nin2)
1702 r2=(r + s)*rt2i
1703 s2=(s - r)*rt2i
1704 r3=zin(2,j,nin3)
1705 s3=zin(1,j,nin3)
1706 r=r2 + r3
1707 s=s2 - s3
1708 zout(1,j,nout1) = r + r1
1709 zout(2,j,nout1) = s + s1
1710 r1=r1 - .5_real64*r
1711 s1=s1 - .5_real64*s
1712 r2=bb*(r2-r3)
1713 s2=bb*(s2+s3)
1714 zout(1,j,nout2) = r1 - s2
1715 zout(2,j,nout2) = s1 + r2
1716 zout(1,j,nout3) = r1 + s2
1717 zout(2,j,nout3) = s1 - r2
1718 end do
1719 end do
1720 end if
1721 else
1722 itt=ias*before
1723 itrig=itt+1
1724 cr2=trig(1,itrig)
1725 ci2=trig(2,itrig)
1726 itrig=itrig+itt
1727 cr3=trig(1,itrig)
1728 ci3=trig(2,itrig)
1729 nin1=ia-after
1730 nout1=ia-atn
1731 do ib = 1, before
1732 nin1=nin1+after
1733 nin2=nin1+atb
1734 nin3=nin2+atb
1735 nout1=nout1+atn
1736 nout2=nout1+after
1737 nout3=nout2+after
1738 do j = 1, nfft
1739 r1=zin(1,j,nin1)
1740 s1=zin(2,j,nin1)
1741 r=zin(1,j,nin2)
1742 s=zin(2,j,nin2)
1743 r2=r*cr2 - s*ci2
1744 s2=r*ci2 + s*cr2
1745 r=zin(1,j,nin3)
1746 s=zin(2,j,nin3)
1747 r3=r*cr3 - s*ci3
1748 s3=r*ci3 + s*cr3
1749 r=r2 + r3
1750 s=s2 + s3
1751 zout(1,j,nout1) = r + r1
1752 zout(2,j,nout1) = s + s1
1753 r1=r1 - .5_real64*r
1754 s1=s1 - .5_real64*s
1755 r2=bb*(r2-r3)
1756 s2=bb*(s2-s3)
1757 zout(1,j,nout2) = r1 - s2
1758 zout(2,j,nout2) = s1 + r2
1759 zout(1,j,nout3) = r1 + s2
1760 zout(2,j,nout3) = s1 - r2
1761 end do
1762 end do
1763 end if
1764 end do
1765 else if (now == 5) then
1766 ! cos(2._real64*pi/5._real64)
1767 cos2=0.3090169943749474_real64
1768 ! cos(4._real64*pi/5._real64)
1769 cos4=-0.8090169943749474_real64
1770 ! sin(2._real64*pi/5._real64)
1771 sin2=isign*0.9510565162951536_real64
1772 ! sin(4._real64*pi/5._real64)
1773 sin4=isign*0.5877852522924731_real64
1774 ia=1
1775 nin1=ia-after
1776 nout1=ia-atn
1777 do ib = 1, before
1778 nin1=nin1+after
1779 nin2=nin1+atb
1780 nin3=nin2+atb
1781 nin4=nin3+atb
1782 nin5=nin4+atb
1783 nout1=nout1+atn
1784 nout2=nout1+after
1785 nout3=nout2+after
1786 nout4=nout3+after
1787 nout5=nout4+after
1788 do j = 1, nfft
1789 r1=zin(1,j,nin1)
1790 s1=zin(2,j,nin1)
1791 r2=zin(1,j,nin2)
1792 s2=zin(2,j,nin2)
1793 r3=zin(1,j,nin3)
1794 s3=zin(2,j,nin3)
1795 r4=zin(1,j,nin4)
1796 s4=zin(2,j,nin4)
1797 r5=zin(1,j,nin5)
1798 s5=zin(2,j,nin5)
1799 r25 = r2 + r5
1800 r34 = r3 + r4
1801 s25 = s2 - s5
1802 s34 = s3 - s4
1803 zout(1,j,nout1) = r1 + r25 + r34
1804 r = r1 + cos2*r25 + cos4*r34
1805 s = sin2*s25 + sin4*s34
1806 zout(1,j,nout2) = r - s
1807 zout(1,j,nout5) = r + s
1808 r = r1 + cos4*r25 + cos2*r34
1809 s = sin4*s25 - sin2*s34
1810 zout(1,j,nout3) = r - s
1811 zout(1,j,nout4) = r + s
1812 r25 = r2 - r5
1813 r34 = r3 - r4
1814 s25 = s2 + s5
1815 s34 = s3 + s4
1816 zout(2,j,nout1) = s1 + s25 + s34
1817 r = s1 + cos2*s25 + cos4*s34
1818 s = sin2*r25 + sin4*r34
1819 zout(2,j,nout2) = r + s
1820 zout(2,j,nout5) = r - s
1821 r = s1 + cos4*s25 + cos2*s34
1822 s = sin4*r25 - sin2*r34
1823 zout(2,j,nout3) = r + s
1824 zout(2,j,nout4) = r - s
1825 end do
1826 end do
1827 do ia = 2, after
1828 ias=ia-1
1829 if (8*ias == 5*after) then
1830 if (isign == 1) then
1831 nin1=ia-after
1832 nout1=ia-atn
1833 do ib = 1, before
1834 nin1=nin1+after
1835 nin2=nin1+atb
1836 nin3=nin2+atb
1837 nin4=nin3+atb
1838 nin5=nin4+atb
1839 nout1=nout1+atn
1840 nout2=nout1+after
1841 nout3=nout2+after
1842 nout4=nout3+after
1843 nout5=nout4+after
1844 do j = 1, nfft
1845 r1=zin(1,j,nin1)
1846 s1=zin(2,j,nin1)
1847 r=zin(1,j,nin2)
1848 s=zin(2,j,nin2)
1849 r2=(r - s)*rt2i
1850 s2=(r + s)*rt2i
1851 r3=zin(2,j,nin3)
1852 s3=zin(1,j,nin3)
1853 r=zin(1,j,nin4)
1854 s=zin(2,j,nin4)
1855 r4=(r + s)*rt2i
1856 s4=(r - s)*rt2i
1857 r5=zin(1,j,nin5)
1858 s5=zin(2,j,nin5)
1859 r25 = r2 - r5
1860 r34 = r3 + r4
1861 s25 = s2 + s5
1862 s34 = s3 - s4
1863 zout(1,j,nout1) = r1 + r25 - r34
1864 r = r1 + cos2*r25 - cos4*r34
1865 s = sin2*s25 + sin4*s34
1866 zout(1,j,nout2) = r - s
1867 zout(1,j,nout5) = r + s
1868 r = r1 + cos4*r25 - cos2*r34
1869 s = sin4*s25 - sin2*s34
1870 zout(1,j,nout3) = r - s
1871 zout(1,j,nout4) = r + s
1872 r25 = r2 + r5
1873 r34 = r4 - r3
1874 s25 = s2 - s5
1875 s34 = s3 + s4
1876 zout(2,j,nout1) = s1 + s25 + s34
1877 r = s1 + cos2*s25 + cos4*s34
1878 s = sin2*r25 + sin4*r34
1879 zout(2,j,nout2) = r + s
1880 zout(2,j,nout5) = r - s
1881 r = s1 + cos4*s25 + cos2*s34
1882 s = sin4*r25 - sin2*r34
1883 zout(2,j,nout3) = r + s
1884 zout(2,j,nout4) = r - s
1885 end do
1886 end do
1887 else
1888 nin1=ia-after
1889 nout1=ia-atn
1890 do ib = 1, before
1891 nin1=nin1+after
1892 nin2=nin1+atb
1893 nin3=nin2+atb
1894 nin4=nin3+atb
1895 nin5=nin4+atb
1896 nout1=nout1+atn
1897 nout2=nout1+after
1898 nout3=nout2+after
1899 nout4=nout3+after
1900 nout5=nout4+after
1901 do j = 1, nfft
1902 r1=zin(1,j,nin1)
1903 s1=zin(2,j,nin1)
1904 r=zin(1,j,nin2)
1905 s=zin(2,j,nin2)
1906 r2=(r + s)*rt2i
1907 s2=(s - r)*rt2i
1908 r3=zin(2,j,nin3)
1909 s3=zin(1,j,nin3)
1910 r=zin(1,j,nin4)
1911 s=zin(2,j,nin4)
1912 r4=(s - r)*rt2i
1913 s4=(r + s)*rt2i
1914 r5=zin(1,j,nin5)
1915 s5=zin(2,j,nin5)
1916 r25 = r2 - r5
1917 r34 = r3 + r4
1918 s25 = s2 + s5
1919 s34 = s4 - s3
1920 zout(1,j,nout1) = r1 + r25 + r34
1921 r = r1 + cos2*r25 + cos4*r34
1922 s = sin2*s25 + sin4*s34
1923 zout(1,j,nout2) = r - s
1924 zout(1,j,nout5) = r + s
1925 r = r1 + cos4*r25 + cos2*r34
1926 s = sin4*s25 - sin2*s34
1927 zout(1,j,nout3) = r - s
1928 zout(1,j,nout4) = r + s
1929 r25 = r2 + r5
1930 r34 = r3 - r4
1931 s25 = s2 - s5
1932 s34 = s3 + s4
1933 zout(2,j,nout1) = s1 + s25 - s34
1934 r = s1 + cos2*s25 - cos4*s34
1935 s = sin2*r25 + sin4*r34
1936 zout(2,j,nout2) = r + s
1937 zout(2,j,nout5) = r - s
1938 r = s1 + cos4*s25 - cos2*s34
1939 s = sin4*r25 - sin2*r34
1940 zout(2,j,nout3) = r + s
1941 zout(2,j,nout4) = r - s
1942 end do
1943 end do
1944 end if
1945 else
1946 ias=ia-1
1947 itt=ias*before
1948 itrig=itt+1
1949 cr2=trig(1,itrig)
1950 ci2=trig(2,itrig)
1951 itrig=itrig+itt
1952 cr3=trig(1,itrig)
1953 ci3=trig(2,itrig)
1954 itrig=itrig+itt
1955 cr4=trig(1,itrig)
1956 ci4=trig(2,itrig)
1957 itrig=itrig+itt
1958 cr5=trig(1,itrig)
1959 ci5=trig(2,itrig)
1960 nin1=ia-after
1961 nout1=ia-atn
1962 do ib = 1, before
1963 nin1=nin1+after
1964 nin2=nin1+atb
1965 nin3=nin2+atb
1966 nin4=nin3+atb
1967 nin5=nin4+atb
1968 nout1=nout1+atn
1969 nout2=nout1+after
1970 nout3=nout2+after
1971 nout4=nout3+after
1972 nout5=nout4+after
1973 do j = 1, nfft
1974 r1=zin(1,j,nin1)
1975 s1=zin(2,j,nin1)
1976 r=zin(1,j,nin2)
1977 s=zin(2,j,nin2)
1978 r2=r*cr2 - s*ci2
1979 s2=r*ci2 + s*cr2
1980 r=zin(1,j,nin3)
1981 s=zin(2,j,nin3)
1982 r3=r*cr3 - s*ci3
1983 s3=r*ci3 + s*cr3
1984 r=zin(1,j,nin4)
1985 s=zin(2,j,nin4)
1986 r4=r*cr4 - s*ci4
1987 s4=r*ci4 + s*cr4
1988 r=zin(1,j,nin5)
1989 s=zin(2,j,nin5)
1990 r5=r*cr5 - s*ci5
1991 s5=r*ci5 + s*cr5
1992 r25 = r2 + r5
1993 r34 = r3 + r4
1994 s25 = s2 - s5
1995 s34 = s3 - s4
1996 zout(1,j,nout1) = r1 + r25 + r34
1997 r = r1 + cos2*r25 + cos4*r34
1998 s = sin2*s25 + sin4*s34
1999 zout(1,j,nout2) = r - s
2000 zout(1,j,nout5) = r + s
2001 r = r1 + cos4*r25 + cos2*r34
2002 s = sin4*s25 - sin2*s34
2003 zout(1,j,nout3) = r - s
2004 zout(1,j,nout4) = r + s
2005 r25 = r2 - r5
2006 r34 = r3 - r4
2007 s25 = s2 + s5
2008 s34 = s3 + s4
2009 zout(2,j,nout1) = s1 + s25 + s34
2010 r = s1 + cos2*s25 + cos4*s34
2011 s = sin2*r25 + sin4*r34
2012 zout(2,j,nout2) = r + s
2013 zout(2,j,nout5) = r - s
2014 r = s1 + cos4*s25 + cos2*s34
2015 s = sin4*r25 - sin2*r34
2016 zout(2,j,nout3) = r + s
2017 zout(2,j,nout4) = r - s
2018 end do
2019 end do
2020 end if
2021 end do
2022 else if (now == 6) then
2023 ! .5_real64*sqrt(3._real64)
2024 bb=isign*0.8660254037844387_real64
2025
2026 ia=1
2027 nin1=ia-after
2028 nout1=ia-atn
2029 do ib = 1, before
2030 nin1=nin1+after
2031 nin2=nin1+atb
2032 nin3=nin2+atb
2033 nin4=nin3+atb
2034 nin5=nin4+atb
2035 nin6=nin5+atb
2036 nout1=nout1+atn
2037 nout2=nout1+after
2038 nout3=nout2+after
2039 nout4=nout3+after
2040 nout5=nout4+after
2041 nout6=nout5+after
2042 do j = 1, nfft
2043 r2=zin(1,j,nin3)
2044 s2=zin(2,j,nin3)
2045 r3=zin(1,j,nin5)
2046 s3=zin(2,j,nin5)
2047 r=r2 + r3
2048 s=s2 + s3
2049 r1=zin(1,j,nin1)
2050 s1=zin(2,j,nin1)
2051 ur1 = r + r1
2052 ui1 = s + s1
2053 r1=r1 - .5_real64*r
2054 s1=s1 - .5_real64*s
2055 r=r2-r3
2056 s=s2-s3
2057 ur2 = r1 - s*bb
2058 ui2 = s1 + r*bb
2059 ur3 = r1 + s*bb
2060 ui3 = s1 - r*bb
2061
2062 r2=zin(1,j,nin6)
2063 s2=zin(2,j,nin6)
2064 r3=zin(1,j,nin2)
2065 s3=zin(2,j,nin2)
2066 r=r2 + r3
2067 s=s2 + s3
2068 r1=zin(1,j,nin4)
2069 s1=zin(2,j,nin4)
2070 vr1 = r + r1
2071 vi1 = s + s1
2072 r1=r1 - .5_real64*r
2073 s1=s1 - .5_real64*s
2074 r=r2-r3
2075 s=s2-s3
2076 vr2 = r1 - s*bb
2077 vi2 = s1 + r*bb
2078 vr3 = r1 + s*bb
2079 vi3 = s1 - r*bb
2080
2081 zout(1,j,nout1)=ur1+vr1
2082 zout(2,j,nout1)=ui1+vi1
2083 zout(1,j,nout5)=ur2+vr2
2084 zout(2,j,nout5)=ui2+vi2
2085 zout(1,j,nout3)=ur3+vr3
2086 zout(2,j,nout3)=ui3+vi3
2087 zout(1,j,nout4)=ur1-vr1
2088 zout(2,j,nout4)=ui1-vi1
2089 zout(1,j,nout2)=ur2-vr2
2090 zout(2,j,nout2)=ui2-vi2
2091 zout(1,j,nout6)=ur3-vr3
2092 zout(2,j,nout6)=ui3-vi3
2093 end do
2094 end do
2095 else
2096 stop 'error fftstp'
2097 end if
2098
2099 end subroutine fftstp
2100
2101
2102
2103 ! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
2104 ! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1995, 1999
2105 ! This file is distributed under the terms of the
2106 ! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
2107 subroutine fftrot(mm,nfft,m,nn,n,zin,zout,trig,after,now,before,isign)
2108 integer :: mm, nfft, m, nn, n, isign
2109 integer :: after, before, atn, atb, now
2110 real(real64) :: trig(:,:)
2111 real(real64) :: zin(2, mm, m), zout(2, n, nn)
2112
2113 real(real64) :: rt2i, dp, cp, cm, ci5, cr5, ci6, cr6, am, ap, ci8, cr8
2114 real(real64) :: r, r1, r2, r3, r4, r5, r6, r7, r8_, r25, r34
2115 real(real64) :: s, s1, s2, s3, s4, s5, s6, s7, s8, s25, s34
2116 real(real64) :: bb, bm, dm, bp
2117 real(real64) :: cr2, ci2, cr3, ci3, cr4, ci4, cr7, ci7
2118 real(real64) :: cos2, sin2, cos4, sin4
2119 real(real64) :: ur1, ur2, ur3, ui1, ui2, ui3
2120 real(real64) :: vr1, vr2, vr3, vi1, vi2, vi3
2121 integer :: ia, ib, ib1, j, ias, itrig, itt
2122 integer :: nin1, nin2, nin3, nin4, nin5, nin6, nin7, nin8
2123 integer :: nout1, nout2, nout3, nout4, nout5, nout6, nout7, nout8
2124
2125 atn=after*now
2126 atb=after*before
2127
2128 ! sqrt(.5_real64)
2129 rt2i=0.7071067811865475_real64
2130 if (now == 2) then
2131 ia=1
2132 nin1=ia-after
2133 nout1=ia-atn
2134 do ib=1,before
2135 nin1=nin1+after
2136 nin2=nin1+atb
2137 nout1=nout1+atn
2138 nout2=nout1+after
2139 do j=1,nfft
2140 r1=zin(1,j,nin1)
2141 s1=zin(2,j,nin1)
2142 r2=zin(1,j,nin2)
2143 s2=zin(2,j,nin2)
2144 zout(1,nout1,j)= r2 + r1
2145 zout(2,nout1,j)= s2 + s1
2146 zout(1,nout2,j)= r1 - r2
2147 zout(2,nout2,j)= s1 - s2
2148 end do
2149 end do
2150
2151 do ia=2,after
2152 ias=ia-1
2153 if (2*ias == after) then
2154 if (isign == 1) then
2155 nin1=ia-after
2156 nout1=ia-atn
2157 do ib=1,before
2158 nin1=nin1+after
2159 nin2=nin1+atb
2160 nout1=nout1+atn
2161 nout2=nout1+after
2162 do j=1,nfft
2163 r1=zin(1,j,nin1)
2164 s1=zin(2,j,nin1)
2165 r2=zin(2,j,nin2)
2166 s2=zin(1,j,nin2)
2167 zout(1,nout1,j)= r1 - r2
2168 zout(2,nout1,j)= s2 + s1
2169 zout(1,nout2,j)= r2 + r1
2170 zout(2,nout2,j)= s1 - s2
2171 end do
2172 end do
2173 else
2174 nin1=ia-after
2175 nout1=ia-atn
2176 do ib=1,before
2177 nin1=nin1+after
2178 nin2=nin1+atb
2179 nout1=nout1+atn
2180 nout2=nout1+after
2181 do j=1,nfft
2182 r1=zin(1,j,nin1)
2183 s1=zin(2,j,nin1)
2184 r2=zin(2,j,nin2)
2185 s2=zin(1,j,nin2)
2186 zout(1,nout1,j)= r2 + r1
2187 zout(2,nout1,j)= s1 - s2
2188 zout(1,nout2,j)= r1 - r2
2189 zout(2,nout2,j)= s2 + s1
2190 end do
2191 end do
2192 end if
2193 else if (4*ias == after) then
2194 if (isign == 1) then
2195 nin1=ia-after
2196 nout1=ia-atn
2197 do ib=1,before
2198 nin1=nin1+after
2199 nin2=nin1+atb
2200 nout1=nout1+atn
2201 nout2=nout1+after
2202 do j=1,nfft
2203 r1=zin(1,j,nin1)
2204 s1=zin(2,j,nin1)
2205 r=zin(1,j,nin2)
2206 s=zin(2,j,nin2)
2207 r2=(r - s)*rt2i
2208 s2=(r + s)*rt2i
2209 zout(1,nout1,j)= r2 + r1
2210 zout(2,nout1,j)= s2 + s1
2211 zout(1,nout2,j)= r1 - r2
2212 zout(2,nout2,j)= s1 - s2
2213 end do
2214 end do
2215 else
2216 nin1=ia-after
2217 nout1=ia-atn
2218 do ib=1,before
2219 nin1=nin1+after
2220 nin2=nin1+atb
2221 nout1=nout1+atn
2222 nout2=nout1+after
2223 do j=1,nfft
2224 r1=zin(1,j,nin1)
2225 s1=zin(2,j,nin1)
2226 r=zin(1,j,nin2)
2227 s=zin(2,j,nin2)
2228 r2=(r + s)*rt2i
2229 s2=(s - r)*rt2i
2230 zout(1,nout1,j)= r2 + r1
2231 zout(2,nout1,j)= s2 + s1
2232 zout(1,nout2,j)= r1 - r2
2233 zout(2,nout2,j)= s1 - s2
2234 end do
2235 end do
2236 end if
2237 else if (4*ias == 3*after) then
2238 if (isign == 1) then
2239 nin1=ia-after
2240 nout1=ia-atn
2241 do ib=1,before
2242 nin1=nin1+after
2243 nin2=nin1+atb
2244 nout1=nout1+atn
2245 nout2=nout1+after
2246 do j=1,nfft
2247 r1=zin(1,j,nin1)
2248 s1=zin(2,j,nin1)
2249 r=zin(1,j,nin2)
2250 s=zin(2,j,nin2)
2251 r2=(r + s)*rt2i
2252 s2=(r - s)*rt2i
2253 zout(1,nout1,j)= r1 - r2
2254 zout(2,nout1,j)= s2 + s1
2255 zout(1,nout2,j)= r2 + r1
2256 zout(2,nout2,j)= s1 - s2
2257 end do
2258 end do
2259 else
2260 nin1=ia-after
2261 nout1=ia-atn
2262 do ib=1,before
2263 nin1=nin1+after
2264 nin2=nin1+atb
2265 nout1=nout1+atn
2266 nout2=nout1+after
2267 do j=1,nfft
2268 r1=zin(1,j,nin1)
2269 s1=zin(2,j,nin1)
2270 r=zin(1,j,nin2)
2271 s=zin(2,j,nin2)
2272 r2=(s - r)*rt2i
2273 s2=(r + s)*rt2i
2274 zout(1,nout1,j)= r2 + r1
2275 zout(2,nout1,j)= s1 - s2
2276 zout(1,nout2,j)= r1 - r2
2277 zout(2,nout2,j)= s2 + s1
2278 end do
2279 end do
2280 end if
2281 else
2282 itrig=ias*before+1
2283 cr2=trig(1,itrig)
2284 ci2=trig(2,itrig)
2285 nin1=ia-after
2286 nout1=ia-atn
2287 do ib=1,before
2288 nin1=nin1+after
2289 nin2=nin1+atb
2290 nout1=nout1+atn
2291 nout2=nout1+after
2292 do j=1,nfft
2293 r1=zin(1,j,nin1)
2294 s1=zin(2,j,nin1)
2295 r=zin(1,j,nin2)
2296 s=zin(2,j,nin2)
2297 r2=r*cr2 - s*ci2
2298 s2=r*ci2 + s*cr2
2299 zout(1,nout1,j)= r2 + r1
2300 zout(2,nout1,j)= s2 + s1
2301 zout(1,nout2,j)= r1 - r2
2302 zout(2,nout2,j)= s1 - s2
2303 end do
2304 end do
2305 end if
2306 end do
2307 else if (now == 4) then
2308 if (isign == 1) then
2309 ia=1
2310 nin1=ia-after
2311 nout1=ia-atn
2312 do ib=1,before
2313 nin1=nin1+after
2314 nin2=nin1+atb
2315 nin3=nin2+atb
2316 nin4=nin3+atb
2317 nout1=nout1+atn
2318 nout2=nout1+after
2319 nout3=nout2+after
2320 nout4=nout3+after
2321 do j=1,nfft
2322 r1=zin(1,j,nin1)
2323 s1=zin(2,j,nin1)
2324 r2=zin(1,j,nin2)
2325 s2=zin(2,j,nin2)
2326 r3=zin(1,j,nin3)
2327 s3=zin(2,j,nin3)
2328 r4=zin(1,j,nin4)
2329 s4=zin(2,j,nin4)
2330 r=r1 + r3
2331 s=r2 + r4
2332 zout(1,nout1,j) = r + s
2333 zout(1,nout3,j) = r - s
2334 r=r1 - r3
2335 s=s2 - s4
2336 zout(1,nout2,j) = r - s
2337 zout(1,nout4,j) = r + s
2338 r=s1 + s3
2339 s=s2 + s4
2340 zout(2,nout1,j) = r + s
2341 zout(2,nout3,j) = r - s
2342 r=s1 - s3
2343 s=r2 - r4
2344 zout(2,nout2,j) = r + s
2345 zout(2,nout4,j) = r - s
2346 end do
2347 end do
2348 do ia=2,after
2349 ias=ia-1
2350 if (2*ias == after) then
2351 nin1=ia-after
2352 nout1=ia-atn
2353 do ib=1,before
2354 nin1=nin1+after
2355 nin2=nin1+atb
2356 nin3=nin2+atb
2357 nin4=nin3+atb
2358 nout1=nout1+atn
2359 nout2=nout1+after
2360 nout3=nout2+after
2361 nout4=nout3+after
2362 do j=1,nfft
2363 r1=zin(1,j,nin1)
2364 s1=zin(2,j,nin1)
2365 r=zin(1,j,nin2)
2366 s=zin(2,j,nin2)
2367 r2=(r-s)*rt2i
2368 s2=(r+s)*rt2i
2369 r3=zin(2,j,nin3)
2370 s3=zin(1,j,nin3)
2371 r=zin(1,j,nin4)
2372 s=zin(2,j,nin4)
2373 r4=(r + s)*rt2i
2374 s4=(r - s)*rt2i
2375 r=r1 - r3
2376 s=r2 - r4
2377 zout(1,nout1,j) = r + s
2378 zout(1,nout3,j) = r - s
2379 r=r1 + r3
2380 s=s2 - s4
2381 zout(1,nout2,j) = r - s
2382 zout(1,nout4,j) = r + s
2383 r=s1 + s3
2384 s=s2 + s4
2385 zout(2,nout1,j) = r + s
2386 zout(2,nout3,j) = r - s
2387 r=s1 - s3
2388 s=r2 + r4
2389 zout(2,nout2,j) = r + s
2390 zout(2,nout4,j) = r - s
2391 end do
2392 end do
2393 else
2394 itt=ias*before
2395 itrig=itt+1
2396 cr2=trig(1,itrig)
2397 ci2=trig(2,itrig)
2398 itrig=itrig+itt
2399 cr3=trig(1,itrig)
2400 ci3=trig(2,itrig)
2401 itrig=itrig+itt
2402 cr4=trig(1,itrig)
2403 ci4=trig(2,itrig)
2404 nin1=ia-after
2405 nout1=ia-atn
2406 do ib=1,before
2407 nin1=nin1+after
2408 nin2=nin1+atb
2409 nin3=nin2+atb
2410 nin4=nin3+atb
2411 nout1=nout1+atn
2412 nout2=nout1+after
2413 nout3=nout2+after
2414 nout4=nout3+after
2415 do j=1,nfft
2416 r1=zin(1,j,nin1)
2417 s1=zin(2,j,nin1)
2418 r=zin(1,j,nin2)
2419 s=zin(2,j,nin2)
2420 r2=r*cr2 - s*ci2
2421 s2=r*ci2 + s*cr2
2422 r=zin(1,j,nin3)
2423 s=zin(2,j,nin3)
2424 r3=r*cr3 - s*ci3
2425 s3=r*ci3 + s*cr3
2426 r=zin(1,j,nin4)
2427 s=zin(2,j,nin4)
2428 r4=r*cr4 - s*ci4
2429 s4=r*ci4 + s*cr4
2430 r=r1 + r3
2431 s=r2 + r4
2432 zout(1,nout1,j) = r + s
2433 zout(1,nout3,j) = r - s
2434 r=r1 - r3
2435 s=s2 - s4
2436 zout(1,nout2,j) = r - s
2437 zout(1,nout4,j) = r + s
2438 r=s1 + s3
2439 s=s2 + s4
2440 zout(2,nout1,j) = r + s
2441 zout(2,nout3,j) = r - s
2442 r=s1 - s3
2443 s=r2 - r4
2444 zout(2,nout2,j) = r + s
2445 zout(2,nout4,j) = r - s
2446 end do
2447 end do
2448 end if
2449 end do
2450 else
2451 ia=1
2452 nin1=ia-after
2453 nout1=ia-atn
2454 do ib=1,before
2455 nin1=nin1+after
2456 nin2=nin1+atb
2457 nin3=nin2+atb
2458 nin4=nin3+atb
2459 nout1=nout1+atn
2460 nout2=nout1+after
2461 nout3=nout2+after
2462 nout4=nout3+after
2463 do j=1,nfft
2464 r1=zin(1,j,nin1)
2465 s1=zin(2,j,nin1)
2466 r2=zin(1,j,nin2)
2467 s2=zin(2,j,nin2)
2468 r3=zin(1,j,nin3)
2469 s3=zin(2,j,nin3)
2470 r4=zin(1,j,nin4)
2471 s4=zin(2,j,nin4)
2472 r=r1 + r3
2473 s=r2 + r4
2474 zout(1,nout1,j) = r + s
2475 zout(1,nout3,j) = r - s
2476 r=r1 - r3
2477 s=s2 - s4
2478 zout(1,nout2,j) = r + s
2479 zout(1,nout4,j) = r - s
2480 r=s1 + s3
2481 s=s2 + s4
2482 zout(2,nout1,j) = r + s
2483 zout(2,nout3,j) = r - s
2484 r=s1 - s3
2485 s=r2 - r4
2486 zout(2,nout2,j) = r - s
2487 zout(2,nout4,j) = r + s
2488 end do
2489 end do
2490 do ia=2,after
2491 ias=ia-1
2492 if (2*ias == after) then
2493 nin1=ia-after
2494 nout1=ia-atn
2495 do ib=1,before
2496 nin1=nin1+after
2497 nin2=nin1+atb
2498 nin3=nin2+atb
2499 nin4=nin3+atb
2500 nout1=nout1+atn
2501 nout2=nout1+after
2502 nout3=nout2+after
2503 nout4=nout3+after
2504 do j=1,nfft
2505 r1=zin(1,j,nin1)
2506 s1=zin(2,j,nin1)
2507 r=zin(1,j,nin2)
2508 s=zin(2,j,nin2)
2509 r2=(r + s)*rt2i
2510 s2=(s - r)*rt2i
2511 r3=zin(2,j,nin3)
2512 s3=zin(1,j,nin3)
2513 r=zin(1,j,nin4)
2514 s=zin(2,j,nin4)
2515 r4=(s - r)*rt2i
2516 s4=(r + s)*rt2i
2517 r=r1 + r3
2518 s=r2 + r4
2519 zout(1,nout1,j) = r + s
2520 zout(1,nout3,j) = r - s
2521 r=r1 - r3
2522 s=s2 + s4
2523 zout(1,nout2,j) = r + s
2524 zout(1,nout4,j) = r - s
2525 r=s1 - s3
2526 s=s2 - s4
2527 zout(2,nout1,j) = r + s
2528 zout(2,nout3,j) = r - s
2529 r=s1 + s3
2530 s=r2 - r4
2531 zout(2,nout2,j) = r - s
2532 zout(2,nout4,j) = r + s
2533 end do
2534 end do
2535 else
2536 itt=ias*before
2537 itrig=itt+1
2538 cr2=trig(1,itrig)
2539 ci2=trig(2,itrig)
2540 itrig=itrig+itt
2541 cr3=trig(1,itrig)
2542 ci3=trig(2,itrig)
2543 itrig=itrig+itt
2544 cr4=trig(1,itrig)
2545 ci4=trig(2,itrig)
2546 nin1=ia-after
2547 nout1=ia-atn
2548 do ib=1,before
2549 nin1=nin1+after
2550 nin2=nin1+atb
2551 nin3=nin2+atb
2552 nin4=nin3+atb
2553 nout1=nout1+atn
2554 nout2=nout1+after
2555 nout3=nout2+after
2556 nout4=nout3+after
2557 do j=1,nfft
2558 r1=zin(1,j,nin1)
2559 s1=zin(2,j,nin1)
2560 r=zin(1,j,nin2)
2561 s=zin(2,j,nin2)
2562 r2=r*cr2 - s*ci2
2563 s2=r*ci2 + s*cr2
2564 r=zin(1,j,nin3)
2565 s=zin(2,j,nin3)
2566 r3=r*cr3 - s*ci3
2567 s3=r*ci3 + s*cr3
2568 r=zin(1,j,nin4)
2569 s=zin(2,j,nin4)
2570 r4=r*cr4 - s*ci4
2571 s4=r*ci4 + s*cr4
2572 r=r1 + r3
2573 s=r2 + r4
2574 zout(1,nout1,j) = r + s
2575 zout(1,nout3,j) = r - s
2576 r=r1 - r3
2577 s=s2 - s4
2578 zout(1,nout2,j) = r + s
2579 zout(1,nout4,j) = r - s
2580 r=s1 + s3
2581 s=s2 + s4
2582 zout(2,nout1,j) = r + s
2583 zout(2,nout3,j) = r - s
2584 r=s1 - s3
2585 s=r2 - r4
2586 zout(2,nout2,j) = r - s
2587 zout(2,nout4,j) = r + s
2588 end do
2589 end do
2590 end if
2591 end do
2592 end if
2593 else if (now == 8) then
2594 if (isign == -1) then
2595 ia=1
2596 nin1=ia-after
2597 nout1=ia-atn
2598 do ib1=1,before
2599 nin1=nin1+after
2600 nin2=nin1+atb
2601 nin3=nin2+atb
2602 nin4=nin3+atb
2603 nin5=nin4+atb
2604 nin6=nin5+atb
2605 nin7=nin6+atb
2606 nin8=nin7+atb
2607 nout1=nout1+atn
2608 nout2=nout1+after
2609 nout3=nout2+after
2610 nout4=nout3+after
2611 nout5=nout4+after
2612 nout6=nout5+after
2613 nout7=nout6+after
2614 nout8=nout7+after
2615 do j=1,nfft
2616 r1=zin(1,j,nin1)
2617 s1=zin(2,j,nin1)
2618 r2=zin(1,j,nin2)
2619 s2=zin(2,j,nin2)
2620 r3=zin(1,j,nin3)
2621 s3=zin(2,j,nin3)
2622 r4=zin(1,j,nin4)
2623 s4=zin(2,j,nin4)
2624 r5=zin(1,j,nin5)
2625 s5=zin(2,j,nin5)
2626 r6=zin(1,j,nin6)
2627 s6=zin(2,j,nin6)
2628 r7=zin(1,j,nin7)
2629 s7=zin(2,j,nin7)
2630 r8_=zin(1,j,nin8)
2631 s8=zin(2,j,nin8)
2632 r=r1 + r5
2633 s=r3 + r7
2634 ap=r + s
2635 am=r - s
2636 r=r2 + r6
2637 s=r4 + r8_
2638 bp=r + s
2639 bm=r - s
2640 r=s1 + s5
2641 s=s3 + s7
2642 cp=r + s
2643 cm=r - s
2644 r=s2 + s6
2645 s=s4 + s8
2646 dp=r + s
2647 dm=r - s
2648 zout(1,nout1,j) = ap + bp
2649 zout(2,nout1,j) = cp + dp
2650 zout(1,nout5,j) = ap - bp
2651 zout(2,nout5,j) = cp - dp
2652 zout(1,nout3,j) = am + dm
2653 zout(2,nout3,j) = cm - bm
2654 zout(1,nout7,j) = am - dm
2655 zout(2,nout7,j) = cm + bm
2656 r=r1 - r5
2657 s=s3 - s7
2658 ap=r + s
2659 am=r - s
2660 r=s1 - s5
2661 s=r3 - r7
2662 bp=r + s
2663 bm=r - s
2664 r=s4 - s8
2665 s=r2 - r6
2666 cp=r + s
2667 cm=r - s
2668 r=s2 - s6
2669 s=r4 - r8_
2670 dp=r + s
2671 dm=r - s
2672 r = ( cp + dm)*rt2i
2673 s = ( dm - cp)*rt2i
2674 cp= ( cm + dp)*rt2i
2675 dp= ( cm - dp)*rt2i
2676 zout(1,nout2,j) = ap + r
2677 zout(2,nout2,j) = bm + s
2678 zout(1,nout6,j) = ap - r
2679 zout(2,nout6,j) = bm - s
2680 zout(1,nout4,j) = am + cp
2681 zout(2,nout4,j) = bp + dp
2682 zout(1,nout8,j) = am - cp
2683 zout(2,nout8,j) = bp - dp
2684 end do
2685 end do
2686 do ia=2,after
2687 ias=ia-1
2688 itt=ias*before
2689 itrig=itt+1
2690 cr2=trig(1,itrig)
2691 ci2=trig(2,itrig)
2692 itrig=itrig+itt
2693 cr3=trig(1,itrig)
2694 ci3=trig(2,itrig)
2695 itrig=itrig+itt
2696 cr4=trig(1,itrig)
2697 ci4=trig(2,itrig)
2698 itrig=itrig+itt
2699 cr5=trig(1,itrig)
2700 ci5=trig(2,itrig)
2701 itrig=itrig+itt
2702 cr6=trig(1,itrig)
2703 ci6=trig(2,itrig)
2704 itrig=itrig+itt
2705 cr7=trig(1,itrig)
2706 ci7=trig(2,itrig)
2707 itrig=itrig+itt
2708 cr8=trig(1,itrig)
2709 ci8=trig(2,itrig)
2710 nin1=ia-after
2711 nout1=ia-atn
2712 do ib=1,before
2713 nin1=nin1+after
2714 nin2=nin1+atb
2715 nin3=nin2+atb
2716 nin4=nin3+atb
2717 nin5=nin4+atb
2718 nin6=nin5+atb
2719 nin7=nin6+atb
2720 nin8=nin7+atb
2721 nout1=nout1+atn
2722 nout2=nout1+after
2723 nout3=nout2+after
2724 nout4=nout3+after
2725 nout5=nout4+after
2726 nout6=nout5+after
2727 nout7=nout6+after
2728 nout8=nout7+after
2729 do j=1,nfft
2730 r1=zin(1,j,nin1)
2731 s1=zin(2,j,nin1)
2732 r=zin(1,j,nin2)
2733 s=zin(2,j,nin2)
2734 r2=r*cr2 - s*ci2
2735 s2=r*ci2 + s*cr2
2736 r=zin(1,j,nin3)
2737 s=zin(2,j,nin3)
2738 r3=r*cr3 - s*ci3
2739 s3=r*ci3 + s*cr3
2740 r=zin(1,j,nin4)
2741 s=zin(2,j,nin4)
2742 r4=r*cr4 - s*ci4
2743 s4=r*ci4 + s*cr4
2744 r=zin(1,j,nin5)
2745 s=zin(2,j,nin5)
2746 r5=r*cr5 - s*ci5
2747 s5=r*ci5 + s*cr5
2748 r=zin(1,j,nin6)
2749 s=zin(2,j,nin6)
2750 r6=r*cr6 - s*ci6
2751 s6=r*ci6 + s*cr6
2752 r=zin(1,j,nin7)
2753 s=zin(2,j,nin7)
2754 r7=r*cr7 - s*ci7
2755 s7=r*ci7 + s*cr7
2756 r=zin(1,j,nin8)
2757 s=zin(2,j,nin8)
2758 r8_=r*cr8 - s*ci8
2759 s8=r*ci8 + s*cr8
2760 r=r1 + r5
2761 s=r3 + r7
2762 ap=r + s
2763 am=r - s
2764 r=r2 + r6
2765 s=r4 + r8_
2766 bp=r + s
2767 bm=r - s
2768 r=s1 + s5
2769 s=s3 + s7
2770 cp=r + s
2771 cm=r - s
2772 r=s2 + s6
2773 s=s4 + s8
2774 dp=r + s
2775 dm=r - s
2776 zout(1,nout1,j) = ap + bp
2777 zout(2,nout1,j) = cp + dp
2778 zout(1,nout5,j) = ap - bp
2779 zout(2,nout5,j) = cp - dp
2780 zout(1,nout3,j) = am + dm
2781 zout(2,nout3,j) = cm - bm
2782 zout(1,nout7,j) = am - dm
2783 zout(2,nout7,j) = cm + bm
2784 r=r1 - r5
2785 s=s3 - s7
2786 ap=r + s
2787 am=r - s
2788 r=s1 - s5
2789 s=r3 - r7
2790 bp=r + s
2791 bm=r - s
2792 r=s4 - s8
2793 s=r2 - r6
2794 cp=r + s
2795 cm=r - s
2796 r=s2 - s6
2797 s=r4 - r8_
2798 dp=r + s
2799 dm=r - s
2800 r = ( cp + dm)*rt2i
2801 s = ( dm - cp)*rt2i
2802 cp= ( cm + dp)*rt2i
2803 dp= ( cm - dp)*rt2i
2804 zout(1,nout2,j) = ap + r
2805 zout(2,nout2,j) = bm + s
2806 zout(1,nout6,j) = ap - r
2807 zout(2,nout6,j) = bm - s
2808 zout(1,nout4,j) = am + cp
2809 zout(2,nout4,j) = bp + dp
2810 zout(1,nout8,j) = am - cp
2811 zout(2,nout8,j) = bp - dp
2812 end do
2813 end do
2814 end do
2815
2816 else
2817 ia=1
2818 nin1=ia-after
2819 nout1=ia-atn
2820 do ib=1,before
2821 nin1=nin1+after
2822 nin2=nin1+atb
2823 nin3=nin2+atb
2824 nin4=nin3+atb
2825 nin5=nin4+atb
2826 nin6=nin5+atb
2827 nin7=nin6+atb
2828 nin8=nin7+atb
2829 nout1=nout1+atn
2830 nout2=nout1+after
2831 nout3=nout2+after
2832 nout4=nout3+after
2833 nout5=nout4+after
2834 nout6=nout5+after
2835 nout7=nout6+after
2836 nout8=nout7+after
2837 do j=1,nfft
2838 r1=zin(1,j,nin1)
2839 s1=zin(2,j,nin1)
2840 r2=zin(1,j,nin2)
2841 s2=zin(2,j,nin2)
2842 r3=zin(1,j,nin3)
2843 s3=zin(2,j,nin3)
2844 r4=zin(1,j,nin4)
2845 s4=zin(2,j,nin4)
2846 r5=zin(1,j,nin5)
2847 s5=zin(2,j,nin5)
2848 r6=zin(1,j,nin6)
2849 s6=zin(2,j,nin6)
2850 r7=zin(1,j,nin7)
2851 s7=zin(2,j,nin7)
2852 r8_=zin(1,j,nin8)
2853 s8=zin(2,j,nin8)
2854 r=r1 + r5
2855 s=r3 + r7
2856 ap=r + s
2857 am=r - s
2858 r=r2 + r6
2859 s=r4 + r8_
2860 bp=r + s
2861 bm=r - s
2862 r=s1 + s5
2863 s=s3 + s7
2864 cp=r + s
2865 cm=r - s
2866 r=s2 + s6
2867 s=s4 + s8
2868 dp=r + s
2869 dm=r - s
2870 zout(1,nout1,j) = ap + bp
2871 zout(2,nout1,j) = cp + dp
2872 zout(1,nout5,j) = ap - bp
2873 zout(2,nout5,j) = cp - dp
2874 zout(1,nout3,j) = am - dm
2875 zout(2,nout3,j) = cm + bm
2876 zout(1,nout7,j) = am + dm
2877 zout(2,nout7,j) = cm - bm
2878 r= r1 - r5
2879 s=-s3 + s7
2880 ap=r + s
2881 am=r - s
2882 r=s1 - s5
2883 s=r7 - r3
2884 bp=r + s
2885 bm=r - s
2886 r=-s4 + s8
2887 s= r2 - r6
2888 cp=r + s
2889 cm=r - s
2890 r=-s2 + s6
2891 s= r4 - r8_
2892 dp=r + s
2893 dm=r - s
2894 r = ( cp + dm)*rt2i
2895 s = ( cp - dm)*rt2i
2896 cp= ( cm + dp)*rt2i
2897 dp= ( dp - cm)*rt2i
2898 zout(1,nout2,j) = ap + r
2899 zout(2,nout2,j) = bm + s
2900 zout(1,nout6,j) = ap - r
2901 zout(2,nout6,j) = bm - s
2902 zout(1,nout4,j) = am + cp
2903 zout(2,nout4,j) = bp + dp
2904 zout(1,nout8,j) = am - cp
2905 zout(2,nout8,j) = bp - dp
2906 end do
2907 end do
2908
2909 do ia=2,after
2910 ias=ia-1
2911 itt=ias*before
2912 itrig=itt+1
2913 cr2=trig(1,itrig)
2914 ci2=trig(2,itrig)
2915 itrig=itrig+itt
2916 cr3=trig(1,itrig)
2917 ci3=trig(2,itrig)
2918 itrig=itrig+itt
2919 cr4=trig(1,itrig)
2920 ci4=trig(2,itrig)
2921 itrig=itrig+itt
2922 cr5=trig(1,itrig)
2923 ci5=trig(2,itrig)
2924 itrig=itrig+itt
2925 cr6=trig(1,itrig)
2926 ci6=trig(2,itrig)
2927 itrig=itrig+itt
2928 cr7=trig(1,itrig)
2929 ci7=trig(2,itrig)
2930 itrig=itrig+itt
2931 cr8=trig(1,itrig)
2932 ci8=trig(2,itrig)
2933 nin1=ia-after
2934 nout1=ia-atn
2935 do ib=1,before
2936 nin1=nin1+after
2937 nin2=nin1+atb
2938 nin3=nin2+atb
2939 nin4=nin3+atb
2940 nin5=nin4+atb
2941 nin6=nin5+atb
2942 nin7=nin6+atb
2943 nin8=nin7+atb
2944 nout1=nout1+atn
2945 nout2=nout1+after
2946 nout3=nout2+after
2947 nout4=nout3+after
2948 nout5=nout4+after
2949 nout6=nout5+after
2950 nout7=nout6+after
2951 nout8=nout7+after
2952 do j=1,nfft
2953 r1=zin(1,j,nin1)
2954 s1=zin(2,j,nin1)
2955 r=zin(1,j,nin2)
2956 s=zin(2,j,nin2)
2957 r2=r*cr2 - s*ci2
2958 s2=r*ci2 + s*cr2
2959 r=zin(1,j,nin3)
2960 s=zin(2,j,nin3)
2961 r3=r*cr3 - s*ci3
2962 s3=r*ci3 + s*cr3
2963 r=zin(1,j,nin4)
2964 s=zin(2,j,nin4)
2965 r4=r*cr4 - s*ci4
2966 s4=r*ci4 + s*cr4
2967 r=zin(1,j,nin5)
2968 s=zin(2,j,nin5)
2969 r5=r*cr5 - s*ci5
2970 s5=r*ci5 + s*cr5
2971 r=zin(1,j,nin6)
2972 s=zin(2,j,nin6)
2973 r6=r*cr6 - s*ci6
2974 s6=r*ci6 + s*cr6
2975 r=zin(1,j,nin7)
2976 s=zin(2,j,nin7)
2977 r7=r*cr7 - s*ci7
2978 s7=r*ci7 + s*cr7
2979 r=zin(1,j,nin8)
2980 s=zin(2,j,nin8)
2981 r8_=r*cr8 - s*ci8
2982 s8=r*ci8 + s*cr8
2983 r=r1 + r5
2984 s=r3 + r7
2985 ap=r + s
2986 am=r - s
2987 r=r2 + r6
2988 s=r4 + r8_
2989 bp=r + s
2990 bm=r - s
2991 r=s1 + s5
2992 s=s3 + s7
2993 cp=r + s
2994 cm=r - s
2995 r=s2 + s6
2996 s=s4 + s8
2997 dp=r + s
2998 dm=r - s
2999 zout(1,nout1,j) = ap + bp
3000 zout(2,nout1,j) = cp + dp
3001 zout(1,nout5,j) = ap - bp
3002 zout(2,nout5,j) = cp - dp
3003 zout(1,nout3,j) = am - dm
3004 zout(2,nout3,j) = cm + bm
3005 zout(1,nout7,j) = am + dm
3006 zout(2,nout7,j) = cm - bm
3007 r= r1 - r5
3008 s=-s3 + s7
3009 ap=r + s
3010 am=r - s
3011 r=s1 - s5
3012 s=r7 - r3
3013 bp=r + s
3014 bm=r - s
3015 r=-s4 + s8
3016 s= r2 - r6
3017 cp=r + s
3018 cm=r - s
3019 r=-s2 + s6
3020 s= r4 - r8_
3021 dp=r + s
3022 dm=r - s
3023 r = ( cp + dm)*rt2i
3024 s = ( cp - dm)*rt2i
3025 cp= ( cm + dp)*rt2i
3026 dp= ( dp - cm)*rt2i
3027 zout(1,nout2,j) = ap + r
3028 zout(2,nout2,j) = bm + s
3029 zout(1,nout6,j) = ap - r
3030 zout(2,nout6,j) = bm - s
3031 zout(1,nout4,j) = am + cp
3032 zout(2,nout4,j) = bp + dp
3033 zout(1,nout8,j) = am - cp
3034 zout(2,nout8,j) = bp - dp
3035 end do
3036 end do
3037 end do
3038
3039 end if
3040 else if (now == 3) then
3041! .5_real64*sqrt(3._real64)
3042 bb=isign*0.8660254037844387_real64
3043 ia=1
3044 nin1=ia-after
3045 nout1=ia-atn
3046 do ib=1,before
3047 nin1=nin1+after
3048 nin2=nin1+atb
3049 nin3=nin2+atb
3050 nout1=nout1+atn
3051 nout2=nout1+after
3052 nout3=nout2+after
3053 do j=1,nfft
3054 r1=zin(1,j,nin1)
3055 s1=zin(2,j,nin1)
3056 r2=zin(1,j,nin2)
3057 s2=zin(2,j,nin2)
3058 r3=zin(1,j,nin3)
3059 s3=zin(2,j,nin3)
3060 r=r2 + r3
3061 s=s2 + s3
3062 zout(1,nout1,j) = r + r1
3063 zout(2,nout1,j) = s + s1
3064 r1=r1 - .5_real64*r
3065 s1=s1 - .5_real64*s
3066 r2=bb*(r2-r3)
3067 s2=bb*(s2-s3)
3068 zout(1,nout2,j) = r1 - s2
3069 zout(2,nout2,j) = s1 + r2
3070 zout(1,nout3,j) = r1 + s2
3071 zout(2,nout3,j) = s1 - r2
3072 end do
3073 end do
3074 do ia=2,after
3075 ias=ia-1
3076 if (4*ias == 3*after) then
3077 if (isign == 1) then
3078 nin1=ia-after
3079 nout1=ia-atn
3080 do ib=1,before
3081 nin1=nin1+after
3082 nin2=nin1+atb
3083 nin3=nin2+atb
3084 nout1=nout1+atn
3085 nout2=nout1+after
3086 nout3=nout2+after
3087 do j=1,nfft
3088 r1=zin(1,j,nin1)
3089 s1=zin(2,j,nin1)
3090 r2=zin(2,j,nin2)
3091 s2=zin(1,j,nin2)
3092 r3=zin(1,j,nin3)
3093 s3=zin(2,j,nin3)
3094 r=r2 + r3
3095 s=s2 - s3
3096 zout(1,nout1,j) = r1 - r
3097 zout(2,nout1,j) = s + s1
3098 r1=r1 + .5_real64*r
3099 s1=s1 - .5_real64*s
3100 r2=bb*(r2-r3)
3101 s2=bb*(s2+s3)
3102 zout(1,nout2,j) = r1 - s2
3103 zout(2,nout2,j) = s1 - r2
3104 zout(1,nout3,j) = r1 + s2
3105 zout(2,nout3,j) = s1 + r2
3106 end do
3107 end do
3108 else
3109 nin1=ia-after
3110 nout1=ia-atn
3111 do ib=1,before
3112 nin1=nin1+after
3113 nin2=nin1+atb
3114 nin3=nin2+atb
3115 nout1=nout1+atn
3116 nout2=nout1+after
3117 nout3=nout2+after
3118 do j=1,nfft
3119 r1=zin(1,j,nin1)
3120 s1=zin(2,j,nin1)
3121 r2=zin(2,j,nin2)
3122 s2=zin(1,j,nin2)
3123 r3=zin(1,j,nin3)
3124 s3=zin(2,j,nin3)
3125 r=r2 - r3
3126 s=s2 + s3
3127 zout(1,nout1,j) = r + r1
3128 zout(2,nout1,j) = s1 - s
3129 r1=r1 - .5_real64*r
3130 s1=s1 + .5_real64*s
3131 r2=bb*(r2+r3)
3132 s2=bb*(s2-s3)
3133 zout(1,nout2,j) = r1 + s2
3134 zout(2,nout2,j) = s1 + r2
3135 zout(1,nout3,j) = r1 - s2
3136 zout(2,nout3,j) = s1 - r2
3137 end do
3138 end do
3139 end if
3140 else if (8*ias == 3*after) then
3141 if (isign == 1) then
3142 nin1=ia-after
3143 nout1=ia-atn
3144 do ib=1,before
3145 nin1=nin1+after
3146 nin2=nin1+atb
3147 nin3=nin2+atb
3148 nout1=nout1+atn
3149 nout2=nout1+after
3150 nout3=nout2+after
3151 do j=1,nfft
3152 r1=zin(1,j,nin1)
3153 s1=zin(2,j,nin1)
3154 r=zin(1,j,nin2)
3155 s=zin(2,j,nin2)
3156 r2=(r - s)*rt2i
3157 s2=(r + s)*rt2i
3158 r3=zin(2,j,nin3)
3159 s3=zin(1,j,nin3)
3160 r=r2 - r3
3161 s=s2 + s3
3162 zout(1,nout1,j) = r + r1
3163 zout(2,nout1,j) = s + s1
3164 r1=r1 - .5_real64*r
3165 s1=s1 - .5_real64*s
3166 r2=bb*(r2+r3)
3167 s2=bb*(s2-s3)
3168 zout(1,nout2,j) = r1 - s2
3169 zout(2,nout2,j) = s1 + r2
3170 zout(1,nout3,j) = r1 + s2
3171 zout(2,nout3,j) = s1 - r2
3172 end do
3173 end do
3174 else
3175 nin1=ia-after
3176 nout1=ia-atn
3177 do ib=1,before
3178 nin1=nin1+after
3179 nin2=nin1+atb
3180 nin3=nin2+atb
3181 nout1=nout1+atn
3182 nout2=nout1+after
3183 nout3=nout2+after
3184 do j=1,nfft
3185 r1=zin(1,j,nin1)
3186 s1=zin(2,j,nin1)
3187 r=zin(1,j,nin2)
3188 s=zin(2,j,nin2)
3189 r2=(r + s)*rt2i
3190 s2=(s - r)*rt2i
3191 r3=zin(2,j,nin3)
3192 s3=zin(1,j,nin3)
3193 r=r2 + r3
3194 s=s2 - s3
3195 zout(1,nout1,j) = r + r1
3196 zout(2,nout1,j) = s + s1
3197 r1=r1 - .5_real64*r
3198 s1=s1 - .5_real64*s
3199 r2=bb*(r2-r3)
3200 s2=bb*(s2+s3)
3201 zout(1,nout2,j) = r1 - s2
3202 zout(2,nout2,j) = s1 + r2
3203 zout(1,nout3,j) = r1 + s2
3204 zout(2,nout3,j) = s1 - r2
3205 end do
3206 end do
3207 end if
3208 else
3209 itt=ias*before
3210 itrig=itt+1
3211 cr2=trig(1,itrig)
3212 ci2=trig(2,itrig)
3213 itrig=itrig+itt
3214 cr3=trig(1,itrig)
3215 ci3=trig(2,itrig)
3216 nin1=ia-after
3217 nout1=ia-atn
3218 do ib=1,before
3219 nin1=nin1+after
3220 nin2=nin1+atb
3221 nin3=nin2+atb
3222 nout1=nout1+atn
3223 nout2=nout1+after
3224 nout3=nout2+after
3225 do j=1,nfft
3226 r1=zin(1,j,nin1)
3227 s1=zin(2,j,nin1)
3228 r=zin(1,j,nin2)
3229 s=zin(2,j,nin2)
3230 r2=r*cr2 - s*ci2
3231 s2=r*ci2 + s*cr2
3232 r=zin(1,j,nin3)
3233 s=zin(2,j,nin3)
3234 r3=r*cr3 - s*ci3
3235 s3=r*ci3 + s*cr3
3236 r=r2 + r3
3237 s=s2 + s3
3238 zout(1,nout1,j) = r + r1
3239 zout(2,nout1,j) = s + s1
3240 r1=r1 - .5_real64*r
3241 s1=s1 - .5_real64*s
3242 r2=bb*(r2-r3)
3243 s2=bb*(s2-s3)
3244 zout(1,nout2,j) = r1 - s2
3245 zout(2,nout2,j) = s1 + r2
3246 zout(1,nout3,j) = r1 + s2
3247 zout(2,nout3,j) = s1 - r2
3248 end do
3249 end do
3250 end if
3251 end do
3252 else if (now == 5) then
3253! cos(2._real64*pi/5._real64)
3254 cos2=0.3090169943749474_real64
3255! cos(4._real64*pi/5._real64)
3256 cos4=-0.8090169943749474_real64
3257! sin(2._real64*pi/5._real64)
3258 sin2=isign*0.9510565162951536_real64
3259! sin(4._real64*pi/5._real64)
3260 sin4=isign*0.5877852522924731_real64
3261 ia=1
3262 nin1=ia-after
3263 nout1=ia-atn
3264 do ib=1,before
3265 nin1=nin1+after
3266 nin2=nin1+atb
3267 nin3=nin2+atb
3268 nin4=nin3+atb
3269 nin5=nin4+atb
3270 nout1=nout1+atn
3271 nout2=nout1+after
3272 nout3=nout2+after
3273 nout4=nout3+after
3274 nout5=nout4+after
3275 do j=1,nfft
3276 r1=zin(1,j,nin1)
3277 s1=zin(2,j,nin1)
3278 r2=zin(1,j,nin2)
3279 s2=zin(2,j,nin2)
3280 r3=zin(1,j,nin3)
3281 s3=zin(2,j,nin3)
3282 r4=zin(1,j,nin4)
3283 s4=zin(2,j,nin4)
3284 r5=zin(1,j,nin5)
3285 s5=zin(2,j,nin5)
3286 r25 = r2 + r5
3287 r34 = r3 + r4
3288 s25 = s2 - s5
3289 s34 = s3 - s4
3290 zout(1,nout1,j) = r1 + r25 + r34
3291 r = r1 + cos2*r25 + cos4*r34
3292 s = sin2*s25 + sin4*s34
3293 zout(1,nout2,j) = r - s
3294 zout(1,nout5,j) = r + s
3295 r = r1 + cos4*r25 + cos2*r34
3296 s = sin4*s25 - sin2*s34
3297 zout(1,nout3,j) = r - s
3298 zout(1,nout4,j) = r + s
3299 r25 = r2 - r5
3300 r34 = r3 - r4
3301 s25 = s2 + s5
3302 s34 = s3 + s4
3303 zout(2,nout1,j) = s1 + s25 + s34
3304 r = s1 + cos2*s25 + cos4*s34
3305 s = sin2*r25 + sin4*r34
3306 zout(2,nout2,j) = r + s
3307 zout(2,nout5,j) = r - s
3308 r = s1 + cos4*s25 + cos2*s34
3309 s = sin4*r25 - sin2*r34
3310 zout(2,nout3,j) = r + s
3311 zout(2,nout4,j) = r - s
3312 end do
3313 end do
3314 do ia=2,after
3315 ias=ia-1
3316 if (8*ias == 5*after) then
3317 if (isign == 1) then
3318 nin1=ia-after
3319 nout1=ia-atn
3320 do ib=1,before
3321 nin1=nin1+after
3322 nin2=nin1+atb
3323 nin3=nin2+atb
3324 nin4=nin3+atb
3325 nin5=nin4+atb
3326 nout1=nout1+atn
3327 nout2=nout1+after
3328 nout3=nout2+after
3329 nout4=nout3+after
3330 nout5=nout4+after
3331 do j=1,nfft
3332 r1=zin(1,j,nin1)
3333 s1=zin(2,j,nin1)
3334 r=zin(1,j,nin2)
3335 s=zin(2,j,nin2)
3336 r2=(r - s)*rt2i
3337 s2=(r + s)*rt2i
3338 r3=zin(2,j,nin3)
3339 s3=zin(1,j,nin3)
3340 r=zin(1,j,nin4)
3341 s=zin(2,j,nin4)
3342 r4=(r + s)*rt2i
3343 s4=(r - s)*rt2i
3344 r5=zin(1,j,nin5)
3345 s5=zin(2,j,nin5)
3346 r25 = r2 - r5
3347 r34 = r3 + r4
3348 s25 = s2 + s5
3349 s34 = s3 - s4
3350 zout(1,nout1,j) = r1 + r25 - r34
3351 r = r1 + cos2*r25 - cos4*r34
3352 s = sin2*s25 + sin4*s34
3353 zout(1,nout2,j) = r - s
3354 zout(1,nout5,j) = r + s
3355 r = r1 + cos4*r25 - cos2*r34
3356 s = sin4*s25 - sin2*s34
3357 zout(1,nout3,j) = r - s
3358 zout(1,nout4,j) = r + s
3359 r25 = r2 + r5
3360 r34 = r4 - r3
3361 s25 = s2 - s5
3362 s34 = s3 + s4
3363 zout(2,nout1,j) = s1 + s25 + s34
3364 r = s1 + cos2*s25 + cos4*s34
3365 s = sin2*r25 + sin4*r34
3366 zout(2,nout2,j) = r + s
3367 zout(2,nout5,j) = r - s
3368 r = s1 + cos4*s25 + cos2*s34
3369 s = sin4*r25 - sin2*r34
3370 zout(2,nout3,j) = r + s
3371 zout(2,nout4,j) = r - s
3372 end do
3373 end do
3374 else
3375 nin1=ia-after
3376 nout1=ia-atn
3377 do ib=1,before
3378 nin1=nin1+after
3379 nin2=nin1+atb
3380 nin3=nin2+atb
3381 nin4=nin3+atb
3382 nin5=nin4+atb
3383 nout1=nout1+atn
3384 nout2=nout1+after
3385 nout3=nout2+after
3386 nout4=nout3+after
3387 nout5=nout4+after
3388 do j=1,nfft
3389 r1=zin(1,j,nin1)
3390 s1=zin(2,j,nin1)
3391 r=zin(1,j,nin2)
3392 s=zin(2,j,nin2)
3393 r2=(r + s)*rt2i
3394 s2=(s - r)*rt2i
3395 r3=zin(2,j,nin3)
3396 s3=zin(1,j,nin3)
3397 r=zin(1,j,nin4)
3398 s=zin(2,j,nin4)
3399 r4=(s - r)*rt2i
3400 s4=(r + s)*rt2i
3401 r5=zin(1,j,nin5)
3402 s5=zin(2,j,nin5)
3403 r25 = r2 - r5
3404 r34 = r3 + r4
3405 s25 = s2 + s5
3406 s34 = s4 - s3
3407 zout(1,nout1,j) = r1 + r25 + r34
3408 r = r1 + cos2*r25 + cos4*r34
3409 s = sin2*s25 + sin4*s34
3410 zout(1,nout2,j) = r - s
3411 zout(1,nout5,j) = r + s
3412 r = r1 + cos4*r25 + cos2*r34
3413 s = sin4*s25 - sin2*s34
3414 zout(1,nout3,j) = r - s
3415 zout(1,nout4,j) = r + s
3416 r25 = r2 + r5
3417 r34 = r3 - r4
3418 s25 = s2 - s5
3419 s34 = s3 + s4
3420 zout(2,nout1,j) = s1 + s25 - s34
3421 r = s1 + cos2*s25 - cos4*s34
3422 s = sin2*r25 + sin4*r34
3423 zout(2,nout2,j) = r + s
3424 zout(2,nout5,j) = r - s
3425 r = s1 + cos4*s25 - cos2*s34
3426 s = sin4*r25 - sin2*r34
3427 zout(2,nout3,j) = r + s
3428 zout(2,nout4,j) = r - s
3429 end do
3430 end do
3431 end if
3432 else
3433 ias=ia-1
3434 itt=ias*before
3435 itrig=itt+1
3436 cr2=trig(1,itrig)
3437 ci2=trig(2,itrig)
3438 itrig=itrig+itt
3439 cr3=trig(1,itrig)
3440 ci3=trig(2,itrig)
3441 itrig=itrig+itt
3442 cr4=trig(1,itrig)
3443 ci4=trig(2,itrig)
3444 itrig=itrig+itt
3445 cr5=trig(1,itrig)
3446 ci5=trig(2,itrig)
3447 nin1=ia-after
3448 nout1=ia-atn
3449 do ib=1,before
3450 nin1=nin1+after
3451 nin2=nin1+atb
3452 nin3=nin2+atb
3453 nin4=nin3+atb
3454 nin5=nin4+atb
3455 nout1=nout1+atn
3456 nout2=nout1+after
3457 nout3=nout2+after
3458 nout4=nout3+after
3459 nout5=nout4+after
3460 do j=1,nfft
3461 r1=zin(1,j,nin1)
3462 s1=zin(2,j,nin1)
3463 r=zin(1,j,nin2)
3464 s=zin(2,j,nin2)
3465 r2=r*cr2 - s*ci2
3466 s2=r*ci2 + s*cr2
3467 r=zin(1,j,nin3)
3468 s=zin(2,j,nin3)
3469 r3=r*cr3 - s*ci3
3470 s3=r*ci3 + s*cr3
3471 r=zin(1,j,nin4)
3472 s=zin(2,j,nin4)
3473 r4=r*cr4 - s*ci4
3474 s4=r*ci4 + s*cr4
3475 r=zin(1,j,nin5)
3476 s=zin(2,j,nin5)
3477 r5=r*cr5 - s*ci5
3478 s5=r*ci5 + s*cr5
3479 r25 = r2 + r5
3480 r34 = r3 + r4
3481 s25 = s2 - s5
3482 s34 = s3 - s4
3483 zout(1,nout1,j) = r1 + r25 + r34
3484 r = r1 + cos2*r25 + cos4*r34
3485 s = sin2*s25 + sin4*s34
3486 zout(1,nout2,j) = r - s
3487 zout(1,nout5,j) = r + s
3488 r = r1 + cos4*r25 + cos2*r34
3489 s = sin4*s25 - sin2*s34
3490 zout(1,nout3,j) = r - s
3491 zout(1,nout4,j) = r + s
3492 r25 = r2 - r5
3493 r34 = r3 - r4
3494 s25 = s2 + s5
3495 s34 = s3 + s4
3496 zout(2,nout1,j) = s1 + s25 + s34
3497 r = s1 + cos2*s25 + cos4*s34
3498 s = sin2*r25 + sin4*r34
3499 zout(2,nout2,j) = r + s
3500 zout(2,nout5,j) = r - s
3501 r = s1 + cos4*s25 + cos2*s34
3502 s = sin4*r25 - sin2*r34
3503 zout(2,nout3,j) = r + s
3504 zout(2,nout4,j) = r - s
3505 end do
3506 end do
3507 end if
3508 end do
3509 else if (now == 6) then
3510! .5_real64*sqrt(3._real64)
3511 bb=isign*0.8660254037844387_real64
3512
3513 ia=1
3514 nin1=ia-after
3515 nout1=ia-atn
3516 do ib=1,before
3517 nin1=nin1+after
3518 nin2=nin1+atb
3519 nin3=nin2+atb
3520 nin4=nin3+atb
3521 nin5=nin4+atb
3522 nin6=nin5+atb
3523 nout1=nout1+atn
3524 nout2=nout1+after
3525 nout3=nout2+after
3526 nout4=nout3+after
3527 nout5=nout4+after
3528 nout6=nout5+after
3529 do j=1,nfft
3530 r2=zin(1,j,nin3)
3531 s2=zin(2,j,nin3)
3532 r3=zin(1,j,nin5)
3533 s3=zin(2,j,nin5)
3534 r=r2 + r3
3535 s=s2 + s3
3536 r1=zin(1,j,nin1)
3537 s1=zin(2,j,nin1)
3538 ur1 = r + r1
3539 ui1 = s + s1
3540 r1=r1 - .5_real64*r
3541 s1=s1 - .5_real64*s
3542 r=r2-r3
3543 s=s2-s3
3544 ur2 = r1 - s*bb
3545 ui2 = s1 + r*bb
3546 ur3 = r1 + s*bb
3547 ui3 = s1 - r*bb
3548
3549 r2=zin(1,j,nin6)
3550 s2=zin(2,j,nin6)
3551 r3=zin(1,j,nin2)
3552 s3=zin(2,j,nin2)
3553 r=r2 + r3
3554 s=s2 + s3
3555 r1=zin(1,j,nin4)
3556 s1=zin(2,j,nin4)
3557 vr1 = r + r1
3558 vi1 = s + s1
3559 r1=r1 - .5_real64*r
3560 s1=s1 - .5_real64*s
3561 r=r2-r3
3562 s=s2-s3
3563 vr2 = r1 - s*bb
3564 vi2 = s1 + r*bb
3565 vr3 = r1 + s*bb
3566 vi3 = s1 - r*bb
3567
3568 zout(1,nout1,j)=ur1+vr1
3569 zout(2,nout1,j)=ui1+vi1
3570 zout(1,nout5,j)=ur2+vr2
3571 zout(2,nout5,j)=ui2+vi2
3572 zout(1,nout3,j)=ur3+vr3
3573 zout(2,nout3,j)=ui3+vi3
3574 zout(1,nout4,j)=ur1-vr1
3575 zout(2,nout4,j)=ui1-vi1
3576 zout(1,nout2,j)=ur2-vr2
3577 zout(2,nout2,j)=ui2-vi2
3578 zout(1,nout6,j)=ur3-vr3
3579 zout(2,nout6,j)=ui3-vi3
3580 end do
3581 end do
3582
3583 else
3584 stop 'error fftrot'
3585 end if
3586
3587 end subroutine
3588
3589
3590 ! FFT PART RELATED TO THE CONVOLUTION--------------------------------------------
3591
3592 integer function ncache_optimal()
3593 ncache_optimal = 8*1024
3594 end function ncache_optimal
3595
3596!!!HERE POT MUST BE THE KERNEL (BEWARE THE HALF DIMENSION)
3597
3598 !!****h* BigDFT/convolxc_off
3599 !! NAME
3600 !! convolxc_off
3601 !!
3602 !! FUNCTION
3603 !! (Based on suitable modifications of S.Goedecker routines)
3604 !! Applies the local FFT space Kernel to the density in Real space.
3605 !! Does NOT calculate the LDA exchange-correlation terms
3606 !!
3607 !! SYNOPSIS
3608 !! zf: Density (input/output)
3609 !! ZF(i1,i3,i2)
3610 !! i1=1,md1 , i2=1,md2/nproc , i3=1,md3
3611 !! pot: Kernel, only the distributed part (REAL)
3612 !! POT(i1,i2,i3)
3613 !! i1=1,nd1 , i2=1,nd2 , i3=1,nd3/nproc
3614 !! nproc: number of processors used as returned by MPI_COMM_SIZE
3615 !! iproc: [0:nproc-1] number of processor as returned by MPI_COMM_RANK
3616 !! n1,n2,n3: logical dimension of the transform. As transform lengths
3617 !! most products of the prime factors 2,3,5 are allowed.
3618 !! The detailed table with allowed transform lengths can
3619 !! be found in subroutine CTRIG
3620 !! md1,md2,md3: Dimension of ZF
3621 !! nd1,nd2,nd3: Dimension of POT
3622 !! scal: factor of renormalization of the FFT in order to acheve unitarity
3623 !! and the correct dimension
3624 !! hgrid: grid spacing, used for integrating eharthree
3625 !! comm: MPI communicator to use
3626 !!
3627 !! RESTRICTIONS on USAGE
3628 !! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
3629 !! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
3630 !! Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
3631 !! This file is distributed under the terms of the
3632 !! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
3633 !!
3634 !! AUTHORS
3635 !! S. Goedecker, L. Genovese
3636 !!
3637 !! CREATION DATE
3638 !! February 2006
3639 !!
3640 !! SOURCE
3641 !!
3642 subroutine convolxc_off(n1,n2,n3,nd1,nd2,nd3,md1,md2,md3, &
3643 nproc,iproc,pot,zf,scal,comm)
3644 integer, intent(in) :: n1,n2,n3,nd1,nd2,nd3,md1,md2,md3,nproc,iproc
3645 real(real64), intent(in) :: scal
3646 real(real64), dimension(nd1,nd2,nd3/nproc), intent(in) :: pot
3647 real(real64), dimension(md1,md3,md2/nproc), intent(inout) :: zf
3648 type(mpi_comm), intent(in) :: comm
3649
3650 integer :: ncache,lzt,lot,ma,mb,nfft,ic1,ic2,ic3,Jp2stb,J2stb,Jp2stf,J2stf
3651 integer :: j2,j3,i1,i3,i,j,inzee
3652#if defined(HAVE_MPI)
3653 integer :: ierr
3654#endif
3655 real(real64) :: twopion
3656 !work arrays for transpositions
3657 real(real64), allocatable :: zt(:,:,:)
3658 !work arrays for MPI
3659 real(real64), allocatable :: zmpi1(:,:,:,:,:)
3660 real(real64), allocatable :: zmpi2(:,:,:,:)
3661 !cache work array
3662 real(real64), allocatable :: zw(:,:,:)
3663 !FFT work arrays
3664 real(real64), allocatable :: cosinarr(:,:)
3665 real(real64) :: btrig1(2,8192)
3666 real(real64) :: ftrig1(2,8192)
3667 integer :: after1(7)
3668 integer :: now1(7)
3669 integer :: before1(7)
3670 real(real64) :: btrig2(2,8192)
3671 real(real64) :: ftrig2(2,8192)
3672 integer :: after2(7)
3673 integer :: now2(7)
3674 integer :: before2(7)
3675 real(real64) :: btrig3(2,8192)
3676 real(real64) :: ftrig3(2,8192)
3677 integer :: after3(7)
3678 integer :: now3(7)
3679 integer :: before3(7)
3680
3681 call profiling_in("SG_PCONV")
3682
3683 ! check input
3684 if (mod(n1,2) /= 0) stop 'Parallel convolution:ERROR:n1'
3685 if (mod(n2,2) /= 0) stop 'Parallel convolution:ERROR:n2'
3686 if (mod(n3,2) /= 0) stop 'Parallel convolution:ERROR:n3'
3687 if (nd1 < n1/2+1) stop 'Parallel convolution:ERROR:nd1'
3688 if (nd2 < n2/2+1) stop 'Parallel convolution:ERROR:nd2'
3689 if (nd3 < n3/2+1) stop 'Parallel convolution:ERROR:nd3'
3690 if (md1 < n1/2) stop 'Parallel convolution:ERROR:md1'
3691 if (md2 < n2/2) stop 'Parallel convolution:ERROR:md2'
3692 if (md3 < n3/2) stop 'Parallel convolution:ERROR:md3'
3693 if (mod(nd3,nproc) /= 0) stop 'Parallel convolution:ERROR:nd3'
3694 if (mod(md2,nproc) /= 0) stop 'Parallel convolution:ERROR:md2'
3695
3696 !defining work arrays dimensions
3697 ncache = ncache_optimal()
3698 if (ncache <= max(n1,n2,n3/2)*4) ncache=max(n1,n2,n3/2)*4
3699
3700 ! if (timing_flag == 1 .and. iproc ==0) print *,'parallel ncache=',ncache
3701
3702 lzt = n2/2
3703 if (mod(n2/2,2) == 0) lzt=lzt+1
3704 if (mod(n2/2,4) == 0) lzt=lzt+1
3705
3706 safe_allocate(zw(1:2, 1:ncache/4, 1:2))
3707 safe_allocate(zt(1:2, 1:lzt, 1:n1))
3708 safe_allocate(zmpi2(1:2, 1:n1, 1:md2/nproc, 1:nd3))
3709 safe_allocate(cosinarr(1:2, 1:n3/2))
3710
3711 if (nproc > 1) then
3712 safe_allocate(zmpi1(1:2, 1:n1, 1:md2/nproc, 1:nd3/nproc, 1:nproc))
3713 end if
3714
3715 !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
3716 call ctrig(n3/2,btrig3,after3,before3,now3,1,ic3)
3717 call ctrig(n1,btrig1,after1,before1,now1,1,ic1)
3718 call ctrig(n2,btrig2,after2,before2,now2,1,ic2)
3719 do j=1,n1
3720 ftrig1(1,j)= btrig1(1,j)
3721 ftrig1(2,j)=-btrig1(2,j)
3722 end do
3723 do j=1,n2
3724 ftrig2(1,j)= btrig2(1,j)
3725 ftrig2(2,j)=-btrig2(2,j)
3726 end do
3727 do j=1,n3
3728 ftrig3(1,j)= btrig3(1,j)
3729 ftrig3(2,j)=-btrig3(2,j)
3730 end do
3731
3732 !Calculating array of phases for HalFFT decoding
3733 twopion=8._real64*datan(1.0_8)/real(n3, real64)
3734 do i3 = 1,n3/2
3735 cosinarr(1,i3)=dcos(twopion*(i3-1))
3736 cosinarr(2,i3)=-dsin(twopion*(i3-1))
3737 end do
3738
3739 !initializing integral
3740!!! ehartree=0.0_8
3741
3742 ! transform along z axis
3743 lot=ncache/(2*n3)
3744 if (lot < 1) then
3745 write(6,*) &
3746 'convolxc_off:ncache has to be enlarged to be able to hold at' // &
3747 'least one 1-d FFT of this size even though this will' // &
3748 'reduce the performance for shorter transform lengths'
3749 stop
3750 end if
3751
3752 do j2=1,md2/nproc
3753 !this condition ensures that we manage only the interesting part for the FFT
3754 if (iproc*(md2/nproc)+j2 <= n2/2) then
3755 do i1 = 1,(n1/2),lot
3756 ma=i1
3757 mb=min(i1+(lot-1),(n1/2))
3758 nfft=mb-ma+1
3759
3760 !inserting real data into complex array of half length
3761 call halfill_upcorn(md1,md3,lot,nfft,n3,zf(i1,1,j2),zw(1,1,1))
3762
3763 !performing FFT
3764 !input: I1,I3,J2,(Jp2)
3765 inzee=1
3766 do i = 1,ic3
3767 call fftstp(lot,nfft,n3/2,lot,n3/2,zw(1,1,inzee),zw(1,1,3-inzee), &
3768 btrig3,after3(i),now3(i),before3(i),1)
3769 inzee=3-inzee
3770 end do
3771 !output: I1,i3,J2,(Jp2)
3772
3773 !unpacking FFT in order to restore correct result,
3774 !while exchanging components
3775 !input: I1,i3,J2,(Jp2)
3776 call scramble_unpack(i1,j2,lot,nfft,n1/2,n3,md2,nproc,nd3,zw(1,1,inzee),zmpi2,cosinarr)
3777 !output: I1,J2,i3,(Jp2)
3778 end do
3779 end if
3780 end do
3781
3782 !Interprocessor data transposition
3783 !input: I1,J2,j3,jp3,(Jp2)
3784 if (nproc > 1) then
3785 call profiling_in("SG_ALLTOALL")
3786 !communication scheduling
3787#if defined(HAVE_MPI)
3788 call mpi_alltoall(zmpi2(1, 1, 1, 1), n1*(md2/nproc)*(nd3/nproc), &
3789 mpi_double_precision, &
3790 zmpi1(1, 1, 1, 1, 1), n1*(md2/nproc)*(nd3/nproc), &
3791 mpi_double_precision, comm, ierr)
3792#endif
3793 call profiling_out("SG_ALLTOALL")
3794 end if
3795 !output: I1,J2,j3,Jp2,(jp3)
3796
3797 !now each process perform complete convolution of its planes
3798 do j3=1,nd3/nproc
3799 !this condition ensures that we manage only the interesting part for the FFT
3800 if (iproc*(nd3/nproc)+j3 <= n3/2+1) then
3801 jp2stb=1
3802 j2stb=1
3803 jp2stf=1
3804 j2stf=1
3805
3806 ! transform along x axis
3807 lot=ncache/(4*n1)
3808 if (lot < 1) then
3809 write(6,*) &
3810 'convolxc_off:ncache has to be enlarged to be able to hold at' // &
3811 'least one 1-d FFT of this size even though this will' // &
3812 'reduce the performance for shorter transform lengths'
3813 stop
3814 end if
3815
3816 do j = 1,n2/2,lot
3817 ma=j
3818 mb=min(j+(lot-1),n2/2)
3819 nfft=mb-ma+1
3820
3821 !reverse index ordering, leaving the planes to be transformed at the end
3822 !input: I1,J2,j3,Jp2,(jp3)
3823 if (nproc == 1) then
3824 call mpiswitch_upcorn(j3,nfft,jp2stb,j2stb,lot,n1,md2,nd3,nproc,zmpi2,zw(1,1,1))
3825 else
3826 call mpiswitch_upcorn(j3,nfft,jp2stb,j2stb,lot,n1,md2,nd3,nproc,zmpi1,zw(1,1,1))
3827 end if
3828 !output: J2,Jp2,I1,j3,(jp3)
3829
3830 !performing FFT
3831 !input: I2,I1,j3,(jp3)
3832 inzee=1
3833 do i = 1,ic1-1
3834 call fftstp(lot,nfft,n1,lot,n1,zw(1,1,inzee),zw(1,1,3-inzee), &
3835 btrig1,after1(i),now1(i),before1(i),1)
3836 inzee=3-inzee
3837 end do
3838
3839 !storing the last step into zt array
3840 i=ic1
3841 call fftstp(lot,nfft,n1,lzt,n1,zw(1,1,inzee),zt(1,j,1), &
3842 btrig1,after1(i),now1(i),before1(i),1)
3843 !output: I2,i1,j3,(jp3)
3844 end do
3845
3846 !transform along y axis
3847 lot=ncache/(4*n2)
3848 if (lot < 1) then
3849 write(6,*) &
3850 'convolxc_off:ncache has to be enlarged to be able to hold at' // &
3851 'least one 1-d FFT of this size even though this will' // &
3852 'reduce the performance for shorter transform lengths'
3853 stop
3854 end if
3855
3856 do j = 1,n1,lot
3857 ma=j
3858 mb=min(j+(lot-1),n1)
3859 nfft=mb-ma+1
3860
3861 !reverse ordering
3862 !input: I2,i1,j3,(jp3)
3863 call switch_upcorn(nfft,n2,lot,n1,lzt,zt(1,1,j),zw(1,1,1))
3864 !output: i1,I2,j3,(jp3)
3865
3866 !performing FFT
3867 !input: i1,I2,j3,(jp3)
3868 inzee=1
3869 do i = 1,ic2
3870 call fftstp(lot,nfft,n2,lot,n2,zw(1,1,inzee),zw(1,1,3-inzee), &
3871 btrig2,after2(i),now2(i),before2(i),1)
3872 inzee=3-inzee
3873 end do
3874 !output: i1,i2,j3,(jp3)
3875
3876 !Multiply with kernel in fourier space
3877 call multkernel(nd1,nd2,n1,n2,lot,nfft,j,pot(1,1,j3),zw(1,1,inzee))
3878
3879 !TRANSFORM BACK IN REAL SPACE
3880
3881 !transform along y axis
3882 !input: i1,i2,j3,(jp3)
3883 do i = 1,ic2
3884 call fftstp(lot,nfft,n2,lot,n2,zw(1,1,inzee),zw(1,1,3-inzee), &
3885 ftrig2,after2(i),now2(i),before2(i),-1)
3886 inzee=3-inzee
3887 end do
3888
3889 !reverse ordering
3890 !input: i1,I2,j3,(jp3)
3891 call unswitch_downcorn(nfft,n2,lot,n1,lzt,zw(1,1,inzee),zt(1,1,j))
3892 !output: I2,i1,j3,(jp3)
3893 end do
3894
3895 !transform along x axis
3896 !input: I2,i1,j3,(jp3)
3897 lot=ncache/(4*n1)
3898 do j = 1,n2/2,lot
3899 ma=j
3900 mb=min(j+(lot-1),n2/2)
3901 nfft=mb-ma+1
3902
3903 !performing FFT
3904 i=1
3905 call fftstp(lzt,nfft,n1,lot,n1,zt(1,j,1),zw(1,1,1), &
3906 ftrig1,after1(i),now1(i),before1(i),-1)
3907
3908 inzee=1
3909 do i=2,ic1
3910 call fftstp(lot,nfft,n1,lot,n1,zw(1,1,inzee),zw(1,1,3-inzee), &
3911 ftrig1,after1(i),now1(i),before1(i),-1)
3912 inzee=3-inzee
3913 end do
3914 !output: I2,I1,j3,(jp3)
3915
3916 !reverse ordering
3917 !input: J2,Jp2,I1,j3,(jp3)
3918 if (nproc == 1) then
3919 call unmpiswitch_downcorn(j3,nfft,jp2stf,j2stf,lot,n1,md2,nd3,nproc,zw(1,1,inzee),zmpi2)
3920 else
3921 call unmpiswitch_downcorn(j3,nfft,jp2stf,j2stf,lot,n1,md2,nd3,nproc,zw(1,1,inzee),zmpi1)
3922 end if
3923 ! output: I1,J2,j3,Jp2,(jp3)
3924 end do
3925 end if
3926 end do
3927
3928
3929 !Interprocessor data transposition
3930 !input: I1,J2,j3,Jp2,(jp3)
3931 if (nproc > 1) then
3932 call profiling_in("SG_ALLTOALL")
3933 !communication scheduling
3934#if defined(HAVE_MPI)
3935 call mpi_alltoall(zmpi1(1, 1, 1, 1, 1), n1*(md2/nproc)*(nd3/nproc), &
3936 mpi_double_precision, &
3937 zmpi2(1, 1, 1, 1), n1*(md2/nproc)*(nd3/nproc), &
3938 mpi_double_precision, comm, ierr)
3939 !output: I1,J2,j3,jp3,(Jp2)
3940#endif
3941 call profiling_out("SG_ALLTOALL")
3942 end if
3943
3944 !transform along z axis
3945 !input: I1,J2,i3,(Jp2)
3946 lot=ncache/(2*n3)
3947 do j2=1,md2/nproc
3948 !this condition ensures that we manage only the interesting part for the FFT
3949 if (iproc*(md2/nproc)+j2 <= n2/2) then
3950 do i1 = 1,(n1/2),lot
3951 ma=i1
3952 mb=min(i1+(lot-1),(n1/2))
3953 nfft=mb-ma+1
3954
3955 !reverse ordering and repack the FFT data in order to be backward HalFFT transformed
3956 !input: I1,J2,i3,(Jp2)
3957 call unscramble_pack(i1,j2,lot,nfft,n1/2,n3,md2,nproc,nd3,zmpi2,zw(1,1,1),cosinarr)
3958 !output: I1,i3,J2,(Jp2)
3959
3960 !performing FFT
3961 !input: I1,i3,J2,(Jp2)
3962 inzee=1
3963 do i = 1,ic3
3964 call fftstp(lot,nfft,n3/2,lot,n3/2,zw(1,1,inzee),zw(1,1,3-inzee), &
3965 ftrig3,after3(i),now3(i),before3(i),-1)
3966 inzee=3-inzee
3967 end do
3968 !output: I1,I3,J2,(Jp2)
3969
3970 !calculates the Hartree energy locally and rebuild the output array
3971 call unfill_downcorn(md1, md3, lot, nfft, n3, zw(1,1,inzee), zf(i1,1,j2), scal)
3972
3973 end do
3974 end if
3975 end do
3976
3977 safe_deallocate_a(zmpi2)
3978 safe_deallocate_a(zw)
3979 safe_deallocate_a(zt)
3980 safe_deallocate_a(cosinarr)
3981 safe_deallocate_a(zmpi1)
3982
3983 call profiling_out("SG_PCONV")
3984
3985
3986 end subroutine convolxc_off
3987
3988
3989 !!****h* BigDFT/multkernel
3990 !! NAME
3991 !! multkernel
3992 !!
3993 !! FUNCTION
3994 !! (Based on suitable modifications of S.Goedecker routines)
3995 !! Multiply with the kernel taking into account its symmetry
3996 !! Conceived to be used into convolution loops
3997 !!
3998 !! SYNOPSIS
3999 !! pot: Kernel, symmetric and real, half the length
4000 !! zw: Work array (input/output)
4001 !! n1,n2: logical dimension of the FFT transform, reference for zw
4002 !! nd1,nd2: Dimensions of POT
4003 !! jS, nfft: starting point of the plane and number of remaining lines
4004 !!
4005 !! RESTRICTIONS on USAGE
4006 !! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
4007 !! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
4008 !! Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
4009 !! This file is distributed under the terms of the
4010 !! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
4011 !!
4012 !! AUTHORS
4013 !! S. Goedecker, L. Genovese
4014 !!
4015 !! CREATION DATE
4016 !! February 2006
4017 !!
4018 !! SOURCE
4019 !!
4020 subroutine multkernel(nd1,nd2,n1,n2,lot,nfft,jS,pot,zw)
4021 integer, intent(in) :: nd1,nd2,n1,n2,lot,nfft,js
4022 real(real64), dimension(nd1,nd2), intent(in) :: pot
4023 real(real64), dimension(2,lot,n2), intent(inout) :: zw
4024
4025 !Local variables
4026 integer :: j,j1,i2,j2
4027
4028 !case i2=1
4029 do j = 1,nfft
4030 j1=n1/2+1-abs(n1/2+2-js-j)!this stands for j1=min(jS-1+j,n1+3-jS-j)
4031 zw(1,j,1)=zw(1,j,1)*pot(j1,1)
4032 zw(2,j,1)=zw(2,j,1)*pot(j1,1)
4033 end do
4034
4035 !generic case
4036 do i2=2,n2/2
4037 do j = 1,nfft
4038 j1=n1/2+1-abs(n1/2+2-js-j)
4039 j2=n2+2-i2
4040 zw(1,j,i2)=zw(1,j,i2)*pot(j1,i2)
4041 zw(2,j,i2)=zw(2,j,i2)*pot(j1,i2)
4042 zw(1,j,j2)=zw(1,j,j2)*pot(j1,i2)
4043 zw(2,j,j2)=zw(2,j,j2)*pot(j1,i2)
4044 end do
4045 end do
4046
4047 !case i2=n2/2+1
4048 do j = 1,nfft
4049 j1=n1/2+1-abs(n1/2+2-js-j)
4050 j2=n2/2+1
4051 zw(1,j,j2)=zw(1,j,j2)*pot(j1,j2)
4052 zw(2,j,j2)=zw(2,j,j2)*pot(j1,j2)
4053 end do
4054
4055 end subroutine multkernel
4056
4057
4058 subroutine switch_upcorn(nfft,n2,lot,n1,lzt,zt,zw)
4059 integer :: lot, n1, n2, lzt, j, nfft, i
4060 real(real64) :: zw(2,lot,n2), zt(2,lzt,n1)
4061
4062 ! WARNING: Assuming that high frequencies are in the corners
4063 ! and that n2 is multiple of 2
4064
4065 ! Low frequencies
4066 do j = 1, nfft
4067 do i = n2/2 + 1, n2
4068 zw(1,j,i)=zt(1,i-n2/2,j)
4069 zw(2,j,i)=zt(2,i-n2/2,j)
4070 end do
4071 end do
4072
4073 ! High frequencies
4074 do i = 1, n2/2
4075 do j = 1, nfft
4076 zw(1,j,i)=0.0_8
4077 zw(2,j,i)=0.0_8
4078 end do
4079 end do
4080
4081 end subroutine switch_upcorn
4082
4083 ! ----------------------------------------------------------------------------
4084
4085 subroutine mpiswitch_upcorn(j3,nfft,Jp2stb,J2stb,lot,n1,md2,nd3,nproc,zmpi1,zw)
4086 integer :: n1, nd3, nproc, lot, mfft, jp2, jp2stb, j2
4087 integer :: j2stb, nfft, i1, j3, md2
4088 real(real64) :: zmpi1(2,n1/2,md2/nproc,nd3/nproc,nproc),zw(2,lot,n1)
4089
4090 ! WARNING: Assuming that high frequencies are in the corners
4091 ! and that n1 is multiple of 2
4092
4093 mfft=0
4094 do jp2 = jp2stb, nproc
4095 do j2 = j2stb, md2/nproc
4096 mfft = mfft + 1
4097 if (mfft > nfft) then
4098 jp2stb=jp2
4099 j2stb=j2
4100 return
4101 end if
4102 do i1=1,n1/2
4103 zw(1,mfft,i1)=0.0_8
4104 zw(2,mfft,i1)=0.0_8
4105 end do
4106 do i1=n1/2+1,n1
4107 zw(1,mfft,i1)=zmpi1(1,i1-n1/2,j2,j3,jp2)
4108 zw(2,mfft,i1)=zmpi1(2,i1-n1/2,j2,j3,jp2)
4109 end do
4110 end do
4111 j2stb=1
4112 end do
4114 end subroutine mpiswitch_upcorn
4115
4116 ! -------------------------------------------------------------------
4117
4118 subroutine halfill_upcorn(md1,md3,lot,nfft,n3,zf,zw)
4119 integer :: lot, n3, md1, md3, i3, i1, nfft
4120 real(real64) :: zw(2,lot,n3/2),zf(md1,md3)
4121
4122 ! WARNING: Assuming that high frequencies are in the corners
4123 ! and that n3 is multiple of 4
4124 !in principle we can relax this condition
4125
4126 do i3 = 1, n3/4
4127 do i1 = 1,nfft
4128 zw(1,i1,i3)=0.0_8
4129 zw(2,i1,i3)=0.0_8
4130 end do
4131 end do
4132
4133 do i3 = n3/4+1,n3/2
4134 do i1 = 1,nfft
4135 zw(1,i1,i3)=zf(i1,2*i3-1-n3/2)
4136 zw(2,i1,i3)=zf(i1,2*i3-n3/2)
4137 end do
4138 end do
4139
4140 end subroutine halfill_upcorn
4141
4142 ! -------------------------------------------------------------------
4143 !!****h* BigDFT/scramble_unpack
4144 !! NAME
4145 !! scramble_unpack
4146 !!
4147 !! FUNCTION
4148 !! (Based on suitable modifications of S.Goedecker routines)
4149 !! Assign the correct planes to the work array zmpi2
4150 !! in order to prepare for interprocessor data transposition.
4151 !! In the meanwhile, it unpacks the data of the HalFFT in order to prepare for
4152 !! multiplication with the kernel
4153 !!
4154 !! SYNOPSIS
4155 !! zmpi2: Work array for multiprocessor manipulation (output)
4156 !! zw: Work array (input)
4157 !! cosinarr: Array of the phases needed for unpacking
4158 !! n1,n3: logical dimension of the FFT transform, reference for work arrays
4159 !! md2,nd3: Dimensions of real grid and of the kernel, respectively
4160 !! i1,j2,lot,nfft: Starting points of the plane and number of remaining lines
4161 !!
4162 !! RESTRICTIONS on USAGE
4163 !! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
4164 !! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
4165 !! Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
4166 !! This file is distributed under the terms of the
4167 !! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
4168 !!
4169 !! AUTHORS
4170 !! S. Goedecker, L. Genovese
4171 !!
4172 !! CREATION DATE
4173 !! February 2006
4174 !!
4175 !! SOURCE
4176 !!
4177 subroutine scramble_unpack(i1,j2,lot,nfft,n1,n3,md2,nproc,nd3,zw,zmpi2,cosinarr)
4178 integer, intent(in) :: i1,j2,lot,nfft,n1,n3,md2,nproc,nd3
4179 real(real64), dimension(2,lot,n3/2), intent(in) :: zw
4180 real(real64), dimension(2,n3/2), intent(in) :: cosinarr
4181 real(real64), dimension(2,n1,md2/nproc,nd3), intent(out) :: zmpi2
4182
4183 integer :: i3,i,ind1,ind2
4184 real(real64) :: a,b,c,d,cp,sp,feR,feI,foR,foI,fR,fI
4185
4186 !case i3=1 and i3=n3/2+1
4187 do i=0,nfft-1
4188 a=zw(1,i+1,1)
4189 b=zw(2,i+1,1)
4190 zmpi2(1,i1+i,j2,1)=a+b
4191 zmpi2(2,i1+i,j2,1)=0.0_8
4192 zmpi2(1,i1+i,j2,n3/2+1)=a-b
4193 zmpi2(2,i1+i,j2,n3/2+1)=0.0_8
4194 end do
4195
4196 !case 2<=i3<=n3/2
4197 do i3 = 2,n3/2
4198 ind1 = i3
4199 ind2 = n3/2-i3+2
4200 cp = cosinarr(1,i3)
4201 sp = cosinarr(2,i3)
4202 do i = 0, nfft-1
4203 a = zw(1,i+1,ind1)
4204 b = zw(2,i+1,ind1)
4205 c = zw(1,i+1,ind2)
4206 d = zw(2,i+1,ind2)
4207 fer = .5_real64*(a+c)
4208 fei = .5_real64*(b-d)
4209 for = .5_real64*(a-c)
4210 foi = .5_real64*(b+d)
4211 fr = fer+cp*foi-sp*for
4212 fi = fei-cp*for-sp*foi
4213 zmpi2(1,i1+i,j2,ind1) = fr
4214 zmpi2(2,i1+i,j2,ind1) = fi
4215 end do
4216 end do
4217
4218 end subroutine scramble_unpack
4219
4220
4221 !!****h* BigDFT/unscramble_pack
4222 !! NAME
4223 !! unscramble_pack
4224 !!
4225 !! FUNCTION
4226 !! (Based on suitable modifications of S.Goedecker routines)
4227 !! Insert the correct planes of the work array zmpi2
4228 !! in order to prepare for backward FFT transform
4229 !! In the meanwhile, it packs the data in order to be transformed with the HalFFT
4230 !! procedure
4231 !!
4232 !! SYNOPSIS
4233 !! zmpi2: Work array for multiprocessor manipulation (input)
4234 !! zw: Work array (output)
4235 !! cosinarr: Array of the phases needed for packing
4236 !! n1,n3: logical dimension of the FFT transform, reference for work arrays
4237 !! md2,nd3: Dimensions of real grid and of the kernel, respectively
4238 !! i1,j2,lot,nfft: Starting points of the plane and number of remaining lines
4239 !!
4240 !! RESTRICTIONS on USAGE
4241 !! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
4242 !! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
4243 !! Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
4244 !! This file is distributed under the terms of the
4245 !! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
4246 !!
4247 !! AUTHORS
4248 !! S. Goedecker, L. Genovese
4249 !!
4250 !! CREATION DATE
4251 !! February 2006
4252 !!
4253 !! SOURCE
4254 !!
4255 subroutine unscramble_pack(i1,j2,lot,nfft,n1,n3,md2,nproc,nd3,zmpi2,zw,cosinarr)
4256 integer, intent(in) :: i1,j2,lot,nfft,n1,n3,md2,nproc,nd3
4257 real(real64), dimension(2,lot,n3/2), intent(out) :: zw
4258 real(real64), dimension(2,n3/2), intent(in) :: cosinarr
4259 real(real64), dimension(2,n1,md2/nproc,nd3), intent(in) :: zmpi2
4260
4261 integer :: i3,i,indA,indB
4262 real(real64) :: a,b,c,d,cp,sp,re,ie,ro,io,rh,ih
4263
4264 do i3 = 1,n3/2
4265 inda=i3
4266 indb=n3/2+2-i3
4267 cp=cosinarr(1,i3)
4268 sp=cosinarr(2,i3)
4269 do i=0,nfft-1
4270 a=zmpi2(1,i1+i,j2,inda)
4271 b=zmpi2(2,i1+i,j2,inda)
4272 c= zmpi2(1,i1+i,j2,indb)
4273 d=-zmpi2(2,i1+i,j2,indb)
4274 re=(a+c)
4275 ie=(b+d)
4276 ro=(a-c)*cp-(b-d)*sp
4277 io=(a-c)*sp+(b-d)*cp
4278 rh=re-io
4279 ih=ie+ro
4280 zw(1,i+1,inda)=rh
4281 zw(2,i+1,inda)=ih
4282 end do
4283 end do
4284
4285 end subroutine unscramble_pack
4286
4287 ! --------------------------------------------------------------------
4288
4289 subroutine unswitch_downcorn(nfft,n2,lot,n1,lzt,zw,zt)
4290 integer :: lot, n2, lzt, n1, j, nfft, i
4291 real(real64) :: zw(2,lot,n2),zt(2,lzt,n1)
4292 ! WARNING: Assuming that high frequencies are in the corners
4293 ! and that n2 is multiple of 2
4294
4295 ! Low frequencies
4296 do j = 1, nfft
4297 do i = 1, n2/2
4298 zt(1,i,j)=zw(1,j,i)
4299 zt(2,i,j)=zw(2,j,i)
4300 end do
4301 end do
4302
4303 end subroutine unswitch_downcorn
4304
4305
4306 subroutine unmpiswitch_downcorn(j3,nfft,Jp2stf,J2stf,lot,n1,md2,nd3,nproc,zw,zmpi1)
4307 integer :: n1, md2, nproc, nd3, lot, mfft, i1, j3
4308 integer :: jp2, j2, nfft, jp2stf, j2stf
4309 real(real64) :: zmpi1(2,n1/2,md2/nproc,nd3/nproc,nproc),zw(2,lot,n1)
4310 ! WARNING: Assuming that high frequencies are in the corners
4311 ! and that n1 is multiple of 2
4312
4313 mfft=0
4314 do jp2=jp2stf,nproc
4315 do j2 = j2stf, md2/nproc
4316 mfft=mfft+1
4317 if (mfft > nfft) then
4318 jp2stf=jp2
4319 j2stf=j2
4320 return
4321 end if
4322 do i1 = 1, n1/2
4323 zmpi1(1,i1,j2,j3,jp2)=zw(1,mfft,i1)
4324 zmpi1(2,i1,j2,j3,jp2)=zw(2,mfft,i1)
4325 end do
4326 end do
4327 j2stf=1
4328 end do
4329 end subroutine unmpiswitch_downcorn
4330
4331
4332 !!****h* BigDFT/unfill_downcorn
4333 !! NAME
4334 !! unfill_downcorn
4335 !!
4336 !! FUNCTION
4337 !! (Based on suitable modifications of S.Goedecker routines)
4338 !! Restore data into output array, calculating in the meanwhile
4339 !! Hartree energy of the potential
4340 !!
4341 !! SYNOPSIS
4342 !! zf: Original distributed density as well as
4343 !! Distributed solution of the poisson equation (inout)
4344 !! zw: FFT work array
4345 !! n3: (twice the) dimension of the last FFTtransform.
4346 !! md1,md3: Dimensions of the undistributed part of the real grid
4347 !! nfft: number of planes
4348 !! scal: Needed to achieve unitarity and correct dimensions
4349 !!
4350 !! WARNING
4351 !! Assuming that high frequencies are in the corners
4352 !! and that n3 is multiple of 4
4353 !!
4354 !! RESTRICTIONS on USAGE
4355 !! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
4356 !! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
4357 !! Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
4358 !! This file is distributed under the terms of the
4359 !! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
4360 !!
4361 !! AUTHORS
4362 !! S. Goedecker, L. Genovese
4363 !!
4364 !! CREATION DATE
4365 !! February 2006
4366 !!
4367 !! SOURCE
4368 !!
4369 subroutine unfill_downcorn(md1,md3,lot,nfft,n3,zw,zf, scal)
4370 integer, intent(in) :: md1,md3,lot,nfft,n3
4371 real(real64), dimension(2,lot,n3/2), intent(in) :: zw
4372 real(real64), dimension(md1,md3),intent(inout) :: zf
4373 real(real64), intent(in) :: scal
4374
4375 integer :: i3,i1
4376 real(real64) :: pot1
4377
4378 do i3 = 1,n3/4
4379 do i1 = 1,nfft
4380 pot1 = scal*zw(1,i1,i3)
4381 zf(i1, 2*i3 - 1) = pot1
4382 pot1 = scal*zw(2, i1, i3)
4383 zf(i1, 2*i3) = pot1
4384 end do
4385 end do
4386
4387 end subroutine unfill_downcorn
4388
4389! FFT PART RELATED TO THE KERNEL -----------------------------------------------------------------
4390!!****h* BigDFT/kernelfft
4391!! NAME
4392!! kernelfft
4393!!
4394!! FUNCTION
4395!! (Based on suitable modifications of S.Goedecker routines)
4396!! Calculates the FFT of the distributed kernel
4397!!
4398!! SYNOPSIS
4399!! zf: Real kernel (input)
4400!! zf(i1,i2,i3)
4401!! i1=1,nd1 , i2=1,nd2/nproc , i3=1,nd3
4402!! zr: Distributed Kernel FFT
4403!! zr(2,i1,i2,i3)
4404!! i1=1,nd1 , i2=1,nd2 , i3=1,nd3/nproc
4405!! nproc: number of processors used as returned by MPI_COMM_SIZE
4406!! iproc: [0:nproc-1] number of processor as returned by MPI_COMM_RANK
4407!! n1,n2,n3: logical dimension of the transform. As transform lengths
4408!! most products of the prime factors 2,3,5 are allowed.
4409!! The detailed table with allowed transform lengths can
4410!! be found in subroutine CTRIG
4411!! nd1,nd2,nd3: Dimensions of zr
4412!! comm: MPI communicator to use
4413!!
4414!! RESTRICTIONS on USAGE
4415!! Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
4416!! Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
4417!! Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
4418!! This file is distributed under the terms of the
4419!! GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
4420!!
4421!! AUTHORS
4422!! S. Goedecker, L. Genovese
4423!!
4424!! CREATION DATE
4425!! February 2006
4426!!
4427!! SOURCE
4428!!
4429 subroutine kernelfft(n1,n2,n3,nd1,nd2,nd3,nproc,iproc,zf,zr,comm)
4430 integer, intent(in) :: n1,n2,n3,nd1,nd2,nd3,nproc,iproc
4431 real(real64), dimension(nd1,n3,nd2/nproc), intent(in) :: zf
4432 real(real64), dimension(2,nd1,nd2,nd3/nproc), intent(out) :: zr
4433 type(mpi_comm), intent(in) :: comm
4434
4435 !Local variables
4436 integer :: ncache,lzt,lot,ma,mb,nfft,ic1,ic2,ic3,Jp2st,J2st
4437 integer :: j2,j3,i1,i3,i,j,inzee
4438#if defined(HAVE_MPI)
4439 integer :: ierr
4440#endif
4441 real(real64) :: twopion
4442 !work arrays for transpositions
4443 real(real64), dimension(:,:,:), allocatable :: zt
4444 !work arrays for MPI
4445 real(real64), dimension(:,:,:,:,:), allocatable :: zmpi1
4446 real(real64), dimension(:,:,:,:), allocatable :: zmpi2
4447 !cache work array
4448 real(real64), dimension(:,:,:), allocatable :: zw
4449 !FFT work arrays
4450 real(real64), dimension(:,:), allocatable :: trig1,trig2,trig3,cosinarr
4451 integer, dimension(:), allocatable :: after1,now1,before1, &
4452 after2,now2,before2,after3,now3,before3
4453
4454 !check input
4455 if (nd1 < n1) stop 'ERROR:nd1'
4456 if (nd2 < n2) stop 'ERROR:nd2'
4457 if (nd3 < n3/2+1) stop 'ERROR:nd3'
4458 if (mod(nd3,nproc) /= 0) stop 'ERROR:nd3'
4459 if (mod(nd2,nproc) /= 0) stop 'ERROR:nd2'
4460
4461 !defining work arrays dimensions
4463 if (ncache <= max(n1,n2,n3/2)*4) ncache=max(n1,n2,n3/2)*4
4464 lzt=n2
4465 if (mod(n2,2) == 0) lzt=lzt+1
4466 if (mod(n2,4) == 0) lzt=lzt+1
4467
4468 !Allocations
4469 safe_allocate(trig1(1:2,1:8192))
4470 safe_allocate(after1(1:7))
4471 safe_allocate(now1(1:7))
4472 safe_allocate(before1(1:7))
4473 safe_allocate(trig2(1:2,1:8192))
4474 safe_allocate(after2(1:7))
4475 safe_allocate(now2(1:7))
4476 safe_allocate(before2(1:7))
4477 safe_allocate(trig3(1:2,1:8192))
4478 safe_allocate(after3(1:7))
4479 safe_allocate(now3(1:7))
4480 safe_allocate(before3(1:7))
4481 safe_allocate(zw(1:2,1:ncache/4,1:2))
4482 safe_allocate(zt(1:2,1:lzt,1:n1))
4483 safe_allocate(zmpi2(1:2,1:n1,1:nd2/nproc,1:nd3))
4484 safe_allocate(cosinarr(1:2,1:n3/2))
4485 if (nproc > 1) then
4486 safe_allocate(zmpi1(1:2,1:n1,1:nd2/nproc,1:nd3/nproc,1:nproc))
4487 end if
4488
4489 !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
4490 call ctrig(n3/2,trig3,after3,before3,now3,1,ic3)
4491 call ctrig(n1,trig1,after1,before1,now1,1,ic1)
4492 call ctrig(n2,trig2,after2,before2,now2,1,ic2)
4493
4494 !Calculating array of phases for HalFFT decoding
4495 twopion=8._real64*datan(1.0_8)/real(n3, real64)
4496 do i3 = 1,n3/2
4497 cosinarr(1,i3)=dcos(twopion*(i3-1))
4498 cosinarr(2,i3)=-dsin(twopion*(i3-1))
4499 end do
4500
4501 !transform along z axis
4502
4503 lot=ncache/(2*n3)
4504 if (lot < 1) stop 'kernelfft:enlarge ncache for z'
4505
4506 do j2=1,nd2/nproc
4507 !this condition ensures that we manage only the interesting part for the FFT
4508 ! if (iproc*(nd2/nproc)+j2 <= n2) then
4509 do i1 = 1,n1,lot
4510 ma=i1
4511 mb=min(i1+(lot-1),n1)
4512 nfft = mb-ma+1
4513
4514 !inserting real data into complex array of half length
4515 !input: I1,I3,J2,(Jp2)
4516 call inserthalf(nd1,lot,nfft,n3,zf(i1,1,j2),zw(1,1,1))
4517
4518 !performing FFT
4519 inzee = 1
4520 do i = 1, ic3
4521 call fftstp(lot,nfft,n3/2,lot,n3/2,zw(1,1,inzee),zw(1,1,3-inzee), &
4522 trig3,after3(i),now3(i),before3(i),1)
4523 inzee = 3 - inzee
4524 end do
4525 !output: I1,i3,J2,(Jp2)
4526
4527 !unpacking FFT in order to restore correct result,
4528 !while exchanging components
4529 !input: I1,i3,J2,(Jp2)
4530 call scramble_unpack(i1,j2,lot,nfft,n1,n3,nd2,nproc,nd3,zw(1,1,inzee),zmpi2,cosinarr)
4531 !output: I1,J2,i3,(Jp2)
4532 end do
4533! end if
4534 end do
4535
4536 !Interprocessor data transposition
4537 !input: I1,J2,j3,jp3,(Jp2)
4538 if (nproc > 1) then
4539 !communication scheduling
4540#if defined(HAVE_MPI)
4541 call mpi_alltoall(zmpi2(:, 1, 1, 1),2*n1*(nd2/nproc)*(nd3/nproc), &
4542 mpi_double_precision, &
4543 zmpi1(:, 1, 1, 1, 1),2*n1*(nd2/nproc)*(nd3/nproc), &
4544 mpi_double_precision,comm,ierr)
4545 ! output: I1,J2,j3,Jp2,(jp3)
4546#endif
4547 end if
4548
4549
4550 do j3 = 1, nd3/nproc
4551 !this condition ensures that we manage only the interesting part for the FFT
4552 if (iproc*(nd3/nproc)+j3 <= n3/2+1) then
4553 jp2st = 1
4554 j2st = 1
4555
4556 !transform along x axis
4557 lot = ncache/(4*n1)
4558 if (lot < 1) stop 'kernelfft:enlarge ncache for x'
4559
4560 do j = 1, n2, lot
4561 ma = j
4562 mb = min(j+(lot-1),n2)
4563 nfft = mb-ma+1
4564
4565 !reverse ordering
4566 !input: I1,J2,j3,Jp2,(jp3)
4567 if (nproc == 1) then
4568 call mpiswitch(j3,nfft,jp2st,j2st,lot,n1,nd2,nd3,nproc,zmpi2,zw(1,1,1))
4569 else
4570 call mpiswitch(j3,nfft,jp2st,j2st,lot,n1,nd2,nd3,nproc,zmpi1,zw(1,1,1))
4571 end if
4572 !output: J2,Jp2,I1,j3,(jp3)
4573
4574 !performing FFT
4575 !input: I2,I1,j3,(jp3)
4576 inzee = 1
4577 do i = 1, ic1-1
4578 call fftstp(lot,nfft,n1,lot,n1,zw(1,1,inzee),zw(1,1,3-inzee), &
4579 trig1,after1(i),now1(i),before1(i),1)
4580 inzee = 3-inzee
4581 end do
4582 !storing the last step into zt
4583 i = ic1
4584 call fftstp(lot,nfft,n1,lzt,n1,zw(1,1,inzee),zt(1,j,1), &
4585 trig1,after1(i),now1(i),before1(i),1)
4586 !output: I2,i1,j3,(jp3)
4587 end do
4588
4589 !transform along y axis
4590 lot = ncache/(4*n2)
4591 if (lot < 1) stop 'kernelfft:enlarge ncache for y'
4592
4593 do j = 1, n1, lot
4594 ma = j
4595 mb = min(j+(lot-1), n1)
4596 nfft = mb - ma + 1
4597
4598 !reverse ordering
4599 !input: I2,i1,j3,(jp3)
4600 call switch(nfft,n2,lot,n1,lzt,zt(1,1,j),zw(1,1,1))
4601 !output: i1,I2,j3,(jp3)
4602
4603 !performing FFT
4604 !input: i1,I2,j3,(jp3)
4605 inzee = 1
4606 do i = 1, ic2-1
4607 call fftstp(lot,nfft,n2,lot,n2,zw(1,1,inzee),zw(1,1,3-inzee), &
4608 trig2,after2(i),now2(i),before2(i),1)
4609 inzee = 3 - inzee
4610 end do
4611 !storing the last step into output array
4612 i = ic2
4613 call fftstp(lot,nfft,n2,nd1,nd2,zw(1,1,inzee),zr(1,j,1,j3), &
4614 trig2,after2(i),now2(i),before2(i),1)
4615
4616 end do
4617 !output: i1,i2,j3,(jp3)
4618 end if
4619 end do
4620
4621 !De-allocations
4622 safe_deallocate_a(trig1)
4623 safe_deallocate_a(after1)
4624 safe_deallocate_a(now1)
4625 safe_deallocate_a(before1)
4626 safe_deallocate_a(trig2)
4627 safe_deallocate_a(after2)
4628 safe_deallocate_a(now2)
4629 safe_deallocate_a(before2)
4630 safe_deallocate_a(trig3)
4631 safe_deallocate_a(after3)
4632 safe_deallocate_a(now3)
4633 safe_deallocate_a(before3)
4634 safe_deallocate_a(zmpi2)
4635 safe_deallocate_a(zw)
4636 safe_deallocate_a(zt)
4637 safe_deallocate_a(cosinarr)
4638 if (nproc > 1) then
4639 safe_deallocate_a(zmpi1)
4640 end if
4641 end subroutine kernelfft
4642
4643 ! ---------------------------------------------------------------
4644
4645 subroutine switch(nfft,n2,lot,n1,lzt,zt,zw)
4646 integer :: lot, n2, lzt, n1, j, nfft, i
4647 real(real64) :: zw(1:2, 1:lot, 1:n2), zt(1:2, 1:lzt, 1:n1)
4648
4649 do j = 1, nfft
4650 do i = 1, n2
4651 zw(1,j,i) = zt(1,i,j)
4652 zw(2,j,i) = zt(2,i,j)
4653 end do
4654 end do
4655 end subroutine switch
4656
4657 ! ---------------------------------------------------------------
4658
4659 subroutine mpiswitch(j3,nfft,Jp2st,J2st,lot,n1,nd2,nd3,nproc,zmpi1,zw)
4660 integer :: n1, nd2, nproc, nd3, lot, mfft, jp2, jp2st
4661 integer :: j2st, nfft, i1, j3, j2
4662 real(real64) :: zmpi1(2,n1,nd2/nproc,nd3/nproc,nproc),zw(2,lot,n1)
4663
4664 mfft = 0
4665 do jp2 = jp2st, nproc
4666 do j2 = j2st, nd2/nproc
4667 mfft = mfft + 1
4668 if (mfft > nfft) then
4669 jp2st = jp2
4670 j2st = j2
4671 return
4672 end if
4673 do i1 = 1, n1
4674 zw(1,mfft,i1) = zmpi1(1, i1, j2, j3, jp2)
4675 zw(2,mfft,i1) = zmpi1(2, i1, j2, j3, jp2)
4676 end do
4677 end do
4678 j2st = 1
4679 end do
4680 end subroutine mpiswitch
4681
4682 ! ---------------------------------------------------------------
4683
4684 subroutine inserthalf(nd1,lot,nfft,n3,zf,zw)
4685 integer :: lot, n3, nd1, i3, i1, nfft
4686 real(real64) :: zw(1:2, 1:lot, 1:n3/2), zf(1:nd1, 1:n3)
4687
4688 do i3 = 1, n3/2
4689 do i1 = 1, nfft
4690 zw(1, i1, i3) = zf(i1, 2*i3 - 1)
4691 zw(2, i1, i3) = zf(i1, 2*i3)
4692 end do
4693 end do
4694
4695 end subroutine inserthalf
4696
4697end module sgfft_oct_m
4698
4699!! Local Variables:
4700!! mode: f90
4701!! coding: utf-8
4702!! End:
double sin(double __x) __attribute__((__nothrow__
double cos(double __x) __attribute__((__nothrow__
subroutine, public profiling_out(label)
Increment out counter and sum up difference between entry and exit time.
Definition: profiling.F90:623
subroutine, public profiling_in(label, exclude)
Increment in counter and save entry time.
Definition: profiling.F90:552
These routines are part of the ISF poisson solver, eventually they will be integrated with the other ...
Definition: sgfft.F90:117
subroutine, public fourier_dim(n, n_next)
Definition: sgfft.F90:149
subroutine mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
Definition: sgfft.F90:4179
subroutine multkernel(nd1, nd2, n1, n2, lot, nfft, jS, pot, zw)
Definition: sgfft.F90:4114
subroutine ctrig(n, trig, after, before, now, isign, ic)
Definition: sgfft.F90:558
subroutine switch(nfft, n2, lot, n1, lzt, zt, zw)
Definition: sgfft.F90:4739
subroutine, public fft(n1, n2, n3, nd1, nd2, nd3, z, isign, inzee)
Definition: sgfft.F90:271
subroutine switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
Definition: sgfft.F90:4152
subroutine halfill_upcorn(md1, md3, lot, nfft, n3, zf, zw)
Definition: sgfft.F90:4212
subroutine unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
Definition: sgfft.F90:4400
subroutine fftrot(mm, nfft, m, nn, n, zin, zout, trig, after, now, before, isign)
Definition: sgfft.F90:2201
subroutine unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
Definition: sgfft.F90:4383
subroutine unscramble_pack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw, cosinarr)
Definition: sgfft.F90:4349
subroutine fftstp(mm, nfft, m, nn, n, zin, zout, trig, after, now, before, isign)
Definition: sgfft.F90:716
subroutine, public convolxc_off(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, scal, comm)
Definition: sgfft.F90:3737
subroutine unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal)
Definition: sgfft.F90:4463
subroutine scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2, cosinarr)
Definition: sgfft.F90:4271
subroutine mpiswitch(j3, nfft, Jp2st, J2st, lot, n1, nd2, nd3, nproc, zmpi1, zw)
Definition: sgfft.F90:4753
subroutine, public kernelfft(n1, n2, n3, nd1, nd2, nd3, nproc, iproc, zf, zr, comm)
Definition: sgfft.F90:4523
subroutine inserthalf(nd1, lot, nfft, n3, zf, zw)
Definition: sgfft.F90:4778
integer function ncache_optimal()
Definition: sgfft.F90:3686