Octopus
debug.F90
Go to the documentation of this file.
1!! Copyright (C) 2016 X. Andrade
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
21module debug_oct_m
22 use global_oct_m
24 use mpi_oct_m
26 use loct_oct_m
27 use parser_oct_m
28
29 implicit none
30
31 private
32 public :: &
33 debug_t, &
34 debug_init, &
39 debug, &
41#ifndef NDEBUG
44#endif
46
47 type debug_t
48 private
49 logical, public :: info
50 logical, public :: trace
51 logical, public :: trace_term
52 logical, public :: trace_file
53 logical :: extra_checks
54 logical, public :: interaction_graph
55 logical, public :: interaction_graph_full
56 logical, public :: propagation_graph
57 logical, public :: instrument
58 integer :: bits
59 character(len=MAX_PATH_LEN), public :: instr_sub_name
60 integer, public :: instr_tool
61 end type debug_t
62
63 type(debug_t), save :: debug
64
66 integer, parameter :: unit_offset = 1000
67
68 interface
69 subroutine debug_verrou_start_instrumentation() bind(C)
71
72 subroutine debug_verrou_stop_instrumentation() bind(C)
74
75 subroutine debug_fenv_start_instrumentation() bind(C)
77
78 subroutine debug_fenv_stop_instrumentation() bind(C)
80 end interface
81
82contains
83
84 subroutine debug_init(this, namespace)
85 type(debug_t), intent(out) :: this
86 type(namespace_t), intent(in) :: namespace
87
88 character(len=256) :: node_hook
89 logical :: file_exists, mpi_debug_hook
90 integer :: sec, usec
91 type(block_t) :: blk
92 integer :: line
93
94 !%Variable Debug
95 !%Type flag
96 !%Default no
97 !%Section Execution::Debug
98 !%Description
99 !% This variable controls the amount of debugging information
100 !% generated by Octopus. You can use include more than one option
101 !% with the + operator.
102 !%Option no 0
103 !% (default) <tt>Octopus</tt> does not enter debug mode.
104 !%Option info 1
105 !% Octopus prints additional information to the terminal.
106 !%Option trace 2
107 !% Octopus generates a stack trace as it enters end exits
108 !% subroutines. This information is reported if Octopus stops with
109 !% an error.
110 !%Option trace_term 4
111 !% The trace is printed to the terminal as Octopus enters or exits subroutines. This slows down execution considerably.
112 !%Option trace_file 8
113 !% The trace is written to files in the <tt>debug</tt>
114 !% directory. For each node (when running in parallel) there is a file called
115 !% <tt>debug_trace.&lt;rank&gt;</tt>. Writing these files slows down the code by a huge factor and
116 !% it is usually only necessary for parallel runs.
117 !%Option extra_checks 16
118 !% This enables Octopus to perform some extra checks, to ensure
119 !% code correctness, that might be too costly for regular runs.
120 !%Option interaction_graph 32
121 !% Octopus generates a dot file containing the graph for a multisystem run.
122 !%Option interaction_graph_full 64
123 !% Octopus generates a dot file containing the graph for a multisystem run including ghost interactions.
124 !%Option propagation_graph 128
125 !% Octopus generates a file with information for the propagation diagram.
126 !%Option instrument 256
127 !% Octopus adds instrumentation to functions specified in an <tt>InstrumentFunctions</tt> block.
128 !%End
129 call parse_variable(namespace, 'Debug', option__debug__no, this%bits)
130
131 call from_bits(this)
132
133 ! Ensure instrumentation fields are initialized even if block is absent.
134 this%instr_sub_name = ''
135 this%instr_tool = 0
136
137 !%Variable InstrumentFunctions
138 !%Type block
139 !%Section Execution::Debug
140 !%Description
141 !% This input options controls which routines are going to be instrumented
142 !% for the tools selected using the <tt>Debug=instrument</tt> option.
143 !%
144 !% <br>%<tt>InstrumentFunctions
145 !% <br>&nbsp;&nbsp;"function_name" | instrumentation_tool
146 !% <br>%</tt>
147 !%
148 !% Here is an example to better understand how this works:
149 !%
150 !% <br>%<tt>InstrumentFunctions
151 !% <br>&nbsp;&nbsp;"grid/grid.F90.grid_init_from_grid_stage_1" | verrou
152 !% <br>%</tt>
153 !%
154 !% NOTE: Currently only a single function can be instrumented!
155 !%
156 !% Available instrumentation tools:
157 !%Option verrou 1
158 !% Verrou helps you look for floating-point round-off errors.
159 !%Option fenv 2
160 !% Enable floating-point exceptions. Requires Octopus to be compiled against glibc.
161 !%End
162 if (parse_block(namespace, "InstrumentFunctions", blk) == 0) then
163 ! TODO: Allow instrumentation of more than a single function
164 if (parse_block_n(blk) .gt. 1) then
165 write(stderr,'(a)') "Only single function can be instrumented!"
166 call mpi_world%abort()
167 end if
168
169 do line = 0, parse_block_n(blk) - 1
170 call parse_block_string(blk, line, 0, this%instr_sub_name)
171 call parse_block_integer(blk, line, 1, this%instr_tool)
172 select case (this%instr_tool)
173 case (option__instrumentfunctions__verrou)
174 write(stderr,'(a)') "Instrumenting " // trim(this%instr_sub_name) // " for Verrou"
175#if !defined(HAVE_VERROU)
176 write(stderr,'(a)') "requires VERROU but that library was not linked."
177 call mpi_world%abort()
178#endif
179 case (option__instrumentfunctions__fenv)
180 write(stderr,'(a)') "Instrumenting " // trim(this%instr_sub_name) // " with floating-point exceptions"
181 case default
182 assert(.false.) ! Should not happen
183 end select
184 end do
185 call parse_block_end(blk)
186 else if (this%instrument) then
187 write(stderr,'(a)') "Debug=instrument requires InstrumentFunctions block."
188 call mpi_world%abort()
189 end if
190
191 call mpi_debug_init(mpi_world%rank, this%info)
192
193 if (this%info) then
194 !%Variable MPIDebugHook
195 !%Type logical
196 !%Default no
197 !%Section Execution::Debug
198 !%Description
199 !% When debugging the code in parallel it is usually difficult to find the origin
200 !% of race conditions that appear in MPI communications. This variable introduces
201 !% a facility to control separate MPI processes. If set to yes, all nodes will
202 !% start up, but will get trapped in an endless loop. In every cycle of the loop
203 !% each node is sleeping for one second and is then checking if a file with the
204 !% name <tt>node_hook.xxx</tt> (where <tt>xxx</tt> denotes the node number) exists. A given node can
205 !% only be released from the loop if the corresponding file is created. This allows
206 !% to selectively run, <i>e.g.</i>, a compute node first followed by the master node. Or, by
207 !% reversing the file creation of the node hooks, to run the master first followed
208 !% by a compute node.
209 !%End
210 call parse_variable(global_namespace, 'MPIDebugHook', .false., mpi_debug_hook)
211 if (mpi_debug_hook) then
212 call loct_gettimeofday(sec, usec)
213 call epoch_time_diff(sec,usec)
214 write(stdout,'(a,i6,a,i6.6,20x,a)') '* I ',sec,'.',usec,' | MPI debug hook'
215
216 write(stdout,'(a,i3,a)') 'node:', mpi_world%rank, ' In debug hook'
217 write(node_hook,'(i3.3)') mpi_world%rank
218 file_exists = .false.
219
220 do while (.not. file_exists)
221 inquire(file='node_hook.'//node_hook, exist=file_exists)
222 call loct_nanosleep(1,0)
223 write(stdout,'(a,i3,a)') 'node:', mpi_world%rank, &
224 ' - still sleeping. To release me touch: node_hook.'//trim(node_hook)
225 end do
226
227 write(stdout,'(a,i3,a)') 'node:', mpi_world%rank, ' Leaving debug hook'
228 ! remove possible debug hooks
229 call loct_rm('node_hook.'//trim(node_hook))
230
231 call loct_gettimeofday(sec, usec)
232 call epoch_time_diff(sec,usec)
233 write(stdout,'(a,i6,a,i6.6,20x,a)') '* O ', sec, '.', usec,' | MPI debug hook'
234 end if
235 end if
236
237 end subroutine debug_init
238
239 !--------------------------------------------------
240
241 subroutine debug_enable(this)
242 type(debug_t), intent(inout) :: this
243
244 this%info = .true.
245 this%trace = .true.
246 this%trace_term = .true.
247 this%trace_file = .true.
248 this%interaction_graph = .true.
249 this%interaction_graph_full = .true.
250 this%propagation_graph = .true.
251
252 end subroutine debug_enable
253
254 !--------------------------------------------------
255
256 subroutine debug_disable(this)
257 type(debug_t), intent(inout) :: this
258
259 call from_bits(this)
260
261 end subroutine debug_disable
262
263 !--------------------------------------------------
264
265 subroutine debug_delete_trace()
266
267 integer :: iunit
268 character(len=6) :: filenum
269
270 iunit = mpi_world%rank + unit_offset
271 write(filenum, '(i6.6)') iunit - unit_offset
272 call loct_mkdir('debug')
273 call loct_rm('debug/debug_trace.node.'//filenum)
274
275 end subroutine debug_delete_trace
276
277 ! ---------------------------------------------------------
278
279 subroutine debug_open_trace(iunit)
280 integer, intent(out) :: iunit
281
282 character(len=6) :: filenum
283
284 iunit = mpi_world%rank + unit_offset
285 write(filenum, '(i6.6)') iunit - unit_offset
286 call loct_mkdir('debug')
287 open(iunit, file = 'debug/debug_trace.node.'//filenum, &
288 action='write', status='unknown', position='append')
289
290 end subroutine debug_open_trace
291
292 ! ---------------------------------------------------------
293
294 subroutine from_bits(this)
295 type(debug_t), intent(inout) :: this
296
297 this%info = (bitand(this%bits, option__debug__info) /= 0)
298 this%trace_term = (bitand(this%bits, option__debug__trace_term) /= 0)
299 this%trace_file = (bitand(this%bits, option__debug__trace_file) /= 0)
300 this%trace = (bitand(this%bits, option__debug__trace) /= 0) .or. this%trace_term .or. this%trace_file
301 this%extra_checks = (bitand(this%bits, option__debug__extra_checks) /= 0) .or. this%trace_term .or. this%trace_file
302 this%interaction_graph = (bitand(this%bits, option__debug__interaction_graph) /= 0)
303 this%interaction_graph_full = (bitand(this%bits, option__debug__interaction_graph_full) /= 0)
304 this%propagation_graph = (bitand(this%bits, option__debug__propagation_graph) /= 0)
305 this%instrument = (bitand(this%bits, option__debug__instrument) /= 0)
306
307 end subroutine from_bits
308
309
310 ! ---------------------------------------------------------
311 subroutine epoch_time_diff(sec, usec)
312 integer, intent(inout) :: sec
313 integer, intent(inout) :: usec
314
315 ! this is called by push/pop so there cannot be a push/pop in this routine
316
317 call time_diff(s_epoch_sec, s_epoch_usec, sec, usec)
318 end subroutine epoch_time_diff
319
320
321 ! ---------------------------------------------------------
324 subroutine time_diff(sec1, usec1, sec2, usec2)
325 integer, intent(in) :: sec1
326 integer, intent(in) :: usec1
327 integer, intent(inout) :: sec2
328 integer, intent(inout) :: usec2
329
330 ! this is called by push/pop so there cannot be a push/pop in this routine
331
332 ! Correct overflow.
333 if (usec2 - usec1 < 0) then
334 usec2 = 1000000 + usec2
335 if (sec2 >= sec1) then
336 sec2 = sec2 - 1
337 end if
338 end if
339
340 ! Replace values.
341 if (sec2 >= sec1) then
342 sec2 = sec2 - sec1
343 end if
344 usec2 = usec2 - usec1
345
346 end subroutine time_diff
347
348
349#ifndef NDEBUG
350 ! ---------------------------------------------------------
352 subroutine debug_push_sub(sub_name)
353 character(len=*), intent(in) :: sub_name
354
355 integer, parameter :: max_recursion_level = 50
356 integer iunit, sec, usec
357
358 if (debug%instrument) then
359 if (debug_clean_path(sub_name) == trim(debug%instr_sub_name)) then
360 select case (debug%instr_tool)
361 case (option__instrumentfunctions__verrou)
363 case (option__instrumentfunctions__fenv)
365 case default
366 assert(.false.) ! cannot happen
367 end select
368 end if
369 end if
370
371 if (.not. debug%trace) return
372
373 if (debug%trace_file .or. debug%trace_term) then
374 call loct_gettimeofday(sec, usec)
375 call epoch_time_diff(sec, usec)
376 end if
377
379 if (no_sub_stack >= max_recursion_level) then
380 sub_stack(max_recursion_level) = 'debug_push_sub'
381 write(stderr, '(a,i3,a)') 'Too many recursion levels in debug trace (max=', max_recursion_level, ')'
382 write(stderr, '(a,a)') 'Last sub name is ', trim(debug_clean_path(sub_name))
383 call mpi_world%abort()
384 end if
385
386 sub_stack(no_sub_stack) = trim(debug_clean_path(sub_name))
388
389 if (debug%trace_file) then
390 call debug_open_trace(iunit)
391 call push_sub_write(iunit)
392 ! close file to ensure flushing
393 close(iunit)
394 end if
395
396 if (debug%trace_term .and. mpi_world%is_root()) then
397 ! write to stderr if we are node 0
398 call push_sub_write(stderr)
399 end if
400
401 contains
402
403 subroutine push_sub_write(iunit_out)
404 integer, intent(in) :: iunit_out
405
406 integer :: ii
407 character(len=1000) :: tmpstr
408
409 write(tmpstr,'(a,i6,a,i6.6,f20.6,i8,a)') "* I ", &
410 sec, '.', usec, &
411 loct_clock(), &
412 loct_get_memory_usage() / 1024, " | "
413 do ii = no_sub_stack - 1, 1, -1
414 write(tmpstr, '(2a)') trim(tmpstr), "..|"
415 end do
416 write(tmpstr, '(2a)') trim(tmpstr), trim(debug_clean_path(sub_name))
417 write(iunit_out, '(a)') trim(tmpstr)
418
419 end subroutine push_sub_write
420
421 end subroutine debug_push_sub
422
423 ! ---------------------------------------------------------
425 subroutine debug_pop_sub(sub_name)
426 character(len=*), intent(in) :: sub_name
427
428 character(len=80) :: sub_name_short
429 integer iunit, sec, usec
430
431 if (debug%instrument) then
432 if (debug_clean_path(sub_name) == trim(debug%instr_sub_name)) then
433 select case (debug%instr_tool)
434 case (option__instrumentfunctions__verrou)
436 case (option__instrumentfunctions__fenv)
438 case default
439 assert(.false.) ! cannot happen
440 end select
441 end if
442 end if
443
444 if (.not. debug%trace) return
445
446 call loct_gettimeofday(sec, usec)
447 call epoch_time_diff(sec, usec)
448
449 if (no_sub_stack <= 0) then
450 no_sub_stack = 1
451 sub_stack(1) = 'pop_sub'
452 write(stderr, '(a)') 'Too few recursion levels in debug trace'
453 call mpi_world%abort()
454 end if
455
456 ! the name might be truncated in sub_stack, so we copy to a string
457 ! of the same size
458 sub_name_short = trim(debug_clean_path(sub_name))
459
460 if (sub_name_short /= sub_stack(no_sub_stack)) then
461 write(stderr, '(a)') 'Wrong sub name on pop_sub :'
462 write(stderr, '(2a)') ' got : ', sub_name_short
463 write(stderr, '(2a)') ' expected : ', sub_stack(no_sub_stack)
464 call mpi_world%abort()
465 end if
466
467 if (debug%trace_file) then
468 call debug_open_trace(iunit)
469 call pop_sub_write(iunit)
470 ! close file to ensure flushing
471 close(iunit)
472 end if
473
474 if (debug%trace_term .and. mpi_world%is_root()) then
475 ! write to stderr if we are node 0
476 call pop_sub_write(stderr)
477 end if
478
480
481 contains
482
483 subroutine pop_sub_write(iunit_out)
484 integer, intent(in) :: iunit_out
485
486 integer :: ii
487 character(len=1000) :: tmpstr
488
489 write(tmpstr,'(a,i6,a,i6.6,f20.6,i8, a)') "* O ", &
490 sec, '.', usec, &
492 loct_get_memory_usage() / 1024, " | "
493 do ii = no_sub_stack - 1, 1, -1
494 write(tmpstr,'(2a)') trim(tmpstr), "..|"
495 end do
496 write(tmpstr,'(2a)') trim(tmpstr), trim(sub_stack(no_sub_stack))
497
498 write(iunit_out, '(a)') trim(tmpstr)
499
500 end subroutine pop_sub_write
501
502 end subroutine debug_pop_sub
503#endif
504
505 ! -----------------------------------------------------------
507 character(len=MAX_PATH_LEN) function debug_clean_path(filename) result(clean_path)
508 character(len=*), intent(in) :: filename
509
510 integer :: pos
511
512 pos = index(filename, 'src/', back = .true.)
513 if (pos == 0) then
514 ! 'src/' does not occur
515 clean_path = filename
516 else
517 ! remove 'src/'
518 clean_path = filename(pos+4:)
519 end if
521 end function debug_clean_path
522
523end module debug_oct_m
524
525!! Local Variables:
526!! mode: f90
527!! coding: utf-8
528!! End:
subroutine pop_sub_write(iunit_out)
Definition: debug.F90:579
subroutine push_sub_write(iunit_out)
Definition: debug.F90:499
character(len=max_path_len) function, public debug_clean_path(filename)
Prune a filename path to only include subdirectories of the "src" directory.
Definition: debug.F90:603
subroutine, public debug_enable(this)
Definition: debug.F90:337
type(debug_t), save, public debug
Definition: debug.F90:158
subroutine, public debug_pop_sub(sub_name)
Pop a routine from the debug trace.
Definition: debug.F90:521
subroutine, public debug_open_trace(iunit)
Definition: debug.F90:375
subroutine, public epoch_time_diff(sec, usec)
Definition: debug.F90:407
subroutine from_bits(this)
Definition: debug.F90:390
subroutine, public debug_init(this, namespace)
Definition: debug.F90:180
subroutine, public debug_disable(this)
Definition: debug.F90:352
subroutine time_diff(sec1, usec1, sec2, usec2)
Computes t2 <- t2-t1. sec1,2 and usec1,2 are seconds,microseconds of t1,2.
Definition: debug.F90:420
subroutine, public debug_push_sub(sub_name)
Push a routine to the debug trace.
Definition: debug.F90:448
subroutine, public debug_delete_trace()
Definition: debug.F90:361
integer, public s_epoch_sec
global epoch time (time at startup)
Definition: global.F90:262
integer, public no_sub_stack
Definition: global.F90:267
real(real64), dimension(50), public time_stack
Definition: global.F90:266
character(len=80), dimension(50), public sub_stack
The stack.
Definition: global.F90:265
integer, public s_epoch_usec
Definition: global.F90:262
System information (time, memory, sysname)
Definition: loct.F90:117
subroutine, public loct_rm(name)
Definition: loct.F90:318
subroutine, public loct_mkdir(name)
Definition: loct.F90:304
subroutine, public mpi_debug_init(rank, info)
Definition: mpi_debug.F90:196
type(mpi_grp_t), public mpi_world
Definition: mpi.F90:272
type(namespace_t), public global_namespace
Definition: namespace.F90:135
subroutine, public parse_block_string(blk, l, c, res, convert_to_c)
Definition: parser.F90:818
integer function, public parse_block(namespace, name, blk, check_varinfo_)
Definition: parser.F90:623
int true(void)