MemoryTrackerLinux_service.cc
Go to the documentation of this file.
1 // vim: set sw=2 expandtab :
2 // ======================================================================
3 // MemoryTracker
4 //
5 // This MemoryTracker implementation is supported only for Linux
6 // systems. It relies on the proc file system to record VSize and RSS
7 // information throughout the course of an art process. It inserts
8 // memory information into an in-memory SQLite database, or an
9 // external file if the user provides a non-empty file name.
10 //
11 // Since information that procfs provides is process-specific, the
12 // MemoryTracker does not attempt to provide per-module information in
13 // the context of multi-threading. If more than one thread has been
14 // enabled for the art process, only the maximum RSS and VSize for the
15 // process is reported and the end of the job.
16 // ======================================================================
17 
18 #ifndef __linux__
19 #error "This source file can be built only for Linux platforms."
20 #endif
21 
32 #include "art/Utilities/Globals.h"
37 #include "cetlib/HorizontalRule.h"
40 #include "cetlib/sqlite/Ntuple.h"
41 #include "cetlib/sqlite/select.h"
42 #include "fhiclcpp/types/Atom.h"
46 
47 #include <iomanip>
48 #include <memory>
49 #include <sstream>
50 #include <string>
51 #include <tuple>
52 #include <vector>
53 
54 using namespace std;
55 using namespace string_literals;
56 using namespace cet;
57 
61 
62 namespace art {
63 
64  class MemoryTracker {
65  template <unsigned N>
68  using otherInfo_t =
70  using memEvent_t =
73  uint32_t,
74  uint32_t,
75  uint32_t,
76  string,
77  string,
78  string,
79  double,
80  double>;
82  uint32_t,
83  uint32_t,
84  uint32_t,
85  int,
86  int,
87  int,
88  int,
89  int,
90  int,
91  int>;
93  uint32_t,
94  uint32_t,
95  uint32_t,
96  string,
97  string,
98  string,
99  int,
100  int,
101  int,
102  int,
103  int,
104  int,
105  int>;
106 
107  public:
108  static constexpr bool service_handle_allowed{false};
109 
110  struct Config {
111  template <typename T>
113  using Name = fhicl::Name;
115  template <typename T>
117  struct DBoutput {
118  Atom<string> filename{Name{"filename"}, ""};
119  Atom<bool> overwrite{Name{"overwrite"}, false};
120  };
121  Table<DBoutput> dbOutput{Name{"dbOutput"}};
122  Atom<bool> includeMallocInfo{Name{"includeMallocInfo"}, false};
123  };
124 
127 
128  private:
129  void prePathProcessing(PathContext const& pc);
130  void recordOtherData(ModuleDescription const& md, string const& step);
131  void recordOtherData(ModuleContext const& mc, string const& step);
132  void recordEventData(Event const& e, string const& step);
133  void recordModuleData(ModuleContext const& mc, string const& step);
134  void postEndJob();
135  bool checkMallocConfig_(string const&, bool);
136  void recordPeakUsages_();
137  void flushTables_();
138  void summary_();
139 
140  LinuxProcMgr procInfo_{};
141  string const fileName_;
142  unique_ptr<cet::sqlite::Connection> const db_;
143  bool const overwriteContents_;
144  bool const includeMallocInfo_;
145 
146  // NB: using "current" semantics for the MemoryTracker is valid
147  // since per-module/event information are retrieved only in a
148  // sequential (i.e. single-threaded) context.
149  EventID currentEventID_{EventID::invalidEvent()};
150  name_array<3u> peakUsageColumns_{{"Name", "Value", "Description"}};
151  name_array<5u> otherInfoColumns_{
152  {"Step", "ModuleLabel", "ModuleType", "Vsize", "RSS"}};
153  name_array<6u> eventColumns_{
154  {"Step", "Run", "SubRun", "Event", "Vsize", "RSS"}};
155  name_array<9u> moduleColumns_{{"Step",
156  "Run",
157  "SubRun",
158  "Event",
159  "Path",
160  "ModuleLabel",
161  "ModuleType",
162  "Vsize",
163  "RSS"}};
164  name_array<11u> eventHeapColumns_{{"Step",
165  "Run",
166  "SubRun",
167  "Event",
168  "arena",
169  "ordblks",
170  "keepcost",
171  "hblkhd",
172  "hblks",
173  "uordblks",
174  "fordblks"}};
175  name_array<14u> moduleHeapColumns_{{"Step",
176  "Run",
177  "SubRun",
178  "Event",
179  "Path",
180  "ModuleLabel",
181  "ModuleType",
182  "arena",
183  "ordblks",
184  "keepcost",
185  "hblkhd",
186  "hblks",
187  "uordblks",
188  "fordblks"}};
193  unique_ptr<memEventHeap_t> eventHeapTable_;
194  unique_ptr<memModuleHeap_t> moduleHeapTable_;
195  };
196 
197  MemoryTracker::MemoryTracker(ServiceTable<Config> const& config,
198  ActivityRegistry& iReg)
199  : fileName_{config().dbOutput().filename()}
201  , overwriteContents_{config().dbOutput().overwrite()}
203  config().includeMallocInfo())}
204  // Fix so that a value of 'false' is an error if filename => in-memory db.
205  , peakUsageTable_{*db_, "PeakUsage", peakUsageColumns_, true}
206  // always recompute the peak usage
208  , eventTable_{*db_, "EventInfo", eventColumns_, overwriteContents_}
209  , moduleTable_{*db_, "ModuleInfo", moduleColumns_, overwriteContents_}
211  make_unique<memEventHeap_t>(*db_,
212  "EventMallocInfo",
214  nullptr}
216  make_unique<memModuleHeap_t>(*db_,
217  "ModuleMallocInfo",
219  nullptr}
220  {
221  iReg.sPostEndJob.watch(this, &MemoryTracker::postEndJob);
222  auto const nthreads = Globals::instance()->nthreads();
223  if (nthreads != 1) {
224  mf::LogWarning("MemoryTracker")
225  << "Since " << nthreads
226  << " threads have been configured, only process-level\n"
227  "memory usage will be recorded at the end of the job.";
228  }
229 
230  if (!fileName_.empty() && nthreads == 1u) {
231  iReg.sPreModuleConstruction.watch([this](auto const& md) {
232  this->recordOtherData(md, "PreModuleConstruction");
233  });
234  iReg.sPostModuleConstruction.watch([this](auto const& md) {
235  this->recordOtherData(md, "PostModuleConstruction");
236  });
237  iReg.sPreModuleBeginJob.watch(
238  [this](auto const& md) { this->recordOtherData(md, "PreBeginJob"); });
239  iReg.sPostModuleBeginJob.watch(
240  [this](auto const& md) { this->recordOtherData(md, "PostBeginJob"); });
241  iReg.sPreModuleBeginRun.watch(
242  [this](auto const& mc) { this->recordOtherData(mc, "PreBeginRun"); });
243  iReg.sPostModuleBeginRun.watch(
244  [this](auto const& mc) { this->recordOtherData(mc, "PostBeginRun"); });
245  iReg.sPreModuleBeginSubRun.watch([this](auto const& mc) {
246  this->recordOtherData(mc, "PreBeginSubRun");
247  });
248  iReg.sPostModuleBeginSubRun.watch([this](auto const& mc) {
249  this->recordOtherData(mc, "PostBeginSubRun");
250  });
251  iReg.sPreProcessEvent.watch([this](auto const& e, ScheduleContext) {
252  this->recordEventData(e, "PreProcessEvent");
253  });
254  iReg.sPostProcessEvent.watch([this](auto const& e, ScheduleContext) {
255  this->recordEventData(e, "PostProcessEvent");
256  });
257  iReg.sPreModule.watch([this](auto const& mc) {
258  this->recordModuleData(mc, "PreProcessModule");
259  });
260  iReg.sPostModule.watch([this](auto const& mc) {
261  this->recordModuleData(mc, "PostProcessModule");
262  });
263  iReg.sPreWriteEvent.watch([this](auto const& mc) {
264  this->recordModuleData(mc, "PreWriteEvent");
265  });
266  iReg.sPostWriteEvent.watch([this](auto const& mc) {
267  this->recordModuleData(mc, "PostWriteEvent");
268  });
269  iReg.sPreModuleEndSubRun.watch(
270  [this](auto const& mc) { this->recordOtherData(mc, "PreEndSubRun"); });
271  iReg.sPreModuleEndRun.watch(
272  [this](auto const& mc) { this->recordOtherData(mc, "PreEndRun"); });
273  iReg.sPreModuleEndJob.watch(
274  [this](auto const& md) { this->recordOtherData(md, "PreEndJob"); });
275  iReg.sPostModuleEndSubRun.watch(
276  [this](auto const& mc) { this->recordOtherData(mc, "PostEndSubRun"); });
277  iReg.sPostModuleEndRun.watch(
278  [this](auto const& mc) { this->recordOtherData(mc, "PostEndRun"); });
279  iReg.sPostModuleEndJob.watch(
280  [this](auto const& md) { this->recordOtherData(md, "PostEndJob"); });
281  }
282  }
283 
284  void
286  {
288  }
289 
290  void
292  string const& step)
293  {
294  auto const data = procInfo_.getCurrentData();
295  otherInfoTable_.insert(step,
296  md.moduleLabel(),
297  md.moduleName(),
298  LinuxProcData::getValueInMB<vsize_t>(data),
299  LinuxProcData::getValueInMB<rss_t>(data));
300  }
301 
302  void
304  {
305  currentEventID_ = e.id();
306  auto const currentMemory = procInfo_.getCurrentData();
307  eventTable_.insert(step,
311  LinuxProcData::getValueInMB<vsize_t>(currentMemory),
312  LinuxProcData::getValueInMB<rss_t>(currentMemory));
313  if (includeMallocInfo_) {
314  auto minfo = LinuxMallInfo{}.get();
315  eventHeapTable_->insert(step,
319  minfo.arena,
320  minfo.ordblks,
321  minfo.keepcost,
322  minfo.hblkhd,
323  minfo.hblks,
324  minfo.uordblks,
325  minfo.fordblks);
326  }
327  }
328 
329  void
331  {
332  auto const currentMemory = procInfo_.getCurrentData();
333  moduleTable_.insert(step,
337  mc.pathName(),
338  mc.moduleLabel(),
339  mc.moduleName(),
340  LinuxProcData::getValueInMB<vsize_t>(currentMemory),
341  LinuxProcData::getValueInMB<rss_t>(currentMemory));
342  if (includeMallocInfo_) {
343  auto minfo = LinuxMallInfo{}.get();
344  moduleHeapTable_->insert(step,
348  mc.pathName(),
349  mc.moduleLabel(),
350  mc.moduleName(),
351  minfo.arena,
352  minfo.ordblks,
353  minfo.keepcost,
354  minfo.hblkhd,
355  minfo.hblks,
356  minfo.uordblks,
357  minfo.fordblks);
358  }
359  }
360 
361  void
363  {
365  flushTables_();
366  summary_();
367  }
368 
369  bool
370  MemoryTracker::checkMallocConfig_(string const& dbfilename,
371  bool const include)
372  {
373  if (include && dbfilename.empty()) {
374  string const errmsg =
375  "\n'includeMallocInfo : true' is valid only if a nonempty db filename is specified:\n\n"s +
376  " MemoryTracker: {\n"
377  " includeMallocInfo: true\n"
378  " dbOutput: {\n"
379  " filename: \"your_filename.db\"\n"
380  " }\n"
381  " }\n\n";
382  throw Exception{errors::Configuration} << errmsg;
383  }
384  return include;
385  }
386 
387  void
389  {
391  "VmPeak", procInfo_.getVmPeak(), "Peak virtual memory (MB)");
393  "VmHWM", procInfo_.getVmHWM(), "Peak resident set size (MB)");
394  }
395 
396  void
398  {
400  eventTable_.flush();
403  if (eventHeapTable_) {
404  eventHeapTable_->flush();
405  }
406  if (moduleHeapTable_) {
407  moduleHeapTable_->flush();
408  }
409  }
410 
411  void
413  {
414  using namespace cet::sqlite;
415  using namespace std;
416  query_result<double> rVMax;
417  query_result<double> rRMax;
418  rVMax << select("Value")
419  .from(*db_, peakUsageTable_.name())
420  .where("Name='VmPeak'");
421  rRMax << select("Value")
422  .from(*db_, peakUsageTable_.name())
423  .where("Name='VmHWM'");
424  mf::LogAbsolute log{"MemoryTracker"};
425  HorizontalRule const rule{100};
426  log << '\n' << rule('=') << '\n';
427  log << std::left << "MemoryTracker summary (base-10 MB units used)\n\n";
428  log << " Peak virtual memory usage (VmPeak) : " << unique_value(rVMax)
429  << " MB\n"
430  << " Peak resident set size usage (VmHWM): " << unique_value(rRMax)
431  << " MB\n";
432  if (!(fileName_.empty() || fileName_ == ":memory:")) {
433  log << " Details saved in: '" << fileName_ << "'\n";
434  }
435  log << rule('=');
436  }
437 
438 } // namespace art
439 
void insert(Args const ...)
Definition: Ntuple.h:233
double getVmHWM() const noexcept(false)
Definition: LinuxProcMgr.h:35
name_array< 11u > eventHeapColumns_
T unique_value(query_result< T > const &r)
Definition: query_result.h:94
double getVmPeak() const noexcept(false)
Definition: LinuxProcMgr.h:30
auto const & pathName() const
Definition: ModuleContext.h:33
T * get() const
Definition: ServiceHandle.h:63
#define DEFINE_ART_SERVICE(svc)
Definition: ServiceMacros.h:88
art::LinuxProcData::rss_t rss_t
cet::sqlite::name_array< N > name_array
#define DECLARE_ART_SERVICE(svc, scope)
Definition: ServiceMacros.h:86
Definition: config.py:1
string filename
Definition: shutoffs.py:106
unique_ptr< memEventHeap_t > eventHeapTable_
RunNumber_t run() const
Definition: EventID.h:98
std::string const & name() const
Definition: Ntuple.h:147
LinuxProcData::proc_tuple getCurrentData() const noexcept(false)
ScheduleID::size_type nthreads() const
Definition: log.py:1
const XML_Char const XML_Char * data
Definition: expat.h:268
const XML_Char * s
Definition: expat.h:262
void recordEventData(Event const &e, string const &step)
auto const & moduleName() const
Definition: ModuleContext.h:48
std::string const & moduleName() const
unique_ptr< cet::sqlite::Connection > const db_
struct mallinfo get() const
Definition: LinuxMallInfo.h:42
void include(std::istream &in, std::string &result)
bool checkMallocConfig_(string const &, bool)
auto select(T const &...t)
Definition: select.h:146
std::string const & moduleLabel() const
EventNumber_t event() const
Definition: EventID.h:116
cet::coded_exception< errors::ErrorCodes, ExceptionDetail::translate > Exception
Definition: Exception.h:66
constexpr auto const & left(const_AssnsIter< L, R, D, Dir > const &a, const_AssnsIter< L, R, D, Dir > const &b)
Definition: AssnsIter.h:96
SubRunNumber_t subRun() const
Definition: EventID.h:110
art::LinuxProcData::vsize_t vsize_t
auto const & moduleDescription() const
Definition: ModuleContext.h:38
MaybeLogger_< ELseverityLevel::ELsev_warning, false > LogWarning
static Globals * instance()
auto const & moduleLabel() const
Definition: ModuleContext.h:43
std::array< std::string, N > name_array
Definition: column.h:40
Service to store calibration data products (CDP) in the SQLite3 metadatabase of a file...
Definition: FillParentInfo.h:8
name_array< 14u > moduleHeapColumns_
void recordModuleData(ModuleContext const &mc, string const &step)
Float_t e
Definition: plot.C:35
void recordOtherData(ModuleDescription const &md, string const &step)
unique_ptr< memModuleHeap_t > moduleHeapTable_
EventID id() const
enum BeamMode string