ProductionTestTools.py
Go to the documentation of this file.
1 ###########################
2 # Print the important environment variables
4  if "os" not in dir(): import os
5  print "tools: --- Print Environment"
6  env_variables = ["EXPERIMENT",
7  "SRT_BASE_RELEASE",
8  "SRT_QUAL",
9  "SRT_PUBLIC_CONTEXT",
10  "SRT_PRIVATE_CONTEXT",
11  "EXTERNALS",
12  "LHAPATH"
13  ]
14  for var in env_variables:
15  if var in os.environ.keys():
16  print "tools: %s: "%var,os.environ[var]
17  else:
18  print "tools: %s: Not defined"%var
19 ###########################
20 # Parse file sizes into human readable format
21 def convertSize(size):
22  if size in [None,0]: return "0 B"
23  if "math" not in dir(): import math
24  size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
25  i = int(math.floor(math.log(size,1024)))
26  p = math.pow(1024,i)
27  s = round(size/p,2)
28  if (s > 0):
29  return '%s %s' % (s,size_name[i])
30  else:
31  return '0 B'
32 ###########################
33 # Inspect a file
34 def inspectFile(file_name):
35  if "os" not in dir(): import os
36  if "*" in file_name:
37  new_file_name = findWildcardFile(file_name)
38  if not new_file_name:
39  print "tools: File: %s doesn't exist locally."%file_name
40  return "", 0
41  file_name = new_file_name
42  try:
43  input_size = os.path.getsize("%s"%file_name)
44  except:
45  print "tools: File: %s doesn't exist locally."%(file_name)
46  return "", 0
47  size_string = convertSize(input_size)
48  #print "tools: Input file size: %i, %s"%(input_size,size_string)
49  return size_string, input_size
50 ###########################
51 # make a folder
52 def findWildcardFile(file_name, verbose=False):
53  from glob import glob
54  if verbose: print "tools: --- findWildcardFile"
55  file_list = glob(file_name)
56  # remove files that are not of this tier, this is an ugly hack
57  if "caf" not in file_name:
58  tmp = []
59  for f in file_list:
60  if "caf" in f: continue
61  tmp.append(f)
62  file_list = tmp
63  if len(file_list) == 0 :
64  print "tools: No files found matching name: %s"%file_name
65  return False
66  elif len(file_list) > 1:
67  print "tools: More than one file found matching name: %s"%file_name
68  print "tools: %s"%repr(file_list)
69  return False
70  else:
71  if verbose: print "tools: Found unique file name: %s"%file_list[0]
72  file_name = file_list[0]
73  return file_name
74 ###########################
75 # make a folder
76 def makeFolder(folder):
77  import os
78  if not os.path.exists(folder):
79  os.system("mkdir %s"%folder)
80 ###########################
81 # Get metadata from SAM
82 def getSAMMetaData(file_name):
83  if "samweb_client" not in dir():
84  try:
85  import samweb_client
86  except ImportError:
87  print "ImportError caught in getSAMMetaData. This means that the samweb_client module cannot be found. The likely cause is that sam is not setup. Try setting up SAM with a command of the form:\n\nfunction setup_sam\n{\n export PRODUCTS=/grid/fermiapp/products/common/db/:$PRODUCTS\n setup sam_web_client\n export SAM_EXPERIMENT=nova\n}\n\nThen retrying."
88  exit(1)
89  if "os" not in dir(): import os
90  assert("EXPERIMENT" in os.environ)
91  sam = samweb_client.SAMWebClient(experiment=os.environ["EXPERIMENT"])
92  metadata = sam.getMetadata(file_name)
93  return metadata
94 ###########################
95 # get number of events in a file
96 def countEventsInRootFile(file_name, tree_name="Events", verbose=False):
97  import os
98  if verbose: print "tools: --- Counting events in file: %s"%file_name
99  if "*" in file_name:
100  file_name = findWildcardFile(file_name, verbose=verbose)
101  if not file_name:
102  print "tools: File doesn't exist locally."
103  return 0
104  if (not os.path.exists(file_name)):
105  print "tools: File %s not found"%file_name
106  return 0
107 
108  if "caf." in file_name: tree_name = "recTree"
109  if "ROOT" not in dir():
110  if verbose: print "tools: importing ROOT"
111  import ROOT
112  if verbose: print "tools: done"
113  ROOT.gErrorIgnoreLevel = ROOT.kError
114 
115  if verbose: print "tools: opening file"
116  root_file = ROOT.TFile.Open(file_name,"READ")
117  if not root_file.IsOpen():
118  print "tools: output file not found, returning 0"
119  return 0
120 
121  if verbose: print "tools: open"
122  tree = root_file.Get(tree_name)
123  events = tree.GetEntries()
124  if verbose: print "tools: found %i events in tree %s"%(events,tree)
125  root_file.Close()
126  return events
127 ###########################
128 # Detect cosmic veto efficiency
129 # This doesn't work because we can't extract the FilterList info
130 # from the root file. Also there is no product with a trivial
131 # relationship to the number of slices filtered.
132 def cosmicVetoEfficiencyDefunct(file_name, tree_name="Events", verbose=True):
133  if verbose: print "tools: --- Detecting cosmic veto efficiency: %s"%file_name
134  if "ROOT" not in dir():
135  if verbose: print "tools: importing ROOT"
136  import ROOT
137  if verbose: print "tools: done"
138  ROOT.gErrorIgnoreLevel = ROOT.kError
139 
140  if verbose: print "tools: opening file"
141  root_file = ROOT.TFile.Open(file_name)
142  assert root_file.IsOpen()
143 
144  if verbose: print "tools: open"
145  tree = root_file.Get(tree_name)
146  events = tree.GetEntries()
147  if verbose: print "tools: found %i events in tree %s"%(events,tree)
148 
149  leaves = tree.GetListOfLeaves()
150  if verbose: print "tools: found %i leaves"%(len(leaves))
151 
152  # check if the necessary products exist
153  l_clusters = False
154  l_veto = False
155  for leaf in leaves:
156  name = leaf.GetName()
157  # veto
158  if "FilterList_veto" in name:
159  if verbose: print "tools: found veto branch: %s"%name
160  l_veto = name
161 
162  # clusters
163  if ("Clusters_slicer" in name) and (".obj.fView" in name):
164  if verbose: print "tools: found cluster branch: %s"%name
165  l_clusters = name
166 
167  if not l_veto:
168  if verbose: print "tools: No veto object found"
169  return False
170 
171  assert(l_clusters), "tools: veto object found: %s, but no clusters!"%l_veto
172 
173  # if there is a veto and there are clusters then we can work out the efficiency
174 
175  n_clusters = 0
176  for i_entry in range(events):
177  tree.GetEntry(i_entry)
178  if verbose: print "tools: Entry[%i]"%i_entry
179 
180  this_n_clusters = tree.GetLeaf(l_clusters).GetLen()
181  if verbose: print "tools: clusters: %i"%this_n_clusters
182  n_clusters += this_n_clusters
183  return events
184 ###########################
185 # Make a folder
186 def makeFolder(folder):
187  import os
188  try:
189  os.stat(folder)
190  except:
191  print "tools: Making folder: %s"%folder
192  os.mkdir(folder)
193 ###########################
194 # Open a file and write it to a string
195 def fileToString(the_file):
196  string = ""
197  for line in open(the_file, "r").readlines():
198  string += line
199  return string
200 ###########################
201 # Find a FHiCL file
202 def findFHiCL(fcl, verbose=False):
203  import os
204  if verbose: print "tools: Searching FHICL_FILE_PATH for fcl"
205  paths = ["./"]
206  if "FHICL_FILE_PATH" not in os.environ:
207  print "tools: Warning: FHICL_FILE_PATH environment variable not defined, therefore I can't search it. Will only try local directories."
208  else:
209  fcl_paths = os.environ["FHICL_FILE_PATH"].split(":")
210  for fp in fcl_paths: paths.append(fp)
211  if verbose: print "tools: Searching paths: ",paths
212 
213  found = False
214  for path in paths:
215  if os.path.exists("%s%s"%(path,fcl)):
216  found = "%s%s"%(path,fcl)
217  if verbose: print "tools: Found FHiCL in %s"%path
218  break
219  if not found:
220  print "tools: Cannot find FHiCL"
221  return ""
222 
223  return found
224 ###########################
225 # Set memory limits
226 def setLimits(memory):
227  import os, resource
228  # Set maximum CPU time to 1 second in child process, after fork() but before exec()
229  print "Setting resource limit in child (pid %d)" % os.getpid()
230  resource.setrlimit(resource.RLIMIT_RSS, (memory, memory))
231 ###########################
232 # Get date from logical folder name
233 def dateFromFolder(folder):
234  d = folder.split("_")[1].split("-")
235  return "%s:%s:%s %s/%s/%s"%(d[3],d[4],d[5],d[2],d[1],d[0])
236 ###########################
237 # Get datetime from logical folder name
238 def datetimeFromFolder(folder):
239  d = folder.split("_")[1].split("-")
240  from datetime import datetime
241  return datetime(int(d[0]),int(d[1]),int(d[2]))
242 ###########################
243 # Get date from logical time string
244 def dateFromTimeString(time_str):
245  d = time_str.split("-")
246  return "%s:%s:%s %s/%s/%s"%(d[3],d[4],d[5],d[2],d[1],d[0])
247 ###########################
248 # Get release from logical folder name
249 def releaseFromFolder(folder):
250  if "/" in folder:
251  index = -1
252  if folder[-1] == "/": index = -2
253  folder = folder.split("/")[index]
254  return folder.split("_")[0]
255 ###########################
256 # Determine that status of a batch log
257 # 0 = unknown, 1 = good, 2 = running, 3 = bad
258 def batchLogStatus(log):
259  status = 2
260  lines = log.split("\n")
261  for line in lines:
262  if ("SYSTEM_PERIODIC_REMOVE" in line) or ("Abnormal termination" in line):
263  return 3
264  if "Normal termination" in line: return 1
265  return status
266 ###########################
267 # detect which chains have metrics
268 def detectCaching(folder, verbose=True):
269  if verbose:
270  print "tools: --- Detect Caching"
271  print "tools: folder: %s"%folder
272 
273  import glob, re
274  file_list =[ g for g in glob.glob("%s/*.pkl"%folder) if re.search("(ND|FD)", g)]
275 
276  cached_chains = []
277  metric_dir = "%s/metrics/"%folder
278 
279  for file in file_list:
280  # get chain name from folder
281  chain_name = chainNameFromFileName(file)
282  # check for existing metrics
283  metrics = glob.glob("%s%s*.html"%(metric_dir,chain_name))
284  if len(metrics):
285  if verbose: print "tools: %s has metrics in %s"%(chain_name,metric_dir)
286  cached_chains.append(chain_name)
287  elif verbose: print "tools: %s has no metrics in %s"%(chain_name,metric_dir)
288 
289  return cached_chains
290 ###########################
291 # get the name of a chain from the logical file name
292 def chainNameFromFileName(file_name):
293  tokens = file_name.split("/")[-1].split("_")
294  if "data" in tokens: chain_name = "%s_%s_%s"%(tokens[0],tokens[1],tokens[2])
295  elif "genie" in tokens: chain_name = "%s_%s_%s_%s"%(tokens[0],tokens[1],tokens[2],tokens[3])
296  else: chain_name = "%s_%s" %(tokens[0],tokens[1])
297  if ".pkl" in chain_name: chain_name = chain_name[:-4]
298  return chain_name
299 ###########################
300 # Does this folder follow our logical naming conventions
301 def isLogicalFolder(folder):
302  return (((folder[0]=="S") or (folder[:2]=="FA") or (folder[0]=="R") or (folder[:11] == "development") or (folder[:9] == "first-ana")) \
303  and (len(folder.split("_")) == 2))
304 ###########################
305 # convert a chain name into chain and tier
307  print name
308  assert False
309 ###########################
310 # find a chain name from a launch jobs command
312  tokens = command.split(" ")
313  for token in tokens:
314  if ("ND" in token) or ("FD" in token):
315  chain = token
316  # if a tier selection was specified then the token will include these after a ":"
317  chain_name = token.split(":")[0]
318  #print command, chain, chain_name
319  return chain, chain_name
320  assert(False), "tools: Unable to find chain name from command: %s"%command
321 ###########################
322 # from a definition name get an example fcl
323 def getConfigFromDef(definition):
324  import samweb_client
325  import os
326  sam = samweb_client.SAMWebClient(experiment="nova")
327  files = samweb_client.files.listFiles(sam, defname=definition)
328  if len(files) > 0:
329  files.sort()
330  example_file = files[0]
331  cmd = 'bash -c "%s/scripts/get_config.sh %s %s.log"'%(os.environ["NOVAPRODVALID_DIR"],example_file,example_file)
332  os.system(cmd)
333  fcl = open("%s.log"%example_file,"r").read()
334  return fcl
335  else:
336  return "No files"
337 ###########################
338 # Run Chris Backhouse's file size ana and return the output
339 def fileSizeAna(file_name):
340  import os
341  # check that the program exists
342  if not os.path.exists("%s/testing/file_size_ana.py"%os.environ["NOVAPRODVALID_DIR"]):
343  print "tools: %s/testing/file_size_ana.py not found"%os.environ["NOVAPRODVALID_DIR"]
344  return None
345  commands = []
346  commands.append("python %s/testing/file_size_ana.py -b -j %s >& tmp.txt")
347  #commands.append("python file_size_ana.py -b -r %s")
348  #commands.append("python file_size_ana.py -b -l %s")
349  for command in commands:
350  os.system(command%(os.environ["NOVAPRODVALID_DIR"],file_name))
351  files = ["tmp.txt","rings.png","bars.png"]
352  return_list = []
353  for file in files:
354  if os.path.exists(file):
355  return_list.append(open(file,"r").read())
356  else:
357  return_list.append(None)
358  return return_list
359 ###########################
360 # get number of events in a file using the command line tool
361 def cmdLineCountEventsInRootFile(file_name, tree_name="Events", verbose=False):
362  import os
363  if verbose: print "tools: --- Counting events in file: %s using command line"%file_name
364  if ".caf." in file_name: tree_name = "recTree"
365  cmd = 'bash -c "python %s/utils/countEventsInFile.py -i %s -t %s -o %s.log"'\
366  %(os.environ["NOVAPRODVALID_DIR"], file_name, tree_name, file_name)
367  if verbose: print "tools: running: %s"%cmd
368  os.system(cmd)
369  n_events = int(open("%s.log"%file_name, "r").read())
370  if verbose: print "tools: counted %i events"%n_events
371  return n_events
372 ###########################
373 # parse an error log to determine it's status
374 def parseErrorLog(log, verbose=True):
375  if verbose: print "tools: ---Parsing error log"
376  lines = log.split("\n")
377  if verbose: print "tools: read %i lines"%len(lines)
378  for i_l,line in enumerate(lines):
379  if line != "":
380  if verbose:
381  print "tools: unfiltered error line[%i/%i]:"%(i_l+1,len(lines))
382  print "tools: - %s"%repr(line)
383  if "yes: standard output: Broken pipe" in line: continue
384  if "yes: write error" in line: continue
385  if line[:5] == "LOCK ": continue
386  if line.split()[0] in ["Mon","Tue","Wed","Thu","Fri","Sat","Sun"]: continue
387  if ("/fife" in line) and (line[:5] in ["mkdir","chgrp","chmod"]): continue
388  if ("/scratch" in line) and (line[:5] in ["mkdir","chgrp","chmod"]): continue
389  if ("out.json" in line) and (line[:3] in ["rm:"]): continue
390  if ("timing.db" in line) and (line[:3] in ["rm:"]): continue
391  if ("TCanvas" in line) and (line[:4] in ["Info"]): continue
392  if verbose: print "tools: this is an error, returning 4"
393  return 4
394  return None
395 ###########################
396 # convert a logical folder name to a datetime objects
397 def folderNameToDateTime(name,tagged_release=False,hours=False):
398  from datetime import datetime
399  assert len(name.split("_")) == 2, "Folder name %s does not follow logical convention of RELEASE_DATETIME"%name
400  if tagged_release:
401  assert name[:2]=="FA" or name[0]=="S", "Folder name %s is not a tagged release"%name
402  tokens = name.split("_")[0].strip("FA").strip("S").strip("a").split("-")
403  tokens = [int(t) for t in tokens]
404  assert len(tokens) == 3, "Folder datetime does not follow logical convention YEAR-MONTH-DAY-HOUR-MINUTE-SECOND"
405  return datetime(2000+tokens[0],tokens[1],tokens[2])
406  else:
407  tokens = name.split("_")[1].split("-")
408  tokens = [int(t) for t in tokens]
409  assert len(tokens) == 6, "Folder datetime does not follow logical convention YEAR-MONTH-DAY-HOUR-MINUTE-SECOND"
410  return datetime(tokens[0],tokens[1],tokens[2],tokens[3],tokens[4],tokens[5])
411 ###########################
412 # convert a script line to a date
414  from datetime import datetime
415  assert len(line.split()) == 6, "Line: %s does not follow the datetime convention"%line
416  # format expected: Fri Jan 9 00:01:26 CST 2015
417  return datetime.strptime(line,"%a %b %d %X %Z %Y")
418 ###########################
419 # get the link to a station monitor from a log
421  link = False
422  for line in log:
423  if "Station monitor: " in line:
424  link = line.split("monitor: ")[1].strip()
425  if "Station monitor will be " in line:
426  link = line.split("will be ")[1].strip()
427  if not link:
428  print "tools: Couldn't find link in log:"
429  print log
430  assert False
431  # print link
432  return link
433 ###########################
434 # count the number of files in a directory
435 def countFilesInDirectory(directory,wildcard="*"):
436  import glob
437  return len(glob.glob("%s/%s"%(directory,wildcard)))
438 ###########################
439 # Get the sam definition summary
440 def samDefinitionSummary(input_def):
441  from CachedSamDefinitionSummary import cached
442  if input_def in cached.keys():
443  return cached[input_def]
444  if "samweb_client" not in dir():
445  try:
446  import samweb_client
447  except ImportError:
448  print "ImportError caught in getSAMMetaData. This means that the samweb_client module cannot be found. The likely cause is that sam is not setup. Try setting up SAM with a command of the form:\n\nfunction setup_sam\n{\n export PRODUCTS=/grid/fermiapp/products/common/db/:$PRODUCTS\n setup sam_web_client\n export SAM_EXPERIMENT=nova\n}\n\nThen retrying."
449  exit(1)
450  if "os" not in dir(): import os
451  sam = samweb_client.SAMWebClient(experiment="nova")
452  return samweb_client.files.listFilesSummary(sam, defname=input_def)
453 ###########################
454 # Get the list of files in a sam definition
455 def getSamDefinitionFiles(input_def):
456  if "samweb_client" not in dir():
457  try:
458  import samweb_client
459  except ImportError:
460  print "ImportError caught in getSAMMetaData. This means that the samweb_client module cannot be found. The likely cause is that sam is not setup. Try setting up SAM with a command of the form:\n\nfunction setup_sam\n{\n export PRODUCTS=/grid/fermiapp/products/common/db/:$PRODUCTS\n setup sam_web_client\n export SAM_EXPERIMENT=nova\n}\n\nThen retrying."
461  exit(1)
462  if "os" not in dir(): import os
463  sam = samweb_client.SAMWebClient(experiment="nova")
464  return samweb_client.files.listFiles(sam,defname=input_def)
465 ###########################
466 # Compare two lists of files by run/subrun and return those missing
467 def findMissingFiles(test,nominal,verbose=True,only_nominal=True,cycles=True):
468  all_runs = {}
469 
470  # nominal files
471  for nominal_file in nominal:
472  run = findRun(nominal_file)
473  subrun = findSubRun(nominal_file,cycles=cycles)
474  if run not in all_runs.keys():
475  all_runs[run] = {}
476  if subrun not in all_runs[run].keys():
477  all_runs[run][subrun] = {}
478 
479  if "nominal" not in all_runs[run][subrun].keys():
480  all_runs[run][subrun]["nominal"] = []
481  all_runs[run][subrun]["nominal"].append(nominal_file)
482  if verbose: print "tools: nominal runs found: %i"%len(all_runs.keys())
483 
484  # test files
485  for test_file in test:
486  test_file = test_file.split("/")[-1]
487  run = findRun(test_file)
488  subrun = findSubRun(test_file,cycles=cycles)
489  if run not in all_runs.keys():
490  all_runs[run] = {}
491  if subrun not in all_runs[run].keys():
492  all_runs[run][subrun] = {}
493 
494  if "test" not in all_runs[run][subrun].keys():
495  all_runs[run][subrun]["test"] = []
496  all_runs[run][subrun]["test"].append(test_file)
497 
498  # check for missing files
499  if verbose: print "tools: Checking for inconsistencies"
500  missing = {}
501  duplicate = {}
502  input_list = ["nominal","test"]
503  for definition in input_list: missing[definition] = {}
504 
505  list_of_missing_files = []
506  runs = all_runs.keys()
507  runs.sort()
508  for run in runs:
509  subruns = all_runs[run].keys()
510  subruns.sort()
511  for subrun in subruns:
512  definitions = all_runs[run][subrun]
513 
514  # Look for missing files
515  if len(definitions) != len(input_list):
516  # print "run : missing file found for run %i, subrun %s"%(run,subrun)
517  for definition in input_list:
518  if definition in definitions:
519  # print "run : - def:%s has file(s): %s"%(definition,repr(definitions[definition]))
520  pass
521  else:
522  # print "run : - def:%s doesn't have this run/subrun"%(definition)
523  if run not in missing[definition].keys():
524  missing[definition][run] = []
525  missing[definition][run].append(subrun)
526  if only_nominal and ("nominal" not in all_runs[run][subrun].keys()): continue
527  list_of_missing_files.append(all_runs[run][subrun]["nominal"])
528  return list_of_missing_files
529 
530 ###########################
531 # Find logical run name
532 def findRun(file_name):
533  file_name = stripUniqueifier(file_name)
534  tokens = file_name.split("_")
535  for token in tokens:
536  if token[0] != "r": continue
537  return int(token[1:])
538  assert False, "Run can't be found in file: %s"%file_name
539 ###########################
540 # Find logical subrun name
541 def findSubRun(file_name,cycles=False):
542  file_name = stripUniqueifier(file_name)
543  tokens = file_name.split("_")
544  for i,token in enumerate(tokens):
545  if token[0] != "s": continue
546  if token == "swap": continue
547  if cycles:
548  subrun = "%s_%s"%(tokens[i],tokens[i+1])
549  # print subrun
550  # assert False
551  return subrun
552  else:
553  return int(token[1:])
554  assert False, "Sub-run can't be found in file: %s"%file_name
555 ###########################
556 # Find logical cycle
557 def findCycle(file_name):
558  file_name = stripUniqueifier(file_name)
559  tokens = file_name.split("_")
560  for i,token in enumerate(tokens):
561  if token[0] != "c": continue
562  if tokens[i-1][0] != "s": continue
563  return int(token[1:])
564  assert False, "Cycle can't be found in file: %s"%file_name
565 ###########################
566 # Find logical release name
567 def findRelease(file_name):
568  file_name = stripUniqueifier(file_name)
569  tokens = file_name.split("_")
570  for token in tokens:
571  # special casing for R releases
572  if (token.count("-") == 3): return token
573  # most other releases
574  if (token.count("-") == 2): return token
575  assert False, "Release can't be found in file: %s"%file_name
576 ###########################
577 # check if a file is being written to
578 # from:
579 # http://stackoverflow.com/questions/21126108/ensuring-that-my-program-is-not-doing-a-concurrent-file-write/21149744#21149744
580 def isFileBeingWrittenInto(filename, max_dt = 120):
581  if "os" not in dir(): import os
582  if "time" not in dir(): import time
583 
584  modifiedTimeStart = os.stat(filename).st_mtime # Time file last modified
585  now = time.time()
586  dt = now - modifiedTimeStart
587  if dt < max_dt:
588  return True
589  else:
590  return False
591 ###########################
592 # Get the size of a FLAT directory in bytes
593 # Sauce: http://stackoverflow.com/questions/1392413/calculating-a-directory-size-using-python
594 # Sauce includes recursive varieties if needed
595 def flatDirectorySize(dir_path):
596  import os
597  # print dir_path
598  size_sum = 0
599  for f in os.listdir(dir_path):
600  file_name = "%s/%s"%(dir_path,f)
601  # print "%s, %f"%(file_name,os.path.getsize(file_name))
602  size_sum += os.path.getsize(file_name)
603  return size_sum
604 ###########################
605 # determine SAM project status from submission log
606 def getSAMProjectStatus(monitor_text,skip=False):
607  monitor_url = getSAMURL(monitor_text)
608  if "samweb_client" not in dir():
609  try:
610  import samweb_client
611  except ImportError:
612  print "ImportError caught in getSAMMetaData. This means that the samweb_client module cannot be found. The likely cause is that sam is not setup. Try setting up SAM with a command of the form:\n\nfunction setup_sam\n{\n export PRODUCTS=/grid/fermiapp/products/common/db/:$PRODUCTS\n setup sam_web_client\n export SAM_EXPERIMENT=nova\n}\n\nThen retrying."
613  exit(1)
614  if skip:
615  return 5, 0
616 
617  if "os" not in dir(): import os
618  sam = samweb_client.SAMWebClient(experiment="nova")
619  project = samweb_client.projects.projectSummary(sam, monitor_url)
620  n_processes = 0
621  if ("processes" in project.keys()):
622  for p in project["processes"]:
623  if ("status" not in p.keys()): continue
624  if p["status"] == "active": n_processes +=1
625 
626  #print project
627  status = 5 # unknown
628  if ("project_status" in project.keys()) and (project["project_status"] == "ended incomplete"):
629  status = 4 # ended incomplete
630  elif ("project_status" in project.keys()) and (project["project_status"] == "ended complete"):
631  status = 3 # ended
632  elif ("processes" in project.keys()) and ("project_status" in project.keys()) and (len(project["processes"]) > 0):
633  status = 2 #running
634  elif ("project_status" in project.keys()) and (project["project_status"] == "running"):
635  status = 1 # started
636  return status, n_processes
637 ###########################
638 # get the SAM URL from submission log
639 def getSAMURL(monitor_text):
640  for l in monitor_text:
641  # print l.strip()
642  if "got back string result: " in l:
643  return l.split("got back string result: ")[1].strip().replace("8483","8480").replace("https","http")
644  if "http://samweb.fnal.gov:8480/sam/nova/api/running/projects" in l:
645  return l.strip()
646  assert False, "Couldn't determine URL from log"
647 ###########################
648 # Scan a directory and return a set of Gantt inputs
649 def directoryToGantt(wildcard, status, to_skip=[], verbose=False,gantt=False):
650 
651  if not gantt:
652  gantt = {}
653  gantt["start"] = []
654  gantt["end"] = []
655  gantt["name"] = []
656  gantt["class"] = []
657 
658  import glob
659 
660  file_list = glob.glob(wildcard)
661  file_list.sort()
662  for f in file_list:
663  skip = False
664  for s in to_skip:
665  if s in f: skip = True
666  if skip: continue
667  try:
668  this_status = status[f]
669  except KeyError:
670  print f, "has no status, skipping"
671  continue
672 
673  name = f.split("/")[-1].split("cronlog_")[1][:-4].replace("_"," ")
674  gantt["name"].append(name)
675 
676  # format: done, error, delta_time, start_date, end_date
677 
678  if (this_status[0] != False) and (this_status[1] ==False):
679  gantt["class"].append("good")
680  elif this_status[1] == False:
681  gantt["class"].append("running")
682  else:
683  gantt["class"].append("bad")
684 
685  if this_status[3] != None:
686  gantt["start"].append({"hour":this_status[3].hour,"minute":this_status[3].minute})
687  else:
688  gantt["start"].append({"hour":0,"minute":0})
689 
690  if this_status[4] != None:
691  gantt["end"].append({"hour":this_status[4].hour,"minute":this_status[4].minute})
692  else:
693  gantt["end"].append({"hour":0,"minute":0})
694 
695  return gantt
696 ###########################
697 # Scan a crontab and return a set of Gantt inputs
698 def crontabToGantt(crontab, to_skip=[], verbose=False):
699  gantt = {}
700  gantt["start"] = []
701  gantt["end"] = []
702  gantt["name"] = []
703  gantt["class"] = []
704 
705  time_estimates = {}
706  time_estimates["make keep up page"] = (0,20)
707  time_estimates["make testing page quick"] = (0,5)
708  time_estimates["make first ana page"] = (0,15)
709  time_estimates["submit development tests"] = (0,5)
710  time_estimates["submit tagged release tests"] = (0,5)
711  time_estimates["clean production tests"] = (0,5)
712  time_estimates["my sam"] = (0,5)
713 
714  to_skip.append("SHELL")
715  to_skip.append("MAILTO")
716  for line in crontab.split("\n"):
717  if len(line) and (line[0] == "#"): continue
718  skip = False
719  for s in to_skip:
720  if s in line: skip = True
721  if skip: continue
722 
723  tokens = line.split()
724  if len(tokens) not in [8,9]:continue
725  # print line
726  # print tokens
727  name = tokens[-1].split("/")[-1].split("cronlog_")[1][:-4].replace("_"," ")
728  minutes = tokens[0]
729  if "*/" in minutes: minutes = ",".join([str(a) for a in range(24) if a%(int(minutes.split("/")[-1]))==0])
730  hours = tokens[1]
731  if hours == "*": hours = "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23"
732  if "*/" in hours: hours = ",".join([str(a) for a in range(24) if a%(int(hours.split("/")[-1]))==0])
733  # if hours == "*/2": hours = "0,2,4,6,8,10,12,14,16,18,20,22"
734  # if hours == "*/1": hours = "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23"
735  for hour in hours.split(","):
736  for minute in minutes.split(","):
737  gantt["name"].append(name)
738  gantt["class"].append("pending")
739  gantt["start"].append({"hour":int(hour),"minute":int(minute)})
740  if name in time_estimates.keys():
741  offset = time_estimates[name]
742  else:
743  offset = (0,10)
744  gantt["end"].append({"hour":int(hour)+offset[0],"minute":int(minute)+offset[1]})
745  return gantt
746 ###########################
747 # Get jobsub client job id from submission log
748 def getJobID(log):
749  return_id = False
750  for l in log:
751  if "JobsubJobId of first job" in l:
752  return_id = l.split(":")[1].strip()
753  if return_id == False:
754  print "tools: getJobID failed. Given log:"
755  for l in log: print l.strip()
756  return return_id
757 ###########################
758 # Add definitions from a dictionary of dictionaries of definitions to a list
759 def addDefs(all_defs, defs):
760  for i_key in defs.keys():
761  for j_key in defs[i_key].keys():
762  all_defs.append(defs[i_key][j_key])
763 ###########################
764 # get a list of sam definitions
765 def getListOfDefinitions(prod=False,wildcard=False,filters=[],get_n_files=False,verbose=False):
766  if verbose:
767  print "tools: --- getListOfDefinitions"
768  print "tools: wildcard: %s"%wildcard
769  print "tools: filters: %s"%repr(filters)
770  from Definition import Definition
771  from CachedSamDefinitionSummary import cached
772  import samweb_client
773  sam = samweb_client.SAMWebClient(experiment="nova")
774  all_definitions = sam.listDefinitions()
775  all_definitions.sort()
776  if verbose: print "tools: found %i definitions"%len(all_definitions)
777  filtered_defs = []
778  for i,d in enumerate(all_definitions):
779  if verbose: print "tools: [%i/%i]: %s"%(i+1,len(all_definitions),d)
780  if prod and d[:5] != "prod_": continue
781  if wildcard and (wildcard not in d): continue
782  filtered = False
783  for f in filters:
784  if f in d: filtered =True
785  if filtered: continue
786  definition = Definition(d)
787  definition.description = sam.descDefinitionDict(d)
788  if d in cached.keys():
789  definition.addInfo(cached[d])
790  else:
791  if get_n_files:
792  try:
793  info = sam.listFilesSummary(defname=d)
794  except:
795  print "PMS : Query of dataset: %s timed out, filling zeros"%d
796  info = cached["empty"]
797  definition.addInfo(info)
798  else:
799  definition.addInfo(cached["empty"])
800  filtered_defs.append(definition)
801  # if len(filtered_defs) > 2: break
802  if verbose: print "tools: returning %i definitions"%len(filtered_defs)
803  return filtered_defs
804 ###########################
805 # count the number of running processes in a sam status
807  n_processes = 0
808  for key in status.keys():
809  project_status_dict = status[key][0]
810  for project in project_status_dict.keys():
811  for i in project_status_dict[project]["processes"]:
812  n_processes += i
813  return n_processes
814 ###########################
815 # Strip the hex uniquifier from the start of a file name if present
816 def stripUniqueifier(fname):
817  if len(fname) < 38: return fname
818  tokens = fname.split("-")
819  if len(tokens) < 6: return fname
820  uniquified = False
821  u_lengths = [8,4,4,4,12]
822  this_lengths = []
823  for t in tokens[:5]:
824  this_lengths.append(len(t))
825  if this_lengths == u_lengths:
826  uniquified = True
827  if uniquified:
828  fname = fname[37:]
829  return fname
830 ###########################
831 # convert a string into a purely alphanumeric one
832 # adapted from:
833 # http://stackoverflow.com/questions/1276764/stripping-everything-but-alphanumeric-chars-from-a-string-in-python
834 def makeAlphanumeric(input):
835  new_chars = []
836  for c in input:
837  if c.isalnum():
838  new_chars.append(c)
839  else:
840  new_chars.append("_")
841  return "".join(new_chars)
void split(double tt, double *fr)
def samDefinitionSummary(input_def)
Get the sam definition summary.
def countEventsInRootFile(file_name, tree_name="Events", verbose=False)
get number of events in a file
keys
Reco plots.
Definition: caf_analysis.py:46
def listFilesSummary(samweb, dimensions=None, defname=None)
Definition: files.py:80
def countFilesInDirectory(directory, wildcard="*")
count the number of files in a directory
def findMissingFiles(test, nominal, verbose=True, only_nominal=True, cycles=True)
Compare two lists of files by run/subrun and return those missing.
def getConfigFromDef(definition)
from a definition name get an example fcl
def dateFromFolder(folder)
Get date from logical folder name.
def getSAMProjectStatus(monitor_text, skip=False)
determine SAM project status from submission log
def cmdLineCountEventsInRootFile(file_name, tree_name="Events", verbose=False)
get number of events in a file using the command line tool
def isLogicalFolder(folder)
Does this folder follow our logical naming conventions.
def chainNameFromFileName(file_name)
get the name of a chain from the logical file name
def cosmicVetoEfficiencyDefunct(file_name, tree_name="Events", verbose=True)
Detect cosmic veto efficiency This doesn&#39;t work because we can&#39;t extract the FilterList info from the...
fvar< T > round(const fvar< T > &x)
Definition: round.hpp:23
def findChainNameFromCommand(command)
find a chain name from a launch jobs command
def getListOfDefinitions(prod=False, wildcard=False, filters=[], get_n_files=False, verbose=False)
get a list of sam definitions
def parseErrorLog(log, verbose=True)
parse an error log to determine it&#39;s status
def getSAMURL(monitor_text)
get the SAM URL from submission log
def listFiles(samweb, dimensions=None, defname=None, fileinfo=False, stream=False)
Definition: files.py:47
def folderNameToDateTime(name, tagged_release=False, hours=False)
convert a logical folder name to a datetime objects
def getSamDefinitionFiles(input_def)
Get the list of files in a sam definition.
def findWildcardFile(file_name, verbose=False)
make a folder
def fileToString(the_file)
Open a file and write it to a string.
def isFileBeingWrittenInto(filename, max_dt=120)
check if a file is being written to from: http://stackoverflow.com/questions/21126108/ensuring-that-m...
def detectCaching(folder, verbose=True)
detect which chains have metrics
def releaseFromFolder(folder)
Get release from logical folder name.
def fileSizeAna(file_name)
Run Chris Backhouse&#39;s file size ana and return the output.
def cronlogLineToDate(line)
convert a script line to a date
def crontabToGantt(crontab, to_skip=[], verbose=False)
Scan a crontab and return a set of Gantt inputs.
def inspectFile(file_name)
Inspect a file.
def convertSize(size)
Parse file sizes into human readable format.
def makeFolder(folder)
make a folder
def dateFromTimeString(time_str)
Get date from logical time string.
def getStationMonitorLink(log)
get the link to a station monitor from a log
procfile open("FD_BRL_v0.txt")
def projectSummary(samweb, projecturl)
Definition: projects.py:174
def findSubRun(file_name, cycles=False)
Find logical subrun name.
def setLimits(memory)
Set memory limits.
def makeAlphanumeric(input)
convert a string into a purely alphanumeric one adapted from: http://stackoverflow.com/questions/1276764/stripping-everything-but-alphanumeric-chars-from-a-string-in-python
def findRelease(file_name)
Find logical release name.
def datetimeFromFolder(folder)
Get datetime from logical folder name.
def getSAMMetaData(file_name)
Get metadata from SAM.
TDirectory * dir
Definition: macro.C:5
def findCycle(file_name)
Find logical cycle.
exit(0)
def printEnvironment()
Print the important environment variables.
def batchLogStatus(log)
Determine that status of a batch log 0 = unknown, 1 = good, 2 = running, 3 = bad. ...
assert(nhit_max >=nhit_nbins)
def flatDirectorySize(dir_path)
Get the size of a FLAT directory in bytes Sauce: http://stackoverflow.com/questions/1392413/calculati...
def stripUniqueifier(fname)
Strip the hex uniquifier from the start of a file name if present.
def getNProcessesFromSamStatus(status)
count the number of running processes in a sam status
def findRun(file_name)
Find logical run name.
def getJobID(log)
Get jobsub client job id from submission log.
def findFHiCL(fcl, verbose=False)
Find a FHiCL file.
def nameToChainAndTier(name)
convert a chain name into chain and tier
def directoryToGantt(wildcard, status, to_skip=[], verbose=False, gantt=False)
Scan a directory and return a set of Gantt inputs.
def addDefs(all_defs, defs)
Add definitions from a dictionary of dictionaries of definitions to a list.