make_associated_cosmic_defs.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #------------------------------------------------------------#
3 #
4 # usage: python make_associated_cosmic_defs.py <definition_name>
5 #
6 # note: this script is for prod5 and has the cosmic definitions hard-coded:
7 from __future__ import print_function
8 from builtins import str
9 from builtins import range
10 cosmics_fhc="lasquith_fd_cosmics_p12359_goodruns_prod5"
11 cosmics_rhc="scalvez_artdaq_fd_cosmics_p4678_goodruns_prod5"
12 #
13 # help: lasquith@fnal.gov, scalvez@fnal.gov
14 #------------------------------------------------------------#
15 
16 print("If you are running this script, you will want to also cache the parent definition, e.g. novaproduction/overlays/cache.sh DEFNAME")
17 
18 import sys, shlex, subprocess, math
19 from subprocess import Popen, PIPE
20 from datetime import datetime
21 
22 #------------------------------------------------------------#
23 # the number of subruns to let sam handle in a single definition.
24 # could be optimised further for timing.
25 #------------------------------------------------------------#
26 splitLen = 200
27 startTime = datetime.now()
28 fcl_list=[]
29 #------------------------------------------------------------#
30 # if True this will only make a small subset of 3*splitLen cosmic defs
31 #------------------------------------------------------------#
32 TEST=False
33 
34 HACK=False
35 
36 if(HACK):
37  print("HACK!")
38 
39 elif(TEST):
40  print("WARNING! You are running in TEST mode. The output defn will be incomplete, so please delete it.")
41 
42 #------------------------------------------------------------#
43 # parse the fcl definition name passed as arg to extract key
44 # info: DET_HORN_SWAP or DET_HORN_SWAP_SYST
45 #------------------------------------------------------------#
46 if(HACK==False):
47  DEFNAME=sys.argv[1]
48 
49  # take a snapshot of the fcl def
50  cmd0 = 'samweb take-snapshot '+DEFNAME
51  p0 = Popen(shlex.split(cmd0))
52  p0.wait()
53 
54  DARRAY = DEFNAME.split("_")
55  if( DARRAY[0]=="prod" and len(DARRAY)>7 ):
56  BASENAME = DARRAY[3]+"_"+DARRAY[6]+"_"+DARRAY[7]
57  if( len(DARRAY)>14 ):
58  BASENAME+="_"+DARRAY[14]
59  # prod_fcl_v03.62_fd_genie_N1810j0211a_fhc_nonswap_fhc_nova_v08_periods12359_batch1_v1_lldown_6k2
60  if( len(DARRAY)>15 ):
61  BASENAME+="_"+DARRAY[15]
62  else:
63  print("Your definition is not of the form expected - e.g. prod_fcl_v03.59_fd_genie_N1810j0211a_rhc_nonswap_rhc_nova_v08_full_batch1_v1_cherenkov. Please either hack this script or ask Lily to hack it")
64  exit(0)
65  print("> Getting associated cosmics (%s)" % (BASENAME))
66 
67  if BASENAME.find('rhc')>-1 :
68  cosmics = cosmics_rhc
69  elif BASENAME.find('fhc')>-1 :
70  cosmics = cosmics_fhc
71  print(">> Using hard-coded prod5 cosmics file %s for finding matching runs"%(cosmics))
72 
73  #------------------------------------------------------------#
74  # issue command to list all files in fcl defname
75  # retrieve stdout from command
76  # make a list of the fcls
77  #------------------------------------------------------------#
78  p1 = Popen(['samweb', 'list-files', 'dataset_def_name_newest_snapshot ', DEFNAME], stdout=PIPE,stderr=PIPE)
79  out1,err1 = p1.communicate()
80  fcl_list=out1.split('\n')
81 
82 else:
83  cosmics = cosmics_fhc
84  BASENAME = "fd_fhc_nonswap_llup"
85  file_list = "/nova/app/home/novapro/PROD5_SHIFTERS/mcgen/fcls/fhc_nonswap_llup_fcls_dropbox.txt"
86  fcls = open(file_list, "r")
87  for f in fcls:
88  #ef= f.split('\n')
89  e = f.split(" ")[8]
90  if (e.find("fardet")>-1):
91  fcl_list.append(e)
92 
93 #------------------------------------------------------------#
94 # divide the list of fcls into chunks of length splitLen
95 # if there are <3 files in last chunk, add them to the previous chunk
96 # make a list of lists, fcl_chunks[ chunk1,...,chunk<nfiles/splitLen>]
97 #------------------------------------------------------------#
98 fcl_chunks=[]
99 counter=0
100 print("Found %d files in %s" % (len(fcl_list), DEFNAME))
101 for lines in range(0, len(fcl_list), splitLen):
102  chunk = fcl_list[lines:lines+splitLen]
103  # bail if we've over chunked
104  if(len(chunk)==0):
105  break
106 
107  # if <3 files in the last chunk, put them in the previous chunk
108  if(len(chunk)>2):
109  fcl_chunks.append(chunk)
110  elif(counter>0):
111  fcl_chunks[counter-1] += chunk
112  break
113  else:
114  print("There are less than 100 fcls, something is wrong")
115  exit(0)
116  counter+=1
117 # finished loop over fcls in input def
118 nsubdefs = len(fcl_chunks)
119 if(TEST):
120  nsubdefs=2
121 print(">>> Divided this definition's %d files into %d bitesize chunks each with ~%d fcls"%(len(fcl_list), len(fcl_chunks),splitLen))
122 
123 #------------------------------------------------------------#
124 # loop over chunks to construct a command to make defintions
125 # each chunk has its own temp definition
126 # each definition is an AND of cosmics def and run,subrun numbers
127 #------------------------------------------------------------#
128 atime=datetime.now()
129 snapshot_ids=[]
130 temp_defs=[]
131 cc=0
132 
133 run_subrub_cycle=[]
134 duplicates=0
135 
136 for chunk in fcl_chunks:
137  if(cc==1):
138  btime=datetime.now()
139  print(">>>> first subdef time (s): ",(btime-atime))
140  if(cc>nsubdefs):
141  break # for test mode
142  subset=str(cc).zfill(3) #000,001,...
143  # the definition name for this subset of cosmics
144  cosmics_def="temp_prod5_cosmics_"+BASENAME+"_s"+subset
145  temp_defs.append(cosmics_def)
146  cmd = ""
147  # for each fcl in this chunk, extract the run and subrun number
148  fc=0
149 
150  for fcl in chunk:
151  if(len(fcl)<7):
152  print("INFO: skipping malformed fcl name: ",fcl)
153  continue
154  run = fcl.split("_")[7]
155  r = fcl.split("_")[7].lstrip("r0")
156  subrun = fcl.split("_")[8]
157  cycle = fcl.split("_")[9]
158  rsc = run+"_"+subrun+"_"+cycle
159  if(rsc in run_subrub_cycle):
160  duplicates+=1
161  run_subrub_cycle.append(rsc)
162 
163  #deal with subrun numbers like 01
164  if subrun=="s00":
165  s="0"
166  else:
167  s = subrun.lstrip("s0")
168  # set output
169  sd = cosmics_def+' \"def_snapshot '+cosmics+' and (run_number '+r+'.'+s
170  if fc==0:
171  cmd +='samweb create-definition '+sd+' '
172  fc += 1
173  elif fc % (len(chunk)-1) ==0:
174  cmd +=' or run_number '+r+'.'+s
175  fc+=1
176  else:
177  cmd +=' or run_number '+r+'.'+s+' '
178  fc+=1
179  # finished loop over list of fcls for this chunk
180  cmd += ' )\" '
181  cc+=1
182 
183  #------------------------------------------------------------#
184  # issue command to make chunk definition
185  # wait() is needed here because communicate() is not used
186  # take a snapshot of the chunk definition
187  # append chunk snapshot id to a list of them
188  #------------------------------------------------------------#
189  p2 = Popen(shlex.split(cmd))
190  p2.wait()
191  cmd = 'samweb take-snapshot '+cosmics_def
192  p3 = Popen(shlex.split(cmd), stdout=PIPE,stderr=PIPE)
193  out3,err3=p3.communicate()
194  snapshot_ids.append(str(out3))
195 
196 # finished loop over all chunks
197 
198 print("The number of duplicate run-subrun-cycle in the input fcl is: ",duplicates)
199 
200 # ------------------------------------------------------------#
201 # loop over chunk snapshot ids to construct full definition
202 # ------------------------------------------------------------#
203 cmd4 = ""
204 count_ids=0
205 FULL_DEF_NAME='_'.join(cosmics_def.split('_')[1:-1])
206 for sid in snapshot_ids:
207  if(count_ids==0):
208  cmd4 += 'samweb create-definition '+FULL_DEF_NAME+' \"snapshot_id '+sid+' or'
209  elif(count_ids==len(snapshot_ids)-1):
210  cmd4 += ' snapshot_id '+sid+'\"'
211  else:
212  cmd4 += ' snapshot_id '+sid+' or'
213  count_ids+=1
214 
215 # finished loop over all snapshot_ids
216 
217 # ------------------------------------------------------------#
218 # issue command to construct full definition
219 # take a snapshot of full definition
220 # issue command to make snapshot definition
221 # ------------------------------------------------------------#
222 print(">>>>> Making full definition from subdefinition snapshots")
223 p4 = Popen( shlex.split(cmd4) )
224 p4.wait()
225 print(">>>>>> Taking a snapshot of %s" % (FULL_DEF_NAME))
226 
227 cmd5='samweb take-snapshot '+FULL_DEF_NAME
228 p5 = Popen( shlex.split(cmd5),stdout=PIPE,stderr=PIPE )
229 out5,err5=p5.communicate()
230 
231 cmd6='samweb create-definition '+FULL_DEF_NAME+'_snapshot'+out5+' \"snapshot_id '+out5+'\"'
232 p6 = Popen( shlex.split(cmd6) )
233 p6.wait()
234 
235 
236 # ------------------------------------------------------------#
237 # delete the temporary subdef snapshots and the full cosmics def
238 # this is so that rerunning doesn't pick up old ones
239 # ------------------------------------------------------------#
240 for td in temp_defs:
241  print("deleting temp definition ",td)
242  cmd7='samweb delete-definition '+td
243  p7 = Popen( shlex.split(cmd7) )
244  p7.wait()
245 
246 print("deleting full definition ",FULL_DEF_NAME)
247 cmd8='samweb delete-definition '+FULL_DEF_NAME
248 p8 = Popen( shlex.split(cmd8) )
249 p8.wait()
250 
251 
252 
253 print("FIN.")
254 print("time taken: ",(datetime.now() - startTime))
255 
if(dump)
bool print
procfile open("FD_BRL_v0.txt")
exit(0)