make_associated_cosmic_defs.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #------------------------------------------------------------#
3 #
4 # usage: python make_associated_cosmic_defs.py <definition_name>
5 #
6 # note: this script is for prod5 and has the cosmic definitions hard-coded:
7 cosmics_fhc="lasquith_fd_cosmics_p12359_goodruns_prod5"
8 cosmics_rhc="scalvez_artdaq_fd_cosmics_p4678_goodruns_prod5"
9 #
10 # help: lasquith@fnal.gov, scalvez@fnal.gov
11 #------------------------------------------------------------#
12 
13 print "If you are running this script, you will want to also cache the parent definition, e.g. novaproduction/overlays/cache.sh DEFNAME"
14 
15 import sys, shlex, subprocess, math
16 from subprocess import Popen, PIPE
17 from datetime import datetime
18 
19 #------------------------------------------------------------#
20 # the number of subruns to let sam handle in a single definition.
21 # could be optimised further for timing.
22 #------------------------------------------------------------#
23 splitLen = 200
24 startTime = datetime.now()
25 fcl_list=[]
26 #------------------------------------------------------------#
27 # if True this will only make a small subset of 3*splitLen cosmic defs
28 #------------------------------------------------------------#
29 TEST=False
30 
31 HACK=False
32 
33 if(HACK):
34  print "HACK!"
35 
36 elif(TEST):
37  print "WARNING! You are running in TEST mode. The output defn will be incomplete, so please delete it."
38 
39 #------------------------------------------------------------#
40 # parse the fcl definition name passed as arg to extract key
41 # info: DET_HORN_SWAP or DET_HORN_SWAP_SYST
42 #------------------------------------------------------------#
43 if(HACK==False):
44  DEFNAME=sys.argv[1]
45 
46  # take a snapshot of the fcl def
47  cmd0 = 'samweb take-snapshot '+DEFNAME
48  p0 = Popen(shlex.split(cmd0))
49  p0.wait()
50 
51  DARRAY = DEFNAME.split("_")
52  if( DARRAY[0]=="prod" and len(DARRAY)>7 ):
53  BASENAME = DARRAY[3]+"_"+DARRAY[6]+"_"+DARRAY[7]
54  if( len(DARRAY)>14 ):
55  BASENAME+="_"+DARRAY[14]
56  # prod_fcl_v03.62_fd_genie_N1810j0211a_fhc_nonswap_fhc_nova_v08_periods12359_batch1_v1_lldown_6k2
57  if( len(DARRAY)>15 ):
58  BASENAME+="_"+DARRAY[15]
59  else:
60  print "Your definition is not of the form expected - e.g. prod_fcl_v03.59_fd_genie_N1810j0211a_rhc_nonswap_rhc_nova_v08_full_batch1_v1_cherenkov. Please either hack this script or ask Lily to hack it"
61  exit(0)
62  print "> Getting associated cosmics (%s)" % (BASENAME)
63 
64  if BASENAME.find('rhc')>-1 :
65  cosmics = cosmics_rhc
66  elif BASENAME.find('fhc')>-1 :
67  cosmics = cosmics_fhc
68  print ">> Using hard-coded prod5 cosmics file %s for finding matching runs"%(cosmics)
69 
70  #------------------------------------------------------------#
71  # issue command to list all files in fcl defname
72  # retrieve stdout from command
73  # make a list of the fcls
74  #------------------------------------------------------------#
75  p1 = Popen(['samweb', 'list-files', 'dataset_def_name_newest_snapshot ', DEFNAME], stdout=PIPE,stderr=PIPE)
76  out1,err1 = p1.communicate()
77  fcl_list=out1.split('\n')
78 
79 else:
80  cosmics = cosmics_fhc
81  BASENAME = "fd_fhc_nonswap_llup"
82  file_list = "/nova/app/home/novapro/PROD5_SHIFTERS/mcgen/fcls/fhc_nonswap_llup_fcls_dropbox.txt"
83  fcls = open(file_list, "r")
84  for f in fcls:
85  #ef= f.split('\n')
86  e = f.split(" ")[8]
87  if (e.find("fardet")>-1):
88  fcl_list.append(e)
89 
90 #------------------------------------------------------------#
91 # divide the list of fcls into chunks of length splitLen
92 # if there are <3 files in last chunk, add them to the previous chunk
93 # make a list of lists, fcl_chunks[ chunk1,...,chunk<nfiles/splitLen>]
94 #------------------------------------------------------------#
95 fcl_chunks=[]
96 counter=0
97 print "Found %d files in %s" % (len(fcl_list), DEFNAME)
98 for lines in range(0, len(fcl_list), splitLen):
99  chunk = fcl_list[lines:lines+splitLen]
100  # bail if we've over chunked
101  if(len(chunk)==0):
102  break
103 
104  # if <3 files in the last chunk, put them in the previous chunk
105  if(len(chunk)>2):
106  fcl_chunks.append(chunk)
107  elif(counter>0):
108  fcl_chunks[counter-1] += chunk
109  break
110  else:
111  print "There are less than 100 fcls, something is wrong"
112  exit(0)
113  counter+=1
114 # finished loop over fcls in input def
115 nsubdefs = len(fcl_chunks)
116 if(TEST):
117  nsubdefs=2
118 print ">>> Divided this definition's %d files into %d bitesize chunks each with ~%d fcls"%(len(fcl_list), len(fcl_chunks),splitLen)
119 
120 #------------------------------------------------------------#
121 # loop over chunks to construct a command to make defintions
122 # each chunk has its own temp definition
123 # each definition is an AND of cosmics def and run,subrun numbers
124 #------------------------------------------------------------#
125 atime=datetime.now()
126 snapshot_ids=[]
127 temp_defs=[]
128 cc=0
129 
130 run_subrub_cycle=[]
131 duplicates=0
132 
133 for chunk in fcl_chunks:
134  if(cc==1):
135  btime=datetime.now()
136  print ">>>> first subdef time (s): ",(btime-atime)
137  if(cc>nsubdefs):
138  break # for test mode
139  subset=str(cc).zfill(3) #000,001,...
140  # the definition name for this subset of cosmics
141  cosmics_def="temp_prod5_cosmics_"+BASENAME+"_s"+subset
142  temp_defs.append(cosmics_def)
143  cmd = ""
144  # for each fcl in this chunk, extract the run and subrun number
145  fc=0
146 
147  for fcl in chunk:
148  if(len(fcl)<7):
149  print "INFO: skipping malformed fcl name: ",fcl
150  continue
151  run = fcl.split("_")[7]
152  r = fcl.split("_")[7].lstrip("r0")
153  subrun = fcl.split("_")[8]
154  cycle = fcl.split("_")[9]
155  rsc = run+"_"+subrun+"_"+cycle
156  if(rsc in run_subrub_cycle):
157  duplicates+=1
158  run_subrub_cycle.append(rsc)
159 
160  #deal with subrun numbers like 01
161  if subrun=="s00":
162  s="0"
163  else:
164  s = subrun.lstrip("s0")
165  # set output
166  sd = cosmics_def+' \"def_snapshot '+cosmics+' and (run_number '+r+'.'+s
167  if fc==0:
168  cmd +='samweb create-definition '+sd+' '
169  fc += 1
170  elif fc % (len(chunk)-1) ==0:
171  cmd +=' or run_number '+r+'.'+s
172  fc+=1
173  else:
174  cmd +=' or run_number '+r+'.'+s+' '
175  fc+=1
176  # finished loop over list of fcls for this chunk
177  cmd += ' )\" '
178  cc+=1
179 
180  #------------------------------------------------------------#
181  # issue command to make chunk definition
182  # wait() is needed here because communicate() is not used
183  # take a snapshot of the chunk definition
184  # append chunk snapshot id to a list of them
185  #------------------------------------------------------------#
186  p2 = Popen(shlex.split(cmd))
187  p2.wait()
188  cmd = 'samweb take-snapshot '+cosmics_def
189  p3 = Popen(shlex.split(cmd), stdout=PIPE,stderr=PIPE)
190  out3,err3=p3.communicate()
191  snapshot_ids.append(str(out3))
192 
193 # finished loop over all chunks
194 
195 print "The number of duplicate run-subrun-cycle in the input fcl is: ",duplicates
196 
197 # ------------------------------------------------------------#
198 # loop over chunk snapshot ids to construct full definition
199 # ------------------------------------------------------------#
200 cmd4 = ""
201 count_ids=0
202 FULL_DEF_NAME='_'.join(cosmics_def.split('_')[1:-1])
203 for sid in snapshot_ids:
204  if(count_ids==0):
205  cmd4 += 'samweb create-definition '+FULL_DEF_NAME+' \"snapshot_id '+sid+' or'
206  elif(count_ids==len(snapshot_ids)-1):
207  cmd4 += ' snapshot_id '+sid+'\"'
208  else:
209  cmd4 += ' snapshot_id '+sid+' or'
210  count_ids+=1
211 
212 # finished loop over all snapshot_ids
213 
214 # ------------------------------------------------------------#
215 # issue command to construct full definition
216 # take a snapshot of full definition
217 # issue command to make snapshot definition
218 # ------------------------------------------------------------#
219 print ">>>>> Making full definition from subdefinition snapshots"
220 p4 = Popen( shlex.split(cmd4) )
221 p4.wait()
222 print ">>>>>> Taking a snapshot of %s" % (FULL_DEF_NAME)
223 
224 cmd5='samweb take-snapshot '+FULL_DEF_NAME
225 p5 = Popen( shlex.split(cmd5),stdout=PIPE,stderr=PIPE )
226 out5,err5=p5.communicate()
227 
228 cmd6='samweb create-definition '+FULL_DEF_NAME+'_snapshot'+out5+' \"snapshot_id '+out5+'\"'
229 p6 = Popen( shlex.split(cmd6) )
230 p6.wait()
231 
232 
233 # ------------------------------------------------------------#
234 # delete the temporary subdef snapshots and the full cosmics def
235 # this is so that rerunning doesn't pick up old ones
236 # ------------------------------------------------------------#
237 for td in temp_defs:
238  print "deleting temp definition ",td
239  cmd7='samweb delete-definition '+td
240  p7 = Popen( shlex.split(cmd7) )
241  p7.wait()
242 
243 print "deleting full definition ",FULL_DEF_NAME
244 cmd8='samweb delete-definition '+FULL_DEF_NAME
245 p8 = Popen( shlex.split(cmd8) )
246 p8.wait()
247 
248 
249 
250 print "FIN."
251 print "time taken: ",(datetime.now() - startTime)
252 
if(dump)
procfile open("FD_BRL_v0.txt")
exit(0)