make_cached_def.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #------------------------------------------------------------#
3 #
4 # usage: python make_cached_def.py DEFNAME MAXFILES_CHECK MAXFILES_CACHED
5 # help: lasquith@fnal.gov, scalvez@fnal.gov
6 #------------------------------------------------------------#
7 
8 import sys, shlex, subprocess, math
9 from subprocess import Popen, PIPE
10 
11 DEFNAME = sys.argv[1]
12 MAXFILES_CHECK = sys.argv[2]
13 MAXFILES_CACHED = sys.argv[3]
14 
15 fc = open("cached_files.txt", "w")
16 
17 print "Checking %s files in definition %s "%(MAXFILES_CHECK, DEFNAME)
18 
19 # Based on Jeremy's script for checking individual cache state of files in a def and returning cached files list
20 
21 cmd = 'samweb list-files \" defname: '+DEFNAME+' with limit '+MAXFILES_CHECK+' \" '
22 p0 = Popen(shlex.split(cmd), stdout=PIPE,stderr=PIPE)
23 o0,e0=p0.communicate()
24 definition_files = o0
25 
26 dfs = definition_files.split('\n')
27 del dfs[-1]
28 #print "dfs: ",dfs
29 #print "Number of definition files retrieved: ",len(dfs)
30 
31 if( len(dfs)==0 ):
32  exit(0)
33 
34 cached_files=[]
35 cached_count = 0
36 for f in dfs:
37 
38  cmd = "samweb locate-file "+f
39  p1 = Popen(shlex.split(cmd), stdout=PIPE,stderr=PIPE)
40  o1,e1 = p1.communicate()
41 
42  spath = o1
43  # strip prefix and suffix from spath
44  # enstore:/pnfs/nova/production/raw2root/S17-10-30/fardet/000280/28043/00(3171@vpn272)
45  # /pnfs/nova/production/raw2root/S17-10-30/fardet/000280/28043/00/fardet_r00028043_s21_t00_S17-10-30_v1_data.artdaq.root
46  apath = spath.split("/")[1:]
47  bpath = '/'.join(apath)
48  cpath = "/"+bpath.split("(")[0]
49  filepath = cpath+"/"+f
50 
51  cmd = "cache_state.py "+filepath
52  p2 = Popen(shlex.split(cmd), stdout=PIPE,stderr=PIPE)
53  o2,e2 = p2.communicate()
54 
55  output=o2.split("\n")
56  #print "---->out: ", output[2]
57 
58  # want to send file list to a text file because the above takes bloody ages and if it now fails we have to start from scratch.
59 
60 
61  if( len(output)<3 ):
62  print "checking cache state of filepath %s did not produce expected output. Skipping to next file."%(f)
63  continue
64  else:
65  fc.write(f+"\n")
66  cached = output[2]
67 
68  if( cached == "CACHED" ):
69  #print "Cached file name: ",f
70  cached_count +=1
71 
72  cached_files.append(f)
73 
74  if( str(cached_count) == MAXFILES_CACHED ):
75  break
76 
77 fc.close()
78 #print "cached_files: ",cached_files
79 
80 #------------------------------------------------------------#
81 # the number of files to let sam handle in a single definition.
82 # could be optimised further for timing.
83 #------------------------------------------------------------#
84 splitLen = 200
85 
86 fcl_list=cached_files
87 
88 #------------------------------------------------------------#
89 # divide the list of fcls into chunks of length splitLen
90 # if there are <3 files in last chunk, add them to the previous chunk
91 # make a list of lists, fcl_chunks[ chunk1,...,chunk<nfiles/splitLen>]
92 #------------------------------------------------------------#
93 fcl_chunks=[]
94 counter=0
95 print "Found %d cached files in %s (from an expected %d)" % (len(fcl_list), DEFNAME,cached_count)
96 
97 for lines in range(0, len(fcl_list), splitLen):
98  chunk = fcl_list[lines:lines+splitLen]
99  # bail if we've over chunked
100  if(len(chunk)==0):
101  break
102 
103  # if <3 files in the last chunk, put them in the previous chunk
104  if(len(chunk)>2):
105  fcl_chunks.append(chunk)
106  elif(counter>0):
107  fcl_chunks[counter-1] += chunk
108  break
109  else:
110  print "There are less than 100 files, something is wrong"
111  exit(0)
112  counter+=1
113 # finished loop over fcls in input def
114 nsubdefs = len(fcl_chunks)
115 
116 print ">>> Divided this list's %d files into %d bitesize chunks each with ~%d fcls"%(len(fcl_list), len(fcl_chunks),splitLen)
117 
118 #------------------------------------------------------------#
119 # loop over chunks to construct a command to make defintions
120 # each chunk has its own temp definition temp_cached_DEFNAME_NNN
121 #------------------------------------------------------------#
122 
123 snapshot_ids=[]
124 temp_defs=[]
125 
126 cc=0
127 
128 for chunk in fcl_chunks:
129 
130  subset=str(cc).zfill(3) #000,001,...
131  subdefname= "temp_cached_"+DEFNAME+"_"+subset
132  temp_defs.append(subdefname)
133 
134  cmd = ""
135  fc=0
136 
137  for fcl in chunk:
138  sd = subdefname+' \"( file_name '+fcl
139 
140  if fc==0:
141  cmd +='samweb create-definition '+sd+' '
142  fc += 1
143  elif fc % (len(chunk)-1) ==0:
144  cmd +=' or file_name '+fcl
145  fc+=1
146  else:
147  cmd +=' or file_name '+fcl+' '
148  fc+=1
149 
150  cmd += ' )\" '
151  cc+=1
152 
153  #------------------------------------------------------------#
154  # issue command to make chunk definition
155  # wait() is needed here because communicate() is not used
156  # take a snapshot of the chunk definition
157  # append chunk snapshot id to a list of them
158  #------------------------------------------------------------#
159  print "issuing command to make chunk definition: "#,cmd
160 
161  p2 = Popen(shlex.split(cmd))
162  p2.wait()
163  cmd = 'samweb take-snapshot '+subdefname
164  print "issuing command to make snapshot: ",cmd
165  p3 = Popen(shlex.split(cmd), stdout=PIPE,stderr=PIPE)
166  out3,err3=p3.communicate()
167  snapshot_ids.append(str(out3))
168 
169 # finished loop over all chunks
170 
171 print "number of snapshot ids: ",len(snapshot_ids)
172 # ------------------------------------------------------------#
173 # loop over chunk snapshot ids to construct full definition
174 # ------------------------------------------------------------#
175 cmd4 = ""
176 count_ids=0
177 FULL_DEF_NAME='temp_cached_'+DEFNAME+'_000_'+subset
178 
179 for sid in snapshot_ids:
180 
181  if( len(snapshot_ids)==1 ):
182  cmd4 = 'samweb create-definition '+FULL_DEF_NAME+' \"snapshot_id '+sid+'\"'
183  break
184 
185  if(count_ids==0):
186  cmd4 += 'samweb create-definition '+FULL_DEF_NAME+' \"snapshot_id '+sid+' or'
187 
188  elif(count_ids==len(snapshot_ids)-1):
189  cmd4 += ' snapshot_id '+sid+'\"'
190 
191  else:
192  cmd4 += ' snapshot_id '+sid+' or'
193  count_ids+=1
194 
195 # finished loop over all snapshot_ids
196 
197 # ------------------------------------------------------------#
198 # issue command to construct full definition
199 # take a snapshot of full definition
200 # issue command to make snapshot definition
201 # ------------------------------------------------------------#
202 
203 print ">>>>> Making full definition from subdefinition snapshots"
204 print "issuing command to make full def: ",cmd4
205 
206 p4 = Popen( shlex.split(cmd4) )
207 p4.wait()
208 print ">>>>>> Taking a snapshot of %s" % (FULL_DEF_NAME)
209 
210 
211 cmd5='samweb take-snapshot '+FULL_DEF_NAME
212 
213 print "issuing command to make snapshot: ",cmd5
214 p5 = Popen( shlex.split(cmd5),stdout=PIPE,stderr=PIPE )
215 out5,err5=p5.communicate()
216 
217 cmd6='samweb create-definition '+FULL_DEF_NAME+'_snapshot'+out5+' \"snapshot_id '+out5+'\"'
218 print "issuing command to make snapshot def: ",cmd6
219 p6 = Popen( shlex.split(cmd6) )
220 p6.wait()
221 
222 # ------------------------------------------------------------#
223 # delete the temporary subdef snapshots and the full cosmics def
224 # this is so that rerunning doesn't pick up old ones
225 # ------------------------------------------------------------#
226 for td in temp_defs:
227  #print "deleting temp definition ",td
228  cmd7='samweb delete-definition '+td
229  p7 = Popen( shlex.split(cmd7) )
230  p7.wait()
231 
232 #print "deleting full definition ",FULL_DEF_NAME
233 cmd8='samweb delete-definition '+FULL_DEF_NAME
234 p8 = Popen( shlex.split(cmd8) )
235 p8.wait()
236 
237 print "FIN."
238 
if(dump)
procfile open("FD_BRL_v0.txt")
exit(0)