find_dupe_fcls.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #------------------------------------------------------------#
3 #
4 # usage: python find_dupe_fcls.py <definition_name>
5 #
6 # help: lasquith@fnal.gov, scalvez@fnal.gov
7 #------------------------------------------------------------#
8 
9 from __future__ import print_function
10 import sys, shlex, subprocess, math
11 from subprocess import Popen, PIPE
12 
13 fcl_list=[]
14 
15 DEFNAME=sys.argv[1]
16 cmd0 = 'samweb take-snapshot '+DEFNAME
17 p0 = Popen(shlex.split(cmd0))
18 p0.wait()
19 
20 DARRAY = DEFNAME.split("_")
21 if( DARRAY[0]=="prod" and len(DARRAY)>7 ):
22  BASENAME = DARRAY[3]+"_"+DARRAY[6]+"_"+DARRAY[7]
23  if( len(DARRAY)>14 ):
24  BASENAME+="_"+DARRAY[14]
25 else:
26  print("Your definition is not of the form expected - e.g. prod_fcl_v03.59_fd_genie_N1810j0211a_rhc_nonswap_rhc_nova_v08_full_batch1_v1(_cherenkov. Please either hack this script or ask Lily to hack it")
27  exit(0)
28 
29 dupelist="duplicate_fcls_"+BASENAME+".txt"
30 
31 
32 # ------------------------------------------------------------#
33 cmd1 = 'samweb list-files dataset_def_name_newest_snapshot '+DEFNAME
34 p1 = Popen(shlex.split(cmd1),stdout=PIPE,stderr=PIPE)
35 out1,err1 = p1.communicate()
36 fcl_list=out1.split('\n')
37 fcls_seen=[]
38 run_subrub_cycle=[]
39 duplicates=[]
40 
41 print("Checking %d fcls for duplicates"%(len(fcl_list)))
42 for f in fcl_list:
43  if(len(f)<7):
44  print("INFO: skipping malformed fcl name: ",f)
45  continue
46  run = f.split("_")[7]
47  subrun = f.split("_")[8]
48  cycle = f.split("_")[9]
49  rsc = run+"_"+subrun+"_"+cycle
50  if(rsc in run_subrub_cycle):
51  #print "Duplicate rsc: ",f
52  duplicates.append(f)
53  """
54  for seen in fcls_seen:
55  srun = seen.split("_")[7]
56  ssubrun = seen.split("_")[8]
57  scycle = seen.split("_")[9]
58  srsc = srun+"_"+ssubrun+"_"+scycle
59  if(f.find(srsc)>-1):
60  print "Seen file: ",seen
61  """
62  else:
63  fcls_seen.append(f)
64  run_subrub_cycle.append(rsc)
65 
66 print("The number of duplicate run-subrun-cycle in the input fcl is: ",len(duplicates))
67 
68 with open(dupelist, 'w') as outf:
69  for d in duplicates:
70  outf.write("%s\n" % d)
71 
if(dump)
bool print
procfile open("FD_BRL_v0.txt")
exit(0)