findDuplicateFiles.py
Go to the documentation of this file.
1 #!/bin/env python
2 
3 import samweb_client
4 import itertools
5 import re
6 from operator import itemgetter, attrgetter
7 
8 samweb = samweb_client.SAMWebClient(experiment='nova')
9 
10 for stream in [0,2]:
11  pattern = re.compile(r"^.*?_r(.*?)_s(.*?)_.*")
12  query = "Nova.DetectorID = fd and data_tier = artdaq and data_stream = " + stream + " and file_type = importedDetector and run_number >= 11496 and DAQ2RawDigit.base_release = S14-01-20"
13  listResult = samweb.listFiles(query)
14 
15  fileList = []
16 
17  for file in listResult:
18  match = pattern.match(file)
19  if match == None:
20  print "None found"
21  else:
22  run = int(match.groups()[0])
23  subrun = int(match.groups()[1])
24  fileInfo = (run, subrun, file)
25  fileList.append(fileInfo)
26 
27  fileList.sort(key=itemgetter(0,1))
28 
29  count = 0
30  lastRun = 0
31  lastSubrun = 0
32  lastFile = ""
33  for fileInfo in fileList:
34  run = fileInfo[0]
35  subrun = fileInfo[1]
36  file = fileInfo[2]
37 
38  if count > 1 and run == lastRun and subrun == lastSubrun:
39  print "Duplicate found!"
40  print "File 1: ", file
41  print "File 2: ", lastFile
42  samweb.retireFile(lastFile)
43  print "-----------------------------"
44 
45  lastRun = run
46  lastSubrun = subrun
47  lastFile = file
48  count = count + 1
49 
50 
51 print "Done!"
52