findDuplicateFiles.py
Go to the documentation of this file.
1 #!/bin/env python
2 
3 from __future__ import print_function
4 import samweb_client
5 import itertools
6 import re
7 from operator import itemgetter, attrgetter
8 
9 samweb = samweb_client.SAMWebClient(experiment='nova')
10 
11 for stream in [0,2]:
12  pattern = re.compile(r"^.*?_r(.*?)_s(.*?)_.*")
13  query = "Nova.DetectorID = fd and data_tier = artdaq and data_stream = " + stream + " and file_type = importedDetector and run_number >= 11496 and DAQ2RawDigit.base_release = S14-01-20"
14  listResult = samweb.listFiles(query)
15 
16  fileList = []
17 
18  for file in listResult:
19  match = pattern.match(file)
20  if match == None:
21  print("None found")
22  else:
23  run = int(match.groups()[0])
24  subrun = int(match.groups()[1])
25  fileInfo = (run, subrun, file)
26  fileList.append(fileInfo)
27 
28  fileList.sort(key=itemgetter(0,1))
29 
30  count = 0
31  lastRun = 0
32  lastSubrun = 0
33  lastFile = ""
34  for fileInfo in fileList:
35  run = fileInfo[0]
36  subrun = fileInfo[1]
37  file = fileInfo[2]
38 
39  if count > 1 and run == lastRun and subrun == lastSubrun:
40  print("Duplicate found!")
41  print("File 1: ", file)
42  print("File 2: ", lastFile)
43  samweb.retireFile(lastFile)
44  print("-----------------------------")
45 
46  lastRun = run
47  lastSubrun = subrun
48  lastFile = file
49  count = count + 1
50 
51 
52 print("Done!")
53 
bool print