DefinitionData.py
Go to the documentation of this file.
1 #!/bin/env python
2 import os, sys, shutil
3 import samweb_client
4 import pickle
5 from collections import OrderedDict
6 from time import time, ctime
7 import datetime
8 
9 sam = samweb_client.SAMWebClient(experiment='nova')
10 defDataDir="${NOVAPRODVALID_DATA}/definitions/current"
11 defDataCache="${NOVAPRODVALID_DATA}/definitions/cache"
12 
13 def definitionPickleName(defname, useCache=False):
14  tmp=""
15  if useCache:
16  tmp=defDataCache + "/" + defname + ".pkl"
17  else:
18  tmp=defDataDir + "/" + defname + ".pkl"
19  return os.path.expandvars(tmp)
20 
21 def dumpDefinitionData(defData,useCache=False):
22 
23  fname = definitionPickleName(defData.name,useCache)
24  outFile = open(fname,'w')
25 
26  pickle.dump(defData,outFile)
27  outFile.close()
28 
29 def loadDefinitionData(defname):
30 
31  fname = definitionPickleName(defname)
32  inFile = open(fname,'r')
33 
34  data=pickle.load(inFile)
35 
36  inFile.close()
37 
38  return data
39 
40 def cacheDefinitionData(defname):
41  pklName = definitionPickleName(defname)
42  cacheName = definitionPickleName(defname,True)
43  if os.path.isfile(pklName):
44  shutil.copy(pklName,cacheName)
45  else:
46  print "ERROR couldn't find pkl file for", defname
47 
49 
50  def __init__(self,name):
51  self.name=name
52  self.description = { "defname":name }
53  self.summary = { "file_count":0, "total_event_count":0, "total_file_size":0 }
54  self.state = "noData"
55  self.queryTime = 0
56  self.queryTimeStamp = 0
57 
58  def queryDate(self):
59  return ctime(self.queryTimeStamp)
60 
61  def sameData(self,other):
62 
63  if self.description != other.description:
64  return False
65 
66  if self.summary != other.summary:
67  return False
68 
69  def fetchData(self,maxCacheLifetime):
70  ## we might want to add a fallback in case dataset exists but query
71  ## fails
72  print "Fetching data for", self.name
73  cacheName=definitionPickleName(self.name, useCache=True)
74  cacheData=None
75  if not os.path.isfile(cacheName):
76  pass #no cache so don't even worry about it
77  else:
78  cacheData=pickle.load(open(cacheName,'r'))
79  #if cache data is recent, just use it, just use it,
80  # and that's the end of the story, but if the cache
81  # data is old, don't rely on it. But we do need to keep
82  # the cache around fo comparison purposes
83  if (time() - cacheData.queryTimeStamp) < maxCacheLifetime:
84  self.__dict__ = cacheData.__dict__
85  self.state="cached"
86  self.queryTimeStamp = cacheData.queryTimeStamp
87  return
88 
89  try:
90  self.description = sam.descDefinitionDict(self.name)
91 
92  startTime=time()
93  #try:
94  # self.snapshotID = sam.takeSnapshot(self.name)
95  # snapshotQuery = "dataset_def_name_newest_snapshot %s" % self.name
96  # self.summary = sam.listFilesSummary(dimensions=snapshotQuery)
97  # self.state = "good"
98  #except:
99  # self.summary = sam.listFilesSummary(defname=self.name)
100  # self.state = "snapshot failed"
101  self.summary = sam.listFilesSummary(defname=self.name)
102  self.state = "good"
103 
104  self.queryTime = time() - startTime
105  if self.summary["file_count"] == 0:
106  self.summary["total_event_count"]=0
107  self.summary["total_file_size"]=0
108 
110  print "ERROR, definition %s not found" % self.name
111  self.state="bad definition name error"
113  print "ERROR, Timeout getting data for %s" % self.name
114  self.state="timed out"
116  print "ERROR, HTTP: ", e
117  self.state="http error"
118  except IOError:
119  print "IOError", e
120  self.state="proxy error"
121  except KeyboardInterrupt:
122  print "Stopping because you say so."
123  sys.exit(99)
124  except:
125  print "ERROR, unknown exception fetching data for %s" % self.name
126  self.state="unknown error"
127  # Store the time, even if there is an exception
128  self.queryTimeStamp = time()
129 
130  #so here we have cache data that is stale (else we would have exited the function
131  # already), so we know we've fetched the data from SAM. Check if that matches the cache.
132  # If not print a warning, delete cache, as it's clearly no good. If they do match, dump to
133  # the cache as well.
134 
135  if cacheData != None:
136  if not self.sameData(cacheData):
137  os.remove(cacheName)
138  sys.stderr.write("Warning:: cache data and current data do not match for %s\n" %self.name)
139  else:
140  dumpDefinitionData(self,useCache=True)
141 
142  def printData(self):
143  for key in self.description.keys():
144  print key, ":", self.description[key]
145  for key in self.summary.keys():
146  print key, ":", self.summary[key]
147 
148 
__dict__
we might want to add a fallback in case dataset exists but query fails
def definitionPickleName(defname, useCache=False)
def cacheDefinitionData(defname)
def loadDefinitionData(defname)
def dumpDefinitionData(defData, useCache=False)
procfile open("FD_BRL_v0.txt")
def fetchData(self, maxCacheLifetime)