cache_state.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 from __future__ import print_function
4 from __future__ import division
5 from past.utils import old_div
6 import argparse
7 import os, os.path
8 import re
9 import shlex
10 import subprocess
11 import sys
12 
13 import samweb_client as swc
14 from progbar import ProgressBar
15 
16 BULK_QUERY_SIZE = 100
17 WEBDAV_HOST = "https://fndca1.fnal.gov:2880"
18 PNFS_DIR_PATTERN = re.compile(r"/pnfs/(?P<area>[^/]+)")
19 
20 
21 def FilelistCacheCount(files, verbose_flag, METHOD="pnfs"):
22  bulk_query_list = []
23 
24  if len(files) > 1:
25  print("Checking %d files:" % len(files))
26  cached = 0
27  progbar = ProgressBar(len(files))
28  n = 0
29 
30  for f in files:
31  if METHOD == "webdav":
32  f = PNFS_DIR_PATTERN.sub(r"/pnfs/fnal.gov/usr/\1", f)
33  bulk_query_list.append(f)
34 
35  else:
36  path, filename = os.path.split(f)
37  stat_file="%s/.(get)(%s)(locality)"%(path,filename)
38  theStatFile=open(stat_file)
39  state=theStatFile.readline()
40  theStatFile.close()
41  if 'ONLINE' in state:
42  cached += 1
43  if verbose_flag:
44  print(filename,"is cached")
45 
46  n += 1
47  progbar.Update(n)
48 
49  if len(bulk_query_list) > 0:
50  while len(bulk_query_list) > 0:
51  # it's probably possible to actually implement this using urllib2 natively,
52  # but I couldn't make it work very quickly
53 
54  params = {
55  "local_cert": "/tmp/x509up_u%d" % os.getuid(),
56  "host": WEBDAV_HOST,
57  }
58 
59 
60  cmd = """
61  curl -L --capath /etc/grid-security/certificates \
62  --cert %(local_cert)s \
63  --cacert %(local_cert)s \
64  --key %(local_cert)s \
65  -s -X PROPFIND -H Depth:0 \
66  --data '<?xml version="1.0" encoding="utf-8"?>
67  <D:propfind xmlns:D="DAV:">
68  <D:prop xmlns:R="http://www.dcache.org/2013/webdav"
69  xmlns:S="http://srm.lbl.gov/StorageResourceManager">
70  <S:FileLocality/>
71  </D:prop>
72  </D:propfind>' \
73  """ % params
74  for f in bulk_query_list[:BULK_QUERY_SIZE]:
75  cmd += " %s/%s" % (WEBDAV_HOST, f)
76 
77  out = subprocess.check_output(shlex.split(cmd))
78  for l in out.split("\n"):
79  if "ONLINE" in l:
80  cached += 1
81  if verbose_flag:
82  print(l)
83 
84  # NOTE: *not* n*BULK_QUERY_SIZE since we've already stripped off (n-1)*BULK_QUERY_SIZE in previous iterations
85  bulk_query_list = bulk_query_list[BULK_QUERY_SIZE:]
86  n += 1
87  if len(bulk_query_list) > 0:
88  progbar.Update( n*BULK_QUERY_SIZE )
89 
90  if len(bulk_query_list) > 0:
91  progbar.Finish()
92 
93  return cached
94 
95 
96 parser= argparse.ArgumentParser()
97 
98 gp = parser.add_mutually_exclusive_group()
99 gp.add_argument("files",
100  nargs="*",
101  default=[],
102  metavar="FILE",
103  help="Files to consider",
104 )
105 gp.add_argument("-d", "--dataset",
106  metavar="DATASET",
107  dest="dataset_name",
108  help="Name of the SAM dataset to check cache status of",
109 )
110 
111 parser.add_argument("-s","--sparse", dest='sparse',help="Sparsification factor. This is used to check only a portion of a list of files",default=1)
112 parser.add_argument("-ss", "--snapshot", dest="snapshot", help="[Also requires -d] Use this snapshot ID for the dataset. Specify 'latest' for the most recent one.")
113 parser.add_argument("-v","--verbose", action="store_true", dest="verbose", default=False, help="Print information about individual files")
114 parser.add_argument("-m", "--method", choices=["webdav", "pnfs"], default="webdav", help="Use this method to look up file status.")
115 
116 args=parser.parse_args()
117 
118 # gotta make sure you have a valid certificate.
119 # otherwise the results may lie...
120 if args.method in ("webdav",):
121  try:
122  subprocess.check_call(shlex.split("setup_fnal_security --check"), stdout=open(os.devnull), stderr=subprocess.STDOUT)
123  except subprocess.CalledProcessError:
124  print("Your proxy is expired or missing. Please run `setup_fnal_security` and then try again.")
125  sys.exit(2)
126 
127 METHOD = args.method
128 
129 filelist = None if args.dataset_name else args.files
130 
131 sam = swc.SAMWebClient("nova")
132 
133 #
134 # Figure out where we want to get our list of files from
135 
136 # See if a SAM dataset was specified
137 if args.dataset_name:
138  print("Retrieving file list for SAM dataset definition name: '%s'..." % args.dataset_name, end=' ')
139  sys.stdout.flush()
140  try:
141  dimensions = None
142  if args.snapshot == "latest":
143  dimensions = "dataset_def_name_newest_snapshot %s" % args.dataset_name
144  elif args.snapshot:
145  dimensions = "snapshot_id %s" % args.snapshot
146  if dimensions:
147  samlist = sam.listFiles(dimensions=dimensions)
148  else:
149  samlist = sam.listFiles(defname=args.dataset_name)
150  filelist = [ f for f in samlist[::int(args.sparse)] ]
151  print(" done.")
152  except Exception as e:
153  print(e)
154  print()
155  print('Unable to retrieve SAM information for dataset: %s' %(args.dataset_name))
156  exit(-1)
157  # Take the rest of the commandline as the filenames
158  filelist = args
159 
160 cache_count = 0
161 miss_count = 0
162 
163 n_files = len(filelist)
164 if n_files == 0:
165  print("Empty definition")
166  sys.exit(1)
167 announce = n_files > 50 # some status notes if there are lots of files
168 if announce:
169  print("Finding locations for %d files:" % n_files)
170 
171 progbar = ProgressBar(n_files)
172 
173 files_to_check = []
174 n = -1 # so we start at 0 below
175 for f in filelist:
176  n += 1
177  progbar.Update(n)
178 
179  if os.path.isfile(f):
180  loc = os.path.split(f)[0]
181  # ok. try to guess what kind of location this is...
182  if loc.startswith("/pnfs") and ("/scratch" in loc or "/persistent" in loc):
183  loc = "dcache:" + loc
184  elif loc.startswith("/pnfs"):
185  loc = "enstore:" + loc
186  elif loc.startswith("/nova"):
187  loc = "bluearc:" + loc
188  else:
189  print("Unknown storage tier for file:", f, file=sys.stderr)
190  print("Cannot determine cache state.", file=sys.stderr)
191  sys.exit(2)
192 
193  loc = [loc,]
194  else:
195  try:
196  loc = sam.locateFile(f)
197  loc = [l['location'] for l in loc]
198  except (swc.exceptions.FileNotFound, swc.exceptions.HTTPNotFound):
199  print("File is not known to SAM and is not a full path:", f, file=sys.stderr)
200  sys.exit(2)
201 
202  # if it's got a dcache location, our tools will prefer that location anyway,
203  # and that's cached by construction.
204  # bluearc files are cached for the purposes of this script, I guess...
205  if any(l.startswith("dcache:") or l.startswith("bluearc:") for l in loc):
206  cache_count += 1
207  continue
208 
209  for l in loc:
210  if l.startswith("enstore:"):
211  # We now have the enstore location
212  # Strip off the enstore prefix and the tape label
213  thePath = l.split(':')[1].split('(')[0]
214  files_to_check.append(os.path.join(thePath, f))
215 
216 progbar.Finish()
217 print()
218 
219 non_enstore = cache_count
220 
221 cache_count = FilelistCacheCount(files_to_check, args.verbose, METHOD)
222 miss_count = len(files_to_check) - cache_count
223 
224 cache_count += non_enstore
225 
226 total = float(cache_count + miss_count)
227 cache_frac_str = (" (%d%%)" % round(old_div(cache_count,total)*100)) if total > 0 else ""
228 miss_frac_str = (" (%d%%)" % round(old_div(miss_count,total)*100)) if total > 0 else ""
229 
230 if total > 1:
231  print()
232  print("Cached: %d%s\tTape only: %d%s" % (cache_count, cache_frac_str, miss_count, miss_frac_str))
233 elif total == 1:
234  print("CACHED" if cache_count > 0 else "NOT CACHED")
235 
236 if miss_count == 0:
237  sys.exit(0)
238 else:
239  sys.exit(1)
void split(double tt, double *fr)
fvar< T > round(const fvar< T > &x)
Definition: round.hpp:23
bool print
procfile open("FD_BRL_v0.txt")
exit(0)
def FilelistCacheCount(files, verbose_flag, METHOD="pnfs")
Definition: cache_state.py:21