Public Member Functions | Public Attributes | List of all members
bp_docdb.DocDB Class Reference

Public Member Functions

def __init__ (self)
 
def GetDocument (self, id)
 
def GetDocIDsByCategory (self, cat_id, full_lookback)
 
def DownloadFile (self, doc_id, filename, dest)
 
def DownloadArchive (self, doc_id, filename, dest)
 

Public Attributes

 uri
 

Detailed Description

A class for interfacing with DocDB via HTTP

A connection to DocDB is established over HTTP, and requests are made for
data in XML format.

Attributes:
    uri (str): Base URI with which any desired DocDB query can be formed

Definition at line 65 of file bp_docdb.py.

Constructor & Destructor Documentation

def bp_docdb.DocDB.__init__ (   self)

Definition at line 75 of file bp_docdb.py.

Referenced by PandAna.core.core.spectrum.fill().

75  def __init__(self):
76  # Class variables
77  self.uri = config.DOCDB_URI
78 
79 
def __init__(self)
Definition: bp_docdb.py:75

Member Function Documentation

def bp_docdb.DocDB.DownloadArchive (   self,
  doc_id,
  filename,
  dest 
)
Download archive of files from a given DocDB document.

Args:
    doc_id (int): DocDB document ID
    dest: Destination directory


Returns:
    NONE

Definition at line 230 of file bp_docdb.py.

References novadaq::HexUtils.format(), open(), and bp_docdb.DocDB.uri.

230  def DownloadArchive(self, doc_id, filename, dest):
231  """Download archive of files from a given DocDB document.
232 
233  Args:
234  doc_id (int): DocDB document ID
235  dest: Destination directory
236 
237 
238  Returns:
239  NONE
240 
241  """
242  r = requests.get(self.uri + 'RetrieveArchive?docid={}&type=tar.gz'.format(doc_id), auth=(config.DOCDB_USER, config.DOCDB_PWD))
243  destination = '{}/{}'.format(dest, filename)
244  with open(destination, "wb") as d:
245  d.write(r.content)
246 
def DownloadArchive(self, doc_id, filename, dest)
Definition: bp_docdb.py:230
std::string format(const int32_t &value, const int &ndigits=8)
Definition: HexUtils.cpp:14
procfile open("FD_BRL_v0.txt")
def bp_docdb.DocDB.DownloadFile (   self,
  doc_id,
  filename,
  dest 
)
Download a given file from a given DocDB document.

Args:
    doc_id (int): DocDB document ID
    filename: Full filename to download
    dest: Destination directory


Returns:
    NONE

Definition at line 212 of file bp_docdb.py.

References novadaq::HexUtils.format(), open(), and bp_docdb.DocDB.uri.

212  def DownloadFile(self, doc_id, filename, dest):
213  """Download a given file from a given DocDB document.
214 
215  Args:
216  doc_id (int): DocDB document ID
217  filename: Full filename to download
218  dest: Destination directory
219 
220 
221  Returns:
222  NONE
223 
224  """
225  r = requests.get(self.uri + 'RetrieveFile?docid={}&filename={}'.format(doc_id, filename), auth=(config.DOCDB_USER, config.DOCDB_PWD))
226  with open(dest + '/{}'.format(filename), "wb") as d:
227  d.write(r.content)
228 
229 
def DownloadFile(self, doc_id, filename, dest)
Definition: bp_docdb.py:212
std::string format(const int32_t &value, const int &ndigits=8)
Definition: HexUtils.cpp:14
procfile open("FD_BRL_v0.txt")
def bp_docdb.DocDB.GetDocIDsByCategory (   self,
  cat_id,
  full_lookback 
)
Retrieve list of DocDB IDs for a given DocDB category ID.

Args:
    cat_id (int): DocDB category ID

Returns:
    List(Document ID, revision number, modified time)

Definition at line 174 of file bp_docdb.py.

References makeTrainCVSamples.int, submit_syst.str, and bp_docdb.DocDB.uri.

174  def GetDocIDsByCategory(self, cat_id, full_lookback):
175  """Retrieve list of DocDB IDs for a given DocDB category ID.
176 
177  Args:
178  cat_id (int): DocDB category ID
179 
180  Returns:
181  List(Document ID, revision number, modified time)
182 
183  """
184  # Make HTTP request
185  lookback = datetime.strftime(datetime.now() - timedelta(1), '&afterday=%d&aftermonth=%b&afteryear=%Y')
186  request_url = self.uri + 'Search?topics=' + str(cat_id) + '&includesubtopics=1&outformat=xml'
187  if not full_lookback: request_url += lookback
188  response = requests.get(request_url, auth=(config.DOCDB_USER, config.DOCDB_PWD))
189  data = xmltodict.parse(response.content)['docdb']
190 
191  # Extract document IDs for this category
192  documents = []
193  if data.get('document') == None:
194  return documents
195 
196  if type(data['document']) == list:
197  docs = data['document']
198  else:
199  docs = [data['document']]
200 
201  for doc in docs:
202  documents.append(
203  {
204  'id': int(doc['docrevision']['@docid']),
205  'revision': int(doc['docrevision']['@version']),
206  'modtime': doc['docrevision']['@modified']
207  }
208  )
209 
210  return documents
211 
def GetDocIDsByCategory(self, cat_id, full_lookback)
Definition: bp_docdb.py:174
def bp_docdb.DocDB.GetDocument (   self,
  id 
)
Retrieve metadata for a DocDB document given a DocDB ID.

Args:
    id (int): DocDB ID

Returns:
    Document

Definition at line 80 of file bp_docdb.py.

References makeTrainCVSamples.int, submit_syst.str, and bp_docdb.DocDB.uri.

80  def GetDocument(self, id):
81  """Retrieve metadata for a DocDB document given a DocDB ID.
82 
83  Args:
84  id (int): DocDB ID
85 
86  Returns:
87  Document
88 
89  """
90 
91  # Make HTTP request
92  response = requests.get(self.uri + 'ShowDocument?docid=' + str(id) + '&outformat=xml', auth=(config.DOCDB_USER, config.DOCDB_PWD))
93  data = xmltodict.parse(response.content)['docdb']['document']
94 
95  # HTML parser is used to convert hex characters in the XML response to Unicode
96  parser = HTMLParser.HTMLParser()
97 
98  # Extract document metadata
99  rev = int(data['docrevision']['@version'])
100  modtime = data['docrevision']['@modified']
101  authors = [data['docrevision']['author']] if (type(data['docrevision']['author']) is not list) else data['docrevision']['author']
102  # Use only first author if there are multiple
103  first_author_id = int(authors[0]['@id']) if (type(authors) is list) else int(authors['@id'])
104  first_author = authors[0]['fullname'] if (type(authors) is list) else authors['fullname']
105  submitters = data['docrevision']['submitter']
106  # Use only first submitter if there are multiple
107  submitter_id = int(submitters[0]['@id']) if (type(submitters) is list) else int(submitters['@id'])
108  submitter = submitters[0]['fullname'] if (type(submitters) is list) else submitters['fullname']
109  modifygroups = data['docrevision']['modifygroup']
110  modifygroup = [mod['name'] for mod in modifygroups] if (type(modifygroups) is list) else [modifygroups['name']]
111  categories = []
112  files = []
113  title = parser.unescape(data['docrevision']['title'])
114  abstract = parser.unescape(data['docrevision']['abstract']) if data['docrevision']['abstract'] is not None else ''
115  is_blessed = False
116  is_dep = False
117 
118  # Deal with categories
119  if ('topic' not in data['docrevision']):
120  categories = None
121 
122  elif (type(data['docrevision']['topic']) is list):
123  for topic in data['docrevision']['topic']:
124  category = {}
125  category['id'] = int(topic['@id'])
126  if category['id'] == 370:
127  is_blessed = True
128  if category['id'] == 422:
129  is_dep = True
130  category['name'] = topic['name']
131  category['description'] = topic['description']
132  if ('@parentid' in topic):
133  category['p_id'] = int(topic['@parentid'])
134  else:
135  category['p_id'] = ''
136  categories.append(category)
137 
138  else:
139  category = {}
140  topic = data['docrevision']['topic']
141  category['id'] = int(topic['@id'])
142  if category['id'] == 370:
143  is_blessed = True
144  if category['id'] == 422:
145  is_dep = True
146  category['name'] = topic['name']
147  category['description'] = topic['description']
148  if ('@parentid' in topic):
149  category['p_id'] = int(topic['@parentid'])
150  else:
151  category['p_id'] = ''
152  categories.append(category)
153 
154  # Deal with files
155  if (type(data['docrevision']['file']) is list):
156  for item in data['docrevision']['file']:
157  file = {}
158  file['href'] = item['@href']
159  file['id'] = int(item['@id'])
160  file['name'] = item['name']
161  files.append(file)
162 
163  else:
164  file = {}
165  item = data['docrevision']['file']
166  file['href'] = item['@href']
167  file['id'] = int(item['@id'])
168  file['name'] = item['name']
169  files.append(file)
170 
171  return Document(id, rev, modtime, authors, first_author_id, first_author, submitter_id, submitter, modifygroup, categories, files, title, abstract, is_blessed, is_dep)
172 
173 
def GetDocument(self, id)
Definition: bp_docdb.py:80

Member Data Documentation


The documentation for this class was generated from the following file: