levelDB2LMDB.py
Go to the documentation of this file.
1 import sys
2 import caffe
3 import numpy as np
4 import leveldb
5 import lmdb
6 import math
7 
8 if __name__ == "__main__":
9  ldb = "TestLevelDB"
10  db = leveldb.LevelDB(ldb)
11 
12  batch_size = 10000
13  map_size = 1e12
14 
15  env = lmdb.open('TestLMDB', map_size=map_size)
16  txn = env.begin(write=True)
17 
18  count = 0
19  for key, value in db.RangeIter():
20  str_id = '{:08}'.format(int(key))
21  txn.put(str_id.encode('ascii'), value)
22 
23  if (count+1)%batch_size == 0:
24  print count, key
25  txn.commit()
26  txn = env.begin(write=True)
27  count += 1
28 
29  if (count+1)%batch_size != 0:
30  txn.commit()
31  print 'last batch'
32  print count
33 
34  """
35  datum = caffe.proto.caffe_pb2.Datum()
36  datum.ParseFromString(value)
37  label = int(datum.label)
38  if count%10000 == 0:
39  print count, label
40  frequencies[label] += 1
41  count += 1
42 
43  #frequencies[i] = 1 - frequencies[i]
44  minVal = 9999
45  for i in range(392):
46  frequencies[i] /= count
47  if frequencies[i] <= minVal and frequencies[i] > 0:
48  minVal = frequencies[i]
49 
50  print frequencies
51 
52  for i in range(392):
53  if frequencies[i] < minVal:
54  frequencies[i] = minVal
55  frequencies[i] = -math.log(frequencies[i])/nomLog
56 
57  print frequencies
58 
59  summatrix = np.zeros((392, 392), dtype = 'f4')
60  for i in range(392):
61  summatrix[i][i] = frequencies[i]
62 
63  # for j in range(392):
64  # if frequencies[i] == 0 or frequencies[j] == 0:
65  # summatrix[i][j] = 0
66  # else:
67  # summatrix[i][j] = 0.5*(frequencies[i]*math.log(frequencies[i]/frequencies[j]) + frequencies[j]*math.log(frequencies[j]/frequencies[i]))
68  # #
69 
70  print summatrix
71 
72  blob = caffe.io.array_to_blobproto(summatrix.reshape(1,1,392,392))
73 
74  with open('infoGain.binaryproto', 'wb') as f:
75  f.write(blob.SerializeToString())
76 """
77 
std::string format(const int32_t &value, const int &ndigits=8)
Definition: HexUtils.cpp:14