PlotUtilityNew.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 import pandas as pd
3 import numpy as np
4 import time
5 import datetime
6 from datetime import datetime, date, time, timedelta
7 import glob
8 import os
9 time_gap = 30 #this is the files time gap in min
10 
11 #-------------------------
12 
13 level0_list = ['MM1HV1','MM2HV1','MM3HV1','MM1XAV','MM2XAV','MM3XAV','MM1YAV','MM2YAV','MM3YAV','MGSMPD104','MGSMPD105','MGSMPD106',
14  'TORTGT','TRTGTD','MM1GF','MM2GF', 'MM3GF','MM1RTD','MM2RTD','MM3RTD','MM1GPR', 'MM2GPR', 'MM3GPR','MGSMM1','MGSMM2','MGSMM3',
15  'MM1PRC','MM2PRC','MM3PRC','MM1HV1','MM1HV2','MM1HV3','MM2HV1','MM2HV2','MM2HV3','MM3HV1','MM3HV2','MM3HV3','MM1CNT',
16  'MM2CNT','MM3CNT','MM1INT','MM2INT','MM3INT','VPTGT','HPTGT','VP121','HP121','NSLINA','NSLINB','NSLINC','NSLIND',
17  'TGTT1','TGTT2','TGTT3','TGTT4','MM1COR','MM2COR','MM3COR', 'TGTPWR','THPTTW','THPTCW','THPTBW','THPTHS','MTGTVS','MTGTHS']
18 
19 level1_list = ['MM1COR_CAL','MM2COR_CAL','MM3COR_CAL','MM1INT_CAL','MM2INT_CAL','MM3INT_CAL','MM1PRC_CAL','MM2PRC_CAL','MM3PRC_CAL','MM1CNT_CAL','MM2CNT_CAL','MM3CNT_CAL','MM1_sig_calib','MM2_sig_calib','MM3_sig_calib']
20 
21 #=========================================================
22 
23 #--------------------------------------
24 def get_time_array(start_date, start_hr, tot_30min):
25 
26  dt_array = []
27  tt_i = datetime.strptime(start_date+' 00:00:00','%Y-%m-%d %H:%M:%S')
28  start_time = tt_i + timedelta(hours = start_hr)
29  for i in range(tot_30min):
30  new_time = start_time + timedelta(minutes = time_gap*i)
31  #print(new_time)
32  dt_array.append(new_time)
33 
34  #print(dt_array[0])
35  return dt_array
36 
37 def get_time_info(d1,t1,d2,t2):
38  #print(t1,t2)
39  tt_i = datetime.strptime(d1+' 00:00:00','%Y-%m-%d %H:%M:%S')
40  tt_f = datetime.strptime(d2+' 23:59:59','%Y-%m-%d %H:%M:%S')
41  tt1 = datetime.strptime(d1+' '+t1,'%Y-%m-%d %H:%M:%S')
42  tt2 = datetime.strptime(d2+' '+t2,'%Y-%m-%d %H:%M:%S')
43 
44  l1 = tt1 - tt_i
45  #days = l1.days
46  start_date = d1
47  start_hr = int(l1.seconds/(60*60))
48  #print(int(l1.seconds/(60*60)))
49  #print(start_date, start_hr)
50 
51  l2 = tt2 - tt_i
52  Ndays = l2.days
53  end_date = d2
54  end_hr = int(l2.seconds/(60*60))+1
55  tot_30min = int((end_hr - start_hr)*60/time_gap + Ndays*24*60/time_gap)
56 
57  start_time = tt_i + timedelta(hours = start_hr)
58  return start_date, start_hr, tot_30min
59 
60 def find_files(d,t,l,f_name):
61  path = '/pnfs/nova/users/athula/tmp_MMdata/'+str(d)+'/'+str(t)+'/'+str(l)+'/'
62  #path = '/nova/app/users/athula/MMdata/ExtractData_v2/data/'+str(d)+'/'+str(t)+'/'+str(l)+'/'
63  #path = '/pnfs/numix/mm/'+str(d)+'/'+str(t)+'/'+str(l)+'/'
64 
65  print(path)
66  file_out = 'csv'
67  try:
68  for file in os.listdir(path):
69  if file.endswith(f_name):
70  file_out = os.path.join(path, file)
71  print(os.path.join(path, file))
72  except:
73  pass
74  #print(file_out)
75  return file_out
76 
77 def pull_data(d1,t1,d2,t2, var_name):
78  start_date, start_hr, tot_30min = get_time_info(d1,t1,d2,t2)
79  time_array = get_time_array(start_date, start_hr, tot_30min)
80 
81  level = 'a'
82  csv_file = '_'
83  if var_name in level0_list:
84  level = 'level0/csv'
85  csv_file = '_'+var_name+'.csv'
86  elif var_name in level1_list:
87  level = 'level1/csv'
88  csv_file = '_'+var_name+'.csv'
89  df_add = []
90  #print(time_array)
91  for i in range(len(time_array)):
92  dt_tmp = str(time_array[i]).split(' ')
93  date_i = str(dt_tmp[0])
94  time_i = str(dt_tmp[1].split(':')[0])+'_'+str(dt_tmp[1].split(':')[1])+'_'+str(dt_tmp[1].split(':')[2])
95  print(i, date_i,time_i)
96  try:
97  file = find_files(date_i,time_i,level,csv_file)
98  #print(file)
99  except:
100  pass
101  if os.path.exists(file):
102  df_add.append(pd.read_csv(file,sep = '\t'))
103  del dt_tmp
104 
105  #print(df_add[1])
106  df_result = pd.concat(df_add, ignore_index=True)
107  return df_result
108 
109 #--------------
110 def file_list(d1,t1,d2,t2, var_name):
111  start_date, start_hr, tot_30min = get_time_info(d1,t1,d2,t2)
112  time_array = get_time_array(start_date, start_hr, tot_30min)
113 
114  level = 'a'
115  csv_file = '_'
116  if var_name in level0_list:
117  level = 'level0/csv'
118  csv_file = '_'+var_name+'.csv'
119  elif var_name in level1_list:
120  level = 'level1/csv'
121  csv_file = '_'+var_name+'.csv'
122 
123  df_add = []
124 
125  for i in range(len(time_array)):
126  dt_tmp = str(time_array[i]).split(' ')
127  date_i = str(dt_tmp[0])
128  time_i = str(dt_tmp[1].split(':')[0])+'_'+str(dt_tmp[1].split(':')[1])+'_'+str(dt_tmp[1].split(':')[2])
129  #print(date_i,time_i)
130  try:
131  file = find_files(date_i,time_i,level,csv_file)
132  if os.path.exists(file):
133  df_add.append(file)
134  else:
135  continue
136  except:
137  pass
138 
139  return df_add
140 
141 
142 #------------------
143 def pull_MultiData(d1,t1,d2,t2,var_array):
144 
145  df_result = pd.DataFrame()
146  try:
147  df_i = pull_data(d1,t1,d2,t2,var_array[0])
148  df_result['date'] = df_i['date']
149  df_result['time'] = df_i['time']
150  #df_result['time'] = pd.to_datetime(df_i['date'] +' '+df_i['time'])
151  except:
152  pass
153 
154  for i in range(len(var_array)):
155  try:
156  df_i = pull_data(d1,t1,d2,t2,var_array[i])
157  try:
158  if var_array[i] in ['MM1COR_CAL','MM2COR_CAL','MM3COR_CAL']:
159  df_result[var_array[i]] = df_i['MMCOR_CAL']
160  else:
161  df_result[var_array[i]] = df_i['val']
162  except:
163  pass
164  except:
165  pass
166  print(df_result)
167  return df_result
168 
169 #------------------------------------------
170 def pull_NormData(d1,t1,d2,t2,var_name1,var_name2):
171  list1 = file_list(d1,t1,d2,t2, var_name1)
172  list2 = file_list(d1,t1,d2,t2, var_name2)
173 
174  df1_add = []
175  df2_add = []
176 
177  for i in range(len(list2)):
178  try:
179  if os.path.exists(list1[i]) and os.path.exists(list2[i]):
180  #print(list1[i])
181  #print(list2[i])
182 
183  #if (str(list1[i].split('_')[3]) == str(list2[i].split('_')[3])) and (str(list1[i].split('_')[5]) == str(list2[i].split('_')[5])):
184  #print(len(pd.read_csv(list1[i]).index))
185  #print(len(pd.read_csv(list2[i]).index))
186  if( len(pd.read_csv(list1[i],sep = '\t').index) == len(pd.read_csv(list2[i],sep = '\t').index)):
187  df1_add.append(pd.read_csv(list1[i],sep = '\t'))
188  df2_add.append(pd.read_csv(list2[i],sep = '\t'))
189 
190  else:
191  df_tmp1 = pd.read_csv(list1[i])
192  df_tmp2 = pd.read_csv(list2[i])
193 
194  df_bad1 = pd.read_csv(list1[i])
195  df_bad2 = pd.read_csv(list2[i])
196  #df_bad['time1'] = pd.to_datetime(df_bad1.date +' '+df_bad1.time)
197  #df_bad['time2'] = pd.to_datetime(df_bad2.date +' '+df_bad2.time)
198  #df_bad['diff'] = (df_bad['time2'] - df_bad['time1'])/np.timedelta64(1,'s')
199  df_bad1['diff'] = (pd.to_datetime(df_bad1.date +' '+df_bad1.time) - pd.to_datetime(df_bad2.date +' '+df_bad2.time))/np.timedelta64(1,'s')
200  df_bad2['diff'] = (pd.to_datetime(df_bad1.date +' '+df_bad1.time) - pd.to_datetime(df_bad2.date +' '+df_bad2.time))/np.timedelta64(1,'s')
201 
202  if len(pd.read_csv(list1[i],sep = '\t').index) > len(pd.read_csv(list2[i],sep = '\t').index):
203  for index, row in df_bad1.iterrows():
204  diff = row['diff']
205  if abs(diff) > 1:
206  print(index, diff)
207  indxi = index
208  print(indxi, diff)
209  break
210  df2_add.append(df_tmp2)
211  df1_add.append(df_tmp1.drop([indxi]))
212  print(df_tmp1)
213  else:
214  for index, row in df_bad2.iterrows():
215  diff = row['diff']
216  if abs(diff) > 1:
217  indxi = index
218  print(indxi, diff)
219  break
220  df1_add.append(df_tmp1)
221  df2_add.append(df_tmp2.drop([indxi]))
222  print(df_tmp2)
223 
224  #print(len(df_tmp1).index, len(df_tmp2).index)
225 
226  except:
227  pass
228 
229  df1_result = pd.DataFrame()
230  df1_result = pd.concat(df1_add, ignore_index=True)
231 
232  df2_result = pd.DataFrame()
233  df2_result = pd.concat(df2_add, ignore_index=True)
234 
235  df_result = pd.DataFrame()
236  df_result['time1'] = pd.to_datetime(df1_result.date +' '+df1_result.time)
237  df_result['time2'] = pd.to_datetime(df2_result.date +' '+df2_result.time)
238  df_result['diff[s]'] = (df_result['time2'] - df_result['time1'])/np.timedelta64(1,'s')
239 
240  df_result[var_name2] = df2_result['val']
241  if var_name1 in ['MM1COR_CAL','MM2COR_CAL','MM3COR_CAL']:
242  df_result[var_name1] = df1_result['MMCOR_CAL']
243  else:
244  df_result[var_name1] = df1_result['val']
245  #df_result['TRTGTD_new[E12]'] = np.where((df_result['TRTGTD[E12]'] < 10),-9999, df_result['TRTGTD[E12]'])
246  #df_result['TRTGTD_new'] = np.where((df_result['TRTGTD'] < 10),-9999, df_result['TRTGTD'])
247  #df_result[var_name+'_nom'] = df_result[var_name]/df_result['TRTGTD_new[E12]']
248 
249  return df_result
250 
void split(double tt, double *fr)
def find_files(d, t, l, f_name)
def get_time_array(start_date, start_hr, tot_30min)
void abs(TH1 *hist)
def pull_data(d1, t1, d2, t2, var_name)
def get_time_info(d1, t1, d2, t2)
if(dump)
def pull_NormData(d1, t1, d2, t2, var_name1, var_name2)
bool print
def pull_MultiData(d1, t1, d2, t2, var_array)
def file_list(d1, t1, d2, t2, var_name)