Rev Author Line No. Line
4951 hanuson1 1 #!/usr/bin/python
2  
3 import pandas as pd
4 import sys
5 import os
6 import time
7 import datetime
8  
9 from mlabutils import ejson
10  
11 parser = ejson.Parser()
12  
13 #### Script Arguments ###############################################
14  
15 if len(sys.argv) != 2:
16 sys.stderr.write("Invalid number of arguments.\n")
17 sys.stderr.write("Usage: %s CONFIG_FILE\n" % (sys.argv[0], ))
18 sys.exit(1)
19  
20 value = parser.parse_file(sys.argv[1])
21 dataSource = value['data_path'] # raw data
22 dataArchive = value['data_archive'] # archive for row data
23 dataUpload = value['data_upload'] # computed mean values for upload
24 stationName = value['origin']
25  
26 loop = 1
27 csvHeader = "Date;LevelMeter;Temperature1;Conductivity;Salinity;TDSKcl;Temperature2;pH;Redox" #csv header
28 sleepTime = 1000 # sleep time in seconds
29  
30  
31 while True:
32 try:
33 print("Start")
34 ## Create sorted list of csv files
35 listOfDataFiles = list() #empty list
36 listOfSpecDataFiles = list() #empty list
37 files = list() #empty list
38 flag = False # is computation needed
39  
40 files = sorted(os.listdir(dataSource)) # list of all files and folders in directory
41 for idx, val in enumerate(files): #goes through files
42 if val.endswith("data.csv"): # in case of *data.csv
43 listOfDataFiles.append(val) #add file to listOfFiles
44  
45 ## Find the newest and oldest and compare them. If they are from different day, compute the average of all measurement from oldest day
46 if len(listOfDataFiles)>=2: # if there are more than 2 data files
47 first = listOfDataFiles[0] # get first of them
48 last = listOfDataFiles[-1] # get last of them
49  
50 if time.mktime(datetime.datetime.strptime(last[:8], "%Y%m%d").timetuple()) > time.mktime(datetime.datetime.strptime(first[:8], "%Y%m%d").timetuple()): # if the last is older than first
51 flag = True # computation needed
52 print("Computing...")
53 print(loop)
54 loop +=1
55 listOfSpecDataFiles = list() # empty list
56  
57 for file in listOfDataFiles: # go through data files and create lis of data files measured on same day
58 # if the day is same like the first one
59 if time.mktime(datetime.datetime.strptime(first[:8], "%Y%m%d").timetuple()) == time.mktime(datetime.datetime.strptime(file[:8], "%Y%m%d").timetuple()):
60 listOfSpecDataFiles.append(file)
61  
62 filename = dataUpload + first[:8]+'000000_' + stationName + '_data_mean.csv'
63  
64 for file in listOfSpecDataFiles:
65 df=pd.read_csv(dataSource + file, sep=';', header=None) # read current csv
66 dim=df.shape # gets data file dimensions
67 rowsInd=dim[0] # maximal index of rows
68 columnsInd=dim[1] # maximal index of columns
69 values=pd.DataFrame() # empty DataFrame
70  
71 for x in range(0,columnsInd): # for each column
72 values = values.set_value(0,x,round(df[x].mean(),3),0) #calculates mean value for all cloumns and round it by 3
73  
74 outfile = open(filename, 'a')
75 values.to_csv(filename, sep=';', header=False, index=False, mode='a') # save (add) DataFrame to csv
76 outfile.close()
77  
78 #adding header
79 with open(filename, 'r+') as f:
80 content = f.read()
81 f.seek(0, 0)
82 f.write(csvHeader.rstrip('\r\n') + '\n' + content)
83  
84  
85 # move files to archive structure
86 for file in listOfSpecDataFiles:
87 year = file[:4]
88 month = file[4:6]
89 day = file[6:8]
90 directory = dataArchive + year + "/" + month + "/" + day + "/"
91 if not os.path.exists(directory):
92 os.makedirs(directory)
93 os.rename(dataSource + file, directory + file) # move file
94  
95 else:
96 flag = False # computation is not needed
97 else:
98 flag = False # computation is not needed
99  
100 if flag == False:
101 time.sleep(sleepTime) #long sleep, because is nothing to process
102  
103 except ValueError:
104 print ValueError