-
Notifications
You must be signed in to change notification settings - Fork 0
/
cmx-anonymiser.py
387 lines (369 loc) · 20.4 KB
/
cmx-anonymiser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
# Author Leigh Jewell
# License https://github.com/leigh-jewell/cmx-anonymiser/blob/master/LICENSE
# Github repository: https://github.com/leigh-jewell/cmx-anonymiser
# Try and load in all the required modules.
try:
import sys
import configparser
import requests
# Ignore HTTPS warnings if they appear
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
from requests.auth import HTTPBasicAuth
from collections import defaultdict
import csv
import hashlib
from datetime import datetime
from datetime import timedelta
import os
import sched, time
from math import ceil
except ImportError:
print('Error: Missing one of the required modules. Check the docs.')
sys.exit()
#Constants
# CMX API URL prefix, could be changed to https://
url_prefix = "http://"
#Read configuration from config.ini file into global variables
#Expects to find is in the same directory as the program file
config = configparser.ConfigParser()
if os.path.isfile("config.ini"):
try:
config.read("config.ini")
cmx = config.get('cmx', 'cmx_ip')
username = config.get('cmx', 'username')
password = config.get('cmx', 'password')
timeout = config.get('cmx', 'timeout', fallback=4)
timeout = int(timeout)
max_retries = config.get('cmx', 'retry', fallback=5)
max_retries = int(max_retries)
sleep_between_retries = config.get('cmx', 'retry_sleep', fallback=3)
sleep_between_retries = int(sleep_between_retries)
url_clients = config.get('cmx', 'url_clients', fallback="/api/location/v1/clients/")
url_aps = config.get('cmx', 'url_aps', fallback="/api/config/v1/aps/")
url_client_count = config.get('cmx', 'url_client_count', fallback="/api/location/v2/clients/count")
page_size = config.get('cmx', 'page_size', fallback=1000)
page_size = int(page_size)
if page_size > 1000 or page_size <= 0:
page_size = 1000
max_pages = config.get('cmx', 'max_pages', fallback=1000)
max_pages = int(max_pages)
output_dir = config.get('output', 'output_dir', fallback=os.path.join(os.getcwd(), 'output'))
log_dir = config.get('output', 'log_dir', fallback=os.path.join(os.getcwd(), 'logs'))
log_console = config.getboolean('output', 'log_console', fallback=False)
days = config.get('schedule', 'days', fallback=5)
days = int(days)
schedule = config.get('schedule', 'hours', fallback='9:00,12:00,15:00,18:00')
salt = config.get('privacy', 'salt', fallback='b1303114888c11e79e6a448500844918')
configError = False
except configparser.Error as e:
print("Error with config.ini, missing part of the file: ", e)
configError = True
else:
print("config.ini missing from current directory.")
configError = True
# Setup the path and filename for the log file
if not os.path.exists(log_dir):
try:
os.makedirs(log_dir)
except OSError as e:
print('Error - log directory {} does not exist, and cannot create it {}'.format(log_dir, e))
# Create a unique file name by appending the date to the end
logFile = 'cmx' + datetime.strftime(datetime.now(),'-%d-%m-%y-%H-%M.log')
fulllogFile = os.path.join(log_dir, logFile)
def logging(info):
# Setup logging to a file and console
# Create a unique name with the date and time
dateStamp = datetime.strftime(datetime.now(),'%d/%m/%y %H:%M.%S.%f: ') + info
if log_console:
print(dateStamp)
try:
with open(fulllogFile,'a', newline='\n', encoding='utf-8') as f:
print(dateStamp, file=f)
except OSError as e:
print('Error - tried to open file for writing but something went wront {}'.format(e))
return
def deidentifyMac(mac):
# Take the mac-address and deidentify it by securely hashing it
# Add the salt to the mac and encode before hashing result and then returning the unique token
mac_hashed = hashlib.sha256(salt.encode()+mac.encode()).hexdigest()
return mac_hashed
def requestCMX(URL, response_dict):
# Generic API call to CMX with all the error handling
no_data = True
number_retries = 1
response = requests.Session()
while no_data and number_retries <= max_retries:
logging("getData: Attempting to request data from cmx. Attempt number {}".format(number_retries))
try:
response = requests.get(url = URL, auth = HTTPBasicAuth(username, password), verify=False, timeout=timeout)
if response.status_code == 200:
no_data = False
response_dict['isError'] = False
else:
logging("getData: Got status code {} from CMX, need 200, will retry".format(response.status_code))
response_dict['isError'] = True
# As number of retries increases the sleep time will increase to.
time.sleep(sleep_between_retries*number_retries)
except requests.exceptions.ConnectionError as e:
e = str(e)
logging("getData: Got connectError from URL requests\n"+e)
response_dict['isError'] = True
time.sleep(sleep_between_retries)
except requests.exceptions.HTTPError as e:
e = str(e)
logging("getData: Got HTTPError from URL requests\n"+e)
response_dict['isError'] = True
time.sleep(sleep_between_retries)
except requests.exceptions.ConnectTimeout as e:
e = str(e)
logging("getData: Got connectTimeout from URL requests\n"+e)
response_dict['isError'] = True
time.sleep(sleep_between_retries)
except requests.exceptions.RequestException as e:
e = str(e)
logging("getData: Got general error RequestException from URL requests\n"+e)
response_dict['isError'] = True
time.sleep(sleep_between_retries)
number_retries += 1
if no_data:
logging('getData: Something went wrong, no data returned.')
return [response, response_dict]
def getClientCount():
# API call to get the client count so we know how many pages to pull back
URL = url_prefix + cmx + url_client_count
logging('getClientCount: Getting client count for {}'.format(URL))
# Setup a defaultdict so we can reference keys without errors
response_dict = defaultdict(list)
response, response_dict = requestCMX(URL, response_dict)
if not response_dict['isError']:
# Step through the JSON response pulling out the data
client = response.json()
try:
client_count = int(client['count'])
except ValueError as e:
logging('getClientCount: integer value not returned from client count'+e)
client_count = 0
pass # it was a string, not an int.
logging('getClientCount: Got client count of {:,}'.format(client_count))
else:
logging('getClientCount: Got error response from API call for client count, setting count to zero')
client_count = 0
return client_count
def getCMXData():
# Setup a defaultdict so we can reference keys without errors
response_dict = defaultdict(list)
need_header = True
# API call to get the client data from the CMX
client_count = getClientCount()
logging('getCMXData: Get client data for {:,} clients'.format(client_count))
if client_count <= 0:
logging('getCMXData: No clients so nothing to do.')
else:
# Calculate the number of pages to get all the clients
pages = ceil(client_count / page_size)
if pages > max_pages:
logging('getCMXData: Calculated pages {} > than max pages {}. Will set limit to max pages.'.format(pages, max_pages))
# Ensure we don't get too many pages
pages = min(pages, max_pages)
logging('getCMXData: Calculated {} pages to retrieve.'.format(pages))
for page in range(1,pages+1):
suffix = '/?page={}&pageSize={}'.format(page, page_size)
URL = url_prefix + cmx + url_clients + suffix
logging('getCMXData: Getting data for {}'.format(URL))
response, response_dict = requestCMX(URL, response_dict)
if not response_dict['isError']:
# Check the status code of the result to see if we got something
logging('getCMXData: Got status code {} from CMX API (200 is good)'.format(response.status_code))
response_dict['statusCode'] = response.status_code
if response.status_code == 200:
response.encoding = 'utf-8'
if need_header:
need_header = False
# Add a header for all the variables
response_dict['data'].append(['hash',
'mapHierarchyString',
'floorRefId',
'length',
'width',
'x',
'y',
'unit',
'currentlyTracked',
'confidenceFactor',
'currentServerTime',
'firstLocatedTime',
'lastLocatedTime',
'maxDetectedRssiApMacAddress',
'band',
'rssi',
'lastHeardInSeconds',
'networkStatus',
'changedOn',
'ssId',
'band',
'apMacAddress',
'dot11Status',
'manufacturer',
'detectingControllers',
'bytesSent',
'bytesReceived'
])
# Step through the JSON response pulling out the data
for client in response.json():
response_dict['data'].append([deidentifyMac(client['macAddress']), \
client['mapInfo']['mapHierarchyString'], \
client['mapInfo']['floorRefId'], \
client['mapInfo']['floorDimension']['length'], \
client['mapInfo']['floorDimension']['width'], \
client['mapCoordinate']['x'], \
client['mapCoordinate']['y'], \
client['mapCoordinate']['unit'], \
client['currentlyTracked'], \
client['confidenceFactor'], \
client['statistics']['currentServerTime'], \
client['statistics']['firstLocatedTime'], \
client['statistics']['lastLocatedTime'], \
client['statistics']['maxDetectedRssi']['apMacAddress'], \
client['statistics']['maxDetectedRssi']['band'], \
client['statistics']['maxDetectedRssi']['rssi'], \
client['statistics']['maxDetectedRssi']['lastHeardInSeconds'], \
client['networkStatus'], \
client['changedOn'], \
client['ssId'], \
client['band'], \
client['apMacAddress'], \
client['dot11Status'], \
client['manufacturer'], \
client['detectingControllers'], \
client['bytesSent'], \
client['bytesReceived']
])
# We minus 1 due to header that was added to file
logging('getCMXData: Got {:,} total records from CMX, expecting {:,} clients'.format(len(response_dict['data'])-1, client_count))
return response_dict
def getCMXAPData():
# Get the AP data from the CMX
URL = url_prefix + cmx + url_aps
logging('getCMXAPData: Getting data for API: {}'.format(URL))
response_dict = defaultdict(list)
response, response_dict = requestCMX(URL, response_dict)
if not response_dict['isError']:
logging('getCMXAPData: Got status code {} from CMX API (200 is good)'.format(response.status_code))
response_dict['statusCode'] = response.status_code
if response.status_code == 200:
response.encoding = 'utf-8'
response_dict['data'].append(['radioMacAddress',
'name',
'x',
'y',
'unit',
'802_11_BChannelNumber',
'802_11_BTxPowerLevel',
'802_11_AChannelNumber',
'802_11_ATxPowerLevel',
'floorId'
])
for ap in response.json():
if len(ap['apInterfaces']) == 2:
response_dict['data'].append([ap['radioMacAddress'], \
ap['name'], \
ap['mapCoordinates']['x'], \
ap['mapCoordinates']['y'], \
ap['mapCoordinates']['unit'], \
ap['apInterfaces'][0]['channelNumber'], \
ap['apInterfaces'][0]['txPowerLevel'], \
ap['apInterfaces'][1]['channelNumber'], \
ap['apInterfaces'][1]['txPowerLevel'], \
ap['floorIdString']
])
elif len(ap['apInterfaces']) == 1:
response_dict['data'].append([ap['radioMacAddress'], \
ap['name'], \
ap['mapCoordinates']['x'], \
ap['mapCoordinates']['y'], \
ap['mapCoordinates']['unit'], \
ap['apInterfaces'][0]['channelNumber'], \
ap['apInterfaces'][0]['txPowerLevel'], \
0, \
0, \
ap['floorIdString']
])
logging('getCMXAPData: Got {:,} ap records from CMX.'.format(len(response_dict['data'])-1))
return response_dict
def writeFile(data, fileName):
# Write the data to an appropriate file
logging('writeFile: Using {} as output directory'.format(os.path.abspath(output_dir)))
if not os.path.exists(output_dir):
try:
os.makedirs(output_dir)
except OSError as e:
logging('writeFile: Error - output directory {} does not exist, and cannot create it {}'.format(output_dir, e))
if os.path.exists(output_dir):
# Create a unique file name by appending the date to the end
fileNameDate = fileName + datetime.strftime(datetime.now(),'-%d-%m-%y-%H-%M-%S-%f.csv')
fullFileName = os.path.join(output_dir, fileNameDate)
# Its a new unique file so it shouldn't exist
if not os.path.isfile(fullFileName):
try:
with open(fullFileName,'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerows(data['data'])
logging('writeFile:Finished writing.')
except IOError as e:
logging('writeFile: Error - tried to open file for writing but something went wront {}'.format(e))
else:
logging('writeFile: Error - tried to create unique output file name {} but file exists'.format(fileNameDate))
else:
logging('writeFile: Tried to create output directory and it should have worked, but there is a problem still.')
return
def getData():
# This is the function that gets call by the scheduler
logging('getData: Schdule woken up.')
logging('getData: Using CMX: {} and username: {}'.format(cmx, username))
ap_data = getCMXAPData()
if not ap_data['isError']:
writeFile(ap_data, 'ap_data')
else:
logging("getData: getCMXAPData had an error, nothing to write.")
user_data = getCMXData()
if not user_data['isError']:
writeFile(user_data, 'user_data')
else:
logging("getData: getCMXData had an error, nothing to write.")
logging('getData: Process sleeping.')
return
def main():
# Make sure we read in the config file ok.
if not configError:
# If we find now string in schdule we just run one straight away.
if 'now' in schedule:
logging("main: Process started, no scheduling needed, running now.")
getData()
else:
logging("main: Process started, scheduling jobs {} days and {} hours".format(days, schedule))
s = sched.scheduler(time.time, time.sleep)
# Step through all the scheduled 24hr times
sched_time = [i.split(':') for i in schedule.split(',')]
# Need current time as scheduler wants to know how many secs to run the function
today = datetime.now()
# Step through the days and schedule the getData function to run at the appropriate time.
for day in range(days):
for (sched_hour,sched_min) in sched_time:
hr = int(sched_hour)
minute = int(sched_min)
# Need to get correct year month day for days in the future
future = datetime.now() + timedelta(days=day)
# Create a new date using the scheduled hours and minutes
future_date = datetime(future.year, future.month, future.day, hr, minute, 0)
# Get the delta between future and todays date and time in seconds so we can schedule
secs = (future_date - today).total_seconds()
# For time that is in the future schedule the call of the getData function
if secs > 0:
logging('main: getData will be run {} total secs {}'.format(future_date, secs))
s.enter(secs, 1, getData)
# Allow the scheduler to run schedule the jobs to run.
s.run()
logging('main: Finished scheduled runs.')
if __name__ == "__main__":
main()