-
Notifications
You must be signed in to change notification settings - Fork 0
/
connections.py
443 lines (338 loc) · 15.1 KB
/
connections.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
"""Connection Handlers, largely arcpy driven
Connections have prerequisits:
Excel:https://pro.arcgis.com/en/pro-app/latest/help/data/excel/work-with-excel-in-arcgis-pro.htm
Currently these connection are read only and return datasets as pandas dataframes.
"""
import os.path as _path
import pathlib as _pathlib
from enum import Enum as _Enum
import pandas as _pd
import arcpy as _arcpy
import xlwings as _xlwings
import fuckit as _fuckit
import funclite.iolib as _iolib
from funclite.iolib import fixp as _fixp
import arcproapi.data as _data
class ESRICursorType(_Enum):
SearchCursor = 1
UpdateCursor = 2
InsertCursor = 3
PandasDataframe = 4
class EnumDataSourceType(_Enum):
ESRIShapefile = 1
ESRIGeoDBFeatureClass = 2
ESRIGeoDBTable = 3
MSSQLServerTable = 4
MSSQLServerSpatialTable = 8
SQLiteTable = 5
OracleTable = 6
OracleSpatialTable = 7
class OracleSDE:
""" Define connections using an SDE file.
Assumes that password and username have been persisted to the file (at least for the moment!).
Generates a path to the feature/table that can be used like a fGDB path to a layer/table.
Args:
sde_file (str): The sde file
layer_name (str): The layer name (prepended with dataset, if in dataset)
schema (str): The schema, in Oracle SDE the schema is prepended to datasets and the layer name.
If this is provided, then the schema will be prepended to create a refrence
to the layer as a shortcut to typing out this details in full.
Notes:
normpaths arguments, as always.
Examples:
No schema
>>> SDELyr = OracleSDE('C:/my.sde', 'MYSCHEMA.mydataset/MYSCHEMA.my_layer', schema='')
>>> SDELyr.feature_path
'C:\\my.sde\\MYSCHEMA.mydataset/MYSCHEMA.my_layer'
We cant be arsed writing out the full path with the schama
>>> SDELyr = OracleSDE('C:/my.sde', 'my_layer', schema='MYSCHEMA')
>>> SDELyr.feature_path
'C:\\my.sde\\MYSCHEMA.mydataset/MYSCHEMA.my_layer'
TODO: Integrate into ESRISDE connections
"""
def __init__(self, sde_file: str, layer_name: str, schema: str = ''):
self._sde_file = _path.normpath(sde_file)
self._layer_name = layer_name
self._schema = schema
if schema:
bits = ['%s.%s' % (schema, s) for s in _pathlib.Path(self._layer_name).parts]
self.feature_path = _fixp(self._sde_file, *bits)
else:
self.feature_path = _fixp(self._sde_file, self._layer_name)
class _BaseFileSource:
"""base class for file type sources"""
def __init__(self, fname: str):
self.fname = _path.normpath(fname)
@property
def file_parts(self):
"""()->str,str,str
get file parts of source
>>> _BaseFileSource.file_parts
'c:/temp','myfile.gdb','.gdb'
"""
return _iolib.get_file_parts2(self.fname)
class ConfigTextFile(_BaseFileSource):
"""Text file connections
"""
class ConfigESRIGeoDBTableOrFeatureClass(_BaseFileSource):
"""store for table and feature class database information
Args:
fname: path to the geodatabase, use config.GeoDatabasePaths
"""
def __init__(self, fname):
super(ConfigESRIGeoDBTableOrFeatureClass, self).__init__(fname)
class ConfigESRIShp(_BaseFileSource):
"""Config class for ESRI Shape Files"""
def __init__(self, fname):
super(ConfigESRIShp, self).__init__(fname)
class ConfigExcel(_BaseFileSource):
"""store for table and feature class database information
Parameters
workbook:
path to the workbook, i.e. xlsx file
table:
table name
worksheet, range
worksheet and range name, optionally expand the range (e.g. range='A1', expand_cell_range='table')
expand_cell_range:
accepts values of 'table', 'vertical', 'horizontal', True
visibile:
boolean to show workbook, useful for debugging
"""
def __init__(self, workbook: str, table: str = '', worksheet: str = '', range_: str = '', expand_cell_range: bool = None):
if expand_cell_range == True: # noqa
expand_cell_range = 'table'
self.expand_cell_range = expand_cell_range
self.workbook = workbook
self.table = table
self.worksheet = worksheet
self.range = range_
super(ConfigExcel, self).__init__(self.workbook)
class Excel(_BaseFileSource):
"""Excel connection class
Use xlwings to expose xlwings Workbook, worksheet and table objects
Loads data into a pandas dataframe as Excel().df
Use as a Context Manager.
Instantiated with a connections.ConfigExcel instance.
Additional arguments are passed to the xlwings Book class instantiator.
See https://docs.xlwings.org/en/stable/api.html
Args:
fname: fully qualified filename
table_name: name of table/listobject in spreadsheet
worksheet: name of worksheet use for cell_range, ignored if table_name provided
cell_range: range in worksheet to return
expand_cell_range: try and expand the range specified in cell_range
args: Additional named arguments to pass to the xlwings.Book class instantiator
Notes:
That table_name takes precedent over worksheet and cell_range.
Workbooks need to be saved manually, the App closes without saving
Examples:
>>> Config = ConfigExcel('C:/workbook.xlsx', table_name='myTable')
>>> with Excel(Config, read_only=True) as Obj:
>>> print(Obj.Table.rows.count)
>>> my_pandas_dataframe = Obj.df
or
>>> Ex = Excel(Config, read_only=True)
>>> Ex.df
>>> Ex.close()
"""
def __init__(self, Config: ConfigExcel, visible=False, **args):
self._Config = Config
self._visible = visible
self.Workbook = None # type: [None, _xlwings.main.Book]
self.Worksheet = None
self.App = None # type: [None, _xlwings.main.App]
self.Table = None # type: [None, _xlwings.main.Table]
self.Range = None # type: [None, _xlwings.main.Range]
self._args = args
self.df = None # type: [None, _pd.DataFrame]
self._loaded = False # flag used in refresh, don't refresh the xlwings objs once loaded
super(Excel, self).__init__(fname=Config.workbook)
self.refresh()
def __enter__(self):
"""enter"""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""exit"""
self.close()
def refresh(self) -> None:
"""
Refreshes the pandas dataframe.
Notes:
Excel objects are updated live as they are a direct link to the spreadsheet
"""
if not self._loaded:
self.App = _xlwings.App(visible=self._visible)
self.Workbook = _xlwings.Book(self._Config.workbook, **self._args)
if self._Config.table:
if not self._loaded:
self.Table = self._get_table(self._Config.table)
self.Range = self.Table.range
self.df = self.Table.range.options(_pd.DataFrame, header=1, index=False).value
elif self._Config.worksheet:
if not self._loaded: # load range from worksheet
self.Worksheet = self.Workbook.sheets(self._Config.worksheet)
if self._Config.expand_cell_range in ['table', 'horizontal', 'vertical']:
self.Range = self.Workbook.sheets[self._Config.worksheet].range(self._Config.range).expand(self._Config.expand_cell_range)
else:
self.Range = self.Workbook.sheets[self._Config.worksheet].range(self._Config.range)
self.df = self.Workbook.sheets[self._Config.worksheet].range(self._Config.range).options(
_pd.DataFrame, header=1, index=False, expand=self._Config.expand_cell_range).value
else:
raise ValueError('ExcelConfig had a blank table_name and worksheet')
self._loaded = True
def close(self, kill=True):
"""release resources"""
with _fuckit:
self.App.quit()
if kill: self.App.kill() # really close it if close didn't work
def _get_table(self, table_name):
"""(str) -> xlwings:Table
Get an xlwings table object from the table name
"""
for sheet in self.Workbook.sheets:
for tbl in sheet.tables:
if tbl.name.lower() == table_name.lower():
return tbl
return None
class TextFile:
"""TextFile connection class for standard text files
Use as a Context Manager.
reads text file into a pandas dataframe, which is set to TextFile().df
Additional arguments are passed to a pandas.read_csv
See https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
Examples:
>>> with TextFile('C:/data.csv', delim='\t', header=('col1','col2')) as Obj: # noqa
>>> my_pandas_dataframe = Obj.df
>>> print(Obj.df[1])
"""
def __init__(self, fname, delim=',', header=0, **args):
"""(class:ConfigTextFile, args)->None
args are passed to the panda's read_csv func call.
"""
self._header = header
self._delim = delim
self.fname = _path.normpath(fname)
self._args = args
self.df = None # type: [None, _pd.DataFrame]
self.refresh()
def __enter__(self):
"""enter"""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""exit"""
self.close()
def refresh(self):
"""refresh data"""
self.df = _pd.read_csv(self.fname, header=self._header, delimiter=self._delim, **self._args)
def close(self):
"""close connection"""
with _fuckit:
del self.df # not really necessary but stops pylint moaning
class _ESRIFile(_BaseFileSource):
"""Base class for ESRI file data sources, including layers in
filesystem geodatabases and shapefiles
Args:
Config (str, ConfigESRIShp, ConfigESRIGeoDBTableOrFeatureClass, ConfigESRISDE) : An instance of a Config class in this module.
Also accepts a string and just tries it!
cursor_type: Type of cursor top open, currently only supports SearchCursor
cols (tuple, list): List of cols to include in dataset
exclude_cols (tuple, list): List of cols to exclude from dataset
where_clause (str): Filter records returned with this where clause
col_rename_func (any): A function to apply to the column names, defaults to str.lower
Raies:
ValueError: If incorrect class type passed to Config. Config should be an instance of [ConfigESRIShp, ConfigESRIGeoDBTableOrFeatureClass or ConfigESRISDE]
Notes:
Converts all column names to lower case. This is because ESRI geodatabases are not case sensitive to field names, but pandas
is, which causes a hell of a lot of niggling issues. Pass col_rename_func = lambda x:x to override thie behaviour
"""
def __init__(self, Config, cursor_type=ESRICursorType.SearchCursor, cols=(), exclude_cols=('Shape',), where_clause='', col_rename_func=str.lower, **kwargs):
self.Config = Config # type: [ConfigESRIShp, ConfigESRIGeoDBTableOrFeatureClass]
if isinstance(Config, (ConfigESRIGeoDBTableOrFeatureClass, ConfigESRIShp)):
self._fqn = Config.fname
elif isinstance(Config, str):
self._fqn = _path.normpath(Config)
else:
raise ValueError('Expected Config class instance of type ConfigESRIShp, ConfigESRIGeoDBTableOrFeatureClass or ConfigESRISDE')
self._col_rename_func = col_rename_func
self._cursor_type = cursor_type # type: ESRICursorType
self._where_clause = where_clause
self._kwargs = kwargs
self._exclude_cols = exclude_cols
self.df = None # type: [None, _pd.DataFrame]
self.SearchCursor = None
self.UpdateCursor = None
self.InsertCursor = None
if isinstance(cols, str):
self._cols = (cols,)
else:
self._cols = cols
super().__init__(self._fqn)
self.refresh()
def __enter__(self):
"""enter"""
return self
def refresh(self) -> None:
"""
retrieve values with search cursor
"""
# TODO Implement sort and support other cursors - See CRUD.py and ORM.py
self.close()
if self._cursor_type == ESRICursorType.SearchCursor:
self.SearchCursor = _arcpy.da.SearchCursor(self._fqn, fields=self._cols, where_clause=self._where_clause, **self._kwargs)
elif self._cursor_type == ESRICursorType.InsertCursor:
raise NotImplementedError
elif self._cursor_type == ESRICursorType.UpdateCursor:
raise NotImplementedError
elif self._cursor_type == ESRICursorType.PandasDataframe:
# table_as_pandas errors on date types with nulls, hence replaced
# self.df = _data.table_as_pandas(self._fqn, cols=self._cols, where=self._where_clause, **self._kwargs)
self.df = _data.table_as_pandas2(self._fqn, cols=self._cols, exclude_cols=self._exclude_cols, where=self._where_clause, **self._kwargs)
self.df.columns = list(map(self._col_rename_func, self.df.columns))
else:
raise NotImplementedError
def __exit__(self, exc_type, exc_val, exc_tb):
"""exit"""
self.close()
def close(self):
"""close connections"""
with _fuckit:
del self.SearchCursor
del self.UpdateCursor
del self.InsertCursor
def delete(self):
"""delete"""
raise NotImplementedError
# Note the inheritance here. Let the base class do the work
# But lets implent as two different classes to make it more
# obvious what being read when see it in code that is working
# with these connection types
class ESRIShp(_ESRIFile): # noqa
"""
Open layer
See https://pro.arcgis.com/en/pro-app/latest/arcpy/functions/searchcursor.htm
Use pypika to generate complex where statements:
https://github.com/kayak/pypika
Args:
cols: tuple/list of columns to include in the conncetion # noqa # noqa
cursor_type: type of datasource to open. See the connections.ESRICursorType enumeration # noqa
args: passed to the relevant connection call, e.g. arcpy.da.SearchCursor # noqa
"""
# See comment against ESRIShp
class ESRIGeoDBFeatureClassOrTable(_ESRIFile): # noqa
"""Open geodatabase feature layer
See https://pro.arcgis.com/en/pro-app/latest/arcpy/functions/searchcursor.htm
Use pypika to generate complex where statements:
https://github.com/kayak/pypika
Args:
Config (connections.ConfigESRIGeoDBTableOrFeatureClass): Instance of ConfigESRIGeoDBTableOrFeatureClass # noqa
cols: tuple/list of columns to include in the connetion # noqa
cursor_type: type of datasource to open. See the connections.ESRICursorType enumeration # noqa
args: passed to the relevant connection call, e.g. arcpy.da.SearchCursor # noqa
"""
class Oracle:
"""Oracle tables.
An oracle connection, independent of ESRI enterprise geodb
"""
def __init__(self):
raise NotImplementedError