Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nashville etl features #142

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
c32bc53
Adding outline for new etl features in script
jmausolf Aug 17, 2016
63845fd
Added etl feature years of service
jmausolf Aug 17, 2016
dc9c1b3
Updated config for new etl officer years of service feature
jmausolf Aug 17, 2016
6627e03
Adding officer transfers to etl features
jmausolf Aug 17, 2016
0872ceb
Updating config for etl transfers feature
jmausolf Aug 17, 2016
1d13ed0
Counts number of arrests that only had resisting or evading arrests
jonkeane Aug 17, 2016
fc70e8f
Update example_officer_config.yaml
linbug Aug 17, 2016
981f7aa
Merge pull request #106 from dssg/etl-disc-arrests
linbug Aug 17, 2016
385f0fd
fixed mixup between transfers and arrestsOnlyResist features
jonkeane Aug 17, 2016
f2297d4
Update officers.py
jonkeane Aug 17, 2016
fd720be
Merge pull request #107 from dssg/etl-fixing-collision
jmausolf Aug 17, 2016
a5eb5c0
Adding new features
jmausolf Aug 17, 2016
4acc54b
Updating config for new features
jmausolf Aug 17, 2016
6fe51d6
Merge pull request #110 from dssg/transfers2
jonkeane Aug 17, 2016
5a1d54b
Added a feature that is the proportion of charges that were dismissed…
jonkeane Aug 17, 2016
3147356
Merge branch 'nashville_etl_features' into etl-charge-dispositions
sumedhjoshi Aug 17, 2016
e8ef2d8
Merge pull request #111 from dssg/etl-charge-dispositions
sumedhjoshi Aug 17, 2016
69aec91
Added not guilty charge proportion feature as well as guilty charge p…
jonkeane Aug 17, 2016
23f5a06
Merge pull request #112 from dssg/etl-charge-dispositions
linbug Aug 17, 2016
2094fe2
Added dispatch feature of source type
jmausolf Aug 17, 2016
e8bf6c4
Fix conflict
jmausolf Aug 17, 2016
74c1a1b
Merge pull request #115 from dssg/etl_dispatch
sumedhjoshi Aug 17, 2016
d370098
Merged develop back in
jonkeane Aug 19, 2016
17953ee
Merging nashville etl features to yaml
Aug 29, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
358 changes: 358 additions & 0 deletions eis/features/officers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,364 @@

time_format = "%Y-%m-%d %X"

#####################################################################
##### ETL FEATURES #####
#####################################################################

class ETLdummyfeature1(abstract.OfficerFeature):
def __init__(self, **kwargs):
abstract.OfficerFeature.__init__(self, **kwargs)
self.description = ("Dummy feature for testing 2016 schema")
self.num_features = 1
self.name_of_features = ["DummyFeature"]
self.query = ("SELECT officer_id, COUNT(event_type_code) "
"FROM events_hub "
"WHERE event_type_code = 4 "
"GROUP BY officer_id")

class ETL_ArrestOnlyResist(abstract.TimeGatedOfficerFeature):
def __init__(self, **kwargs):
abstract.TimeGatedOfficerFeature.__init__(self, **kwargs)
self.description = ("Number of officer arrests where the only charge was resisting or evading, time-gated")
self.query = ("UPDATE features.{0} feature_table "
"SET {1} = staging_table.count "
"FROM (SELECT officers_hub.officer_id, count( distinct arrests.arr_nbr) "
"FROM etl.arrests "
"LEFT JOIN ( "
"SELECT arr_nbr, "
"true AS no_resist_evade "
"FROM etl.arrests resist_evade_charges "
"WHERE NOT lower(w_chgdesc) SIMILAR TO '%%(resist|evad)%%' "
" ) AS no_resist_evade_charges "
"ON arrests.arr_nbr = no_resist_evade_charges.arr_nbr "
"FULL JOIN staging.officers_hub "
"ON cast( arrests.anonid as text)=department_defined_officer_id "
"WHERE no_resist_evade is null "
"AND arr_date <= '{2}'::date "
"AND arr_date >= '{2}'::date - interval '{3}' "
"GROUP BY officer_id, arrests.arr_nbr "
" ) AS staging_table "
"WHERE feature_table.officer_id = staging_table.officer_id "
"AND feature_table.fake_today = '{2}'::date"
.format( self.table_name,
self.COLUMN,
self.fake_today.strftime(time_format),
self.DURATION ))
self.set_null_counts_to_zero = True

class ETL_YearsOfService(abstract.OfficerFeature):
def __init__(self, **kwargs):
abstract.OfficerFeature.__init__(self, **kwargs)
self.description = ("Officer's years of service")
self.num_features = 1
self.name_of_features = ["ETLYearsOfService"]
self.query = ("UPDATE features.{} feature_table "
"SET {} = staging_table.years_service "
"FROM ( SELECT officer_id, years_service "
" FROM etl.officers "
" JOIN staging.officers_hub "
" ON cast( anonid as text)=department_defined_officer_id "
" ) AS staging_table "
"WHERE feature_table.officer_id = staging_table.officer_id "
.format( self.table_name,
self.feature_name ) )

class ETL_NumberTransfers(abstract.TimeGatedOfficerFeature):
def __init__(self, **kwargs):
abstract.TimeGatedOfficerFeature.__init__(self, **kwargs)
self.description = ("Number of officer transfers, time-gated")
self.query = ("UPDATE features.{0} feature_table "
"SET {1} = staging_table.count "
"FROM ( SELECT officer_id, count(officer_id) "
" FROM etl.transfers "
" FULL JOIN staging.officers_hub "
" ON cast( anonid as text)=department_defined_officer_id "
" WHERE startdate <= '{2}'::date "
" AND startdate >= '{2}'::date - interval '{3}' "
" AND officer_id is not null "
" GROUP BY officer_id "
" ) AS staging_table "
"WHERE feature_table.officer_id = staging_table.officer_id "
"AND feature_table.fake_today = '{2}'::date"
.format( self.table_name,
self.COLUMN,
self.fake_today.strftime(time_format),
self.DURATION ))
self.set_null_counts_to_zero = True

class ETL_NumberTransfersNotOnNewYears(abstract.TimeGatedOfficerFeature):
def __init__(self, **kwargs):
abstract.TimeGatedOfficerFeature.__init__(self, **kwargs)
self.description = ("Number of officer transfers not starting on New Years, YYYY-01-01, time-gated")
self.query = ("UPDATE features.{0} feature_table "
"SET {1} = staging_table.count "
"FROM ( SELECT officer_id, count(officer_id) "
" FROM etl.transfers "
" FULL JOIN staging.officers_hub "
" ON cast( anonid as text)=department_defined_officer_id "
" WHERE extract(doy from startdate) != 1 "
" AND startdate <= '{2}'::date "
" AND startdate >= '{2}'::date - interval '{3}' "
" AND officer_id is not null "
" GROUP BY officer_id "
" ) AS staging_table "
"WHERE feature_table.officer_id = staging_table.officer_id "
"AND feature_table.fake_today = '{2}'::date"
.format( self.table_name,
self.COLUMN,
self.fake_today.strftime(time_format),
self.DURATION ))
self.set_null_counts_to_zero = True

class ETL_NumberTransfersNotInJanuary(abstract.TimeGatedOfficerFeature):
def __init__(self, **kwargs):
abstract.TimeGatedOfficerFeature.__init__(self, **kwargs)
self.description = ("Number of officer transfers not starting in January, time-gated")
self.query = ("UPDATE features.{0} feature_table "
"SET {1} = staging_table.count "
"FROM ( SELECT officer_id, count(officer_id) "
" FROM etl.transfers "
" FULL JOIN staging.officers_hub "
" ON cast( anonid as text)=department_defined_officer_id "
" WHERE extract(month from startdate) != 01 "
" AND startdate <= '{2}'::date "
" AND startdate >= '{2}'::date - interval '{3}' "
" AND officer_id is not null "
" GROUP BY officer_id "
" ) AS staging_table "
"WHERE feature_table.officer_id = staging_table.officer_id "
"AND feature_table.fake_today = '{2}'::date"
.format( self.table_name,
self.COLUMN,
self.fake_today.strftime(time_format),
self.DURATION ))
self.set_null_counts_to_zero = True

class ETL_NumberTransfersLessThanOneYear(abstract.TimeGatedOfficerFeature):
def __init__(self, **kwargs):
abstract.TimeGatedOfficerFeature.__init__(self, **kwargs)
self.description = ("Number of officer transfers less than one year in length, time-gated")
self.query = ("UPDATE features.{0} feature_table "
"SET {1} = staging_table.count "
"FROM ( SELECT officer_id, count(officer_id) "
" FROM etl.transfers "
" FULL JOIN staging.officers_hub "
" ON cast( anonid as text)=department_defined_officer_id "
" WHERE extract(year from age(enddate, startdate )) < 1 "
" AND startdate <= '{2}'::date "
" AND startdate >= '{2}'::date - interval '{3}' "
" AND officer_id is not null "
" GROUP BY officer_id "
" ) AS staging_table "
"WHERE feature_table.officer_id = staging_table.officer_id "
"AND feature_table.fake_today = '{2}'::date"
.format( self.table_name,
self.COLUMN,
self.fake_today.strftime(time_format),
self.DURATION ))
self.set_null_counts_to_zero = True

class ETL_NumberTransfersLessThanOneMonth(abstract.TimeGatedOfficerFeature):
def __init__(self, **kwargs):
abstract.TimeGatedOfficerFeature.__init__(self, **kwargs)
self.description = ("Number of officer transfers less than one month in length, time-gated")
self.query = ("UPDATE features.{0} feature_table "
"SET {1} = staging_table.count "
"FROM ( SELECT officer_id, count(officer_id) "
" FROM etl.transfers "
" FULL JOIN staging.officers_hub "
" ON cast( anonid as text)=department_defined_officer_id "
" WHERE extract(year from age(enddate, startdate )) < 1 "
" AND extract(month from age(enddate, startdate )) < 1 "
" AND startdate <= '{2}'::date "
" AND startdate >= '{2}'::date - interval '{3}' "
" AND officer_id is not null "
" GROUP BY officer_id "
" ) AS staging_table "
"WHERE feature_table.officer_id = staging_table.officer_id "
"AND feature_table.fake_today = '{2}'::date"
.format( self.table_name,
self.COLUMN,
self.fake_today.strftime(time_format),
self.DURATION ))
self.set_null_counts_to_zero = True



##### charges that were dismissed.
### Any charges with the dispositions below are considered dismissed
### in other words, these are charges where no trial has or will take place.
### The selection of these categories should be varified with MNPD
### 'DISMISSED ROS',
### 'DISMISSED - COSTS TO PROSECUTOR',
### 'RETIRED ON COSTS',
### 'DISMISSED ON COST',
### 'RETIRED',
### 'NOT GUILTY - REASON OF INSANITY',
### 'NOLLE PROSEQUI',
### 'NO TRUE BILL'
####################################
class ETL_ChargesDismissed(abstract.TimeGatedOfficerFeature):
def __init__(self, **kwargs):
abstract.TimeGatedOfficerFeature.__init__(self, **kwargs)
self.description = ("Proportion of arrested charges that were dismissed, time-gated")
self.query = """
UPDATE features.{0} feature_table
SET {1} = staging_table.propdismissed
FROM (SELECT officers_hub.officer_id,
case when COUNT(disposition_desc) > 0 then
SUM(CASE WHEN disposition_desc IN (
'DISMISSED ROS',
'DISMISSED - COSTS TO PROSECUTOR',
'RETIRED ON COSTS',
'DISMISSED ON COST',
'RETIRED',
'NOT GUILTY - REASON OF INSANITY',
'NOLLE PROSEQUI',
'NO TRUE BILL'
) THEN 1 ELSE 0 END) / COUNT(disposition_desc)::float
when COUNT(disposition_desc) = 0 then 0
end as propDismissed
FROM etl.arrests
FULL JOIN staging.officers_hub
ON cast( arrests.anonid as text)=department_defined_officer_id
WHERE arr_date <= '{2}'::date
AND arr_date >= '{2}'::date - interval '{3}'
group by officer_id
) AS staging_table
WHERE feature_table.officer_id = staging_table.officer_id
AND feature_table.fake_today = '{2}'::date
""".format( self.table_name,
self.COLUMN,
self.fake_today.strftime(time_format),
self.DURATION )
self.set_null_counts_to_zero = True

class ETL_NumberDispatchedInitiatedBy(abstract.TimeGatedCategoricalOfficerFeature):
def __init__(self, **kwargs):
self.categories = { "No": "None",
"Schedule": "Schedule",
"Phone": "Phone",
"Mobile": "Mobile",
"Field": "Field",
"911": "911" }
abstract.TimeGatedCategoricalOfficerFeature.__init__(self, **kwargs)
self.description = ("Number dispatches by initiation source over time gated periods")
self.query = ("UPDATE features.{0} feature_table "
"SET {1} = staging_table.count "
"FROM ( SELECT officer_id, count(officer_id) "
" FROM etl.dispatch "
" FULL JOIN staging.officers_hub "
" ON cast( anonid as text)=department_defined_officer_id "
" WHERE init_source like '%%{4}%%' "
" AND call_rec <= '{2}'::date "
" AND call_rec >= '{2}'::date - interval '{3}' "
" AND officer_id is not null "
" GROUP BY officer_id "
" ) AS staging_table "
"WHERE feature_table.officer_id = staging_table.officer_id "
"AND feature_table.fake_today = '{2}'::date"
.format( self.table_name,
self.COLUMN,
self.fake_today.strftime(time_format),
self.DURATION,
self.LOOKUPCODE ))
self.set_null_counts_to_zero = True

##### charges that resulted in some disposition like guilty
### Any charges with the dispositions below are guilty or similar to guilty
### The selection of these categories should be varified with MNPD
# 'GUILTY',
# 'PROBATION',
# 'GUILTY PLEA - LESSER CHARGE`',
# 'NOLO CONTENDERE',
# 'CONCLUDE PROBATION',
# 'NOLO CONTENDERE 40-35-313',
# 'PRE-TRIAL DIVERSION (40-15-105)',
# 'GUILTY AFTER TRIAL',
# 'PROBATION VIOLATION'
####################################
class ETL_ChargesQuasiGuilty(abstract.TimeGatedOfficerFeature):
def __init__(self, **kwargs):
abstract.TimeGatedOfficerFeature.__init__(self, **kwargs)
self.description = ("Proportion of arrested charges that were found quasi-guilty, time-gated")
self.query = """
UPDATE features.{0} feature_table
SET {1} = staging_table.propdismissed
FROM (SELECT officers_hub.officer_id,
case when COUNT(disposition_desc) > 0 then
SUM(CASE WHEN disposition_desc IN (
'GUILTY',
'PROBATION',
'GUILTY PLEA - LESSER CHARGE`',
'NOLO CONTENDERE',
'CONCLUDE PROBATION',
'NOLO CONTENDERE 40-35-313',
'PRE-TRIAL DIVERSION (40-15-105)',
'GUILTY AFTER TRIAL',
'PROBATION VIOLATION'
) THEN 1 ELSE 0 END) / COUNT(disposition_desc)::float
when COUNT(disposition_desc) = 0 then 0
end as propDismissed
FROM etl.arrests
FULL JOIN staging.officers_hub
ON cast( arrests.anonid as text)=department_defined_officer_id
WHERE arr_date <= '{2}'::date
AND arr_date >= '{2}'::date - interval '{3}'
group by officer_id
) AS staging_table
WHERE feature_table.officer_id = staging_table.officer_id
AND feature_table.fake_today = '{2}'::date
""".format( self.table_name,
self.COLUMN,
self.fake_today.strftime(time_format),
self.DURATION )
self.set_null_counts_to_zero = True

##### charges that resulted in some disposition not guilty
### Any charges with the dispositions below are not guilty
### The selection of these categories should be varified with MNPD
# 'NOT GUILTY',
# 'NOT GUILTY - REASON OF INSANITY'
####################################
class ETL_ChargesNotGuilty(abstract.TimeGatedOfficerFeature):
def __init__(self, **kwargs):
abstract.TimeGatedOfficerFeature.__init__(self, **kwargs)
self.description = ("Proportion of arrested charges that were found not guilty, time-gated")
self.query = """
UPDATE features.{0} feature_table
SET {1} = staging_table.propdismissed
FROM (SELECT officers_hub.officer_id,
case when COUNT(disposition_desc) > 0 then
SUM(CASE WHEN disposition_desc IN (
'NOT GUILTY',
'NOT GUILTY - REASON OF INSANITY'
) THEN 1 ELSE 0 END) / COUNT(disposition_desc)::float
when COUNT(disposition_desc) = 0 then 0
end as propDismissed
FROM etl.arrests
FULL JOIN staging.officers_hub
ON cast( arrests.anonid as text)=department_defined_officer_id
WHERE arr_date <= '{2}'::date
AND arr_date >= '{2}'::date - interval '{3}'
group by officer_id
) AS staging_table
WHERE feature_table.officer_id = staging_table.officer_id
AND feature_table.fake_today = '{2}'::date
""".format( self.table_name,
self.COLUMN,
self.fake_today.strftime(time_format),
self.DURATION )
self.set_null_counts_to_zero = True




#####################################################################
##### STAGING FEATURES #####
#####################################################################


### Officer labels.
class LabelSustained(abstract.OfficerFeature):
def __init__(self, **kwargs):
Expand Down
Loading