-
Notifications
You must be signed in to change notification settings - Fork 0
/
CDPO.py
1600 lines (1303 loc) · 67.2 KB
/
CDPO.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
"""
Copyright 2017 David Pany (@DavidPany)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Card Data Processor and Organizer (CDPO)
Version 1.0 - 2017-06-23 (SANS DFIR Summit Edition)
CDPO is a tool to validate, de-duplicate, combine, query, and encrypt track data recovered from a breach
Created by David Pany - 2017
twitter: @davidpany
github: https://github.com/davidpany
Encryption implemented by Patrick Charbonneau
"""
from __future__ import print_function #Used for hipster print function
import sqlite3 # used to interact with sqlite DB
import sys # used to quit if user does not make decision about duplicate .sqlite files.
import cmd # used for cli loop
import os # used to import files
import fnmatch # used to match filenames and wildcards for importing
import re # used for track regex
import time # used to track time of imports and to find current MMYY
import getpass # used to get password without echo
import hashlib # used to MD5 password for verification
import base64 # used to convert encrypted PANs to strings for DB storage
import datetime # used to create unique statsCSV and DB filenames
import csv # used to write output files
import json # read json strings from the db and convert to dicts
"""
ToDo
[ ]After sanitizing collection name, check to see if the sanitized name exists already
[ ]Add speedygonzales from Brandan Schondorfer
[ ]Format the available collections when making a super so they look nice
[ ]Change file command to use csv module
[ ]Add tab-complete for import, combine, query, etc.
[ ]Add BINLIST ranger post processor
[ ]Add status updates to query table creations
[ ]Don't make a database table for files that don't have valid track data
[ ]Add support to read in a file of only pan (no full track data)
[ ]requires an additional DB field for P on top of 1, 2, and 12; and maybe EXP date filler of 9999 or null?
[ ]If wildcard used for input, limit display noise or consider using buffer to keep it clean
[ ]Format components part of stats output
[ ]Add option to delete a collection DB table and from the metadata table
"""
SPECIAL_CHARS_LIST = ["`", "~", "!", "@", "#", "$", "%", "^", "&", "*", "(", ")", "-", "=", "+", "\\", "|", ";", ":",
"'", '"', ",", "<", ".", ">", "/", "?", " ", "[", "]", "{", "}"]
################################################### CMD Loop Class ####################################################
class CDPO(cmd.Cmd):
"""\tFramework for counting PCI records"""
def __init__(self):
cmd.Cmd.__init__(self)
self.db_file_name, self.metadata, self.skrtkey = initialize()
# Define settings
prompt = "(CDPO): "
intro = "\n Hello, I am CDPO, PCI track data relations. How might I serve you?.\n\tTry 'help'\n"
def do_import(self, track_file_args):
"""\timport [track_file]
\tImport the specified file. Wildcards and directory traversal are supported.\n\
\tCDPO ignores .py and .sqlite files.\n"""
print()
if track_file_args:
if "\\" in track_file_args:
track_file_args = track_file_args.replace("\\", "/")
if "/" in track_file_args:
target_path = track_file_args.rpartition("/")
else:
target_path = "./", "", track_file_args
directory = target_path[0]
file_name = target_path[2]
files_to_import = []
for root, dir_name, file_names in os.walk(directory):
for file_name in fnmatch.filter(file_names, file_name):
file_path = os.path.join(root, file_name).replace("\\", "/")
if file_path[-3:] != ".py" and file_path[-7:] != ".sqlite":
files_to_import.append(file_path)
if files_to_import:
for track_file in files_to_import:
#Sanitize the file_name to match that the format of the DB Table names
temp_file_name = track_file.rpartition("/")[2]
if temp_file_name[0] in ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]:
temp_file_name = "_{}".format(temp_file_name)
if [char for char in SPECIAL_CHARS_LIST if char in temp_file_name]:
for special_char in SPECIAL_CHARS_LIST:
temp_file_name = temp_file_name.replace("{}".format(special_char), "_")
#Check to see if the imported file exists. Import if not, skip if exists
if temp_file_name not in read_all_collection_names(self.db_file_name, "list"):
print(" A while this may take. Patience you must have.")
create_collection(self.db_file_name, track_file, self.skrtkey)
else:
print("\tYou cannot add {} twice!\n".format(track_file))
else:
print("\tPlease speficy a valid file/files to import.\n\t If you are traversing directories on"
"Windows, be sure to use a backslash \\ .\n\t You may also use * wildcards.\n")
else:
print("\tPlease specify a file.\n")
def do_show(self, default=None):
"""\tDisplays all collections loaded into CDPO and basic statistics\n"""
conn = sqlite3.connect(self.db_file_name)
c = conn.cursor()
#Read all collection rows from metadata TABLE
c.execute('SELECT * FROM metadata WHERE collection_type="collection"')
collection_list = c.fetchall()
print_loaded(collection_list, "Collections")
c.execute('SELECT * FROM metadata WHERE collection_type="Super"')
super_collection_list = c.fetchall()
print_loaded(super_collection_list, "Super Collections")
c.execute('SELECT * FROM metadata WHERE collection_type LIKE "Query%"')
super_collection_list = c.fetchall()
print_loaded(super_collection_list, "Query results")
conn.close()
def do_csvstats(self, garbage):
"""\tWrites basic statistics of all collections to a CSV file named show-CSV.csv\n"""
conn = sqlite3.connect(self.db_file_name)
c = conn.cursor()
#Read all collection rows from metadata TABLE
c.execute('SELECT * FROM metadata WHERE collection_type="collection"')
collection_list = c.fetchall()
c.execute('SELECT * FROM metadata WHERE collection_type="Super"')
super_collection_list = c.fetchall()
c.execute('SELECT * FROM metadata WHERE collection_type LIKE "Query%"')
query_collection_list = c.fetchall()
write_stats_to_csv(collection_list, super_collection_list, query_collection_list)
conn.close()
def do_combine(self, combo_name):
"""\tcombine [SuperCollection Name]
\t\tCombine and de-duplicate two or more existing Collections into [SuperCollection Name]\n"""
if combo_name:
if combo_name.upper() != "ALL":
combo_name = combo_name.replace(" ", "_") #prevents having super collection labels from having spaces
all_collections_string = read_all_collection_names(self.db_file_name, "string")
if combo_name not in all_collections_string:
combo_name = sanitize_collection_name(combo_name, combo_name)
print(" Please specify two or more existing collections to combine. (collection1 collection2)")
print(" Note that ALL will combine all loaded collections. Your choices are:")
print("\tALL {}\n".format(all_collections_string))
collections_to_add = ask_for_collections(all_collections_string)
if collections_to_add:
print(" A very, very long time this may take. Patience you must have.")
if collections_to_add == "ALL":
conn = sqlite3.connect(self.db_file_name)
c = conn.cursor()
c.execute('SELECT collection_name FROM metadata Where collection_type="collection"')
collection_list = c.fetchall()
collections_to_add = []
for collection_name in collection_list:
collections_to_add.append(str(collection_name[0]))
create_super(self.db_file_name, collections_to_add, combo_name)
else:
print("\tIt looks like this Super Combination name is already used. Please try a different name.")
else:
print("\tYou cannot name your new super collection 'all'."
"Sorry about that but maybe try 'total' instead?\n")
else:
print("\tYou need to specify a new name for a new super collection.\n")
def do_stat(self, existing_collection):###############
"""\tstat [collection]
\t\tDisplay brand, track, Expiration Dates, and pan Count Statistics for one [collection]\n"""
if existing_collection and existing_collection in read_all_collection_names(self.db_file_name, "list"):
conn = sqlite3.connect(self.db_file_name)
c = conn.cursor()
#Read all collection rows from metadata TABLE
c.execute('SELECT * FROM metadata WHERE collection_name="{}"'.format(existing_collection))
collection_list = c.fetchall()
print_loaded(collection_list, "stat")
conn.commit()
conn.close()
elif not existing_collection:
print("\tYou didn't specify a valid collection as an argument."
" Here are your options:\n\t\t{}\n".format(read_all_collection_names(self.db_file_name, "string")))
else:
print("\tIt doesn't look like {} is a valid collection."
" Here are your options:\n\t\t{}\n".format(
existing_collection, read_all_collection_names(self.db_file_name, "string")))
def do_query(self, existing_collection): ############################
"""\tquery [collection]
\t\tQuery a collection for PANs based on Expiration date, brand, or track attributes\n"""
if existing_collection and existing_collection in read_all_collection_names(self.db_file_name, "list"):
conn = sqlite3.connect(self.db_file_name)
c = conn.cursor()
c.execute('SELECT DISTINCT brand FROM {}'.format(existing_collection))
possible_brands = c.fetchall()
exp_filter = get_exp_filter()
brand_filter = get_brand_filter(possible_brands)
track_filter = get_track_filter()
filter_list = []
if exp_filter:
filter_list.append("({})".format(exp_filter))
if brand_filter:
filter_list.append("({})".format(brand_filter))
if track_filter:
filter_list.append("({})".format(track_filter))
if filter_list:
query_filters = ''' WHERE {}'''.format(" AND ".join(filter_list))
else:
query_filters = ''
c.execute('SELECT count(pan_exp) FROM {}{}'.format(existing_collection, query_filters))
query_count = c.fetchall()
print("\tYour query returned {} unique pan-exp combinations.\n".format(query_count[0][0]))
if ask_yn_question(" Would you like to add the query results to a collection? (Y/N): ") == "Y":
c.execute('SELECT * FROM {}{}'.format(existing_collection, query_filters))
query_results = c.fetchall()
conn.commit()
conn.close()
combo_name = ask_for_new_table_name("Query", read_all_collection_names(self.db_file_name, "list"))
create_query_table(self.db_file_name, query_results, combo_name,
'SELECT * FROM {}{}'.format(existing_collection, query_filters))
else:
conn.commit()
conn.close()
print()
elif not existing_collection:
print("\tYou didn't specify a valid collection as an argument."
"Here are your options:\n\t\t{}\n".format(read_all_collection_names(self.db_file_name, "string")))
else:
print("\tIt doesn't look like {} is a valid collection."
"Here are your options:\n\t\t{}\n".format(
existing_collection, read_all_collection_names(self.db_file_name, "string")))
def do_file(self, existing_collection):
"""\tfile [collection]
\t\tWrite [collection] to a CSV file in the working directory\n"""
if existing_collection and existing_collection in read_all_collection_names(self.db_file_name, "list"):
conn = sqlite3.connect(self.db_file_name)
c = conn.cursor()
#Read all collection rows from metadata TABLE
c.execute('SELECT * FROM {}'.format(existing_collection))
collection_records = c.fetchall()
write_collection_to_csv(collection_records, existing_collection, self.skrtkey)
conn.commit()
conn.close()
elif not existing_collection:
print("\tYou didn't specify a valid collection as an argument."
" Here are your options:\n\t\t{}\n".format(read_all_collection_names(self.db_file_name, "string")))
else:
print("\tIt doesn't look like {} is a valid collection."
" Here are your options:\n\t\t{}\n".format(existing_collection,
read_all_collection_names(self.db_file_name, "string")))
def do_quit(self, existing_collection):
"""\tquit CDPO\n"""
cdpo_ascii = (
"\n\n"
"\t _.-| |\\ | / |_\n"
"\t / \\ _>-\"\"\"-._.'|_\n"
"\t >`-.' `./ \\\n"
"\t /`./ \-<\n"
"\t `-| You |_/\n"
"\t /_| Are |_\\\n"
"\t ) | The | |\n"
"\t -<| Best! |\\/\n"
"\t `'_\ /`<\n"
"\t |_/`. .'\\_/\n"
"\t \_/ >-.._..-'\\_|\n"
"\t `-`_| \\_\\|_/\n"
"\t | `' | |\n"
"\t | | |\n"
"\t | | |\n"
"\t | | |\n"
"\t | | |\n"
"\t | /\\ | |\n"
"\t | /| \\ |\\ |\n"
"\t |/ |/ \\| \\|\n")
print(
"\n\tOh my goodness! Shut me down. Machines counting track data."
" How perverse.\n\n\tThanks for using CDPO!\t\t{}".format(cdpo_ascii))
return True # quits the cmd
def do_exit(self, existing_collection):
"""\tquit CDPO\n"""
cdpo_ascii = (
"\n\n"
"\t ,o888888o. 8 888888888o. 8 888888888o ,o888888o. \n"
"\t 8888 `88. 8 8888 `^888. 8 8888 `88. . 8888 `88. \n"
"\t,8 8888 `8. 8 8888 `88. 8 8888 `88 ,8 8888 `8b \n"
"\t88 8888 8 8888 `88 8 8888 ,88 88 8888 `8b\n"
"\t88 8888 8 8888 88 8 8888. ,88' 88 8888 88\n"
"\t88 8888 8 8888 88 8 888888888P' 88 8888 88\n"
"\t88 8888 8 8888 ,88 8 8888 88 8888 ,8P\n"
"\t`8 8888 .8' 8 8888 ,88' 8 8888 `8 8888 ,8P \n"
"\t 8888 ,88' 8 8888 ,o88P' 8 8888 ` 8888 ,88' \n"
"\t `8888888P' 8 888888888P' 8 8888 `8888888P'\n")
print(
"\n\tOh my goodness! Shut me down. Machines counting track data."
" How perverse.\n\n\tThanks for using CDPO!\t\t{}".format(cdpo_ascii))
return True # quits the cmd
########################################## rc4 encrypt and decrypt functions ###########################################
def rc4e(data, key):
"""rc4 encrypt data and base64 encode"""
S, j, out = range(256), 0, []
for i in range(256):
j = (j + S[i] + ord(key[i % len(key)])) % 256
S[i], S[j] = S[j], S[i]
i = j = 0
for ch in data:
i = (i + 1) % 256
j = (j + S[i]) % 256
S[i], S[j] = S[j], S[i]
out.append(chr(ord(ch) ^ S[(S[i] + S[j]) % 256]))
return base64.b64encode("".join(out))
def rc4d(data, key):
"""base64decode value and rc4decrypt to plaintext"""
data2 = base64.b64decode(data)
S, j, out = range(256), 0, []
for i in range(256):
j = (j + S[i] + ord(key[i % len(key)])) % 256
S[i], S[j] = S[j], S[i]
i = j = 0
for ch in data2:
i = (i + 1) % 256
j = (j + S[i]) % 256
S[i], S[j] = S[j], S[i]
out.append(chr(ord(ch) ^ S[(S[i] + S[j]) % 256]))
return "".join(out)
############################################ Functions for adding to the DB ############################################
def initialize():
"""Start the program and make a database"""
print_license()
# This is the name we will use for the db, unless it gets incremented below
if len(sys.argv) > 1:
db_file_name = sys.argv[1]
else:
db_file_name = "CDPO.sqlite"
if os.path.isfile(db_file_name):
db_scenario = ask_delete_or_reuse(db_file_name)
else: # if there was no error which means we just created a new empty db
db_scenario = "N" # We want to load the existing DB since we just made it blank
if db_scenario == "D": # Delete and make a new db
os.remove(db_file_name)
skrtkey = create_db_file(db_file_name)
elif db_scenario == "I": # Finds next increment for DB name and makes it
raw_time = str(datetime.datetime.utcnow()).replace(":", "").replace("-", "")
text_time = raw_time[0:8] + "_" + raw_time[9:15]
db_file_name = "{}_{}".format(db_file_name, text_time)
skrtkey = create_db_file(db_file_name)
elif db_scenario == "L": # Load the existing CDPO.sqlite so we don't need to change the file name
conn = sqlite3.connect(db_file_name)
c = conn.cursor()
# grab password hash from database
c.execute('SELECT password_hash from password_hash_table')
existing_md5_hash = c.fetchall()[0][0]
conn.close()
# ask for password and validate with hash stored in DB
correct_password = False
while not correct_password:
password_to_test = getpass.getpass("\n\tPassword (masked): ")
if len(password_to_test) == 0:
print("\n\t\t## PASSWORD INVALID, PLEASE TRY AGAIN ##")
else:
md5_password_digest = hashlib.md5()
md5_password_digest.update(password_to_test)
password_md5_hex = md5_password_digest.hexdigest()
if password_md5_hex == existing_md5_hash:
correct_password = True
skrtkey = password_to_test
else:
print("\n\t\t## PASSWORD INVALID, PLEASE TRY AGAIN ##")
elif db_scenario == "N":
skrtkey = create_db_file(db_file_name)
# We now have the correct .sqlite file created or loaded as db_file_name, so lets load the data from metadata
time.sleep(1) # sleep for 1 second for readability
print("\n Using SQLite DB file name: {}\n".format(db_file_name))
time.sleep(1) # sleep for 1 second for readability
metadata_contents = read_metadata_table(db_file_name)
return db_file_name, metadata_contents, skrtkey
def create_db_file(filename="CDPO.sqlite"):
"""Creates database file"""
conn = sqlite3.connect(filename)
c = conn.cursor()
# Create metadata table
c.execute("CREATE TABLE metadata (collection_name VARCHAR(40),brand_stats VARCHAR(100),track_stats VARCHAR(40),"
"exp_date_stats VARCHAR(40),pan_stats VARCHAR(100),collection_type VARCHAR(40),components VARCHAR(100),"
"primary key(collection_name))")
# Create Password hash keeper table
# ask for new passwords and confirm that they match
passwords_match = False
while not passwords_match:
new_password = getpass.getpass("\n\tEncryption password (masked): ")
confirm_password = getpass.getpass("\tConfirm password (masked): ")
if len(new_password) == 0:
print("\n\n !!!!Password cannot be blank. Please use a long and complex password."
" Neither CDPO nor its creators are responsible if your database password is cracked.\n")
elif new_password == confirm_password:
passwords_match = True
else:
print("\n\n !!!!Passwords did not match. Please try again.\n")
# md5 hash password and save that into the DB so we can verify the password next time
skrtkey = new_password
md5_password_digest = hashlib.md5()
md5_password_digest.update(skrtkey)
password_md5_hex = md5_password_digest.hexdigest()
c.execute("CREATE TABLE password_hash_table (password_hash VARCHAR(40),primary key(password_hash))")
c.execute("INSERT INTO password_hash_table (password_hash) VALUES (?)", (str(password_md5_hex),))
# Save (commit) the changes
conn.commit()
conn.close()
return skrtkey
def create_collection(db_file_name, file_name, skrtkey):
"""This function is used when importing track data from a file and creating a new table in the database"""
start_time = time.time() # start keeping track of time to see how long the import takes
collection_name = file_name
# open the file and store contents
if collection_name[0] == "_":
if collection_name[1] in ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]:
try:
handle = open(collection_name[1:], "rb")
except IOError:
print("File not found, please try again")
return
else:
try:
handle = open(collection_name, "rb")
except IOError:
# print("File not found, please try again")
return
print("\t Loading file {} ...".format(file_name))
file_read_start_time = time.time()
# Read all lines of file into set to immediately get rid of duplicate lines.
file_read = set(handle.readlines())
handle.close()
file_read_elapsed_time = int(time.time() - file_read_start_time)
print("\t File loaded in {} seconds. Processing track data...".format(file_read_elapsed_time))
track_processing_start_time = time.time()
records = {}
luhn_fail = 0
not_track = 0
exp_fail = 0
luhn_substring = 0
track_mo = re.compile("([1-9][0-9]{11,18})(\^.{,30}\^|\=)([0-9]{4})")
for file_line in file_read:
# Match all track regex instances in the line
track_matches = re.findall(track_mo, file_line)
if track_matches:
# Loop through each match in the line
for track_match in track_matches:
# Assign matched values to variables for ease of understanding
pan = track_match[0]
field_separator = track_match[1]
exp = track_match[2]
pan_exp = "{}-{}".format(pan, exp)
# See if we are testing a substring
substrings_to_check = True
substrings_checked = False
# substring testing is performed in case malware regex grabs extra characters before the PAN
while substrings_to_check:
if luhn(pan):
# If the pan is a valid Luhn number, continue processing
# if the pan matches, no need to check substring
substrings_to_check = False
if substrings_checked:
luhn_substring += 1
# If Expiration date Month in [1,2,3,4,5,6,7,8,9,10,11,12], continue processing
if int(exp[2:4]) in range(1, 13):
# In here, we are left with only records that match the track data regex,
# have Luhn valid PANs, and have Expiration dates with valid months.
# We must now place them into the records dictionary
# Determine track format by checking field_separator
if field_separator == "=":
track_format = 2
else:
track_format = 1
# Find if pan_exp is already in records and update track_format if necessary
if pan_exp in records:
if records[pan_exp]["track"] in [track_format, 12]:
continue
else:
records[pan_exp]["track"] = 12
# If pan_exp is not already in records, we need to add it with all the necessary fields
# We wait to calculate the brand until here because it can be resource intensive
else:
brand = wiki_brand_compare(pan)
records[pan_exp] = {"pan": pan, "exp": exp, "track": track_format, "brand": brand}
else:
exp_fail += 1
else:
if len(pan) >= 16: # will check the substring down to a pan length of 15
substrings_to_check = True
pan = pan[1:] # cut off the first digit of the pan to min of 15 which is why we use 16 above
substrings_checked = True
else:
luhn_fail += 1
substrings_to_check = False
else:
not_track += 1
# Calculate and print the time it took to process the track data
track_processing_elapsed_time = int(time.time() - track_processing_start_time)
print("\t track data processed in {} seconds. Adding data to DB...".format(track_processing_elapsed_time))
db_add_start_time = time.time()
# Sanitize special characters from CollectionNames and first characters from being numbers,
# also gets rid of directory paths for collection names
collection_name_orig = collection_name.rpartition("/")[2]
collection_name = collection_name_orig
collection_name = sanitize_collection_name(collection_name, collection_name_orig)
# Write the records dict to SQLite DB
write_collection_to_db(records, db_file_name, collection_name, skrtkey)
db_add_elapsed_time = int(time.time() - db_add_start_time)
print("\t Data added to DB in {} seconds. Calculating stats...".format(db_add_elapsed_time))
# Calculate All Stats with this function
brand_stats, track_stats, exp_date_stats, pan_stats = calculate_all_stats(db_file_name, collection_name)
add_collection_to_metadata(
db_file_name,
collection_name,
brand_stats,
track_stats,
exp_date_stats,
pan_stats,
"collection",
"")
elapsed_time = time.time() - start_time
print("\t{} unique pan+exp combinations imported from {} in {} seconds!".format(pan_stats["UniquePANExp"],
collection_name, int(elapsed_time)))
if not_track:
print("\t\t{} unique lines did not contain track data.".format(not_track))
if luhn_fail:
print("\t\t{} records did not pass the Luhn check.".format(luhn_fail))
if exp_fail:
print("\t\t{} valid PANs did not have valid Expiration Dates.".format(exp_fail))
if luhn_substring:
print("\t\t{} invalid PANs contained valid PANs as a substring, likely due to an error with the scraper."
.format(luhn_substring))
print()
def write_collection_to_db(records, db_file_name, label, skrtkey):
"""This function is called by the create_collection function during import, combine, and query commands"""
conn = sqlite3.connect(db_file_name)
c = conn.cursor()
try:
c.execute('''CREATE TABLE {} (pan_exp text,track INTEGER,brand text,exp text,pan"
" text,primary key(pan_exp))'''.format(label))
except:
print(label)
raise
# pan + PANEXP is encrypted as it is written to the database. the value will be base64 encoded cipher text
# rc4e(plaintext,key) is called for pan and pan+exp
for key, value in records.iteritems():
c.execute('''INSERT INTO {} VALUES (?,?,?,?,?)'''.format(label), (
rc4e(key, skrtkey),
value["track"],
value["brand"],
value["exp"],
rc4e(value["pan"], skrtkey)))
# Save (commit) the changes
conn.commit()
conn.close()
def add_collection_to_metadata(
db_file_name,
collection_name,
brand_stats,
track_stats,
exp_date_stats,
pan_stats,
collection_type,
components):
"""This function adds information of a new collection the database metadata table after import, combine, or query"""
conn = sqlite3.connect(db_file_name)
c = conn.cursor()
c.execute("INSERT INTO metadata (collection_name,brand_stats,track_stats,exp_date_stats,pan_stats,"
"collection_type,components) VALUES (?,?,?,?,?,?,?)", (
collection_name,
str(brand_stats),
str(track_stats),
str(exp_date_stats),
str(pan_stats),
collection_type,
components))
# Save (commit) the changes
conn.commit()
conn.close()
def create_query_table(db_file_name, records, table_name, query_string):
"""Create new table and collection after query command"""
start_time = time.time()
print("\t Writing {} to the database. Patience you must have.".format(table_name))
conn = sqlite3.connect(db_file_name)
c = conn.cursor()
c.execute('''CREATE TABLE {} (pan_exp text,track INTEGER,brand text,exp Text,pan text,primary key(pan_exp))'''
.format(table_name))
for record in records:
try:
c.execute('''INSERT INTO {} VALUES (?,?,?,?,?)'''.format(table_name),
(record[0], record[1], record[2], record[3], record[4]))
except sqlite3.IntegrityError:
c.execute('''SELECT track from {} WHERE pan_exp="{}"'''.format(table_name, record[0]))
track = (c.fetchall()[0][0])
if track == "12":
pass
elif track == record[1]:
pass
else:
c.execute('''Update {} SET track =12 WHERE pan_exp="{}"'''.format(table_name, record[0]))
conn.commit()
conn.close()
table_add_elapsed_time = int(time.time() - start_time)
print("\t {} added to the database in {} seconds. Calculating stats now...".format(
table_name, table_add_elapsed_time))
# Calculate All Stats with this function
brand_stats, track_stats, exp_date_stats, pan_stats = calculate_all_stats(db_file_name, table_name)
add_collection_to_metadata(db_file_name, table_name, brand_stats, track_stats, exp_date_stats, pan_stats,
"Query: {}".format(query_string), "")
query_add_elapsed_time = int(time.time() - start_time)
print("\t{} unique pan+exp combinations added to {} in {} seconds!".format(pan_stats["UniquePANExp"], table_name,
query_add_elapsed_time))
def create_super(db_file_name, collections_to_add, combo_name):
"""Create a combination collection"""
start_time = time.time()
conn = sqlite3.connect(db_file_name)
c = conn.cursor()
c.execute('''CREATE TABLE {} (pan_exp text,track INTEGER,brand text,exp Text,pan text,primary key(pan_exp))'''
.format(combo_name))
for collection in collections_to_add:
collection_add_start_time = time.time()
print("\t Adding {} to {} combination. Patience you must have.".format(collection, combo_name))
c.execute("SELECT * FROM {}".format(collection))
old_records = c.fetchall()
for record in old_records:
# print(record)
try:
c.execute('''INSERT INTO {} VALUES (?,?,?,?,?)'''.format(combo_name),
(record[0], record[1], record[2], record[3], record[4]))
except sqlite3.IntegrityError:
c.execute('''SELECT track from {} WHERE pan_exp="{}"'''.format(combo_name, record[0]))
track = (c.fetchall()[0][0])
if track == "12":
pass
elif track == record[1]:
pass
else:
c.execute('''Update {} SET track =12 WHERE pan_exp="{}"'''.format(combo_name, record[0]))
collection_add_elapsed_time = int(time.time() - collection_add_start_time)
print("\t {} processed in {} seconds.".format(collection, collection_add_elapsed_time))
print("\t All collections have been added to the {} super collection."
" Saving to .sqlite file...".format(combo_name))
db_saves_start_time = time.time()
conn.commit()
conn.close()
db_save_elapsed_time = int(time.time() - db_saves_start_time)
print("\t The super collection has been saved in {} seconds. Calculating stats now..."
.format(db_save_elapsed_time))
# Calculate All Stats with this function
brand_stats, track_stats, exp_date_stats, pan_stats = calculate_all_stats(db_file_name, combo_name)
add_collection_to_metadata(db_file_name, combo_name, brand_stats, track_stats, exp_date_stats, pan_stats, "Super",
str(collections_to_add))
elapsed_time = time.time() - start_time
print("\t{} unique pan+exp combinations added to {} in {} seconds!\n".format(pan_stats["UniquePANExp"], combo_name,
int(elapsed_time)))
def sanitize_collection_name(collection_name, collection_name_orig):
# First char cannot be a number but other characters can, this catches that
if collection_name[0] in ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]:
print(
"\tThe name of your {} collection will have an underscore _ added"
" to the beginning for SQLite compliance".format(collection_name_orig))
collection_name = "_{}".format(collection_name)
# No special characters allowed anywhere
if [char for char in SPECIAL_CHARS_LIST if char in collection_name_orig]:
for special_char in SPECIAL_CHARS_LIST:
collection_name = collection_name.replace("{}".format(special_char), "_")
print(
"\tThe name of your {} collection has been renamed to {}"
" for SQLite compliance.".format(collection_name_orig, collection_name))
if collection_name.upper() == "ALL":
print("\tThe name of your {} collection will be renamed to _{}".format(collection_name, collection_name))
collection_name = "_{}".format(collection_name)
return collection_name
################################################ File Output Functions #################################################
def write_collection_to_csv(records, file_name, skrtkey):
"""Write decrypted track data to a csv file"""
outfile = open("{}.csv".format(file_name), "w")
outfile.write('"PAN-EXP","Track Format","Brand","ExpYYMM","PAN"\n')
for record in records:
#record[0] and record[4] are decrypted and returned here
record_line = '"{}","{}","{}","{}",="{}"\n'.format(
rc4d(record[0], skrtkey),
record[1],
record[2],
record[3],
rc4d(record[4], skrtkey))
outfile.write(record_line)
outfile.close()
def write_stats_to_csv(collection_list, super_collection_list, query_collection_list):
"""Write stats to a csv file"""
output_dictionary = {}
brands_set = set()
all_collections = []
for collection_tuple in collection_list:
all_collections.append(collection_tuple)
for collection_tuple in super_collection_list:
all_collections.append(collection_tuple)
for collection_tuple in query_collection_list:
all_collections.append(collection_tuple)
for collection_tuple in all_collections:
collection_name = collection_tuple[0]
collection_brand_dict = json.loads(collection_tuple[1].replace("'", '"'))
collection_tracks_dict = json.loads(collection_tuple[2].replace("'", '"'))
collection_exp_dict = json.loads(collection_tuple[3].replace("'", '"'))
collection_pan_stats_dict = json.loads(collection_tuple[4].replace("'", '"'))
collection_type = collection_tuple[5]
collection_components = collection_tuple[6][1:-1].split(",")
output_dictionary[collection_name] = {"brands":collection_brand_dict,
"track1":collection_tracks_dict.get("1", 0),
"track2":collection_tracks_dict.get("2", 0),
"track12":collection_tracks_dict.get("12", 0),
"exp_dates":collection_exp_dict,
"unique_pans":collection_pan_stats_dict["UniquePAN"],
"unique_pan_exps":collection_pan_stats_dict["UniquePANExp"],
"type":collection_type,
"components":collection_components
}
for brand, count in collection_brand_dict.iteritems():
brands_set.add(brand)
brands_list = list(brands_set)
raw_time = str(datetime.datetime.utcnow()).replace(":", "").replace("-", "")
text_time = raw_time[0:8]+"_"+raw_time[9:15]
with open("CDPO_stats_{}_UTC.csv".format(text_time), 'wb') as stats_csvfile:
stats_writer = csv.writer(stats_csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
header_row = ["Collection Name",
"Unique PANs",
"Unique pan+exp Dates",
"Track 1 Only",
"Track 2 Only",
"Track 1 and 2",
"Expiration date (JSON)"]
for brand in brands_list:
header_row.append(brand)
stats_writer.writerow(header_row)
for collection_name, details in output_dictionary.iteritems():
csv_row = [collection_name,
details["unique_pans"],
details["unique_pan_exps"],
details["track1"],
details["track2"],
details["track12"],
details["exp_dates"]]
for brand in brands_list:
csv_row.append(details["brands"].get(brand, 0))
stats_writer.writerow(csv_row)
############################################### Query Command Functions ################################################
def get_track_filter():
""" Ask user for which tracks they want to query on"""
answered = False
while not answered:
print(" ")
print(""" Please choose the track you would like to filter by. Available choices are:\n\
(A) track 1 OR both tracks\n\
(B) track 1 ONLY\n\
(C) track 2 OR both tracks\n\
(D) track 2 ONLY\n\
(E) Only Both Tracks\n\
(N) Skip [default]\n""")
filter_choice = raw_input(" You're choice: ")
if filter_choice == "":
return None
else:
filter_choice = filter_choice[:1].upper()
if filter_choice in ["A", "B", "C", "D", "E", "N"]:
if filter_choice == "A":
return "track = 1 OR track = 12"
elif filter_choice == "B":
return "track = 1"
elif filter_choice == "C":
return "track = 2 OR track = 12"
elif filter_choice == "D":
return "track = 2"
elif filter_choice == "E":
return "track = 12"
else:
print("\t\tNo track filter will be used.")
return None
def get_brand_filter(unicode_brands_list):
"""Ask user for which brands they want to filter on in query"""
available_brand_string = "ALL "
selected_brands_list = []
# BrandList = []
for brand in unicode_brands_list:
available_brand_string += "{} ".format(str(brand)[3:-3])
answered = False
while not answered:
print(" ")
print(
" Please choose brands (separated by space) that you would like to filter by. Default is ALL.")
print("\tAvailable brands are:\n\t {}".format(available_brand_string))
filter_choice = raw_input("\n Your choice(s) (Press Enter to skip this filter): ").upper()
if filter_choice == "":
print("\tNo brand filter will be used")
return None
elif filter_choice.strip() == "ALL":
print("\tNo brand filter will be used")
return None
else:
for brand in filter_choice.split(" "):
if brand in available_brand_string:
selected_brands_list.append('brand = "{}"'.format(brand))
else:
print("\t\t{} does not appear to be valid. It was skipped".format(brand))
if len(selected_brands_list) == 0:
print("\tYou did not enter any valid brands. Please try again\n")
break
else:
return " OR ".join(selected_brands_list)
def get_exp_filter():
"""Ask user which exp dates they want to filter on for query"""
answered = False
while not answered:
filter_choice = raw_input(" You can filter on Expiration date. Here are your options:\n"
"\t(G)reater than OR equal to YYMM\n"
"\t(L)ess than OR equal to YYMM\n"
"\t(E)qual to YYMM\n"
"\t(B)etween AND including YYMM and YYMM\n"
"\t(N)o Expiration date filter [default]"
"\n\n Your Choice: ")
if filter_choice == "":
answered = True
return None
else:
filter_choice = filter_choice.upper()[:1]
if filter_choice == "G":
answered = True
filter_year = ask_for_exp_date(
"What year must all Expiration Dates be greater than OR equal to? YYMM:")
return "exp >= {}".format(filter_year)
elif filter_choice == "L":
answered = True
filter_year = ask_for_exp_date(
"What year must all Expiration Dates be less than OR equal to? YYMM:")
return "exp <= {}".format(filter_year)
elif filter_choice == "E":