-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinvoice_file_processor.py
90 lines (63 loc) · 4.02 KB
/
invoice_file_processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os
import shutil
import json
import converters
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest
endpoint = os.environ["DOCUMENT_INTELLIGENCE_ENDPOINT"]
key = os.environ["DOCUMENT_INTELLIGENCE_SUBSCRIPTION_KEY"]
def process_field(field_name, new_file_name, all_conditions_met, invoice, convert_function=None):
field = invoice.fields.get(field_name)
if field:
field_content = field.get('content')
if convert_function:
field_content = convert_function(field_content)
new_file_name = new_file_name + "_" + field_content
print(f"{field_name}: {field.get('content')} has confidence: {field.get('confidence')}")
else:
all_conditions_met = False
return new_file_name, all_conditions_met
def analyze_invoices():
for filename in os.listdir("in"):
filepath = os.path.join("in", filename)
if os.path.isfile(filepath):
print(filepath)
document_intelligence_client = DocumentIntelligenceClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
)
try:
with open(filepath, "rb") as f:
file_content = f.read()
if not file_content:
print("Error: The file is empty.")
else:
poller = document_intelligence_client.begin_analyze_document(
"prebuilt-invoice", analyze_request=file_content, content_type="application/octet-stream"
)
invoices = poller.result()
new_file_name = ""
all_conditions_met = True
for idx, invoice in enumerate(invoices.documents):
print(f"--------Analyzing invoice --------")
new_file_name, all_conditions_met = process_field("InvoiceDate", new_file_name, all_conditions_met, invoice, converters.convert_date)
new_file_name, all_conditions_met = process_field("VendorName", new_file_name, all_conditions_met, invoice, converters.remove_special_characters)
new_file_name, all_conditions_met = process_field("VendorTaxId", new_file_name, all_conditions_met, invoice, converters.remove_special_characters)
new_file_name, all_conditions_met = process_field("InvoiceId", new_file_name, all_conditions_met, invoice, converters.remove_special_characters)
new_file_name, all_conditions_met = process_field("CustomerName", new_file_name, all_conditions_met, invoice, converters.remove_special_characters)
new_file_name, all_conditions_met = process_field("CustomerTaxId", new_file_name, all_conditions_met, invoice, converters.remove_special_characters)
print("----------------------------------------")
file_name = converters.create_safe_filename(new_file_name)
print(file_name)
output = invoices.as_dict()
out_directory = "out" if all_conditions_met else "error"
with open(os.path.join(out_directory, file_name + '.json'), 'w') as json_file:
json.dump(output, json_file)
file, file_extension = os.path.splitext(filepath)
shutil.copy(filepath, os.path.join(out_directory, file_name + file_extension))
except FileNotFoundError:
print(f"Error: File not found at {filepath}")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
analyze_invoices()