Getting a bunch CAMT files from the bank, it’s hard to import manually each single file (for one day). Therefore a request to ChatGPT delivered the attached script. It takes several CAMT files combined in a ZIP archive and merge them together to one single CAMT file. It also cleans the “.0” at the end of some timestamp entries, which Tryton cannot deal with.
What do You think? Would it be the best approach to
- create a separate module
- to integrate such code in the account-sepa-statement module
- or is there yet another and better solution?
Greetings,
Michael
Here the script generated by ChatGPT:
#! /usr/bin/env python3
import os
import zipfile
import xml.dom.minidom as minidom
from xml.dom.minidom import Document
from datetime import datetime
import re
def extract_zip_files(input_folder):
for filename in os.listdir(input_folder):
if filename.endswith(".zip"):
zip_path = os.path.join(input_folder, filename)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(input_folder)
def merge_camt053_files(input_folder, output_file):
doc = Document()
root = doc.createElement("Document")
root.setAttribute("xmlns", "urn:iso:std:iso:20022:tech:xsd:camt.053.001.02")
doc.appendChild(root)
bk_to_cstmr_stmt = doc.createElement("BkToCstmrStmt")
root.appendChild(bk_to_cstmr_stmt)
for filename in os.listdir(input_folder):
if filename.endswith(".xml"):
filepath = os.path.join(input_folder, filename)
parse_and_merge(filepath, bk_to_cstmr_stmt, doc)
with open(output_file, "w", encoding="utf-8") as f:
f.write(doc.toprettyxml(indent=" "))
def parse_and_merge(filepath, target_node, doc):
dom = minidom.parse(filepath)
statements = dom.getElementsByTagName("BkToCstmrStmt")
for stmt in statements:
for child in stmt.childNodes:
if child.nodeType == Document.ELEMENT_NODE:
clean_time_elements(child)
imported_stmt = doc.importNode(child, True)
target_node.appendChild(imported_stmt)
def clean_time_elements(node):
for element in node.getElementsByTagName("FrDtTm"):
clean_time_format(element)
for element in node.getElementsByTagName("ToDtTm"):
clean_time_format(element)
for element in node.getElementsByTagName("CreDtTm"):
clean_time_format(element)
def clean_time_format(element):
if element.firstChild and element.firstChild.nodeType == element.TEXT_NODE:
original_value = element.firstChild.nodeValue
try:
cleaned_value = re.sub(r"(\.\d+)([+-]\d+:\d+)?", r"\2", original_value)
dt = datetime.fromisoformat(cleaned_value)
element.firstChild.nodeValue = dt.strftime("%Y-%m-%dT%H:%M:%S")
except ValueError:
pass
if __name__ == "__main__":
input_folder = "./input_files"
output_file = "merged_camt053.xml"
if not os.path.exists(input_folder):
print(f"Folder {input_folder} not found.")
else:
# Extract ZIP files before processing XML files
extract_zip_files(input_folder)
merge_camt053_files(input_folder, output_file)
print(f"Finished merging. Result saved in {output_file}")