It’s not certain that anyone needs it, but I created a small script that sequentially numbers each node in a ranking system. I use it when I have a lot of nodes that are under testing.
file_path = "C:/UxxxxKE.txt"
# Read the content from the file
with open(file_path, 'r', encoding='utf-8') as file:
content = file.readlines()
# Initialize a counter for numbering
counter = 1
# Function to update lines with numbering
updated_lines = []
for line in content:
# Check if the line contains 'Name="' but not 'Boolean Name="'
if 'Name="' in line and 'Boolean Name="' not in line:
# Find the start and end positions of Name=" for the first occurrence
start_index = line.index('Name="') + len('Name="')
end_index = line.index('"', start_index)
# Get the name between Name=" and "
name = line[start_index:end_index]
# Add numbering in front of the name
new_name = f'{counter}.{name}'
# Limit the name to a maximum of 45 characters
if len(new_name) > 45:
new_name = new_name[:45]
# Replace the old name with the new name in the line
line = line[:start_index] + new_name + line[end_index:]
# Increment the counter
counter += 1
# Add the updated line to the list
updated_lines.append(line)
# Write the updated lines back to the file
with open(file_path, 'w', encoding='utf-8') as file:
file.writelines(updated_lines)
print("Numbering with periods added, and names limited to a maximum of 45 characters, except for 'Boolean Name=' in the file.")
2 Likes
"Here is another one. This sorts all the nodes from largest to smallest:"""
import re
from lxml import etree
def escape_special_chars_in_formula(xml_text):
"""
Escapes special characters (<, >, &) within <Formula> tags.
"""
# Pattern to match content within <Formula> tags
pattern = r'(<Formula>)(.*?)(</Formula>)'
def replacer(match):
start, content, end = match.groups()
# Replace &, <, > with their respective escape sequences
content = content.replace('&', '&').replace('<', '<').replace('>', '>')
return f"{start}{content}{end}"
# Apply the replacement for all <Formula> tags
return re.sub(pattern, replacer, xml_text, flags=re.DOTALL)
def sort_stock_nodes(element):
"""
Recursively sorts <StockFormula> and <StockFactor> child nodes
based on their Weight attribute in descending order.
"""
for child in element:
# Recursively call the function to sort child elements
sort_stock_nodes(child)
# Identify all <StockFormula> and <StockFactor> nodes under the current element
stock_nodes = [child for child in element if child.tag in ['StockFormula', 'StockFactor']]
if stock_nodes:
# Sort the nodes based on the 'Weight' attribute in descending order
sorted_nodes = sorted(
stock_nodes,
key=lambda x: float(x.get('Weight', 0)),
reverse=True
)
# Remove the existing nodes from the parent element
for node in stock_nodes:
element.remove(node)
# Append the sorted nodes back to the parent element
for node in sorted_nodes:
element.append(node)
def pretty_print_xml(tree, output_file):
"""
Writes the XML tree to the output file with pretty printing.
"""
tree.write(output_file, pretty_print=True, xml_declaration=True, encoding='utf-8')
def main():
# Define the file paths
input_file = r"C:/Users/xxxxG.txt"
output_fixed_file = r"C:/Users/xxxxd.txt"
output_sorted_file = r"C:/Users/xxxxd.txt"
try:
# Read the original XML file as text
with open(input_file, 'r', encoding='utf-8') as file:
xml_text = file.read()
# Escape special characters within <Formula> tags
xml_text = escape_special_chars_in_formula(xml_text)
# Write the corrected XML to a new fixed file
with open(output_fixed_file, 'w', encoding='utf-8') as file:
file.write(xml_text)
print(f"Corrected XML has been written to '{output_fixed_file}'.")
# Parse the corrected XML file
parser = etree.XMLParser(remove_blank_text=True)
tree = etree.parse(output_fixed_file, parser)
root = tree.getroot()
# Sort the <StockFormula> and <StockFactor> nodes
sort_stock_nodes(root)
# Write the sorted XML to the output .txt file
pretty_print_xml(tree, output_sorted_file)
print(f"Sorted XML has been written to '{output_sorted_file}' successfully.")
except etree.XMLSyntaxError as e:
print(f"XML Syntax Error: {e}")
except FileNotFoundError:
print(f"Error: File '{input_file}' not found.")
except Exception as e:
print(f"An unexpected error occurred: {e}")
if __name__ == "__main__":
main()
1 Like