Skip to content

Commit 01b511f

Browse files
committed
switch py-libxml2 to py-lxml (#84)
tested ok.
1 parent e8c41d4 commit 01b511f

File tree

2 files changed

+75
-110
lines changed

2 files changed

+75
-110
lines changed

.github/workflows/main.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ jobs:
6767
run: |
6868
sudo apt-get -y update
6969
sudo apt-get -y install texlive help2man libconvert-binary-c-perl shellcheck \
70-
swig python3-dev python3-libxml2 libxml2-dev libpcre2-dev libpcre2-32-0 jq \
70+
swig python3-dev python3-lxml libxml2-dev libpcre2-dev libpcre2-32-0 jq \
7171
pkg-config shellcheck parallel rpmlint shfmt doxygen jing valgrind cppcheck \
7272
gperf intltool latexml
7373
build-aux/install-libxslt
@@ -241,6 +241,7 @@ jobs:
241241
mingw-w64-x86_64-libtool
242242
mingw-w64-x86_64-libiconv
243243
mingw-w64-x86_64-libxml2
244+
mingw-w64-x86_64-libxslt
244245
mingw-w64-x86_64-swig
245246
mingw-w64-x86_64-python
246247
libtool
@@ -250,6 +251,7 @@ jobs:
250251
gperf
251252
mingw-w64-x86_64-perl
252253
mingw-w64-x86_64-pcre2
254+
python-lxml
253255
- run: reg add "HKLM\Software\Microsoft\Windows\Windows Error Reporting" /f /v DontShowUI /d 1
254256
- name: checkout
255257
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

test/xmlsuite/helper.py

Lines changed: 72 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,12 @@
11
# -*- indent-tabs-mode:1 tab-width:4 mode:python minor-mode:whitespace -*-
2-
import libxml2
3-
import re
42
import os
53
import glob
6-
7-
8-
"""
9-
This class has all the colors to be used with colored output
10-
terminal.
11-
"""
4+
import re
5+
from lxml import etree
126

137

148
class bcolors:
9+
"""This class has all the colors to be used with colored output terminal."""
1510
HEADER = "\033[95m"
1611
OKBLUE = "\033[94m"
1712
OKGREEN = "\033[92m"
@@ -20,24 +15,20 @@ class bcolors:
2015
ENDC = "\033[0m"
2116

2217

23-
"""
24-
This functions uses the script to generate xml which can be used for
25-
comparison later.
26-
27-
@param string dwgdir The path to DWG dir
28-
"""
29-
30-
3118
def generatexml(dwgdir):
32-
# This beats ‘sys.argv[0]’, which is not guaranteed to be set.
19+
"""
20+
This function uses the script to generate xml which can be used for
21+
comparison later.
22+
23+
@param str dwgdir: The path to DWG dir
24+
"""
3325
me = os.getenv("PYTHON")
3426
if not me:
3527
me = "python"
3628
srcdir = os.path.dirname(__file__)
3729
current_dir = os.getcwd()
3830
os.chdir(dwgdir)
3931
for filename in glob.glob("*/*.txt"):
40-
# maybe add double-quotes for the script?
4132
os.system(
4233
me
4334
+ " "
@@ -51,84 +42,64 @@ def generatexml(dwgdir):
5142
os.chdir(current_dir)
5243

5344

54-
"""
55-
This functions main aim is to process special types of attributes
56-
which are difficult to equate to each other. Currently this only
57-
handles 2D and 3D point. It converts these string in certain format
58-
so that they can be equated
59-
60-
@param string attr the attribute to be processed
61-
@return string The processed attribute
62-
"""
63-
64-
6545
def processattr(attr):
66-
pattern = re.compile(r"(\d+\.\d{1,})\s{0,1}")
46+
"""
47+
This function's main aim is to process special types of attributes
48+
which are difficult to equate to each other. Currently this only
49+
handles 2D and 3D point. It converts these strings in certain format
50+
so that they can be equated.
51+
52+
@param str attr: the attribute to be processed
53+
@return str: The processed attribute
54+
"""
55+
pattern = re.compile(r"(\d+\.\d+)\s?")
6756
if re.search(pattern, attr):
68-
# extract the numbers and adjust them
69-
extract_pattern = re.compile(r"(\d+\.\d{1,})\s{0,1}")
70-
result = extract_pattern.findall(attr)
71-
for no in range(len(result)):
72-
result[no] = float(result[no])
57+
result = [float(x) for x in pattern.findall(attr)]
7358

74-
# if its a 3d point
7559
if len(result) == 3:
7660
return "(%.2f %.2f %.2f)" % (
7761
round(result[0], 2),
7862
round(result[1], 2),
7963
round(result[2], 2),
8064
)
8165
elif len(result) == 2:
82-
return "(%.2f %.2f)" % round(round(result[0], 2), round(result[1], 2))
83-
else:
84-
return attr
85-
86-
87-
"""
88-
This function takes handle to both ideal file which came from AutoCAD and
89-
practical file which came from LibreDWG and compares them to emit out the
90-
result
91-
92-
@param ideal Name of the ideal file
93-
@param practical Name of the practical file
94-
95-
return array[2]
96-
[0] = The percentage of entity that matched
97-
[1] = The unmatched attributes with following format
98-
{attrname, original, duplicate}
99-
attrname = Name of the attribute
100-
original = Value came from AutoCAD
101-
duplicate = Value that came from LibreDWG.
102-
"""
66+
return "(%.2f %.2f)" % (
67+
round(result[0], 2),
68+
round(result[1], 2)
69+
)
70+
return attr
10371

10472

10573
def xmlprocess(ideal, practical):
106-
doc = libxml2.parseFile(ideal)
107-
108-
root = doc.getRootElement()
109-
child = root.children
110-
111-
# Let's first collect all the entities present in the file
112-
original_entities = []
113-
114-
while child is not None:
115-
if child.type == "element":
116-
original_entities.insert(len(original_entities), child)
117-
child = child.next
118-
119-
doc2 = libxml2.parseFile(practical)
120-
121-
root2 = doc2.getRootElement()
122-
child2 = root2.children
123-
124-
duplicate_entities = []
125-
126-
while child2 is not None:
127-
if child2.type == "element":
128-
duplicate_entities.insert(len(duplicate_entities), child2)
129-
child2 = child2.next
74+
"""
75+
This function takes handle to both ideal file which came from AutoCAD and
76+
practical file which came from LibreDWG and compares them to emit out the
77+
result.
78+
79+
@param str ideal: Name of the ideal file
80+
@param str practical: Name of the practical file
81+
@return list: array[2]
82+
[0] = The percentage of entity that matched
83+
[1] = The unmatched attributes with following format
84+
{attrname, original, duplicate}
85+
attrname = Name of the attribute
86+
original = Value came from AutoCAD
87+
duplicate = Value that came from LibreDWG.
88+
"""
89+
doc = etree.parse(ideal)
90+
root = doc.getroot()
91+
92+
# Collect all entity elements (direct children of root)
93+
original_entities = [child for child in root if isinstance(child.tag, str)]
94+
95+
doc2 = etree.parse(practical)
96+
root2 = doc2.getroot()
97+
98+
# Collect all entity elements (direct children of root)
99+
duplicate_entities = [child for child in root2 if isinstance(child.tag, str)]
130100

131101
match = 0
102+
total_unmatched = []
132103

133104
# Now its time for comparison, For each dwg entity
134105
for original, duplicate in zip(original_entities, duplicate_entities):
@@ -144,62 +115,54 @@ def xmlprocess(ideal, practical):
144115
"Hyperlinks",
145116
]
146117

147-
# collect original attributes. Removing the attributes here, so the
148-
# total length is also set
118+
# Collect original attributes
149119
try:
150-
# print (ideal + " original.properties")
151-
for attr in original.properties:
152-
if attr.name not in excluded_attributes:
153-
original_attributes[attr.name] = processattr(attr.content)
154-
except TypeError:
120+
for attr_name, attr_value in original.attrib.items():
121+
if attr_name not in excluded_attributes:
122+
original_attributes[attr_name] = processattr(attr_value)
123+
except AttributeError:
155124
print("Need python3 compatible libxml2 with __next__ iterator")
156125

126+
# Collect duplicate attributes
157127
try:
158-
for attr in duplicate.properties:
159-
duplicate_attributes[attr.name] = processattr(attr.content)
160-
except TypeError:
128+
for attr_name, attr_value in duplicate.attrib.items():
129+
if attr_name not in excluded_attributes:
130+
duplicate_attributes[attr_name] = processattr(attr_value)
131+
except AttributeError:
161132
pass
162133

163134
unmatched_attr = []
164-
# collect duplicate attributes and check if it matches with
165-
# original ones
135+
136+
# Compare attributes
166137
for key, value in original_attributes.items():
167138
try:
168139
if value == duplicate_attributes[key]:
169140
match += 1
170141
else:
171-
# The attributes didn't match.
172-
# Report the unmatched attribute
173142
unmatched_attr.append(
174143
{
175144
"attrname": key,
176145
"original": value,
177146
"duplicate": duplicate_attributes[key],
178147
}
179148
)
180-
181-
except Exception:
182-
# This exception would occur when
183-
# We can't find the given attribute
184-
149+
except KeyError:
150+
# Attribute not found in duplicate
185151
unmatched_attr.append(
186152
{"attrname": key, "original": value, "duplicate": ""}
187153
)
188-
continue
189154

190-
# What are the total number of attributes
155+
total_unmatched.extend(unmatched_attr)
156+
157+
# Calculate percentage
191158
try:
192159
total_attr = len(original_attributes)
193160
if total_attr == 0:
194-
percent_each = 0
195-
else:
196-
percent_each = 100 / total_attr
161+
return [0, []]
162+
percent_each = 100 / total_attr
197163
except NameError:
198164
return [0, []]
199-
raise
200165

201166
res_percent = percent_each * match
202-
doc.freeDoc()
203-
doc2.freeDoc()
204167

205-
return [res_percent, unmatched_attr]
168+
return [res_percent, total_unmatched]

0 commit comments

Comments
 (0)