Skip to content

Commit 6238d8a

Browse files
authored
Merge pull request #944 from Mingun/flexible-read-text
Change `read_text()` return value to `BytesText`
2 parents dbf8a24 + c5506c3 commit 6238d8a

File tree

8 files changed

+58
-35
lines changed

8 files changed

+58
-35
lines changed

Changelog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,13 @@
4444
of `NsReader`. Use `.resolver().<...>` methods instead.
4545
- [#938]: Now `BytesText::xml_content`, `BytesCData::xml_content` and `BytesRef::xml_content`
4646
accepts `XmlVersion` parameter to apply correct EOL normalization rules.
47+
- [#944]: `read_text()` now returns `BytesText` which allows you to get the content with
48+
properly normalized EOLs. To get the previous behavior use `.read_text().decode()?`.
4749

4850
[#371]: https://github.com/tafia/quick-xml/issues/371
4951
[#914]: https://github.com/tafia/quick-xml/pull/914
5052
[#938]: https://github.com/tafia/quick-xml/pull/938
53+
[#944]: https://github.com/tafia/quick-xml/pull/944
5154

5255

5356
## 0.39.2 -- 2026-02-20

examples/read_nodes.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Note: for this specific data set using serde feature would simplify
33
// this simple data is purely to make it easier to understand the code
44

5+
use quick_xml::encoding::EncodingError;
56
use quick_xml::events::attributes::AttrError;
67
use quick_xml::events::{BytesStart, Event};
78
use quick_xml::name::QName;
@@ -53,6 +54,12 @@ impl From<AttrError> for AppError {
5354
}
5455
}
5556

57+
impl From<EncodingError> for AppError {
58+
fn from(error: EncodingError) -> Self {
59+
Self::Xml(quick_xml::Error::Encoding(error))
60+
}
61+
}
62+
5663
#[derive(Debug)]
5764
struct Translation {
5865
tag: String,
@@ -91,7 +98,7 @@ impl Translation {
9198
Ok(Translation {
9299
tag: tag.into(),
93100
lang: lang.into(),
94-
text: text_content.into(),
101+
text: text_content.decode()?.into(),
95102
})
96103
} else {
97104
dbg!("Expected Event::Start for Text, got: {:?}", &event);

src/reader/ns_reader.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
//! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname
55
//! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname
66
7-
use std::borrow::Cow;
87
use std::fs::File;
98
use std::io::{BufRead, BufReader};
109
use std::ops::Deref;
@@ -725,11 +724,13 @@ impl<'i> NsReader<&'i [u8]> {
725724
/// // ...then, we could read text content until close tag.
726725
/// // This call will correctly handle nested <html> elements.
727726
/// let text = reader.read_text(end.name()).unwrap();
728-
/// assert_eq!(text, Cow::Borrowed(r#"
727+
/// let text = text.decode().unwrap();
728+
/// assert_eq!(text, r#"
729729
/// <title>This is a HTML text</title>
730730
/// <p>Usual XML rules does not apply inside it
731731
/// <p>For example, elements not needed to be &quot;closed&quot;
732-
/// "#));
732+
/// "#);
733+
/// assert!(matches!(text, Cow::Borrowed(_)));
733734
///
734735
/// // Now we can enable checks again
735736
/// reader.config_mut().check_end_names = true;
@@ -741,7 +742,7 @@ impl<'i> NsReader<&'i [u8]> {
741742
/// [`Start`]: Event::Start
742743
/// [`decoder()`]: Reader::decoder()
743744
#[inline]
744-
pub fn read_text(&mut self, end: QName) -> Result<Cow<'i, str>> {
745+
pub fn read_text(&mut self, end: QName) -> Result<BytesText<'i>> {
745746
// According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
746747
// match literally the start name. See `Self::check_end_names` documentation
747748
let result = self.reader.read_text(end)?;

src/reader/slice_reader.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
//! underlying byte stream. This implementation supports not using an
33
//! intermediate buffer as the byte slice itself can be used to borrow from.
44
5-
use std::borrow::Cow;
65
use std::io;
76

87
#[cfg(feature = "encoding")]
@@ -11,7 +10,7 @@ use crate::reader::EncodingRef;
1110
use encoding_rs::{Encoding, UTF_8};
1211

1312
use crate::errors::{Error, Result};
14-
use crate::events::Event;
13+
use crate::events::{BytesText, Event};
1514
use crate::name::QName;
1615
use crate::parser::Parser;
1716
use crate::reader::{BangType, ReadRefResult, ReadTextResult, Reader, Span, XmlSource};
@@ -209,11 +208,12 @@ impl<'a> Reader<&'a [u8]> {
209208
/// // ...then, we could read text content until close tag.
210209
/// // This call will correctly handle nested <html> elements.
211210
/// let text = reader.read_text(end.name()).unwrap();
212-
/// assert_eq!(text, Cow::Borrowed(r#"
211+
/// let text = text.decode().unwrap();
212+
/// assert_eq!(text, r#"
213213
/// <title>This is a HTML text</title>
214214
/// <p>Usual XML rules does not apply inside it
215215
/// <p>For example, elements not needed to be &quot;closed&quot;
216-
/// "#));
216+
/// "#);
217217
/// assert!(matches!(text, Cow::Borrowed(_)));
218218
///
219219
/// // Now we can enable checks again
@@ -225,15 +225,15 @@ impl<'a> Reader<&'a [u8]> {
225225
///
226226
/// [`Start`]: Event::Start
227227
/// [`decoder()`]: Self::decoder()
228-
pub fn read_text(&mut self, end: QName) -> Result<Cow<'a, str>> {
228+
pub fn read_text(&mut self, end: QName) -> Result<BytesText<'a>> {
229229
// self.reader will be changed, so store original reference
230230
let buffer = self.reader;
231231
let span = self.read_to_end(end)?;
232232

233233
let len = span.end - span.start;
234234
// SAFETY: `span` can only contain indexes up to usize::MAX because it
235235
// was created from offsets from a single &[u8] slice
236-
Ok(self.decoder().decode(&buffer[0..len as usize])?)
236+
Ok(BytesText::wrap(&buffer[0..len as usize], self.decoder()))
237237
}
238238
}
239239

tests/issues.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,10 @@ mod issue514 {
130130

131131
reader.config_mut().check_end_names = false;
132132

133-
assert_eq!(reader.read_text(html_end.name()).unwrap(), "...");
133+
assert_eq!(
134+
reader.read_text(html_end.name()).unwrap(),
135+
BytesText::from_escaped("...")
136+
);
134137

135138
reader.config_mut().check_end_names = true;
136139

@@ -153,7 +156,10 @@ mod issue514 {
153156

154157
reader.config_mut().check_end_names = false;
155158

156-
assert_eq!(reader.read_text(html_end.name()).unwrap(), "...");
159+
assert_eq!(
160+
reader.read_text(html_end.name()).unwrap(),
161+
BytesText::from_escaped("...")
162+
);
157163

158164
reader.config_mut().check_end_names = true;
159165

tests/reader-namespaces.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,7 +1235,7 @@ mod read_text {
12351235
);
12361236
assert_eq!(
12371237
reader.read_text(QName(b"root")).unwrap(),
1238-
"<root/><root></root>"
1238+
BytesText::from_escaped("<root/><root></root>")
12391239
);
12401240
assert_eq!(
12411241
reader.read_resolved_event().unwrap(),
@@ -1267,7 +1267,7 @@ mod read_text {
12671267
assert_eq!(reader.read_event().unwrap(), DocType(BytesText::new("dtd")));
12681268
assert_eq!(
12691269
reader.read_text(QName(b"root")).unwrap(),
1270-
"<root/><root></root>"
1270+
BytesText::from_escaped("<root/><root></root>")
12711271
);
12721272
assert_eq!(
12731273
reader.read_resolved_event().unwrap(),
@@ -1297,7 +1297,7 @@ mod read_text {
12971297
assert_eq!(reader.read_event().unwrap(), PI(BytesPI::new("pi")));
12981298
assert_eq!(
12991299
reader.read_text(QName(b"root")).unwrap(),
1300-
"<root/><root></root>"
1300+
BytesText::from_escaped("<root/><root></root>")
13011301
);
13021302
assert_eq!(
13031303
reader.read_resolved_event().unwrap(),
@@ -1330,7 +1330,7 @@ mod read_text {
13301330
);
13311331
assert_eq!(
13321332
reader.read_text(QName(b"root")).unwrap(),
1333-
"<root/><root></root>"
1333+
BytesText::from_escaped("<root/><root></root>")
13341334
);
13351335
assert_eq!(
13361336
reader.read_resolved_event().unwrap(),
@@ -1367,7 +1367,7 @@ mod read_text {
13671367
);
13681368
assert_eq!(
13691369
reader.read_text(QName(b"root")).unwrap(),
1370-
"<root/><root></root>"
1370+
BytesText::from_escaped("<root/><root></root>")
13711371
);
13721372
// NOTE: due to unbalanced XML namespace still not closed
13731373
assert_eq!(
@@ -1406,7 +1406,7 @@ mod read_text {
14061406
);
14071407
assert_eq!(
14081408
reader.read_text(QName(b"root")).unwrap(),
1409-
"<root/><root></root>"
1409+
BytesText::from_escaped("<root/><root></root>")
14101410
);
14111411
assert_eq!(
14121412
reader.read_resolved_event().unwrap(),
@@ -1442,7 +1442,7 @@ mod read_text {
14421442
);
14431443
assert_eq!(
14441444
reader.read_text(QName(b"root")).unwrap(),
1445-
"<root/><root></root>"
1445+
BytesText::from_escaped("<root/><root></root>")
14461446
);
14471447
assert_eq!(
14481448
reader.read_resolved_event().unwrap(),
@@ -1472,7 +1472,7 @@ mod read_text {
14721472
assert_eq!(reader.read_event().unwrap(), Text(BytesText::new("text")));
14731473
assert_eq!(
14741474
reader.read_text(QName(b"root")).unwrap(),
1475-
"<root/><root></root>"
1475+
BytesText::from_escaped("<root/><root></root>")
14761476
);
14771477
assert_eq!(
14781478
reader.read_resolved_event().unwrap(),
@@ -1505,7 +1505,7 @@ mod read_text {
15051505
);
15061506
assert_eq!(
15071507
reader.read_text(QName(b"root")).unwrap(),
1508-
"<root/><root></root>"
1508+
BytesText::from_escaped("<root/><root></root>")
15091509
);
15101510
assert_eq!(
15111511
reader.read_resolved_event().unwrap(),
@@ -1538,7 +1538,7 @@ mod read_text {
15381538
);
15391539
assert_eq!(
15401540
reader.read_text(QName(b"root")).unwrap(),
1541-
"<root/><root></root>"
1541+
BytesText::from_escaped("<root/><root></root>")
15421542
);
15431543
assert_eq!(
15441544
reader.read_resolved_event().unwrap(),

tests/reader-read-text.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ mod borrowed {
2929
);
3030
assert_eq!(
3131
reader.read_text(QName(b"root")).unwrap(),
32-
"<root/><root></root>"
32+
BytesText::from_escaped("<root/><root></root>")
3333
);
3434
assert_eq!(
3535
reader.read_event().unwrap(),
@@ -61,7 +61,7 @@ mod borrowed {
6161
);
6262
assert_eq!(
6363
reader.read_text(QName(b"root")).unwrap(),
64-
"<root/><root></root>"
64+
BytesText::from_escaped("<root/><root></root>")
6565
);
6666
assert_eq!(
6767
reader.read_event().unwrap(),
@@ -88,7 +88,7 @@ mod borrowed {
8888
assert_eq!(reader.read_event().unwrap(), Event::PI(BytesPI::new("pi")));
8989
assert_eq!(
9090
reader.read_text(QName(b"root")).unwrap(),
91-
"<root/><root></root>"
91+
BytesText::from_escaped("<root/><root></root>")
9292
);
9393
assert_eq!(
9494
reader.read_event().unwrap(),
@@ -118,7 +118,7 @@ mod borrowed {
118118
);
119119
assert_eq!(
120120
reader.read_text(QName(b"root")).unwrap(),
121-
"<root/><root></root>"
121+
BytesText::from_escaped("<root/><root></root>")
122122
);
123123
assert_eq!(
124124
reader.read_event().unwrap(),
@@ -149,7 +149,7 @@ mod borrowed {
149149
);
150150
assert_eq!(
151151
reader.read_text(QName(b"root")).unwrap(),
152-
"<root/><root></root>"
152+
BytesText::from_escaped("<root/><root></root>")
153153
);
154154
assert_eq!(
155155
reader.read_event().unwrap(),
@@ -181,7 +181,7 @@ mod borrowed {
181181
);
182182
assert_eq!(
183183
reader.read_text(QName(b"root")).unwrap(),
184-
"<root/><root></root>"
184+
BytesText::from_escaped("<root/><root></root>")
185185
);
186186
assert_eq!(
187187
reader.read_event().unwrap(),
@@ -211,7 +211,7 @@ mod borrowed {
211211
);
212212
assert_eq!(
213213
reader.read_text(QName(b"root")).unwrap(),
214-
"<root/><root></root>"
214+
BytesText::from_escaped("<root/><root></root>")
215215
);
216216
assert_eq!(
217217
reader.read_event().unwrap(),
@@ -241,7 +241,7 @@ mod borrowed {
241241
);
242242
assert_eq!(
243243
reader.read_text(QName(b"root")).unwrap(),
244-
"<root/><root></root>"
244+
BytesText::from_escaped("<root/><root></root>")
245245
);
246246
assert_eq!(
247247
reader.read_event().unwrap(),
@@ -271,7 +271,7 @@ mod borrowed {
271271
);
272272
assert_eq!(
273273
reader.read_text(QName(b"root")).unwrap(),
274-
"<root/><root></root>"
274+
BytesText::from_escaped("<root/><root></root>")
275275
);
276276
assert_eq!(
277277
reader.read_event().unwrap(),
@@ -302,7 +302,7 @@ mod borrowed {
302302
);
303303
assert_eq!(
304304
reader.read_text(QName(b"root")).unwrap(),
305-
"<root/><root></root>"
305+
BytesText::from_escaped("<root/><root></root>")
306306
);
307307
assert_eq!(
308308
reader.read_event().unwrap(),

tests/reader.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,10 @@ mod read_text {
349349
r.config_mut().trim_text(true);
350350

351351
assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("tag")));
352-
assert_eq!(r.read_text(QName(b"tag")).unwrap(), " text ");
352+
assert_eq!(
353+
r.read_text(QName(b"tag")).unwrap(),
354+
BytesText::from_escaped(" text ")
355+
);
353356
assert_eq!(r.read_event().unwrap(), Eof);
354357
}
355358

@@ -359,7 +362,10 @@ mod read_text {
359362
r.config_mut().trim_text(true);
360363

361364
assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("tag")));
362-
assert_eq!(r.read_text(QName(b"tag")).unwrap(), " <nested/> ");
365+
assert_eq!(
366+
r.read_text(QName(b"tag")).unwrap(),
367+
BytesText::from_escaped(" <nested/> ")
368+
);
363369
assert_eq!(r.read_event().unwrap(), Eof);
364370
}
365371
}

0 commit comments

Comments
 (0)