Skip to content

Commit b236d3a

Browse files
authored
Merge pull request #214 from yanyiwu/copilot/fix-fullsegment-cut-issue
Fix FullSegment skipping the rune after a null dict match
2 parents 348ac91 + 2ba11de commit b236d3a

File tree

2 files changed

+9
-0
lines changed

2 files changed

+9
-0
lines changed

include/cppjieba/FullSegment.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ class FullSegment: public SegmentBase {
6868
assert(nextoffset < dags.size());
6969
const DictUnit* du = dags[i].nexts[j].second;
7070
if (du == NULL) {
71+
wordLen = 1;
7172
if (dags[i].nexts.size() == 1 && maxIdx <= uIdx) {
7273
WordRange wr(begin + i, begin + nextoffset);
7374
res.push_back(wr);

test/unittest/segments_test.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,14 @@ TEST(FullSegment, Test1) {
257257
ASSERT_EQ(s, "[\"上市\", \"公司\", \"C\", \"E\", \"O\"]");
258258
}
259259

260+
TEST(FullSegment, NullDictUnitDoesNotSkipFollowingRune) {
261+
FullSegment segment(DICT_DIR "/jieba.dict.utf8");
262+
vector<string> words;
263+
264+
segment.Cut("崎岖的牙齿", words);
265+
ASSERT_EQ("崎岖/的/牙齿", Join(words.begin(), words.end(), "/"));
266+
}
267+
260268
TEST(QuerySegment, Test1) {
261269
QuerySegment segment(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8", "");
262270
vector<string> words;

0 commit comments

Comments
 (0)