-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathseg_test.go
114 lines (108 loc) · 2.3 KB
/
seg_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
package jiagu
import (
"testing"
"github.com/bububa/jiagu/segment"
)
// TestSegDefault 测试分词
func TestSegDefault(t *testing.T) {
txt := "厦门明天会不会下雨"
words := Seg(txt)
expects := []string{
"厦门",
"明天",
"会不会",
"下雨",
}
if len(words) != len(expects) {
t.Errorf("result: %+v, expect: %+v\n", words, expects)
return
}
for idx, w := range words {
if w != expects[idx] {
t.Errorf("result: %+v, expect: %+v\n", words, expects)
break
}
}
}
// TestSegNumber 测试数字分词
func TestSegNumber(t *testing.T) {
txt := "abc 103.25明天100%会不会100万下雨"
words := Seg(txt)
expects := []string{
"abc",
"103.25",
"明天",
"100%",
"会不会",
"100万",
"下雨",
}
if len(words) != len(expects) {
t.Errorf("result: %+v, %d, expect: %+v\n", words, len(words), expects)
return
}
for idx, w := range words {
if w != expects[idx] {
t.Errorf("result: %+v, expect: %+v\n", words, expects)
}
}
}
// TestSegProbe 测试分词
func TestSegProbe(t *testing.T) {
txt := "黑龙江省双鸭山市宝清县宝清镇通达街341号"
seg := Segment()
words := seg.Seg(txt, segment.Probe_SegMode)
expects := []string{
"黑龙江省",
"双鸭山市",
"宝",
"清",
"县",
"宝清镇",
"通达街",
"341号",
}
if len(words) != len(expects) {
t.Errorf("result: %+v, expect: %+v\n", words, expects)
return
}
for idx, w := range words {
if w != expects[idx] {
t.Errorf("result: %+v, expect: %+v\n", words, expects)
break
}
}
}
// TestUserDict 测试用户词典
func TestUserDict(t *testing.T) {
txt := "汉服和服装、维基图谱"
words := Seg(txt)
expects := []string{
"汉服", "和", "服装", "、", "维基", "图谱",
}
if len(words) != len(expects) {
t.Errorf("result: %+v, expect: %+v\n", words, expects)
return
}
for idx, w := range words {
if w != expects[idx] {
t.Errorf("result: %+v, expect: %+v\n", words, expects)
break
}
}
AddVocabs([]string{"汉服和服装"})
words = Seg(txt)
expects = []string{
"汉服和服装", "、", "维基", "图谱",
}
if len(words) != len(expects) {
t.Errorf("result: %+v, expect: %+v\n", words, expects)
return
}
for idx, w := range words {
if w != expects[idx] {
t.Errorf("result: %+v, expect: %+v\n", words, expects)
break
}
}
}