|
官方网站说明http://www.elasticsearch.org/guide/reference/index-modules/analysis/pattern-tokenizer.html
//elasticsearch.yml
- index :
- analysis :
- analyzer :
- pattern_analyzer:
- type: custom
- tokenizer: pattern_tokenizer
- tokenizer:
- pattern_tokenizer:
- type: pattern
- pattern: \'([^\']+)\'
- group: -1
测试
- curl -XGET http://localhost:9200/index/_analyze?text=aaa%20'bbb'%20'ccc'&analyzer=pattern_analyzer
group=-1时
- {
- tokens: [
- {
- token: "aaa ",
- start_offset: 0,
- end_offset: 4,
- type: "word",
- position: 1
- },
- {
- token: " ",
- start_offset: 9,
- end_offset: 10,
- type: "word",
- position: 2
- }
- ]
- }
group=0时
- {
- tokens: [
- {
- token: "'bbb'",
- start_offset: 4,
- end_offset: 9,
- type: "word",
- position: 1
- },
- {
- token: "'ccc'",
- start_offset: 10,
- end_offset: 15,
- type: "word",
- position: 2
- }
- ]
- }
group=1时
- {
- tokens: [
- {
- token: "bbb",
- start_offset: 5,
- end_offset: 8,
- type: "word",
- position: 1
- },
- {
- token: "ccc",
- start_offset: 11,
- end_offset: 14,
- type: "word",
- position: 2
- }
- ]
- }
group=2时
- {
- error: "IndexOutOfBoundsException[No group 2]",
- status: 500
- }
|
|
|