wangluo010 发表于 2019-1-29 07:17:51

elasticsearch学习之Pattern Tokenizer

  官方网站说明http://www.elasticsearch.org/guide/reference/index-modules/analysis/pattern-tokenizer.html
  //elasticsearch.yml


[*]index :
[*]    analysis :
[*]      analyzer :
[*]            pattern_analyzer:
[*]                type: custom
[*]                tokenizer: pattern_tokenizer
[*]      tokenizer:
[*]            pattern_tokenizer:
[*]                type: pattern
[*]                pattern: \'([^\']+)\'
[*]                group: -1

  测试


[*]curl -XGET http://localhost:9200/index/_analyze?text=aaa%20'bbb'%20'ccc'&analyzer=pattern_analyzer

  group=-1时


[*]{
[*]tokens: [
[*]{
[*]    token: "aaa ",
[*]    start_offset: 0,
[*]    end_offset: 4,
[*]    type: "word",
[*]    position: 1
[*]},
[*]{
[*]    token: " ",
[*]    start_offset: 9,
[*]    end_offset: 10,
[*]    type: "word",
[*]    position: 2
[*]}
[*]]
[*]}

  group=0时


[*]{
[*]tokens: [
[*]{
[*]    token: "'bbb'",
[*]    start_offset: 4,
[*]    end_offset: 9,
[*]    type: "word",
[*]    position: 1
[*]},
[*]{
[*]    token: "'ccc'",
[*]    start_offset: 10,
[*]    end_offset: 15,
[*]    type: "word",
[*]    position: 2
[*]}
[*]]
[*]}

  group=1时


[*]{
[*]tokens: [
[*]{
[*]    token: "bbb",
[*]    start_offset: 5,
[*]    end_offset: 8,
[*]    type: "word",
[*]    position: 1
[*]},
[*]{
[*]    token: "ccc",
[*]    start_offset: 11,
[*]    end_offset: 14,
[*]    type: "word",
[*]    position: 2
[*]}
[*]]
[*]}

  group=2时


[*]{
[*]    error: "IndexOutOfBoundsException",
[*]    status: 500
[*]}




页: [1]
查看完整版本: elasticsearch学习之Pattern Tokenizer