弹性搜索:在嵌套对象中查找单词部分

我无法在嵌套对象中找到单词的部分内容。 只找到完整的单词。 我的分析仪配置如下:

{
  "settings": {
    "number_of_shards": 1,
    "analysis": {
      "filter": {
        "word_part_filter": {
          "type": "ngram",
          "min_gram": 3,
          "max_gram": 15
        },
        "word_part_front_filter": {
          "type": "edgeNGram",
          "min_gram": 2,
          "max_gram": 15
        },
        "codeid_filter": {
          "type": "pattern_replace",
          "pattern": "[-/.:]",
          "replacement": "",
          "preserve_original": true
        }
      },
      "char_filter": {
        "umlaut_char_filter": {
          "type": "mapping",
          "mappings": [
            "ö=>oe",
            "ä=>ae",
            "ü=>ue",
            "Ã?=>ss",
            "Ã?=>Oe",
            "Ã?=>Ae",
            "Ã?=>Ue"
          ]
        }
      },
      "analyzer": {
        "description_analyser_query": {
          "type": "custom",
          "char_filter": [
            "html_strip"
          ],
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "stop",
            "asciifolding"
          ]
        },
        "description_analyser_idx": {
          "type": "custom",
          "char_filter": [
            "html_strip"
          ],
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "stop",
            "asciifolding",
            "word_part_filter"
          ]
        },
        "name_analyser_query": {
          "type": "custom",
          "char_filter": [
            "umlaut_char_filter"
          ],
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "asciifolding"
          ]
        },
        "name_analyser_idx": {
          "type": "custom",
          "char_filter": [
            "umlaut_char_filter"
          ],
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "asciifolding",
            "word_part_filter"
          ]
        },
        "codeid_analyser_query": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase",
            "codeid_filter"
          ]
        },
        "codeid_analyser_idx_front": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase",
            "codeid_filter",
            "word_part_front_filter"
          ]
        },
        "codeid_analyser_idx_any": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase",
            "codeid_filter",
            "word_part_filter"
          ]
        }
      }
    }
  }
}

这是嵌套的对象映射(提取):

{
  "properties": {    
    "aid": {
      "type": "nested",
      "properties": {
        "tpid": {
          "type": "string",
          "analyzer": "codeid_analyser_idx_any"
        },
        "aid": {
          "type": "string",
          "analyzer": "codeid_analyser_idx_any"
        }
      }    
    }
  }
}

我正在用这个查询(提取)进行搜索。 这里只有“嵌套”部分是必不可少的:

{
  "query": {
    "bool": {
      "must": [
        {
          "bool": {
            "should": [
              {
                "nested": {
                  "path": "aid",
                  "query": {
                    "bool": {
                      "must": {
                        "match": {
                          "aid.aid": {
                            "query": "1200",
                            "analyzer": "codeid_analyser_query"
                          }
                        }
                      },
                      "filter": {
                        "or": [
                          {
                            "match": {
                              "aid.tpid": "buyer_specific"
                            }
                          },
                          {
                            "match": {
                              "aid.tpid": "mytpid"
                            }
                          }
                        ]
                      }
                    }
                  }
                }
              }
            ],
            "minimum_should_match": 1
          }
        }
      ]
    }
  }
}

有一个元素aid = 120000008

在田间使用分析仪时,它什么都没有发现。 在嵌套对象映射和查询中完全不使用分析器时,只会找到全部单词(如“120000008”),而不是“1200”。 有任何想法吗?


实际上,使用ElasticSearch 5.2,使用名为test的索引,并将该映射应用于名为“product”的类型(仅重写滤镜部分以符合查询语言的发展),我可以获得正确的结果。 查询:

GET test/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "bool": {
            "should": [
              {
                "nested": {
                  "path": "aid",
                  "query": {
                    "bool": {
                      "must": {
                        "match": {
                          "aid.aid": {
                            "query": "1200",
                            "analyzer": "codeid_analyser_query"
                          }
                        }
                      },
                      "filter": {
                        "terms": {
                          "aid.tpid": [
                            "mytpid",
                            "buyer_specific"
                          ]
                        }
                      }
                    }
                  }
                }
              }
            ],
            "minimum_should_match": 1
          }
        }
      ]
    }
  }
}

指数:

GET test/_search

{
  "took": 8,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 1,
    "hits": [
      {
        "_index": "test",
        "_type": "product",
        "_id": "AVrJ1CSd-NyeQ4r64kP6",
        "_score": 1,
        "_source": {
          "aid": {
            "aid": "120000008",
            "tpid": "mytpid"
          }
        }
      }
    ]
  }
}

分析器(我删除了元音过滤器,因为它在我的计算机上是无法读取的,并且不会改变测试结果,因为它不被使用):

PUT test
{
  "settings": {
     "analysis": {
      "filter": {
        "word_part_filter": {
          "type": "ngram",
          "min_gram": 3,
          "max_gram": 15
        },
        "word_part_front_filter": {
          "type": "edgeNGram",
          "min_gram": 2,
          "max_gram": 15
        },
        "codeid_filter": {
          "type": "pattern_replace",
          "pattern": "[-/.:]",
          "replacement": "",
          "preserve_original": true
        }
      },

      "analyzer": {
        "description_analyser_query": {
          "type": "custom",
          "char_filter": [
            "html_strip"
          ],
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "stop",
            "asciifolding"
          ]
        },
        "description_analyser_idx": {
          "type": "custom",
          "char_filter": [
            "html_strip"
          ],
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "stop",
            "asciifolding",
            "word_part_filter"
          ]
        },

        "codeid_analyser_query": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase",
            "codeid_filter"
          ]
        },
        "codeid_analyser_idx_front": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase",
            "codeid_filter",
            "word_part_front_filter"
          ]
        },
        "codeid_analyser_idx_any": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase",
            "codeid_filter",
            "word_part_filter"
          ]
        }
      }
     }
  }
}

产品映射:

PUT test/_mapping/product
{

  "properties": {    
    "aid": {
      "type": "nested",
      "properties": {
        "tpid": {
          "type": "string",
          "analyzer": "codeid_analyser_idx_any"
        },
        "aid": {
          "type": "string",
          "analyzer": "codeid_analyser_idx_any"
        }
      }    
    }
  }
}
链接地址: http://www.djcxy.com/p/95349.html

上一篇: Elastic search: find word parts in nested object

下一篇: is it possible to modify service worker cache response headers?