在为一些帖子建立索引和创建映射时,使用最新版本的elasticsearch.js并尝试创建自定义路径分析器。
目标是在路径的每一段之外创建关键字。然而,作为一个开始,只是简单地尝试让分析器工作。
这是elasticsearch.js create_mapped_index.js,您可以在文件顶部附近看到自定义分析器:
var client = require('./connection.js');
client.indices.create({
index: "wcm-posts",
body: {
"settings": {
"analysis": {
"analyzer": {
"wcm_path_analyzer": {
"tokenizer": "wcm_path_tokenizer",
"type": "custom"
}
},
"tokenizer": {
"wcm_path_tokenizer": {
"type": "pattern",
"pattern": "/"
}
}
}
},
"mappings": {
"post": {
"properties": {
"id": { "type": "string", "index": "not_analyzed" },
"titles": {
"type": "object",
"properties": {
"main": { "type": "string" },
"subtitle": { "type": "string" },
"alternate": { "type": "string" },
"concise": { "type": "string" },
"seo": { "type": "string" }
}
},
"tags": {
"properties": {
"id": { "type": "string", "index": "not_analyzed" },
"name": { "type": "string", "index": "not_analyzed" },
"slug": { "type": "string" }
},
},
"main_taxonomies": {
"properties": {
"id": { "type": "string", "index": "not_analyzed" },
"name": { "type": "string", "index": "not_analyzed" },
"slug": { "type": "string", "index": "not_analyzed" },
"path": { "type": "string", "index": "wcm_path_analyzer" }
},
},
"categories": {
"properties": {
"id": { "type": "string", "index": "not_analyzed" },
"name": { "type": "string", "index": "not_analyzed" },
"slug": { "type": "string", "index": "not_analyzed" },
"path": { "type": "string", "index": "wcm_path_analyzer" }
},
},
"content_elements": {
"dynamic": "true",
"type": "nested",
"properties": {
"content": { "type": "string" }
}
}
}
}
}
}
}, function (err, resp, respcode) {
console.log(err, resp, respcode);
});如果对wcm_path_analyzer的调用被设置为"non_analyzed“或省略了索引,那么索引、映射和插入posts就会起作用。
当我尝试对main_taxonomy和categories路径字段使用自定义分析器时,如上面的json所示,我得到这个错误:
response: '{"error":{"root_cause":[{"type":"mapper_parsing_exception","reason":"wrong value for index [wcm_path_analyzer] for field [path]"}],"type":"mapper_parsing_exception","reason":"Failed to parse mapping [post]: wrong value for index [wcm_path_analyzer] for field [path]","caused_by":{"type":"mapper_parsing_exception","reason":"wrong value for index [wcm_path_analyzer] for field [path]"}},"status":400}',
toString: [Function],
toJSON: [Function] } { error:
{ root_cause: [ [Object] ],
type: 'mapper_parsing_exception',
reason: 'Failed to parse mapping [post]: wrong value for index [wcm_path_analyzer] for field [path]',
caused_by:
{ type: 'mapper_parsing_exception',
reason: 'wrong value for index [wcm_path_analyzer] for field [path]' } },
status: 400 } 400下面是在path字段中需要自定义分析器的两个对象的示例。在不使用自定义分析器的情况下,在elasticsearch索引中插入了15个帖子后,我拉出了这个示例:
"main_taxonomies": [
{
"id": "123",
"type": "category",
"name": "News",
"slug": "news",
"path": "/News/"
}
],
"categories": [
{
"id": "157",
"name": "Local News",
"slug": "local-news",
"path": "/News/Local News/",
"main": true
},在这一点上,我在谷歌上搜索了类似的问题,大多数人说,人们错过了将分析仪放在设置中,以及没有向身体添加参数。我相信这是正确的。
我还查看了elasticsearch.js文档,并尝试创建:
client.indices.putSettings({}) 但是要使用这种方法,索引必须与映射一起存在,否则它会抛出错误“no index found”。
不知道从这里往哪里走?感谢您的建议。
发布于 2017-02-13 02:38:33
所以最终的分析器是:
var client = require('./connection.js');
client.indices.create({
index: "wcm-posts",
body: {
"settings": {
"analysis": {
"analyzer": {
"wcm_path_analyzer": {
"type" : "pattern",
"lowercase": true,
"pattern": "/"
}
}
}
},
"mappings": {
"post": {
"properties": {
"id": { "type": "string", "index": "not_analyzed" },
"client_id": { "type": "string", "index": "not_analyzed" },
"license_id": { "type": "string", "index": "not_analyzed" },
"origin_id": { "type": "string" },
...
...
"origin_slug": { "type": "string" },
"main_taxonomies_path": { "type": "string", "analyzer": "wcm_path_analyzer", "search_analyzer": "standard" },
"categories_paths": { "type": "string", "analyzer": "wcm_path_analyzer", "search_analyzer": "standard" },
"search_tags": { "type": "string" },
// See the custom analyzer set here --------------------------^我确实确定,至少对于路径或模式分析器来说,不能使用复杂的嵌套或对象。将扁平化的字段设置为"type":"string“是使其工作的唯一方法。
我最终不再需要一个自定义的标记器,因为模式分析器功能齐全,并且已经包含了一个标记器。
我选择使用模式分析器,因为它打破了模式,留下了单独的术语,而路径以不同的方式分割了路径,但没有创建单独的术语(我希望我这样说是正确的。我以文档为基础)。
希望这对其他人有帮助!
史蒂夫
发布于 2017-02-12 07:30:50
所以我把它修好了。我认为json对象太复杂了,或者是将分析器添加到字段映射中的更改起到了作用。
首先,我把身体拉平:
至:
"main_taxonomies_path": "/News/",
"categories_paths": [ "/News/Local/", "/Business/Local/" ],
"search_tags": [ "montreal-3","laval-4" ],然后我将分析器更新为:
"settings": {
"analysis": {
"analyzer": {
"wcm_path_analyzer": {
"tokenizer": "wcm_path_tokenizer",
"type": "custom"
}
},
"tokenizer": {
"wcm_path_tokenizer": {
"type": "pattern",
"pattern": "/",
"replacement": ","
}
}
}
},请注意,分析器'type‘被设置为custom。
然后,在映射这些展平的字段时:
"main_taxonomies_path": { "type": "string", "analyzer": "wcm_path_analyzer" },
"categories_paths": { "type": "string", "analyzer": "wcm_path_analyzer" },
"search_tags": { "type": "string" },在搜索这些字段时会产生以下结果:
"main_taxonomies_path": "/News/",
"categories_paths": [ "/News/Local News/", "/Business/Local Business/" ],
"search_tags": [ "montreal-2", "laval-3" ],因此,自定义分析器在这种情况下执行设置的操作。
我不确定是否可以将类型object应用到main_taxonomies_path和categories_paths,所以我将尝试一下,看看这个。
我将改进模式搜索,以不同的格式得到不同的结果,但我很高兴能这样做。
为了完整性,一旦我完成了这项工作,我将把我最终的自定义模式分析器、映射和结果放入其中。
致敬,史蒂夫
https://stackoverflow.com/questions/42179874
复制相似问题