Skip to content

Instantly share code, notes, and snippets.

@alexbrasetvik
Created December 17, 2013 15:42
Show Gist options
  • Save alexbrasetvik/8006939 to your computer and use it in GitHub Desktop.
Save alexbrasetvik/8006939 to your computer and use it in GitHub Desktop.
text:
- Michael
- Heaney
- Heavey
analyzer:
metaphone:
type: custom
tokenizer: standard
filter:
- my_metaphone
porter:
type: custom
tokenizer: standard
filter:
- lowercase
- porter_stem
filter:
my_metaphone:
encoder: metaphone
replace: false
type: phonetic
_type: jr
pty_surname: Heaney
---
_type: jr
pty_surname: Heavey
jr:
properties:
pty_surename:
type: multi_field
fields:
pty_surename:
# Using the simple analyzer, we can boost perfect spellings.
# Don't use standard, as that one removes stopwords.
type: string
analyzer: simple
metaphone:
type: string
analyzer: metaphone
porter:
type: string
analyzer: porter
#!/bin/bash
export ELASTICSEARCH_ENDPOINT="http://localhost:9200"
# Create indexes
curl -XPUT "$ELASTICSEARCH_ENDPOINT/play" -d '{
"settings": {
"analysis": {
"text": [
"Michael",
"Heaney",
"Heavey"
],
"analyzer": {
"metaphone": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"my_metaphone"
]
},
"porter": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"porter_stem"
]
}
},
"filter": {
"my_metaphone": {
"encoder": "metaphone",
"replace": false,
"type": "phonetic"
}
}
}
},
"mappings": {
"jr": {
"properties": {
"pty_surename": {
"type": "multi_field",
"fields": {
"pty_surename": {
"type": "string",
"analyzer": "simple"
},
"metaphone": {
"type": "string",
"analyzer": "metaphone"
},
"porter": {
"type": "string",
"analyzer": "porter"
}
}
}
}
}
}
}'
# Index documents
curl -XPOST "$ELASTICSEARCH_ENDPOINT/_bulk?refresh=true" -d '
{"index":{"_index":"play","_type":"jr"}}
{"pty_surname":"Heaney"}
{"index":{"_index":"play","_type":"jr"}}
{"pty_surname":"Heavey"}
'
# Do searches
curl -XPOST "$ELASTICSEARCH_ENDPOINT/_search?pretty" -d '
{
"query": {
"bool": {
"should": [
{
"bool": {
"should": [
{
"match": {
"pty_surname": {
"query": "heavey"
}
}
},
{
"match": {
"pty_surname": {
"query": "heavey",
"fuzziness": 1
}
}
},
{
"match": {
"pty_surename.metaphone": {
"query": "heavey"
}
}
},
{
"match": {
"pty_surename.porter": {
"query": "heavey"
}
}
}
]
}
}
]
}
}
}
'
# Auto generated by Found's Play-tool at 2013-12-17T16:42:37+01:00
version: 0
title: Fuzzy and metaphone with multi_field
description: ""
# Related to: http://stackoverflow.com/questions/20632042/elasticsearch-searching-for-human-names
# See also: https://www.found.no/play/gist/867785a709b4869c5543
query:
bool:
should:
- bool:
# It's sufficient that any of these match
should:
# Now perfect spellings will be preferred.
- match:
pty_surname:
query: heavey
- match:
pty_surname:
query: heavey
fuzziness: 1
- match:
pty_surename.metaphone:
query: heavey
- match:
pty_surename.porter:
query: heavey
# ... and similarily for first_name and so on.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment