Created
December 17, 2013 15:42
-
-
Save alexbrasetvik/8006939 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
text: | |
- Michael | |
- Heaney | |
- Heavey | |
analyzer: | |
metaphone: | |
type: custom | |
tokenizer: standard | |
filter: | |
- my_metaphone | |
porter: | |
type: custom | |
tokenizer: standard | |
filter: | |
- lowercase | |
- porter_stem | |
filter: | |
my_metaphone: | |
encoder: metaphone | |
replace: false | |
type: phonetic |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
_type: jr | |
pty_surname: Heaney | |
--- | |
_type: jr | |
pty_surname: Heavey |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
jr: | |
properties: | |
pty_surename: | |
type: multi_field | |
fields: | |
pty_surename: | |
# Using the simple analyzer, we can boost perfect spellings. | |
# Don't use standard, as that one removes stopwords. | |
type: string | |
analyzer: simple | |
metaphone: | |
type: string | |
analyzer: metaphone | |
porter: | |
type: string | |
analyzer: porter | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
export ELASTICSEARCH_ENDPOINT="http://localhost:9200" | |
# Create indexes | |
curl -XPUT "$ELASTICSEARCH_ENDPOINT/play" -d '{ | |
"settings": { | |
"analysis": { | |
"text": [ | |
"Michael", | |
"Heaney", | |
"Heavey" | |
], | |
"analyzer": { | |
"metaphone": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"my_metaphone" | |
] | |
}, | |
"porter": { | |
"type": "custom", | |
"tokenizer": "standard", | |
"filter": [ | |
"lowercase", | |
"porter_stem" | |
] | |
} | |
}, | |
"filter": { | |
"my_metaphone": { | |
"encoder": "metaphone", | |
"replace": false, | |
"type": "phonetic" | |
} | |
} | |
} | |
}, | |
"mappings": { | |
"jr": { | |
"properties": { | |
"pty_surename": { | |
"type": "multi_field", | |
"fields": { | |
"pty_surename": { | |
"type": "string", | |
"analyzer": "simple" | |
}, | |
"metaphone": { | |
"type": "string", | |
"analyzer": "metaphone" | |
}, | |
"porter": { | |
"type": "string", | |
"analyzer": "porter" | |
} | |
} | |
} | |
} | |
} | |
} | |
}' | |
# Index documents | |
curl -XPOST "$ELASTICSEARCH_ENDPOINT/_bulk?refresh=true" -d ' | |
{"index":{"_index":"play","_type":"jr"}} | |
{"pty_surname":"Heaney"} | |
{"index":{"_index":"play","_type":"jr"}} | |
{"pty_surname":"Heavey"} | |
' | |
# Do searches | |
curl -XPOST "$ELASTICSEARCH_ENDPOINT/_search?pretty" -d ' | |
{ | |
"query": { | |
"bool": { | |
"should": [ | |
{ | |
"bool": { | |
"should": [ | |
{ | |
"match": { | |
"pty_surname": { | |
"query": "heavey" | |
} | |
} | |
}, | |
{ | |
"match": { | |
"pty_surname": { | |
"query": "heavey", | |
"fuzziness": 1 | |
} | |
} | |
}, | |
{ | |
"match": { | |
"pty_surename.metaphone": { | |
"query": "heavey" | |
} | |
} | |
}, | |
{ | |
"match": { | |
"pty_surename.porter": { | |
"query": "heavey" | |
} | |
} | |
} | |
] | |
} | |
} | |
] | |
} | |
} | |
} | |
' | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Auto generated by Found's Play-tool at 2013-12-17T16:42:37+01:00 | |
version: 0 | |
title: Fuzzy and metaphone with multi_field | |
description: "" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Related to: http://stackoverflow.com/questions/20632042/elasticsearch-searching-for-human-names | |
# See also: https://www.found.no/play/gist/867785a709b4869c5543 | |
query: | |
bool: | |
should: | |
- bool: | |
# It's sufficient that any of these match | |
should: | |
# Now perfect spellings will be preferred. | |
- match: | |
pty_surname: | |
query: heavey | |
- match: | |
pty_surname: | |
query: heavey | |
fuzziness: 1 | |
- match: | |
pty_surename.metaphone: | |
query: heavey | |
- match: | |
pty_surename.porter: | |
query: heavey | |
# ... and similarily for first_name and so on. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment