Created
June 14, 2017 17:00
-
-
Save PtkFerraro/018ac787cfd10a977def552a1bdfeebe to your computer and use it in GitHub Desktop.
ElasticSearch setup for PT-BR
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| PUT idxsearch/ | |
| { | |
| "settings": { | |
| "analysis": { | |
| "analyzer": { | |
| "default_us_analyzer": { | |
| "type": "custom", | |
| "tokenizer": "standard", | |
| "filter": ["us_stop", "stemmer_us_filter", "lowercase", "asciifolding"] | |
| }, | |
| "snowball_us_analyzer": { | |
| "type": "custom", | |
| "tokenizer": "standard", | |
| "filter": ["us_stop", "lowercase", "asciifolding", "snowball_us_filter"] | |
| }, | |
| "shingle_us_analyzer": { | |
| "type": "custom", | |
| "tokenizer": "standard", | |
| "filter": ["us_stop", "lowercase", "asciifolding", "shingle_filter"] | |
| }, | |
| "edgengram_us_analyzer": { | |
| "type": "custom", | |
| "tokenizer": "standard", | |
| "filter": ["us_stop", "lowercase", "asciifolding", "stemmer_us_filter", "edgengram_filter"] | |
| }, | |
| "default_ptbr_analyzer": { | |
| "type": "custom", | |
| "tokenizer": "standard", | |
| "filter": ["ptbr_stop", "stemmer_ptbr_filter", "lowercase", "asciifolding"] | |
| }, | |
| "default_ptbr2_analyzer": { | |
| "type": "custom", | |
| "tokenizer": "standard", | |
| "filter": ["ptbr_stop", "stemmer_ptbr2_filter", "lowercase", "asciifolding"] | |
| }, | |
| "snowball_ptbr_analyzer": { | |
| "type": "custom", | |
| "tokenizer": "standard", | |
| "filter": ["ptbr_stop", "lowercase", "asciifolding", "snowball_ptbr_filter"] | |
| }, | |
| "shingle_ptbr_analyzer": { | |
| "type": "custom", | |
| "tokenizer": "standard", | |
| "filter": ["ptbr_stop", "lowercase", "asciifolding", "shingle_filter"] | |
| }, | |
| "edgengram_ptbr_analyzer": { | |
| "type": "custom", | |
| "tokenizer": "standard", | |
| "filter": ["ptbr_stop", "lowercase", "asciifolding", "stemmer_ptbr_filter", "edgengram_filter"] | |
| } | |
| }, | |
| "filter": { | |
| "stemmer_ptbr_filter": { | |
| "type": "stemmer", | |
| "name": "light_portuguese" | |
| }, | |
| "stemmer_ptbr2_filter": { | |
| "type": "stemmer", | |
| "name": "portuguese_rslp" | |
| }, | |
| "stemmer_us_filter": { | |
| "type": "stemmer", | |
| "name": "english" | |
| }, | |
| "us_stop": { | |
| "type": "stop", | |
| "stopwords": "_english_" | |
| }, | |
| "ptbr_stop": { | |
| "type": "stop", | |
| "stopwords": "_brazilian_" | |
| }, | |
| "snowball_ptbr_filter" : { | |
| "type" : "snowball", | |
| "language" : "Portuguese" | |
| }, | |
| "snowball_us_filter" : { | |
| "type" : "snowball", | |
| "language" : "English" | |
| }, | |
| "shingle_filter": { | |
| "type": "shingle", | |
| "min_shingle_size": 3, | |
| "max_shingle_size": 5, | |
| "token_separator": " ", | |
| "filler_token": "" | |
| }, | |
| "edgengram_filter": { | |
| "type": "edgeNGram", | |
| "min_gram": 3, | |
| "max_gram": 100 | |
| } | |
| } | |
| } | |
| }, | |
| "mappings": { | |
| "entersport": { | |
| "_all": { "enabled": false }, | |
| "properties": { | |
| "is_adult": { "type": "boolean" }, | |
| "match_start": { "type": "date" }, | |
| "championship_name": { "type": "text", "analyzer": "default_ptbr_analyzer" }, | |
| "match_title": { | |
| "type": "text", | |
| "fields": { | |
| "default": {"type": "text", "analyzer": "default_ptbr_analyzer" }, | |
| "snowball": {"type": "text", "analyzer": "snowball_ptbr_analyzer"}, | |
| "shingles": {"type": "text", "analyzer": "shingle_ptbr_analyzer" }, | |
| "ngrams": {"type": "text", "analyzer": "edgengram_ptbr_analyzer", "search_analyzer": "default_ptbr_analyzer"} | |
| } | |
| } | |
| } | |
| }, | |
| "tv": { | |
| "_all": { "enabled": false }, | |
| "properties": { | |
| "is_adult": { "type": "boolean" }, | |
| "program_start": { "type": "date" }, | |
| "channel_name": { "type": "text", "analyzer": "default_ptbr_analyzer" }, | |
| "program_title": { | |
| "type": "text", | |
| "fields": { | |
| "default": {"type": "text", "analyzer": "default_ptbr_analyzer" }, | |
| "snowball": {"type": "text", "analyzer": "snowball_ptbr_analyzer"}, | |
| "shingles": {"type": "text", "analyzer": "shingle_ptbr_analyzer" }, | |
| "ngrams": {"type": "text", "analyzer": "edgengram_ptbr_analyzer", "search_analyzer": "default_ptbr_analyzer"} | |
| } | |
| } | |
| } | |
| }, | |
| "actors": { | |
| "_all": { "enabled": false }, | |
| "properties": { | |
| "is_adult": { "type": "boolean" }, | |
| "actor_name": { | |
| "type": "text", | |
| "fields": { | |
| "default": {"type": "text", "analyzer": "default_us_analyzer" }, | |
| "snowball": {"type": "text", "analyzer": "snowball_us_analyzer"}, | |
| "shingles": {"type": "text", "analyzer": "shingle_us_analyzer" }, | |
| "ngrams": {"type": "text", "analyzer": "edgengram_us_analyzer", "search_analyzer": "default_us_analyzer"} | |
| } | |
| } | |
| } | |
| }, | |
| "radios": { | |
| "_all": { "enabled": false }, | |
| "properties": { | |
| "is_adult": { "type": "boolean" }, | |
| "station_name": { | |
| "type": "text", | |
| "fields": { | |
| "default": {"type": "text", "analyzer": "default_ptbr_analyzer" }, | |
| "snowball": {"type": "text", "analyzer": "snowball_ptbr_analyzer"}, | |
| "shingles": {"type": "text", "analyzer": "shingle_ptbr_analyzer" }, | |
| "ngrams": {"type": "text", "analyzer": "edgengram_ptbr_analyzer", "search_analyzer": "default_ptbr_analyzer"} | |
| } | |
| } | |
| } | |
| }, | |
| "movies": { | |
| "_all": { "enabled": false }, | |
| "properties": { | |
| "genre.name": { "type": "keyword"}, | |
| "vote_average": { "type": "half_float"}, | |
| "tag.name": { "type": "keyword"}, | |
| "is_adult": { "type": "boolean" }, | |
| "cast.real_name": { "type": "text", "analyzer": "default_us_analyzer" }, | |
| "movie_original_title": { "type": "text", "analyzer": "default_us_analyzer" }, | |
| "movie_title": { | |
| "type": "text", | |
| "fields": { | |
| "default": {"type": "text", "analyzer": "default_ptbr_analyzer" }, | |
| "snowball": {"type": "text", "analyzer": "snowball_ptbr_analyzer"}, | |
| "shingles": {"type": "text", "analyzer": "shingle_ptbr_analyzer" }, | |
| "ngrams": {"type": "text", "analyzer": "edgengram_ptbr_analyzer", "search_analyzer": "default_ptbr_analyzer"} | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment