hkulekci/training.md

## training.md

      
    Raw
  

              training.md
            
          
    Elasticsearch Presentation

Indexing Document and Retriving

POST accounts/_doc
{
  "id": 1,
  "name": "AHMET",
  "surname": "MERT EKİNCİOĞLU",
  "iban": "TR910006200000100001234567",
  "balance": 15
}

GET accounts/_doc/LXajCnQBbcLQN9cdCmHu

Indexing with Id and Retriving

POST accounts/_doc/1
{
  "id": 1,
  "name": "AHMET",
  "surname": "MERT EKİNCİOĞLU",
  "iban": "TR910006200000100001234567",
  "balance": 15
}

GET accounts/_doc/1

Updating Document

PUT accounts/_doc/1
{
  "id": 1,
  "name": "AHMET2",
  "surname": "MERT EKİNCİOĞLU",
  "iban": "TR910006200000100001234567",
  "balance": 15
}


# This is overwrite the document
PUT accounts/_doc/1
{
  "id": 1,
  "name": "AHMET2"
}


GET accounts/_doc/1

Partial Update Document

# Create Document again
POST accounts/_doc/1
{
  "id": 1,
  "name": "AHMET",
  "surname": "MERT EKİNCİOĞLU",
  "iban": "TR910006200000100001234567",
  "balance": 15
}

# Partial update
POST accounts/_update/1
{
  "doc": {
    "name": "AHMET SALİM"
  }
}

Update By Query

# Create another document for testing
POST accounts/_doc/2
{
  "id": 2,
  "name": "GÜLNAME",
  "surname": "KINDIR",
  "iban": "TR910006200000100001234568",
  "balance": 27
}

# Update by Query with Script
POST accounts/_update_by_query
{
  "script": {
    "source": "ctx._source.balance+=10",
    "lang": "painless"
  },
  "query": {
    "term": {
      "name.keyword": "GÜLNAME"
    }
  }
}

# Check the balance
GET accounts/_doc/2


# Update by Query with Ingest Pipeline (to bulk update we can use this method to be able to update newly inserted documents)
PUT _ingest/pipeline/make-balance-default-value
{
  "description": "will set to balance 10",
  "processors": [
    {
      "set": {
        "field": "balance",
        "value": "10"
      }
    }
  ]
}

# Another example for ingest
PUT _ingest/pipeline/set-id-as-doc-identifier
{
  "description": "will set id field as doc identifier",
  "processors": [
    {
      "set": {
        "field": "_id",
        "value": "{{id}}"
      }
    }
  ]
}

# Insert doc with an ingest
POST accounts/_doc?pipeline=set-id-as-doc-identifier
{
  "id": 18,
  "name": "ÖMER",
  "surname": "PIRTI",
  "iban": "TR910006200000100001234584",
  "balance": 85,
  "created_at": "2020-08-19T08:08:00Z"
}

# Now we can reach data with id directly
GET accounts/_doc/18


# Check the balance first
GET accounts/_doc/2

# Update the data
POST accounts/_update_by_query?pipeline=make-balance-default-value
{
  "query": {
    "term": {
      "name.keyword": {
        "value": "GÜLNAME"
      }
    }
  }
}

# Check the balance again
GET accounts/_doc/2

Delete Document

DELETE accounts/_doc/1

Delete By Query

POST accounts/_delete_by_query
{
  "query": {
    "term": {
      "name": "GÜLNAME"
    }
  }
}

Checking the tasks

GET _tasks?detailed=true&actions=*/delete/byquery

GET _tasks?detailed=true&actions=*/update/byquery

Bulk Indexing

POST _bulk
{"index": {"_index": "accounts", "_id": "1"}}
{"id": 1, "name": "AHMET", "surname": "MERT EKİNCİOĞLU", "iban": "TR910006200000100001234567", "balance": 15, "created_at": "2020-08-19T00:00:00Z"}
{"index": {"_index": "accounts", "_id": "2"}}
{"id": 2, "name": "GÜLNAME", "surname": "KINDIR", "iban": "TR910006200000100001234568", "balance": 27, "created_at": "2020-08-19T00:00:00Z"}
{"index": {"_index": "accounts", "_id": "3"}}
{"id": 3, "name": "SALİM", "surname": "İLTEMİZ", "iban": "TR910006200000100001234569", "balance": 54, "created_at": "2020-08-19T00:00:00Z"}
{"index": {"_index": "accounts", "_id": "4"}}
{"id": 4, "name": "MÜBERRA", "surname": "GÜVEN MEŞE", "iban": "TR910006200000100001234570", "balance": 81, "created_at": "2020-08-19T00:00:00Z"}

Testing Analyzers

POST _analyze
{
  "analyzer": "whitespace",
  "text":     "Haydar KÜLEKCİ haydar.kulekci@motivolog.com"
}

POST _analyze
{
  "analyzer": "standard",
  "text":     "Haydar KÜLEKCİ haydar.kulekci@motivolog.com"
}

POST _analyze
{
  "analyzer": "simple",
  "text":     "Haydar KÜLEKCİ haydar.kulekci@motivolog.com"
}

POST _analyze
{
  "analyzer": "keyword",
  "text":     "Haydar KÜLEKCİ haydar.kulekci@motivolog.com"
}

Testing Character Filter

POST _analyze
{
  "char_filter": ["html_strip"], 
  "text":     "Haydar KÜLEKCİ <a href=\"http://google.com\">haydar.kulekci@motivolog.com</a>"
}


POST _analyze
{
  "char_filter": [
    {
      "type": "pattern_replace",
      "pattern": "([A-Z])",
      "replacement": " $1"
    }
  ], 
  "tokenizer": "standard",
  "text": "BirEğitimSırasındaEğitim"
}

POST _analyze
{
  "tokenizer": "whitespace",
  "char_filter": [
    {
      "type": "pattern_replace",
      "pattern": "@(.*)$",
      "replacement": "(at)$1"
    }
  ],
  "text": "haydarkulekci@nic.tr"
}

Testing Tokenizers

POST _analyze
{
  "tokenizer": "standard",
  "text":     "Haydar KÜLEKCİ haydar.kulekci@motivolog.com"
}

POST _analyze
{
  "tokenizer": "letter",
  "text":     "Haydar KÜLEKCİ haydar.kulekci@motivolog.com"
}

POST _analyze
{
  "tokenizer": "lowercase",
  "text":     "Haydar KÜLEKCİ haydar.kulekci@motivolog.com"
}

POST _analyze
{
  "tokenizer": "classic",
  "text":     "Haydar KÜLEKCİ haydar.kulekci@motivolog.com"
}

POST _analyze
{
  "tokenizer": "uax_url_email",
  "text": "Bana e-posta gönderebilirsini haydar.kulekci@motivolog.com"
}

POST _analyze
{
  "tokenizer": {
    "type": "edge_ngram",
    "min_gram": 2,
    "max_gram": 100
  },
  "text":     "Haydar KÜLEKCİ"
}

POST _analyze
{
  "tokenizer": {
    "type": "pattern",
    "pattern": "[\\W]"
  },
  "text":     "Haydar Kulekci"
}

POST _analyze
{
  "tokenizer": "path_hierarchy",
  "text":     "/var/www/html"
}

Testing Token Filter

POST _analyze
{
  "tokenizer": "standard",
  "filter":    ["apostrophe"], 
  "text":      "Haydar'ın e-posta adresi haydar.kulekci@motivolog.com"
}

POST _analyze
{
  "tokenizer": "standard",
  "filter": ["lowercase"], 
  "text":     "Haydar'ın e-posta adresi haydar.kulekci@motivolog.com"
}

POST _analyze
{
  "tokenizer": "standard",
  "filter": ["lowercase", "asciifolding"], 
  "text":     "Haydar'ın e-posta adresi haydar.kulekci@motivolog.com"
}

POST _analyze
{
  "tokenizer": "standard",
  "filter": ["apostrophe", "lowercase", "asciifolding"], 
  "text":     "Haydar'ın e-posta adresi haydar.kulekci@motivolog.com"
}

POST _analyze
{
  "tokenizer": "standard",
  "filter": [
    {
      "type": "stemmer",
      "language": "turkish"
    }
  ], 
  "text": "Ankaralılar"
}

POST _analyze
{
  "tokenizer": "standard",
  "filter": [
    {
      "type": "stemmer",
      "language": "turkish"
    }
  ], 
  "text": "Niğdeli"
}

Create Index with Analyzer

DELETE test_analyzer_index


PUT test_analyzer_index
{
  "settings": {
    "analysis": {
      "analyzer": {
        "my_custom_analyzer": { 
          "type": "custom",
          "tokenizer": "punctuation"
        }
      },
      "tokenizer": {
        "punctuation": { 
          "type": "pattern",
          "pattern": "[ .,!?]"
        }
      }
    }
  }
}

POST test_analyzer_index/_analyze
{
  "analyzer": "my_custom_analyzer",
  "text": "Ben :) Haydar! senin adın ne?"
}

A bit more Complex Example

DELETE test_analyzer_index

PUT test_analyzer_index
{
  "settings": {
    "analysis": {
      "analyzer": {
        "my_custom_analyzer": { 
          "type": "custom",
          "char_filter": [
            "emoticons"
          ],
          "tokenizer": "punctuation",
          "filter": [
            "lowercase",
            "english_stop"
          ]
        }
      },
      "tokenizer": {
        "punctuation": { 
          "type": "pattern",
          "pattern": "[ .,!?]"
        }
      },
      "char_filter": {
        "emoticons": { 
          "type": "mapping",
          "mappings": [
            ":) => _happy_",
            ":( => _sad_"
          ]
        }
      },
      "filter": {
        "english_stop": { 
          "type": "stop",
          "stopwords": "_english_"
        }
      }
    }
  }
}

POST test_analyzer_index/_analyze
{
  "analyzer": "my_custom_analyzer",
  "text": "Ben :) Haydar! senin adın ne?"
}

Mapping

# Check the mapping first
GET accounts/_mapping

# Result 
{
  "accounts" : {
    "mappings" : {
      "properties" : {
        "balance" : {
          "type" : "long"
        },
        "iban" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "id" : {
          "type" : "long"
        },
        "name" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "surname" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        }
      }
    }
  }
}

Create an index with Mapping

DELETE accounts

PUT accounts
{
  "aliases": {
    "alias-name": {}
  }, 
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 1,
    "analysis": {}
  },
  "mappings": {
    "properties": {
      "id": {
        "type": "keyword"
      },
      "name": {
        "type": "text"
      },
      "surname": {
        "type": "text"
      },
      "iban": {
        "type": "keyword"
      },
      "balance": {
        "type": "long"
      },
      "created_at": {
        "type": "date"
      }
    }
  }
}

Dynamic Field Mapping

PUT accounts
{
  "aliases": {
    "alias-name": {}
  }, 
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 1,
    "analysis": {}
  },
  "mappings": {
    "dynamic": false, 
    "properties": {
      "id": {
        "type": "keyword"
      },
      "name": {
        "type": "text"
      },
      "surname": {
        "type": "text"
      },
      "iban": {
        "type": "keyword"
      },
      "balance": {
        "type": "long"
      },
      "created_at": {
        "type": "date"
      },
      "details": {
        "dynamic": true,
        "type": "object"
      }
    }
  }
}

# Check mapping first to see difference
GET accounts/_mapping

# To check this mapping 

POST accounts/_doc/10000
{
  "id": 1,
  "tag": "cutomer1",
  "details": {
    "category": "category1"
  }
}

# After indexing the data, check the mapping:
GET accounts/_mapping

# The result will be something like this:
{
  "accounts" : {
    "mappings" : {
      "dynamic" : "false",
      "properties" : {
        "balance" : {
          "type" : "long"
        },
        "created_at" : {
          "type" : "date"
        },
        "details" : {
          "dynamic" : "true",
          "properties" : {
            "name" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            }
          }
        },
        "iban" : {
          "type" : "keyword"
        },
        "id" : {
          "type" : "keyword"
        },
        "name" : {
          "type" : "text"
        },
        "surname" : {
          "type" : "text"
        }
      }
    }
  }
}

Index Template

# Clear the index
DELETE accounts

# Create the template
PUT _template/accounts_index_template
{
  "index_patterns": ["accounts*"],
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0
  },
  "mappings": {
    "properties": {
      "id": {
        "type": "keyword"
      },
      "name": {
        "type": "text"
      },
      "surname": {
        "type": "text"
      },
      "iban": {
        "type": "keyword"
      },
      "balance": {
        "type": "long"
      },
      "created_at": {
        "type": "date"
      }
    }
  }
}


# Insert data to index
POST _bulk
{"index": {"_index": "accounts", "_id": "1"}}
{"id": 1, "name": "AHMET", "surname": "MERT EKİNCİOĞLU", "iban": "TR910006200000100001234567", "balance": 15, "created_at": "2020-08-19T00:00:00Z"}
{"index": {"_index": "accounts", "_id": "2"}}
{"id": 2, "name": "GÜLNAME", "surname": "KINDIR", "iban": "TR910006200000100001234568", "balance": 27, "created_at": "2020-08-19T00:00:00Z"}
{"index": {"_index": "accounts", "_id": "3"}}
{"id": 3, "name": "SALİM", "surname": "İLTEMİZ", "iban": "TR910006200000100001234569", "balance": 54, "created_at": "2020-08-19T00:00:00Z"}
{"index": {"_index": "accounts", "_id": "4"}}
{"id": 4, "name": "MÜBERRA", "surname": "GÜVEN MEŞE", "iban": "TR910006200000100001234570", "balance": 81, "created_at": "2020-08-19T00:00:00Z"}
{"index": {"_index": "accounts", "_id": "309"}}
{"id": 309, "name": "MERT", "surname": "METİN ARIKAN", "iban": "TR910006200000100001234875", "balance": 43, "created_at": "2020-08-19T24:04:00Z"}


# Just check the mapping to see your template mapping
GET accounts/_mapping

Let's do the analyzers with our model

PUT accounts
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0,
    "analysis": {
      "analyzer": {
        "my_analyzer": {
          "type": "custom",
          "tokenizer": "standard",
          "char_filter": ["html_strip"],
          "filter": ["lowercase", "asciifolding"]
        }
      }
    }
  },
  "mappings": {
    "dynamic": false, 
    "properties": {
      "id": {
        "type": "keyword"
      },
      "name": {
        "type": "text",
        "analyze": "my_analyzer"
      },
      "surname": {
        "type": "text"
      },
      "iban": {
        "type": "keyword"
      },
      "balance": {
        "type": "long"
      },
      "created_at": {
        "type": "date"
      },
      "details": {
        "dynamic": true,
        "type": "object"
      }
    }
  }
}


# Test it
POST accounts2/_analyze
{
  "text": ["<a href=''>KÜLEKCİ</a>"], 
  "field": "name.not_analyzed"
}

A little bit more extend

PUT accounts2
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0,
    "analysis": {
      "analyzer": {
        "my_analyzer": {
          "type": "custom",
          "tokenizer": "standard",
          "char_filter": ["html_strip"],
          "filter": ["lowercase", "asciifolding"]
        }
      }
    }
  },
  "mappings": {
    "dynamic": false, 
    "properties": {
      "id": {
        "type": "keyword"
      },
      "name": {
        "type": "text", 
        "analyzer": "my_analyzer",
        "fields": {
          "not_analyzed": {
            "type": "keyword"
          }
        }
      },
      "surname": {
        "type": "text"
      },
      "iban": {
        "type": "keyword"
      },
      "balance": {
        "type": "long"
      },
      "created_at": {
        "type": "date"
      },
      "type": {
        "type": "keyword"
      },
      "tags": {
        "type": "keyword"
      },
      "details": {
        "dynamic": true,
        "type": "object"
      }
    }
  }
}

# Test it
POST accounts2/_analyze
{
  "text": ["<a href=''>KÜLEKCİ</a>"], 
  "field": "name.not_analyzed"
}

We can just search

# Check the _score field
GET accounts/_search
{
  "query": {
    "query_string": {
      "default_field": "name",
      "query": "Ahmet"
    }
  }
}

GET accounts/_search
{
  "query": {
    "query_string": {
      "fields": ["name^3", "surname"],
      "query": "Ahmet"
    }
  }
}

Aggregations

Range Aggregation

GET accounts/_search
{
  "size": 0, 
  "aggs": {
    "range": {
      "range": {
        "field": "balance",
        "ranges": [
          {
            "from": 50,
            "to": 100
          },
          {
            "from": 0,
            "to": 30
          },
          {
            "from": 30,
            "to": 50
          }
        ]
      }
    }
  }
}

# Note that this aggregation includes the from value and excludes the to value for each range.

Date Histogram

GET accounts/_search
{
  "size": 0, 
  "aggs": {
    "date": {
      "date_histogram": {
        "field": "created_at",
        "calendar_interval": "day"
      }
    }
  }
}

Terms Aggregation

GET accounts/_search
{
  "size": 0,
  "aggs": {
    "account_types": {
      "terms": {
        "field": "type.keyword",
        "size": 10
      }
    }
  }
}

Max/Min Aggregation

GET accounts/_search
{
  "size": 0, 
  "aggs": {
    "date": {
      "date_histogram": {
        "field": "created_at",
        "calendar_interval": "day"
      },
      "aggs": {
        "max_balance": {
          "max": {
            "field": "balance"
          }
        },
        "min_balance": {
          "min": {
            "field": "balance"
          }
        }
      }
    }
  }
}

Avg and (Extended) Stats Aggregation


GET accounts/_search
{
  "size": 0, 
  "aggs": {
    "date": {
      "date_histogram": {
        "field": "created_at",
        "calendar_interval": "day"
      },
      "aggs": {
        "average_balance": {
          "avg": {
            "field": "balance"
          }
        }
      }
    }
  }
}

GET accounts/_search
{
  "size": 0, 
  "aggs": {
    "date": {
      "date_histogram": {
        "field": "created_at",
        "calendar_interval": "day"
      },
      "aggs": {
        "average_balance": {
          "stats": {
            "field": "balance"
          }
        }
      }
    }
  }
}

GET accounts/_search
{
  "size": 0, 
  "aggs": {
    "date": {
      "date_histogram": {
        "field": "created_at",
        "calendar_interval": "day"
      },
      "aggs": {
        "average_balance": {
          "extended_stats": {
            "field": "balance"
          }
        }
      }
    }
  }
}


Cardinality Aggregation

A single-value metrics aggregation that calculates an "approximate" count of distinct values.
GET accounts/_search
{
  "size": 0,
  "aggs": {
    "account_types": {
      "cardinality": {
        "field": "type.keyword"
      }
    }
  }
}

Query Time Boosting

GET /_search
{
  "query": {
    "bool": {
      "should": [
        {
          "match": {
            "title": {
              "query": "quick brown fox",
              "boost": 2 
            }
          }
        },
        {
          "match": { 
            "content": "quick brown fox"
          }
        }
      ]
    }
  }
}

Manupulating The Score

GET index/_search
{
  "query": {
    "script_score": {
      "query": {
        "match": { "body": "elasticsearch" }
      },
      "script": {
        "source": "_score * saturation(doc['pagerank'].value, 10)" 
      }
    }
  }
}

Dynamic Templates

DELETE dynamic-template-sample

PUT dynamic-template-sample
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0
  }, 
  "mappings": {
    "dynamic_templates": [
      {
        "strings_as_keywords": {
          "match_mapping_type": "string",
          "mapping": {
            "type": "keyword"
          }
        }
      }
    ]
  }
}


POST dynamic-template-sample/_doc
{
  "test": "string",
  
}


GET dynamic-template-sample/_mapping

DELETE dynamic-template-sample

PUT dynamic-template-sample
{
  "mappings": {
    "dynamic_templates": [
      {
        "full_name": {
          "path_match":   "name.*",
          "path_unmatch": "*.middle",
          "mapping": {
            "type":       "text",
            "copy_to":    "full_name"
          }
        }
      }
    ]
  }
}

POST dynamic-template-sample/_doc
{
  "name": {
    "first":  "John",
    "middle": "Winston",
    "last":   "Lennon"
  }
}

GET dynamic-template-sample/_mapping

GET dynamic-template-sample/_search
{
  "query": {
    "match": {
      "full_name": "John"
    }
  }
}

Ingest Pipeline

PUT _ingest/pipeline/accounts-pipeline
{
  "description": "Balance casting pipeline",
  "processors": [
    {
      "lowercase": {
        "field": "surname"
      }
    },
    {
      "convert": {
        "field": "balance",
        "type": "integer"
      }
    },
    {
      "set": {
        "field": "indexed_at",
        "value": "{{_ingest.timestamp}}"
      }
    }
  ]
}

POST accounts-with-pipeline/_doc/1?pipeline=accounts-pipeline
{
  "name": "HAYDAR",
  "surname": "KÜLEKCİ",
  "balance": "110"
}

GET accounts-with-pipeline/_doc/1

{
  "_index" : "accounts-with-pipeline",
  "_type" : "_doc",
  "_id" : "1",
  "_version" : 1,
  "_seq_no" : 1,
  "_primary_term" : 1,
  "found" : true,
  "_source" : {
    "balance" : 110,
    "surname" : "külekci̇",
    "name" : "HAYDAR"
  }
}


Performans Nodes

This is for buffering indexign operations. While heavy bulk indexings we can increase this setting:
indices.memory.index_buffer_size: 40%

Updating blocking issue

If you are getting below error while indexing:
{
  "error": {
    "root_cause": [
      {
        "type": "cluster_block_exception",
        "reason": "blocked by: [FORBIDDEN/12/index read-only / allow delete (api)];"
      }
    ],
    "type": "cluster_block_exception",
    "reason": "blocked by: [FORBIDDEN/12/index read-only / allow delete (api)];"
  },
  "status": 403
}

This can be caused by disk space:
PUT index-name/_settings
{
  "index": {
    "blocks": {
      "read_only_allow_delete": "false"
    }
  }
}

Checking File Description

Please check /etc/init.d/elasticsearch file to change following configurations:
.....
# The following variables can be overwritten in $DEFAULT

# Directory where the Elasticsearch binary distribution resides
ES_HOME=/usr/share/$NAME

# Additional Java OPTS
#ES_JAVA_OPTS=

# Maximum number of open files
MAX_OPEN_FILES=65536

# Maximum amount of locked memory
#MAX_LOCKED_MEMORY=

# Elasticsearch configuration directory
ES_PATH_CONF=/etc/$NAME

# Maximum number of VMA (Virtual Memory Areas) a process can own
MAX_MAP_COUNT=262144

.....

Or change /usr/lib/systemd/system/elasticsearch.service file:
[Unit]
Description=Elasticsearch
Documentation=http://www.elastic.co
Wants=network-online.target
After=network-online.target

[Service]
RuntimeDirectory=elasticsearch
Environment=ES_HOME=/usr/share/elasticsearch
Environment=ES_PATH_CONF=/etc/elasticsearch
Environment=PID_DIR=/var/run/elasticsearch
EnvironmentFile=-/etc/default/elasticsearch

WorkingDirectory=/usr/share/elasticsearch

User=elasticsearch
Group=elasticsearch

ExecStart=/usr/share/elasticsearch/bin/elasticsearch -p ${PID_DIR}/elasticsearch.pid --quiet

# StandardOutput is configured to redirect to journalctl since
# some error messages may be logged in standard output before
# elasticsearch logging system is initialized. Elasticsearch
# stores its logs in /var/log/elasticsearch and does not use
# journalctl by default. If you also want to enable journalctl
# logging, you can simply remove the "quiet" option from ExecStart.
StandardOutput=journal
StandardError=inherit

# Specifies the maximum file descriptor number that can be opened by this process
LimitNOFILE=1048576

# Specifies the maximum number of processes
LimitNPROC=4096

# Specifies the maximum size of virtual memory
LimitAS=infinity

# Specifies the maximum file size
LimitFSIZE=infinity

# Disable timeout logic and wait until process is stopped
TimeoutStopSec=0

# SIGTERM signal is used to stop the Java process
KillSignal=SIGTERM

# Send the signal only to the JVM rather than its control group
KillMode=process

# Java process is never killed
SendSIGKILL=no

# When a JVM receives a SIGTERM signal it exits with code 143
SuccessExitStatus=143

[Install]
WantedBy=multi-user.target

# Built for distribution-6.2.4 (distribution)

And Run below command :
systemctl daemon-reload


Beat Notes

Exporting index template
filebeat export template
metricbeat export template

Snapshot and Backup

Snapshot Repository

PUT /_snapshot/account_backup
{
  "type": "fs",
  "settings": {
    "location": "/data/backup"
    "compress": true
  }
}


PUT /_snapshot/account_backup/snapshot_20200819_12_00_00?wait_for_completion=true


Restore Snapshot

POST /_snapshot/account_backup/snapshot_20200819_12_00_00/_restore

Using Curator for snapshot

We need to create repository again like did above. Then we need to install the curator
# install pip first
apt-get install python3-pip
ln -s /usr/bin/pip3 /usr/bin/pip
# then install curator
pip install elasticsearch-curator

After installing curator, we need to add our backup path to elasticsearch.yml file.
Then we need to create a repository.
PUT /_snapshot/account_backup
{
  "type": "fs",
  "settings": {
    "location": "/data/backup",
    "compress": true
  }
}

PUT /_snapshot/accounts_backup
{
  "type": "s3",
  "settings": {
    "bucket": "feeds-es-backup",
    "region": "eu-west-1a",
    "compress": true
  }
}

bin/elasticsearch-plugin install repository-s3
After this, we need to create a config file for curator:
# ~/.curator/curator.yml
# Remember, leave a key empty if there is no value.  None will be a string,
# not a Python "NoneType"
client:
  hosts:
    - 127.0.0.1
  port: 9200
  url_prefix:
  use_ssl: False
  certificate:
  client_cert:
  client_key:
  ssl_no_validate: False
  http_auth:
  timeout: 30
  master_only: True
logging:
  loglevel: INFO
  logfile:
  logformat: default
  blacklist: ['elasticsearch', 'urllib3']

After putting this file correctly, you can see the result for the below command:
curator_cli show_indices

Now we can initialize an action for backup. Create a backup_action.yml file:
# ~/.curator/backup_action.yml
actions:
  1:
    action: snapshot
    description: >-
      Snapshot the accounts
    options:
      repository: account_backup
      # Leaving name blank will result in the default 'curator-%Y%m%d%H%M%S'
      name: accounts-%Y%m%d%H%M%S
      ignore_unavailable: False
      include_global_state: True
      partial: False
      wait_for_completion: True
      skip_repo_fs_check: False
      disable_action: False
    filters:
    - filtertype: pattern
      kind: regex
      value: '^account.*$'

At the end, we can run the curator:
curator ~/.curator/backup_action.yml

Also for this command we can create a backup cronjob :
0 20 * * * /home/ubuntu/.local/bin/curator --config /home/ubuntu/.curator/curator.yml /home/ubuntu/.curator/backup_action.yml >> /var/log/curator/curator.log

Logstash Notes

For Ubuntu, you need to go into the /usr/share/logstash directory.
Also, you can use elasticsearch internal JDK as a Java with below command :
export JAVA_HOME=/usr/share/elasticsearch/jdk

After this jdk set, we need to install some plugins to work on the examples:
bin/logstash-plugin install logstash-input-stdin

Running with Docker

If you want to test it locally, you can use docker. Here Dockerfile file to test on locally.
FROM docker.elastic.co/logstash/logstash:7.9.0

WORKDIR /usr/share/logstash
RUN bin/logstash-plugin install logstash-input-beats
RUN bin/logstash-plugin install logstash-input-stdin
RUN bin/logstash-plugin install logstash-output-stdout
RUN bin/logstash-plugin install logstash-output-elasticsearch
RUN bin/logstash-plugin install logstash-filter-json
RUN bin/logstash-plugin install logstash-filter-mutate
RUN bin/logstash-plugin install logstash-filter-geoip


# https://github.com/elastic/logstash-docker/issues/45
RUN sed -i '/xpack/d' /usr/share/logstash/config/logstash.yml

EXPOSE 5044:5044
CMD ["/bin/bash", "-c", "--" , "while true; do sleep 30; done;"]

After doing this, you need to create a docker-compose.yml file.
version: '2'

services:
  logstash:
    build: ./
    ports:
        - "5044:5044"

bin/logstash -e 'input { stdin { } } output { stdout {} }'
After creating docker-compose.yml file. You can run below command:
docker-compose up -d

Now you can go inside the shell and run the docker for testing:
docker-compose exec logstash bash

Testing Docker

After that we can run the below command:
bin/logstash -e 'input { stdin { } } output { stdout {} }'

if you put something to stdin, you will see the result as :
> test message
{
      "@version" => "1",
          "host" => "0b2f3e18edc7",
    "@timestamp" => 2020-08-23T17:05:39.345Z,
       "message" => "test message"
}

Now we can change the config :
bin/logstash -e '
input { stdin { } }
filter {
    grok {
        match => { "message" => "%{COMBINEDAPACHELOG}"}
    }
    geoip {
        source => "clientip"
        target => "geoip"
    }
}
output { stdout {} }'

After changing the configuration, you will see the following result:
8.8.8.8 AWAY - [01/Feb/1998:01:09:14 -0800] "GET /bannerad/click.htm HTTP/1.0" 200 207 "http://www.referrer.com/bannerad/menu.htm" "Mozilla/4.01 (Macintosh; I; PPC)"
{
          "ident" => "AWAY",
           "verb" => "GET",
        "message" => "8.8.8.8 AWAY - [01/Feb/1998:01:09:14 -0800] \"GET /bannerad/click.htm HTTP/1.0\" 200 207 \"http://www.referrer.com/bannerad/menu.htm\" \"Mozilla/4.01 (Macintosh; I; PPC)\"",
          "agent" => "\"Mozilla/4.01 (Macintosh; I; PPC)\"",
       "@version" => "1",
          "geoip" => {
             "longitude" => -97.822,
                    "ip" => "8.8.8.8",
              "timezone" => "America/Chicago",
              "latitude" => 37.751,
         "country_code2" => "US",
          "country_name" => "United States",
        "continent_code" => "NA",
              "location" => {
            "lon" => -97.822,
            "lat" => 37.751
        },
         "country_code3" => "US"
    },
       "clientip" => "8.8.8.8",
           "auth" => "-",
    "httpversion" => "1.0",
           "host" => "0b2f3e18edc7",
        "request" => "/bannerad/click.htm",
       "response" => "200",
          "bytes" => "207",
       "referrer" => "\"http://www.referrer.com/bannerad/menu.htm\"",
      "timestamp" => "01/Feb/1998:01:09:14 -0800",
     "@timestamp" => 2020-08-23T17:10:08.390Z
}

You can use json filter if you want to filter some data (put the configuration file as test.conf and run it):
input { stdin { } }
filter {
    grok {
        match => { "message" => "%{COMBINEDAPACHELOG}"}
    }
    ruby { 
      "code" => "event.set('kilobytes', Float(event.get('bytes')) / 1000)"
    }
    geoip {
        source => "clientip"
    }
}
output { stdout {} }

To run it, just run below command:
bin/logstash -f test.conf

The result will be something like this :
8.8.8.8 AWAY - [01/Feb/1998:01:09:14 -0800] "GET /bannerad/click.htm HTTP/1.0" 200 207 "http://www.referrer.com/bannerad/menu.htm" "Mozilla/4.01 (Macintosh; I; PPC)"
{
           "auth" => "-",
      "kilobytes" => 0.207,
        "message" => "8.8.8.8 AWAY - [01/Feb/1998:01:09:14 -0800] \"GET /bannerad/click.htm HTTP/1.0\" 200 207 \"http://www.referrer.com/bannerad/menu.htm\" \"Mozilla/4.01 (Macintosh; I; PPC)\"",
      "timestamp" => "01/Feb/1998:01:09:14 -0800",
       "referrer" => "\"http://www.referrer.com/bannerad/menu.htm\"",
          "agent" => "\"Mozilla/4.01 (Macintosh; I; PPC)\"",
           "host" => "0b2f3e18edc7",
          "ident" => "AWAY",
    "httpversion" => "1.0",
       "@version" => "1",
     "@timestamp" => 2020-08-23T21:12:36.729Z,
          "geoip" => {
              "latitude" => 37.751,
              "timezone" => "America/Chicago",
              "location" => {
            "lat" => 37.751,
            "lon" => -97.822
        },
         "country_code2" => "US",
        "continent_code" => "NA",
             "longitude" => -97.822,
          "country_name" => "United States",
                    "ip" => "8.8.8.8",
         "country_code3" => "US"
    },
       "response" => "200",
       "clientip" => "8.8.8.8",
        "request" => "/bannerad/click.htm",
           "verb" => "GET",
          "bytes" => "207"
}

Devops Notes

Elasticserach Configurations

To check which node using bootstrap.memory_lock as true or false.
GET _nodes?filter_path=**.mlockall

GET accounts/_recovery

GET _nodes
GET _nodes/usage
GET _nodes/stats/transport?human=true
{
...
  "transport": {
    "server_open": 26,
    "rx_count": 1565123,              // Network Received Packet Count
    "rx_size": "71.2gb",              // Network Received Size
    "rx_size_in_bytes": 76453582042,  // Network Received Size in Byte
    "tx_count": 1565121,              // Network Transmitted Packed Count
    "tx_size": "78.1gb",              // Network Transmitted Size
    "tx_size_in_bytes": 83886776180   // Network Transmitted Size in Byte
  }
...
}

GET _stats
GET _tasks


GET _sql
{
  "query": """
  SELECT agent.version, agent.hostname FROM "filebeat-7.9.0"
  """
}
GET filebeat-7.9.0/_search


GET _alias


GET _cat/indices
GET _cat/fielddata
GET _cat/segments
GET _cat/shards


GET _cluster/health
GET _cluster/pending_tasks
GET _cluster/settings
GET _cluster/state
GET _cluster/stats
GET _cluster/
No results found