send nginx logs to opensearch with fluent-bit

requirements

prepare an index template and eventually a user for that purpose – make sure you’ve got a template for logs-dev-*

# elastic
http://localhost:5601/app/management/data/index_management/templates

# osearch
http://localhost:5602/app/opensearch_index_management_dashboards#/templates

make sure the ELK / osearch service is reachable

nmap -p 9200 10.1.0.30
curl https://10.1.0.30:9200/?pretty=true -k
curl https://10.1.0.30:9200/ -k -u kibanaserver:kibanaserver

setup

vi /etc/fluent-bit/custom_parsers.conf
# @timestamp is enough - no time_key
# nginx's time_local is a pain to handle anyhow
# https://docs.fluentbit.io/manual/pipeline/parsers/configuring-parser
[PARSER]
    name   json_no_time
    format json

# split-up the request field
[PARSER]
    name split_request
    format regex
    regex ^(?<method>[^ ]*) (?<path>[^ ]*) HTTP/(?<http_version>[^ ]*)

# help differenciate web pages - strip out the query string
[PARSER]
    name strip_querystr
    format regex
    regex ^(?<page>[^?]*)
vi /etc/fluent-bit/fluent-bit.conf
# INDENTATION WITH SPACES NOT TABS

[SERVICE]
    flush 1
    daemon off
    parsers_file parsers.conf
    parsers_file custom_parsers.conf
    plugins_file plugins.conf
    log_file /var/log/fluent-bit.log

[INPUT]
    name tail
    path /var/log/nginx/*error*log
    tag nginx.error

# https://nginx.org/en/docs/http/ngx_http_log_module.html --> log_format escape=json
[INPUT]
    name tail
    path /var/log/nginx/*access*log
    tag nginx.access
    parser json_no_time

# @timestamp is enough
[FILTER]
    name modify
    match nginx.access
    remove time_local

# https://elastic.co/guide/en/ecs/1.12/ecs-geo.html
[FILTER]
    name geoip2
    match nginx.access
    database /etc/fluent-bit/GeoLite2-City.mmdb
    lookup_key              remote_addr
    record geo.city_name    remote_addr %{city.names.ru}
    record geo.country_name remote_addr %{country.names.ru}
    record lat              remote_addr %{location.latitude}
    record lon              remote_addr %{location.longitude}
    log_level error

# commonly used field for display
[FILTER]
    name modify
    match nginx.access
    copy source.geo.country_name source.geo.name

# catch lat lon
[FILTER]
    name nest
    match nginx.access
    operation nest
    wildcard l*
    nest_under geo.location

# https://docs.fluentbit.io/manual/pipeline/filters/parser
[FILTER]
    name parser
    match nginx.access
    key_name request
    parser split_request
    reserve_data true

[FILTER]
    name parser
    match nginx.access
    key_name path
    parser strip_querystr
    reserve_data true
    preserve_key true

[FILTER]
    name modify
    match nginx.access
    add sensor nginx@HOSTNAME

#[OUTPUT]
#    name file
#    match nginx.error
#    match nginx.access
#    match nginx.*
#    path /var/log
#    file fluent-bit.nginx.log
chmod 600 fluent-bit.conf

then separate access and error logs in two different indices

ready to go

tail -F /var/log/fluent-bit*log

systemctl restart fluent-bit
systemctl status fluent-bit

acceptance

easy-peasy error log generation as we did not enable cgi scripts

cat > /var/www/html/gen-error.sh <EOF
#!/bin/bash

echo 'Content-Type: text/html'
echo
echo '<p>this message should NOT show up and generate an nginx error log instead'
EOF

generate some logs

curl -I localhost           # 200 access log
curl -I localhost/NO-EXIST      # 404 access log
curl -I localhost/gen-error.sh      # error log

and check that data-stream got created

troubleshooting

[ warn] [engine] failed to flush chunk '51468-1698409873.302155692.flb', retry in 9 seconds: task_id=0, input=tail.0
> output=opensearch.0 (out_id=0)
[error] [output:opensearch:opensearch.0] HTTP status=401 URI=/_bulk
[error] [engine] chunk '51468-1698409873.302155692.flb' cannot be retried: task_id=0, input=tail.0 > output=opensearch.0

resources

https://opensearch.org/blog/getting-started-with-fluent-bit-and-opensearch/

https://docs.fluentbit.io/manual/pipeline/outputs/opensearch

acceptance

https://opensearch.org/docs/latest/api-reference/index-apis/get-index/

https://opensearch.org/docs/latest/dashboards/discover/index-discover/

https://stackoverflow.com/questions/69617608/elasticsearch-8-errors-with-action-metadata-line-1-contains-an-unknown-paramet ==> nice example

https://docs.oracle.com/en-us/iaas/Content/search-opensearch/Tasks/ingestingfluentbit.htm

troubles

https://stackoverflow.com/questions/73867417/opensearch-401-for-bulk ==> don’t forget to authenticate

moar

https://docs.fluentbit.io/manual/pipeline/inputs/nginx


HOME | GUIDES | LECTURES | LAB | SMTP HEALTH | HTML5 | CONTACT
Copyright © 2024 Pierre-Philipp Braun