Logstash 7.0.0的简单使用操作

Author Avatar
山小杰 10月 01, 2019
  • 在其它设备中阅读本文章

本文是Logstash 7.0.0的同步任务的详细配置说明。
主要是自定义mapping以及多个数据源和目标的配置,我就直接放同步任务配置文件的内容了。
需要注意的是:

  • 严禁过滤掉Logstash自动生成的type字段,否则会导致无法正常同步。
  • output中的type值需要与input中jdbc的type值相对应。

同步任务配置文件示例

input {
    stdin {}
    jdbc {
        type => "audit_law"
         # 数据库连接地址
        jdbc_connection_string => "jdbc:mysql://192.168.0.196:3306/auditbase?characterEncoding=UTF-8&autoReconnect=true&useSSL=false"
         # 数据库连接账号密码;
        jdbc_user => "aaaa"
        jdbc_password => "123456"
         # MySQL依赖包路径;
        jdbc_driver_library => "lib/jars/mysql-connector-java-5.1.41-bin.jar"
         # the name of the driver class for mysql
        jdbc_driver_class => "com.mysql.jdbc.Driver"
        # 解决中文乱码
        codec => plain {charset => "UTF-8"}
         # 数据库重连尝试次数
        connection_retry_attempts => "3"
         # 判断数据库连接是否可用,默认false不开启
        jdbc_validate_connection => "true"
         # 数据库连接可用校验超时时间,默认3600S
        jdbc_validation_timeout => "3600"
         # 开启分页查询(默认false不开启);
        jdbc_paging_enabled => "true"
         # 单次分页查询条数(默认100000,若字段较多且更新频率较高,建议调低此值);
        jdbc_page_size => "5000"
         # statement为查询数据sql,如果sql较复杂,建议配通过statement_filepath配置sql文件的存放路径;
         # sql_last_value为内置的变量,存放上次查询结果中最后一条数据tracking_column的值,此处即为ModifyTime;
         # statement_filepath => "mysql/jdbc.sql"
        statement => "SELECT a.ID,'01' AS TYPE,a.NAME,a.DOCUMENT_NO,a.INDUSTRY_NAME,a.REGION_NAME,b.CONTENT,a.PUBLISH_TIME FROM audit_law a LEFT JOIN tab_file_content b ON a.CONTENT_ID = b.ID WHERE a.STATE_CODE='03' AND PUBLISH_TIME > :sql_last_value ORDER BY PUBLISH_TIME ASC"
         # 是否将字段名转换为小写,默认true(如果有数据序列化、反序列化需求,建议改为false);
        lowercase_column_names => false
         # Value can be any of: fatal,error,warn,info,debug,默认info;
        sql_log_level => warn
         # 是否记录上次执行结果,true表示会将上次执行结果的tracking_column字段的值保存到last_run_metadata_path指定的文件中;
        record_last_run => true
         # 需要记录查询结果某字段的值时,此字段为true,否则默认tracking_column为timestamp的值;
        use_column_value => true
         # 需要记录的字段,用于增量同步,需是数据库字段
        tracking_column => "PUBLISH_TIME"
         # Value can be any of: numeric,timestamp,Default value is "numeric"
        tracking_column_type => timestamp
         # record_last_run上次数据存放位置;
        last_run_metadata_path => "mysql/audit_law_last_id.txt"
         # 是否清除last_run_metadata_path的记录,需要增量同步时此字段必须为false;
        clean_run => false
         # 同步频率(分 时 天 月 年),默认每分钟同步一次;
        schedule => "* * * * *"
    }
    jdbc {
        type => "audit_basis"
         # 数据库连接地址
        jdbc_connection_string => "jdbc:mysql://192.168.0.196:3306/auditbase?characterEncoding=UTF-8&autoReconnect=true&useSSL=false"
         # 数据库连接账号密码;
        jdbc_user => "aaaa"
        jdbc_password => "123456"
         # MySQL依赖包路径;
        jdbc_driver_library => "lib/jars/mysql-connector-java-5.1.41-bin.jar"
         # the name of the driver class for mysql
        jdbc_driver_class => "com.mysql.jdbc.Driver"
        # 解决中文乱码
        codec => plain {charset => "UTF-8"}
         # 数据库重连尝试次数
        connection_retry_attempts => "3"
         # 判断数据库连接是否可用,默认false不开启
        jdbc_validate_connection => "true"
         # 数据库连接可用校验超时时间,默认3600S
        jdbc_validation_timeout => "3600"
         # 开启分页查询(默认false不开启);
        jdbc_paging_enabled => "true"
         # 单次分页查询条数(默认100000,若字段较多且更新频率较高,建议调低此值);
        jdbc_page_size => "5000"
         # statement为查询数据sql,如果sql较复杂,建议配通过statement_filepath配置sql文件的存放路径;
         # sql_last_value为内置的变量,存放上次查询结果中最后一条数据tracking_column的值,此处即为ModifyTime;
         # statement_filepath => "mysql/jdbc.sql"
        statement => "SELECT * FROM (SELECT a.ID,'02' as TYPE,a.QUESTION_NAME AS NAME,a.AUDIT_ITEM_NAME,b.AUDIT_LAW_NAME,GROUP_CONCAT(b.LAW_ITEM_TEXT SEPARATOR '\n') AS LAW_TEXT,MAX(b.PUBLISH_TIME) AS PUBLISH_TIME FROM audit_basis a LEFT JOIN audit_basis_detail b ON a.ID = b.AUDIT_BASIS_ID WHERE b.STATE_CODE = '02' GROUP BY a.ID) c WHERE PUBLISH_TIME > :sql_last_value ORDER BY PUBLISH_TIME ASC"
         # 是否将字段名转换为小写,默认true(如果有数据序列化、反序列化需求,建议改为false);
        lowercase_column_names => false
         # Value can be any of: fatal,error,warn,info,debug,默认info;
        sql_log_level => warn
         # 是否记录上次执行结果,true表示会将上次执行结果的tracking_column字段的值保存到last_run_metadata_path指定的文件中;
        record_last_run => true
         # 需要记录查询结果某字段的值时,此字段为true,否则默认tracking_column为timestamp的值;
        use_column_value => true
         # 需要记录的字段,用于增量同步,需是数据库字段
        tracking_column => "PUBLISH_TIME"
         # Value can be any of: numeric,timestamp,Default value is "numeric"
        tracking_column_type => timestamp
         # record_last_run上次数据存放位置;
        last_run_metadata_path => "mysql/audit_basis_last_id.txt"
         # 是否清除last_run_metadata_path的记录,需要增量同步时此字段必须为false;
        clean_run => false
         # 同步频率(分 时 天 月 年),默认每分钟同步一次;
        schedule => "* * * * *"
    }
}
filter {
    json {
        source => "message"
        remove_field => ["message"]
    }
    # 去除自动添加的 "@timestamp", "@version" 字段
    # 严禁过滤 "type" 字段,会导致无法成功同步数据
    mutate {
        remove_field => ["@timestamp", "@version"]
    }
}
output {
    # type 值需与 jdbc 中的 type 值相对应
    if [type] == "audit_law" {
        elasticsearch {
             # 配置ES集群地址
            hosts => ["192.168.0.62:9200"]
             # 索引名字,必须小写
            index => "audit_law"
             # 数据唯一索引(建议使用数据库KeyID)
            document_id => "%{ID}"
            template_overwrite => true
            template => "mysql/audit_law_mapping.json"
        }
    }
    if [type] == "audit_basis" {
        elasticsearch {
             # 配置ES集群地址
            hosts => ["192.168.0.62:9200"]
             # 索引名字,必须小写
            index => "audit_basis"
             # 数据唯一索引(建议使用数据库KeyID)
            document_id => "%{ID}"
            template_overwrite => true
            template => "mysql/audit_basis_mapping.json"
        }
    }
    stdout {
        codec => json_lines
    }
}

自定义mapping文件示例

{
  "settings": {
    "number_of_shards" : 1,
    "number_of_replicas" : 0,
    "analysis.char_filter":["html_strip"]
  },
  "mappings": {
    "properties": {
      "ID":{
        "type":"keyword"
      },
      "TYPE":{
        "type":"keyword"
      },
      "NAME":{
        "type": "text",
        "analyzer": "ik_max_word",
        "search_analyzer": "ik_smart"
      },
      "DOCUMENT_NO":{
        "type":"keyword"
      },
      "INDUSTRY_NAME":{
        "type": "text",
        "analyzer": "ik_max_word",
        "search_analyzer": "ik_smart"
      },
      "REGION_NAME":{
        "type": "text",
        "analyzer": "ik_max_word",
        "search_analyzer": "ik_smart"
      },
      "CONTENT":{
        "type": "text",
        "analyzer": "ik_max_word",
        "search_analyzer": "ik_smart"
      },
      "PUBLISH_TIME":{
        "type":"date"
      }
    }
  }
}