AthenaでALBのアクセスログをクエリすると結果が空


直近のAthenaのALBアクセスログ見ようと思ってクエリしたら結果が空っぽになっていた。S3にログは随時上がってきているのに??


問題発生時期を遡っていくと2024/5/20あたりから、と思ったら2024/5/20にALBのバージョンアップでアクセスログにtraceability_idが追加となりカラム数の不一致が原因であった。
Querying Application Load Balancer logs - Amazon Athena


AWS公式のCREATE TABLE スニペットの通りtraceability_idを追加したテーブルを再構成すると無事にクエリは成功した。

新しいテーブル定義


CREATE EXTERNAL TABLE `alb-access-log-rev20240520`(
  `type` string COMMENT '', 
  `time` string COMMENT '', 
  `elb` string COMMENT '', 
  `client_ip` string COMMENT '', 
  `client_port` int COMMENT '', 
  `target_ip` string COMMENT '', 
  `target_port` int COMMENT '', 
  `request_processing_time` double COMMENT '', 
  `target_processing_time` double COMMENT '', 
  `response_processing_time` double COMMENT '', 
  `elb_status_code` string COMMENT '', 
  `target_status_code` string COMMENT '', 
  `received_bytes` bigint COMMENT '', 
  `sent_bytes` bigint COMMENT '', 
  `request_verb` string COMMENT '', 
  `request_url` string COMMENT '', 
  `request_proto` string COMMENT '', 
  `user_agent` string COMMENT '', 
  `ssl_cipher` string COMMENT '', 
  `ssl_protocol` string COMMENT '', 
  `target_group_arn` string COMMENT '', 
  `trace_id` string COMMENT '', 
  `domain_name` string COMMENT '', 
  `chosen_cert_arn` string COMMENT '', 
  `matched_rule_priority` string COMMENT '', 
  `request_creation_time` string COMMENT '', 
  `actions_executed` string COMMENT '', 
  `redirect_url` string COMMENT '', 
  `lambda_error_reason` string COMMENT '', 
  `target_port_list` string COMMENT '', 
  `target_status_code_list` string COMMENT '', 
  `classification` string COMMENT '', 
  `classification_reason` string COMMENT '', 
  -- 追加↓
  `traceability_id` string COMMENT '')
PARTITIONED BY ( 
  `log_date` string)
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.serde2.RegexSerDe' 
WITH SERDEPROPERTIES ( 
  -- 変更↓
  'input.regex'='([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) (.*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-_]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^s]+?)\" \"([^s]+)\" \"([^ ]*)\" \"([^ ]*)\" ?([^ ]*)?( .*)?') 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  's3://alb-access-log/AWSLogs/************/elasticloadbalancing/ap-northeast-1'
TBLPROPERTIES (
  'projection.enabled'='true', 
  'projection.log_date.format'='yyyy/MM/dd', 
  'projection.log_date.interval'='1', 
  'projection.log_date.interval.unit'='DAYS', 
  'projection.log_date.range'='NOW-1YEARS,NOW',
  'projection.log_date.type'='date', 
  'storage.location.template'='s3://alb-access-log/AWSLogs/************/elasticloadbalancing/ap-northeast-1/${log_date}', 
  'transient_lastDdlTime'='1717717328')

以前のテーブル定義


CREATE EXTERNAL TABLE `alb-access-log`(
  `type` string COMMENT '', 
  `time` string COMMENT '', 
  `elb` string COMMENT '', 
  `client_ip` string COMMENT '', 
  `client_port` int COMMENT '', 
  `target_ip` string COMMENT '', 
  `target_port` int COMMENT '', 
  `request_processing_time` double COMMENT '', 
  `target_processing_time` double COMMENT '', 
  `response_processing_time` double COMMENT '', 
  `elb_status_code` string COMMENT '', 
  `target_status_code` string COMMENT '', 
  `received_bytes` bigint COMMENT '', 
  `sent_bytes` bigint COMMENT '', 
  `request_verb` string COMMENT '', 
  `request_url` string COMMENT '', 
  `request_proto` string COMMENT '', 
  `user_agent` string COMMENT '', 
  `ssl_cipher` string COMMENT '', 
  `ssl_protocol` string COMMENT '', 
  `target_group_arn` string COMMENT '', 
  `trace_id` string COMMENT '', 
  `domain_name` string COMMENT '', 
  `chosen_cert_arn` string COMMENT '', 
  `matched_rule_priority` string COMMENT '', 
  `request_creation_time` string COMMENT '', 
  `actions_executed` string COMMENT '', 
  `redirect_url` string COMMENT '', 
  `lambda_error_reason` string COMMENT '', 
  `target_port_list` string COMMENT '', 
  `target_status_code_list` string COMMENT '', 
  `classification` string COMMENT '', 
  `classification_reason` string COMMENT '')
PARTITIONED BY ( 
  `log_date` string)
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.serde2.RegexSerDe' 
WITH SERDEPROPERTIES ( 
  'input.regex'='([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) ([^ ]*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^s]+?)\" \"([^s]+)\" \"([^ ]*)\" \"([^ ]*)\"') 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  's3://alb-access-log/AWSLogs/************/elasticloadbalancing/ap-northeast-1'
TBLPROPERTIES (
  'projection.enabled'='true', 
  'projection.log_date.format'='yyyy/MM/dd', 
  'projection.log_date.interval'='1', 
  'projection.log_date.interval.unit'='DAYS', 
  'projection.log_date.range'='NOW-1YEARS,NOW',
  'projection.log_date.type'='date', 
  'storage.location.template'='s3://alb-access-log/AWSLogs/************/elasticloadbalancing/ap-northeast-1/${log_date}', 
  'transient_lastDdlTime'='1645361779')