Skip to content

Instantly share code, notes, and snippets.

@ryanschwartz
Created September 10, 2018 22:41
Show Gist options
  • Save ryanschwartz/3e0ae5a5ac416a72e56ed5830ade9e35 to your computer and use it in GitHub Desktop.
Save ryanschwartz/3e0ae5a5ac416a72e56ed5830ade9e35 to your computer and use it in GitHub Desktop.
UDF for pubsub to bigquery pipeline
/**
* A transform function which filters docker_log_auxiliary_field fields containing "no value" and renames "@timestamp" to "timestamp"
* @param {string} inJson
* @return {string} outJson
*
* Sample event: {"@timestamp":"2018-09-07T00:44:27+00:00","appname":"reporter-dev","body_bytes_sent":"5","client_ip":"10.142.0.19","fling.source":"/webapp/log/nginx/access.jlog","forwarded_for_ip":"-","generator":"nginx","http_host":"10.142.0.19","http_referrer":"-","http_user_agent":"GoogleHC/1.0","json":{"docker_log_auxiliary_field":"\u003cno value\u003e"},"remote_user":"-","request":"GET /healthz HTTP/1.1","request_method":"GET","request_time":"0.000","srchost":"reporter-dev-app-7cbb779c7-9dmmx","status":"200"}
* Sample event escaped: "{\"@timestamp\":\"2018-09-07T00:44:27+00:00\",\"appname\":\"reporter-dev\",\"body_bytes_sent\":\"5\",\"client_ip\":\"10.142.0.19\",\"fling.source\":\"/webapp/log/nginx/access.jlog\",\"forwarded_for_ip\":\"-\",\"generator\":\"nginx\",\"http_host\":\"10.142.0.19\",\"http_referrer\":\"-\",\"http_user_agent\":\"GoogleHC/1.0\",\"json\":{\"docker_log_auxiliary_field\":\"\u003cno value\u003e\"},\"remote_user\":\"-\",\"request\":\"GET /healthz HTTP/1.1\",\"request_method\":\"GET\",\"request_time\":\"0.000\",\"srchost\":\"reporter-dev-app-7cbb779c7-9dmmx\",\"status\":\"200\"}"
*
*/
function transform(inJson) {
// provide an array of requests to filter/drop
var bad_requests = ["GET /healthz HTTP/1.1", "GET /robots.txt HTTP/1.1"];
// provide an array of keys to filter/drop
var bad_fields = ["websocket_version", "websocket_key", "http_upgrade", "pubsub_topic", "fling_version"]
var obj = JSON.parse(inJson);
// get rid of that pesky '@'
obj.timestamp = obj['@timestamp'];
delete obj['@timestamp'];
// if there's a message key, shove it into a key in the json field
if (obj.hasOwnProperty('message')) {
obj.json.message = obj.message;
delete obj.message;
}
// status really == http_status
if (obj.hasOwnProperty('status')) {
obj.http_status = obj.status;
delete obj.status;
}
// drop fling source field
obj.fling_source = obj['fling.source'];
delete obj['fling.source'];
// inspect 'json' field for a docker_log_auxiliary_field with "no value" and remove if present
if (obj.hasOwnProperty('json')) {
// inspect docker_log_auxiliary_foeld and delete if it contains our "no value" string
if (obj.json.hasOwnProperty('docker_log_auxiliary_field') && obj.json.docker_log_auxiliary_field === "\u003cno value\u003e") {
delete obj.json.docker_log_auxiliary_field;
}
}
// return the object if the request field doesn't match bad_requests
if ( ! bad_requests.includes(obj.request)) {
return JSON.stringify(obj);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment