Last active
February 1, 2022 13:59
-
-
Save zwjzxh520/4444e276db0db5423dfc3dd0e437408d to your computer and use it in GitHub Desktop.
php写的nginx 日志分析
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* nginx日志文件分析。 | |
* 根据配置的nginx日志格式,分析对应的日志内容。各变量 | |
*/ | |
class NginxLog { | |
protected static $br = "\n"; | |
/** | |
* 根据日志格式分析nginx访问日志。 | |
* 格式示例:"$remote_addr" $request_time - $remote_user d [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$http_x_forwarded_for" | |
* @param array $log 访问日志数组 | |
* @param string $format 访问日志格式 | |
* @return array 分析后的访问日志数组。key是访问日志变量名, value是该变量对应的日志信息 | |
*/ | |
public function analysisAccess($log, $format) | |
{ | |
$logVars = $this->parseFormat($format); | |
$log = explode(self::$br, $log); | |
$formatLen = strlen($format); | |
$logArr = []; | |
foreach ($log as $lineNum=>$line) { | |
$line = trim($line); | |
if (empty($line)) { | |
continue; | |
} | |
//日志内容偏移量 | |
$linePos = 0; | |
//日志格式偏移量 | |
$formatPos = 0; | |
foreach ($logVars as $var => $info) { | |
//本变量内容的起始位置。 | |
$linePos += $info['prevLen']; | |
//本变量日志内容的结束位置 | |
if ($info['endChar']) { | |
$varLogEnd = strpos($line, $info['endChar'], $linePos); | |
} else { | |
$varLogEnd = 0; | |
} | |
$logArr[$lineNum][ $var ] = substr($line, $linePos, $varLogEnd ? $varLogEnd - $linePos : null); | |
//下一个变量的起始位置 | |
if ($varLogEnd) { | |
$linePos += $varLogEnd - $linePos; | |
} | |
} | |
} | |
return $logArr; | |
} | |
/** | |
* 分析nginx错误日志。 | |
* 由于有些错误信息,被分割成多行(例如php fatal error的堆栈错误),因此默认情况下, | |
* 利用$mergeLog变量保留错误日志,将这些错误日志合并成一行进行分析。 | |
* 如果是不完整的错误日志,将不会进行分析。 | |
* 如果得到完整的错误日志, $mergeLog 会被重置。 | |
* 提供 $reset 参数,用于清空 $mergeLog 变量。 | |
* @param array $logLines 错误日志数组 | |
* @param boolean $reset 是否重置上次分析保留不完整错误日志 | |
* @return array 分析后的错误日志数组 | |
*/ | |
public function analysisError($logLines, $reset = false) | |
{ | |
$result = []; | |
$yinhaoCount = 2; | |
static $mergeLog = ''; | |
if ($reset) { | |
$mergeLog = ''; | |
} | |
foreach ($logLines as $lineNum => $log) { | |
if (substr_count($log, '"') >= $yinhaoCount) { | |
if ($mergeLog) { | |
$log = $mergeLog.$log; | |
$mergeLog = ''; | |
} | |
$logResult = $this->parseLineError($log); | |
if (!empty($logResult)) { | |
$result[] = $logResult; | |
} | |
} else { | |
$mergeLog .= $log."\n"; | |
} | |
} | |
return $result; | |
} | |
/** | |
* 对一行错误日志进行分析。 | |
* 完整的日志,将会以时间开头,如果不是,将不会分析。 | |
* 返回结果: | |
* [ | |
* 'datetime' => '错误发生的时间戳' | |
* 'type' => 'nginx错误类型' | |
* 'fd' => 'nginx线程id' | |
* 'msg' => 'nginx错误类型说明' | |
* 'desc' => 'nginx错误详细提示' | |
* 'http' => ['http 请求相关信息'] | |
* 'phperr' => 'php错误类型,如果不是php错误,则为空' | |
* ] | |
* @param string $log 一行完整的错误日志 | |
* @return array [description] | |
*/ | |
protected function parseLineError($log) | |
{ | |
$datetimeLen = 19; //日期字符串长度 | |
$metaPos = $datetimeLen + 1; //meta字符截取起始位置 | |
$datetime = substr($log, 0, 19); | |
$result = []; | |
if ($this->isDateTimeFormat($datetime)) { | |
//日期时间 | |
$result['datetime'] = strtotime($datetime); | |
$metaEndPos = strpos($log, '"', $metaPos); | |
$errorDescEndPos = strpos($log, '"', $metaEndPos + 1); | |
$metaStr = trim(substr($log, $metaPos, $metaEndPos - $metaPos)); | |
$metaArr = explode(' ', $metaStr); | |
//错误类型 | |
$result['type'] = substr($metaArr[0], 1, -1); | |
//文件描述符。nginx进程相关 | |
$result['fd'] = $metaArr[1].' '.$metaArr[2]; | |
//错误类型说明 | |
$result['msg'] = rtrim(implode(' ', array_slice($metaArr, 3)), ':'); | |
//错误提示说明 | |
$result['desc'] = substr($log, $metaEndPos +1, $errorDescEndPos - $metaEndPos - 1); | |
//http相关信息 | |
$result['http'] = $this->parseErrorHTTP(trim(substr($log, $errorDescEndPos + 1))); | |
//php错误,如果是php错误的话 | |
$result['phperr'] = $this->getPHPErrorType($result['desc']); | |
} | |
return $result; | |
} | |
/** | |
* 分析php错误类型,如果不是php错误,则返回空 | |
* @param string $str | |
* @return string | |
*/ | |
protected function getPHPErrorType($str) | |
{ | |
//PHP message: | |
$type = ''; | |
if ('PHP message:' === substr($str, 0, 12)) { | |
//为什么是17,因为PHP message: PHP Waring, 要加' PHP '的长度 | |
$type = substr($str, 17, strpos($str, ':', 12) - 17); | |
} | |
return $type; | |
} | |
/** | |
* 解析http请求的相关信息。一般是从client: 字符串一直到日志结束 | |
* 返回结果(以下字段不一定都有,根据日志而定): | |
* [ | |
* '0' => 'nginx错误详细提示(desc)后,client关键词前的一段提示信息' | |
* 'client' => '客户端ip' | |
* 'server' => '域名' | |
* 'method' => '请求方法' | |
* 'uri' => '请求的uri' | |
* 'httpver' => 'http版本' | |
* 'upstream' => 'php-fpm地址' | |
* 'host' => 'host头' | |
* 'referer' => '引用url地址' | |
* ] | |
* @param string $http | |
* @return array | |
*/ | |
protected function parseErrorHTTP($http) | |
{ | |
//client: | |
$split = strpos($http, 'client:'); | |
$result[0] = substr($http, 0, $split - 2); | |
$other = explode(',', substr($http, $split)); | |
foreach ($other as $value) { | |
$pos = strpos($value, ': '); | |
$key = trim(substr($value, 0, $pos)); | |
$val = trim(substr($value, $pos + 2), '"'); | |
if ('request' === $key) { | |
$result = array_merge($result, $this->parseRequest($val)); | |
} else { | |
$result[$key] = $val; | |
} | |
} | |
return $result; | |
} | |
/** | |
* 解析http请求信息。是对"GET http:/www.baidu.com HTTP/1.1"的解析。 | |
* 返回结果: | |
* [ | |
* 'method' => '请求方法' | |
* 'uri' => '请求的uri' | |
* 'httpver' => 'http版本' | |
* ] | |
* @param string $request 示例:"GET http:/www.baidu.com HTTP/1.1" | |
* @return array | |
*/ | |
protected function parseRequest($request) | |
{ | |
$result = []; | |
$split = ' '; | |
$firstPos = strpos($request, $split); | |
$lastPos = strrpos($request, $split); | |
return [ | |
'method' => substr($request, 0, $firstPos), | |
'uri' => trim(substr($request, $firstPos, $lastPos - $firstPos)), | |
'httpver' => trim(substr($request, $lastPos)), | |
]; | |
} | |
/** | |
* 检查字符串是否是日期格式。固定格式:yyyy/mm/dd hh:mm:ss | |
* @param string $str 待检查的字符串 | |
* @return boolean | |
*/ | |
protected function isDateTimeFormat($str) | |
{ | |
//2016/09/06 00:32:19 | |
$number = '0123456789'; | |
$format = '0000/00/00 00:00:00'; | |
$strlen = strlen($str); | |
$result = true; | |
for($i=0; $i<$strlen; $i++) { | |
if ($format{$i} === '0') { | |
if (false === strpos($number, $str{$i})) { | |
$result = false; | |
break; | |
} | |
} elseif ($format{$i} !== $str{$i}) { | |
$result = false; | |
break; | |
} | |
} | |
return $result; | |
} | |
/** | |
* 分析访问日志格式,得出变量名称,并且保存变量在日志内容中的起始位置。 | |
* @param string $format nginx配置文件中的日志格式 | |
* @return array | |
*/ | |
public function parseFormat($format) | |
{ | |
$varCharList = 'abcdefghijklmnopqrstuvwxyz_$ABCDEFGHIJKLMNOPQRSTUVWXYZ'; | |
$return = []; | |
$formatLen = strlen($format); | |
$logVar = ''; | |
$prevStrLen = 0; | |
for ($i=0; $i < $formatLen; $i++) { | |
$char = $format{$i}; | |
if (strpos($varCharList, $char) === false || (empty($logVar) && $char != '$')) { | |
if ($logVar) { | |
$return[$logVar]['endChar'] = $char; //变量后面紧挨着的字符,日志内容中,该字符的位置,即表示本变量的内容结束位置 | |
$return[$logVar]['prevLen'] = $prevStrLen; //变量前面忽略的字符长度 | |
$prevStrLen = 1; | |
$logVar = ''; | |
} else { | |
$prevStrLen++; | |
} | |
} else { | |
$logVar .= $char; | |
} | |
} | |
if (!empty($logVar)) { | |
$return[$logVar]['endChar'] = ''; | |
$return[$logVar]['prevLen'] = $prevStrLen; | |
$logVar = ''; | |
$prevStrLen = 0; | |
} | |
return $return; | |
} | |
/** | |
* 读取大文件。 | |
* $pos参数和$startLine参数,只设置其中一个,否则结果可能与预期不符。 | |
* 指定$pos时,为保证读取到的是完整的一行,会将该位置所在行的数据全部读出。 | |
* @param string $filepath 文件绝对路径 | |
* @param integer $pos 读取的起始位置 | |
* @param integer $startLine 读取的起始行 | |
* @return [type] [description] | |
*/ | |
public function readBigFile($filepath, $pos = 0, $startLine = 1) | |
{ | |
$handle = fopen($filepath, 'rb'); | |
$br = self::$br; | |
$brLen = strlen($br); | |
$lastShortLine = ''; | |
$blockLen = 20480; //每次读取的数据块大小 | |
if ($handle) { | |
if ($pos > 0) { | |
//移动到合适的位置,保证读取的是完整的一行数据 | |
$movePos = 0; | |
while($pos - $movePos >= 0) { | |
fseek($handle, $pos - $movePos); | |
$c = fread($handle, 1); | |
if ($c === "\n" || $c === "\r") { | |
break; | |
} | |
$movePos ++; | |
} | |
if ($c !== "\n" && $c !== "\r") { | |
fseek($handle, -1, SEEK_CUR); | |
} | |
unset($movePos); | |
} | |
while ( ($data = fread($handle, $blockLen)) ) { | |
if (!feof($handle)) { | |
$dataSize = strlen($data); | |
//补上上次读取的,不完整的行数据 | |
if ($lastShortLine){ | |
$data = $lastShortLine.$data; | |
$dataSize = strlen($data); | |
} | |
$lastBrPos = strrpos($data, $br); | |
if ($lastBrPos !== false && $lastBrPos != $dataSize+$brLen) { | |
$lastShortLine = substr($data, $lastBrPos+$brLen); | |
} else { | |
$lastShortLine = ''; | |
} | |
$result = $lastBrPos !== false ? substr($data, 0, $lastBrPos) : $data; | |
//定位起始行 | |
if ($startLine > 1) { | |
$rows = substr_count($result, $br); | |
$startLine = $startLine - $rows; | |
if ($startLine > 1) { | |
continue; | |
} else { | |
$startpos = 0; | |
$startLine = abs($startLine); | |
while ($rows - $startLine > 1) { | |
$startpos += strpos($result, $br, $startpos) + $brLen; | |
$startLine++; | |
} | |
$result = substr($result, $startpos); | |
unset($rows, $startLine, $startpos); | |
} | |
} | |
} else { | |
$result = $lastShortLine.$data; | |
} | |
yield $result; | |
} | |
fclose($handle); | |
} | |
} | |
} | |
$format = '"$remote_addr" $request_time - $remote_user d [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$http_x_forwarded_for"'; | |
$log = <<<LOG | |
"192.168.110.1" 0.040 - - d [05/Aug/2016:08:22:54 +0800] "GET /uc_server/data/avatar/006/40/73/36_real_avatar_small.jpg HTTP/1.0" 404 6664 "-" "-" "-" | |
"36.149.107.72" 3.016 - - d [05/Aug/2016:08:22:54 +0800] "GET /cardniu/api/api_splashinterface.php?udid=deviceId-866968026335845-generate-cardniu&systemName=android+OS&systemVersion=4.4.4&productName=Cardniu&productVersion=4&position=KNSQTZXQB&positionList=1&chanelSys=shequ HTTP/1.1" 200 5 "http://www.baidu.com/cardniu/detail.php?tid=552221&utm_source=552221&utm_medium=ribao&utm_campaign=xiaoxi" "Mozilla/5.0 (Linux; Android 4.4.4; A31 Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/33.0.0.0 Mobile Safari/537.36 feideeAndroid-V4 MymoneySms7.3.2-oppo" "-" | |
"192.168.110.1" 0.040 - - d [05/Aug/2016:08:22:54 +0800] "GET //uc_server/data/avatar/005/86/04/56_real_avatar_small.jpg HTTP/1.0" 404 6664 "-" "-" "-" | |
LOG; | |
// $log = file_get_contents('test.txt'); | |
//测试访问日志 | |
$startTime = microtime(true); | |
// var_export( (new NginxLog) -> parseFormat($format) ); | |
var_export( (new NginxLog) -> analysisAccess($log, $format) ); | |
echo microtime(true) - $startTime; | |
//测试错误日志 | |
// $logFile = __DIR__.'/../testdata/nginx_access.log'; | |
$errorLogFile = __DIR__.'/../testdata/www.baidu.com_error_2016-09-05.log'; | |
$value = <<<LOG | |
2016/09/05 14:52:52 [error] 17890#0: *1424957037 FastCGI sent in stderr: "PHP message: PHP Fatal error: Uncaught exception 'DbException' with message 'Duplicate entry '240319164' for key 'sid'' in /var/www/html/bbs/source/class/db/db_driver_mysql.php:218 | |
Stack trace: | |
#0 /var/www/html/bbs/source/class/db/db_driver_mysql.php(151): db_driver_mysql->halt('Duplicate entry...', 1062, 'UPDATE pre_com...') | |
#1 /var/www/html/bbs/source/class/db/db_driver_mysql_slave.php(62): db_driver_mysql->query('UPDATE pre_com...', false, false) | |
#2 /var/www/html/bbs/source/class/discuz/discuz_database.php(179): db_driver_mysql_slave->query('UPDATE pre_com...', false, false) | |
#3 /var/www/html/bbs/source/class/discuz/discuz_database.php(102): discuz_database::query('UPDATE pre_com...', '') | |
#4 /var/www/html/bbs/source/class/discuz/discuz_table.php(52): discuz_database::update('common_member', Array, '`uid`='10929277...', false, false) | |
#5 /var/www/html/bbs/m/register_by_email.php(72): discuz_table->update(10929277, Array) | |
#6 /var/www/html/bbs/m/function/ssj_function.php(788): register_by_ssjuser('', '" while reading response header from upstream, client: 192.168.241.97, server: www.baidu.com, request: "POST /m/api/credit.php HTTP/1.1", upstream: "fastcgi://127.0.0.1:9000", host: "www.baidu.com" | |
2016/09/06 05:11:54 [error] 10527#0: *915961403 open() "/var/www/html/cardniu/thread-544114" failed (2: No such file or directory), client: 180.153.205.253, server: www.baidu.com, request: "GET /thread-544114?10000skip=true-1-1.html HTTP/1.1", host: "www.baidu.com", referrer: "http://www.baidu.com/cardniu/detail.php?tid=544114?10000skip=true" | |
2016/09/06 06:22:15 [error] 10535#0: *916177977 FastCGI sent in stderr: "Primary script unknown" while reading response header from upstream, client: 111.127.121.50, server: www.baidu.com, request: "GET /cardniu/api/home.php?mod=spacecp&ac=usergroup&do=expiry HTTP/1.1", upstream: "fastcgi://127.0.0.1:9000", host: "www.baidu.com", referrer: "http://www.baidu.com/cardniu/kn_daily.php?stamptime=2016-09-05&udid=deviceId-868979027115530-generate-cardniu&bankcode=SPD,CMB,CEB" | |
LOG; | |
$startTime = microtime(true); | |
$nginxLog = new NginxLog; | |
$logNum = 0; | |
$phperrs = []; | |
foreach ($nginxLog -> readBigFile($errorLogFile) as $value) { | |
$logString = trim($value); | |
$logArr = explode("\n", $logString); | |
$logNum += count($logArr); | |
foreach ($nginxLog->analysisError($logArr) as $log) { | |
if (!empty($log['phperr'])) { | |
$phperrs[] = $log; | |
} | |
} | |
} | |
echo 'finish. cost time:'.(microtime(true) - $startTime)."\n"; | |
echo '日志数量:'.$logNum."\n"; | |
echo 'php错误日志数量:'.count($phperrs)."\n"; | |
var_export($phperrs); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment