重写慢日志解析程序,实现打印慢SQL信息及其所属数据库
分组自研的审计平台最近推出慢SQL优化的功能,topN慢SQL可以通过MySQLdumpslow拿到,但由于mysqldumpslow输出的信息不包含数据库,这让程序后续的自动优化变得有些棘手。在观察了MySQL慢日志结构后,决定自己写一个python解析程序,返回的结果比mysqldumpslow解析结果上多出数据库名称这一列:
成都创新互联公司坚持“要么做到,要么别承诺”的工作理念,服务领域包括:成都网站设计、成都网站建设、企业官网、英文网站、手机端网站、网站推广等服务,满足客户于互联网时代的麟游网站设计、移动媒体设计的需求,帮助企业找到有效的互联网解决方案。努力成为您成熟可靠的网络建设合作伙伴!
Count: 15 Time=0.002961s (0.034505s) Lock=8.8e-05s (0.000767s) Rows=446 (6690), dbmgr[dbmgr]@10.33.46.179 mysql --SQL所属数据库
SHOW GLOBAL VARIABLES;
python版本:2.7
文件名称:slowParse.py --目前仅支持按query time取topN
代码内容:
import os
import sys
def get_sql(slowlog, topN):
#Slow log 所在目录,请自行替换
f1 = open("/var/mysql/data3306/" + slowlog, "r")
flag1 = 1
flag2 = 0
sqltext = ""
slow_sql_all = {}
sql_info = []
queryTime_list = []
locksTime_list = []
rows_list = []
logonInfo_list = []
db = "None"
rownum = 0
while 1:
line = f1.readline()
rownum += 1
if not line:
break
elif "use " in line and len(line) < 30:
db = getDB(line.strip())
elif "# User@Host" in line and flag1 == 1:
flag2 = 1
userAndHost = getUserAndHost(line.strip())
logonInfo = userAndHost[0] + "[" + userAndHost[0] + "]@" + userAndHost[1]
logonInfo_list.append(logonInfo)
elif "# Query_time" in line and flag1 == 1 and flag2 == 1:
execInfo = getExecInfo(line.strip())
queryTime_list.append(execInfo[0])
locksTime_list.append(execInfo[1])
rows_list.append(execInfo[2])
elif ";" in line and "SET timestamp" not in line and flag1 == 1 and flag2 == 1:
flag2 = 0
sqltext = sqltext + line.strip()
if slow_sql_all.has_key(sqltext):
tmp = slow_sql_all[sqltext]
tmp[0].append(queryTime_list[0])
tmp[1].append(locksTime_list[0])
tmp[2].append(rows_list[0])
tmp[3].append(logonInfo_list[0])
else:
# count = 1
# sql_info.append(count)
sql_info.append(queryTime_list)
sql_info.append(locksTime_list)
sql_info.append(rows_list)
sql_info.append(logonInfo_list)
sql_info.append(db)
slow_sql_all[sqltext] = sql_info
queryTime_list = []
locksTime_list = []
rows_list = []
logonInfo_list = []
sqltext = ""
sql_info = []
elif flag1 == 1 and flag2 == 1 and "# User@Host" not in line and "# Query_time" not in line and "# Time" not in line and "SET timestamp" not in line:
sqltext = sqltext + line.strip() + " "
f1.close()
sqlCombined = {}
sqlTmp = {}
for i in slow_sql_all:
# print i,slow_sql_all[i]
count = len(slow_sql_all[i][0])
totalQueryTime = 0
totalLocksTime = 0
totalRows = 0
for j in slow_sql_all[i][0]:
totalQueryTime += float(j)
maxQueryTime = float(max(slow_sql_all[i][0]))
for k in slow_sql_all[i][1]:
totalLocksTime += float(k)
maxLocksTime = float(max(slow_sql_all[i][1]))
for l in slow_sql_all[i][2]:
totalRows += int(l)
maxRows = int(max(slow_sql_all[i][2]))
logonInfo = slow_sql_all[i][3][0]
db = slow_sql_all[i][4]
sqlCombined[i] = (
count, maxQueryTime, totalQueryTime, maxLocksTime, totalLocksTime, maxRows, totalRows, logonInfo, db)
sqlTmp[i] = maxQueryTime
sqlTopN = sorted(sqlTmp.items(), key=lambda x: x[1], reverse=True)[:topN]
#请自行替换生成文件的所在目录
f2 = open("/var/mysql/data3306/" + slowlog[:-4] + "-top" + str(topN) + ".txt", "w")
for i in sqlTopN:
sqltext = i[0]
count_str = "Count: " + str(sqlCombined[sqltext][0])
queryTime_str = "Time=" + str(sqlCombined[sqltext][1]) + "s (" + str(sqlCombined[sqltext][2]) + "s)"
locksTime_str = "Lock=" + str(sqlCombined[sqltext][3]) + "s (" + str(sqlCombined[sqltext][4]) + "s)"
rows_str = "Rows=" + str(sqlCombined[sqltext][5]) + " (" + str(sqlCombined[sqltext][6]) + "),"
logonInfo_str = sqlCombined[sqltext][7]
db_str = sqlCombined[sqltext][8]
f2.write(
count_str + " " + queryTime_str + " " + locksTime_str + " " + rows_str + " " + logonInfo_str + " " + db_str + "\n " + sqltext + "\n")
f2.close()
def getDB(line):
info = line.split(" ")
db = info[1][:-1]
return db
def getUserAndHost(line):
info_list = line.split(" ")
User = info_list[2].split("[")[0]
idx = info_list.index("@")
hostInfo = info_list[idx + 2]
if hostInfo == "[]":
Host = "localhost"
else:
Host = hostInfo[1:-1]
return User, Host
def getExecInfo(line):
info_list = line.split(" ")
Query_time = info_list[0].split(" ")[2]
Lock_time = info_list[1].split(" ")[1]
Rows_sent = info_list[1].split(" ")[3]
return Query_time, Lock_time, Rows_sent
if __name__ == '__main__':
filename = str(sys.argv[1])
topN = int(sys.argv[2])
get_sql(filename, topN)
使用:
python slowParse.py slow.log 5 --取top 5
文章标题:重写慢日志解析程序,实现打印慢SQL信息及其所属数据库
标题链接:http://scyanting.com/article/gsdged.html