🚀 Python腳本實現數據處理與可視化
本腳本的核心目標是通過一系列數據庫查詢操作,獲取服務端設備的相關信息,計算區域分佈百分比和變異係數,並使用pyecharts
庫進行數據可視化展示。
🚀 快速開始
功能概述
腳本主要完成以下幾個步驟:
- 從
db_alpha
庫中查詢服務端設備的ID和所屬組號。
- 針對每臺服務端設備,在
db_beta
庫中查詢其區域分佈情況。
- 處理區域為空的情況,從
db_alpha
庫中補充缺失的區域信息。
- 合併兩部分結果,計算區域分佈百分比和變異係數。
- 使用
pyecharts
庫繪製單臺服務端設備和每個組的區域分佈百分比圖。
- 計算每個組的平均變異係數,並繪製柱狀圖。
代碼實現
import pymysql
import pandas as pd
from scipy.stats import variation
from pyecharts.charts import Pie, Bar
from pyecharts import options as opts
def connect_db(host, user, password, database):
return pymysql.connect(host=host, user=user, password=password, database=database)
def execute_query(connection, query):
with connection.cursor() as cursor:
cursor.execute(query)
return cursor.fetchall()
alpha_conn = connect_db('host_alpha', 'user_alpha', 'password_alpha', 'db_alpha')
server_query = """
select ex.device_id as id, ex.group_no
from service_config_alpha ex
where ex.is_active = 1
and ex.status = 1
union all
select dv.id, dv.group_no
from device_config_beta dv
where dv.is_active = 1
and dv.status = 1
"""
servers = execute_query(alpha_conn, server_query)
alpha_conn.close()
for server in servers:
server_id = server[0]
beta_conn = connect_db('host_beta', 'user_beta', 'password_beta', 'db_beta')
beta_query = f"""
select loc.region,
count(1)
from connection_log cnt
left join device_location loc on loc.id = cnt.client_id
where server_id = {server_id}
group by loc.region
"""
beta_result = execute_query(beta_conn, beta_query)
null_client_ids = [row[0] for row in beta_result if row[0] is None]
alpha_conn = connect_db('host_alpha', 'user_alpha', 'password_alpha', 'db_alpha')
alpha_query = f"""
select loc.region,
count(1)
from device_location loc
where loc.id in ({','.join(map(str, null_client_ids))})
group by loc.region
"""
alpha_result = execute_query(alpha_conn, alpha_query)
alpha_conn.close()
combined_result = beta_result + alpha_result
total_count = sum([row[1] for row in combined_result])
percentages = [(row[0], row[1] / total_count) for row in combined_result]
counts = [row[1] for row in combined_result]
cv = variation(counts)
pie = (
Pie()
.add(
"區域分佈",
[list(z) for z in zip([row[0] for row in percentages], [row[1] for row in percentages])],
radius=["30%", "75%"],
)
.set_global_opts(
title_opts=opts.TitleOpts(title=f"Server {server_id} 區域分佈百分比"),
legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%"),
toolbox_opts=opts.ToolboxOpts(is_show=True)
)
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"))
)
pie.render(f"server_{server_id}_region_distribution.html")
beta_conn.close()
group_data = {}
for server in servers:
server_id = server[0]
group_no = server[1]
if group_no not in group_data:
group_data[group_no] = []
group_data[group_no].extend(combined_result)
group_cvs = {}
for group_no, data in group_data.items():
counts = [row[1] for row in data]
cv = variation(counts)
group_cvs[group_no] = cv
total_count = sum([row[1] for row in data])
percentages = [(row[0], row[1] / total_count) for row in data]
pie = (
Pie()
.add(
"區域分佈",
[list(z) for z in zip([row[0] for row in percentages], [row[1] for row in percentages])],
radius=["30%", "75%"],
)
.set_global_opts(
title_opts=opts.TitleOpts(title=f"Group {group_no} 區域分佈百分比"),
legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%"),
toolbox_opts=opts.ToolboxOpts(is_show=True)
)
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"))
)
pie.render(f"group_{group_no}_region_distribution.html")
bar = (
Bar()
.add_xaxis(list(group_cvs.keys()))
.add_yaxis("平均變異係數", list(group_cvs.values()))
.set_global_opts(
title_opts=opts.TitleOpts(title="每個組的平均變異係數"),
xaxis_opts=opts.AxisOpts(name="組號"),
yaxis_opts=opts.AxisOpts(name="平均變異係數"),
toolbox_opts=opts.ToolboxOpts(is_show=True)
)
)
bar.render("group_avg_cv.html")
注意事項
- 請根據實際情況修改數據庫連接信息(主機名、用戶名、密碼等)。
- 確保
pymysql
和pyecharts
庫已正確安裝。
- 代碼中的SQL語句可能需要根據實際數據庫表結構進行調整。