math-modeling-competition-C.../main.py

319 lines
15 KiB
Python
Raw Normal View History

2022-07-06 06:40:41 +00:00
"""
团队成员: 王岩琪 袁欣萍 郭禹含 龚智勋
"""
import pandas as pd
import numpy as np
def main():
"""
数据清洗
"""
with pd.ExcelFile("/home/bobmaster/Downloads/数学建模/附件1化学成分及力学性能.xlsx") as origin_data:
pd_chemicals_raw = pd.read_excel(origin_data, "化学成分", usecols=[0, 2, 3, 4, 5, 6, 7])
pd_physics_raw = pd.read_excel(origin_data, "力学性能")
pd_chemical = pd_chemicals_raw.iloc[1:, :]
pd_physics = pd_physics_raw.dropna(how="any")
# pd_chemical = pd_chemical.reindex(index = pd_chemical.index[::-1])
pd_physics_ronglianhao = pd_physics.iloc[:, 0].astype("int64")
pd_physics_qufu = pd_physics.iloc[:, 2]
pd_physics_kangla = pd_physics.iloc[:, 3]
pd_physics_yanshen = pd_physics.iloc[:, 4]
# 提取相同熔炼号的力学性能数据
comp_table = pd_physics.iloc[:, 0].duplicated(keep="last") # 比较表
# phy_num = pd_physics.count() # 力学表数据量 11213
phy_num = 11213
# phy_ronglianhao = []
phy_dict = {}
phy_qufu = []
phy_kangla = []
phy_yanshen = []
temp = 0
for i in range(phy_num):
phy_qufu.append(pd_physics_qufu[i])
phy_kangla.append(pd_physics_kangla[i])
phy_yanshen.append(pd_physics_yanshen[i])
if (comp_table[i] == False):
# phy_ronglianhao[temp] = pd_physics_ronglianhao[i]
phy_dict[pd_physics_ronglianhao[i]] = [phy_qufu, phy_kangla, phy_yanshen]
temp += 1
phy_qufu = []
phy_kangla = []
phy_yanshen = []
# 数据规约 - 力学性能数据均值和标准差
phy_dict_qufu_mean = {}
phy_dict_qufu_std = {}
phy_dict_kangla_mean = {}
phy_dict_kangla_std = {}
phy_dict_yanshen_mean = {}
phy_dict_yanshen_std = {}
phy_dict_qufu_mean_list = []
phy_dict_qufu_std_list = []
phy_dict_kangla_mean_list = []
phy_dict_kangla_std_list = []
phy_dict_yanshen_mean_list = []
phy_dict_yanshen_std_list = []
for key in phy_dict:
np_physics_array_qufu = np.array(phy_dict[key][0])
np_physics_array_kangla = np.array(phy_dict[key][1])
np_physics_array_yanshen = np.array(phy_dict[key][2])
phy_dict_qufu_mean[key] = np_physics_array_qufu.mean()
phy_dict_qufu_std[key] = np_physics_array_qufu.std()
phy_dict_kangla_mean[key] = np_physics_array_kangla.mean()
phy_dict_kangla_std[key] = np_physics_array_kangla.std()
phy_dict_yanshen_mean[key] = np_physics_array_yanshen.mean()
phy_dict_yanshen_std[key] = np_physics_array_yanshen.std()
# 清洗化学成分
# 重建索引保证在同一熔炼号的情况下与力学指标数据匹配
pd_chem_ronglianhao = pd_chemical.iloc[:, 0].astype("int64")
pd_chem_ronglianhao = pd_chem_ronglianhao.drop_duplicates().reset_index().iloc[:, 1]
pd_chem_E1_data = pd_chemical.iloc[:, 1].reset_index().iloc[:, 1]
pd_chem_E2_data = pd_chemical.iloc[:, 2].reset_index().iloc[:, 1]
pd_chem_E3_data = pd_chemical.iloc[:, 3].reset_index().iloc[:, 1]
pd_chem_E4_data = pd_chemical.iloc[:, 4].reset_index().iloc[:, 1]
pd_chem_E5_data = pd_chemical.iloc[:, 5].reset_index().iloc[:, 1]
pd_chem_E6_data = pd_chemical.iloc[:, 6].reset_index().iloc[:, 1]
pd_chem_E1 = {}
pd_chem_E2 = {}
pd_chem_E3 = {}
pd_chem_E4 = {}
pd_chem_E5 = {}
pd_chem_E6 = {}
temp = 0
# 数据规约 - 化学成分
# 0-701 清洗后得到的范围
for i in range(702):
if (i % 2 != 0 and temp != 321):
pd_chem_E1[pd_chem_ronglianhao[temp]] = (pd_chem_E1_data[i - 1] + pd_chem_E1_data[i]) / 2
pd_chem_E2[pd_chem_ronglianhao[temp]] = (pd_chem_E2_data[i - 1] + pd_chem_E2_data[i]) / 2
pd_chem_E3[pd_chem_ronglianhao[temp]] = (pd_chem_E3_data[i - 1] + pd_chem_E3_data[i]) / 2
pd_chem_E4[pd_chem_ronglianhao[temp]] = (pd_chem_E4_data[i - 1] + pd_chem_E4_data[i]) / 2
pd_chem_E5[pd_chem_ronglianhao[temp]] = (pd_chem_E5_data[i - 1] + pd_chem_E5_data[i]) / 2
pd_chem_E6[pd_chem_ronglianhao[temp]] = (pd_chem_E6_data[i - 1] + pd_chem_E6_data[i]) / 2
temp += 1
# 整理出最终所需数据并保证化学成分与力学性能数据一致性
E1_list = []
E2_list = []
E3_list = []
E4_list = []
E5_list = []
E6_list = []
for key in pd_chem_E1:
if key in phy_dict:
E1_list.append(pd_chem_E1[key])
E2_list.append(pd_chem_E2[key])
E3_list.append(pd_chem_E3[key])
E4_list.append(pd_chem_E4[key])
E5_list.append(pd_chem_E5[key])
E6_list.append(pd_chem_E6[key])
phy_dict_qufu_mean_list.append(phy_dict_qufu_mean[key])
phy_dict_qufu_std_list.append(phy_dict_qufu_std[key])
phy_dict_kangla_mean_list.append(phy_dict_kangla_mean[key])
phy_dict_kangla_std_list.append(phy_dict_kangla_std[key])
phy_dict_yanshen_mean_list.append(phy_dict_yanshen_mean[key])
phy_dict_yanshen_std_list.append(phy_dict_yanshen_std[key])
np_E1 = np.array(E1_list)
np_E2 = np.array(E2_list)
np_E3 = np.array(E3_list)
np_E4 = np.array(E4_list)
np_E5 = np.array(E5_list)
np_E6 = np.array(E6_list)
# 初始化二维直方图数据
# dist1 材料
dist1_E1 = np_E1
dist1_E2 = np_E2
dist1_E3 = np_E3
dist1_E4 = np_E4
dist1_E5 = np_E5
dist1_E6 = np_E6
# dist2 力学性能均值
dist2_qufu = np.array(phy_dict_qufu_mean_list)
dist2_kangla = np.array(phy_dict_kangla_mean_list)
dist2_yanshen = np.array(phy_dict_yanshen_mean_list)
# dist3 力学性能标准差
dist3_qufu = np.array(phy_dict_qufu_std_list)
dist3_kangla = np.array(phy_dict_kangla_std_list)
dist3_yanshen = np.array(phy_dict_yanshen_std_list)
# 绘制化学成分与力学特性关系的二维直方图
from hist2d import create_hist2d
create_hist2d(dist1_E1, dist2_qufu, title="化学成分E1与屈服特性的关系", xlabel="E1 %", ylabel="屈服特性均值")
create_hist2d(dist1_E1, dist2_kangla, title="化学成分E1与抗拉特性的关系", xlabel="E1 %", ylabel="抗拉特性均值")
create_hist2d(dist1_E1, dist2_yanshen, title="化学成分E1与延伸率特性的关系", xlabel="E1 %", ylabel="延伸率特性均值")
create_hist2d(dist1_E2, dist2_qufu, title="化学成分E2与屈服特性的关系", xlabel="E2 %", ylabel="屈服特性均值")
create_hist2d(dist1_E2, dist2_kangla, title="化学成分E2与抗拉特性的关系", xlabel="E2 %", ylabel="抗拉特性均值")
create_hist2d(dist1_E2, dist2_yanshen, title="化学成分E2与延伸率特性的关系", xlabel="E2 %", ylabel="延伸率特性均值")
create_hist2d(dist1_E3, dist2_qufu, title="化学成分E3与屈服特性的关系", xlabel="E3 %", ylabel="屈服特性均值")
create_hist2d(dist1_E3, dist2_kangla, title="化学成分E3与抗拉特性的关系", xlabel="E3 %", ylabel="抗拉特性均值")
create_hist2d(dist1_E3, dist2_yanshen, title="化学成分E3与延伸率特性的关系", xlabel="E3 %", ylabel="延伸率特性均值")
create_hist2d(dist1_E4, dist2_qufu, title="化学成分E4与屈服特性的关系", xlabel="E4 %", ylabel="屈服特性均值")
create_hist2d(dist1_E4, dist2_kangla, title="化学成分E4与抗拉特性的关系", xlabel="E4 %", ylabel="抗拉特性均值")
create_hist2d(dist1_E4, dist2_yanshen, title="化学成分E4与延伸率特性的关系", xlabel="E4 %", ylabel="延伸率特性均值")
create_hist2d(dist1_E5, dist2_qufu, title="化学成分E5与屈服特性的关系", xlabel="E5 %", ylabel="屈服特性均值", fig=(10, 10))
create_hist2d(dist1_E5, dist2_kangla, title="化学成分E5与抗拉特性的关系", xlabel="E5 %", ylabel="抗拉特性均值", fig=(10, 10))
create_hist2d(dist1_E5, dist2_yanshen, title="化学成分E5与延伸率特性的关系", xlabel="E5 %", ylabel="延伸率特性均值", fig=(10, 10))
create_hist2d(dist1_E6, dist2_qufu, title="化学成分E6与屈服特性的关系", xlabel="E6 %", ylabel="屈服特性均值")
create_hist2d(dist1_E6, dist2_kangla, title="化学成分E6与抗拉特性的关系", xlabel="E6 %", ylabel="抗拉特性均值")
create_hist2d(dist1_E6, dist2_yanshen, title="化学成分E6与延伸率特性的关系", xlabel="E6 %", ylabel="延伸率特性均值")
# 创建 OLS 回归模型
from linear_regression import OlsModel
from linear_regression import ols_calcutate_all
x = np.array([dist1_E1, dist1_E2, dist1_E3, dist1_E4, dist1_E5, dist1_E6]).transpose()
# 材料与屈服特性均值回归模型
y = np.array(dist2_qufu)
qufu_mean_ols_model = OlsModel(x, y)
# 如需打印报告请删掉下一行的注释
# print(qufu_mean_ols_model.results.summary())
# 材料与抗拉特性均值回归模型
y = np.array(dist2_kangla)
kangla_mean_ols_model = OlsModel(x, y)
# 材料与延伸率特性均值回归模型
y = np.array(dist2_yanshen)
yanshen_mean_ols_model = OlsModel(x, y)
# 材料与屈服特性标准差回归模型
y = np.array(dist3_qufu)
qufu_std_ols_model = OlsModel(x, y)
# 材料与抗拉特性标准差回归模型
y = np.array(dist3_kangla)
kangla_std_ols_model = OlsModel(x, y)
# 材料与延伸率特性标准差回归模型
y = np.array(dist3_yanshen)
yanshen_std_ols_model = OlsModel(x, y)
# 给定熔炼号计算均值和标准差
# ronglianhao = 90624
# x1 = pd_chem_E1[ronglianhao]
# x2 = pd_chem_E2[ronglianhao]
# x3 = pd_chem_E3[ronglianhao]
# x4 = pd_chem_E4[ronglianhao]
# x5 = pd_chem_E5[ronglianhao]
# x6 = pd_chem_E6[ronglianhao]
# x = np.array([1, x1, x2, x3, x4, x5, x6])
# ols_calcutate_all(x, qufu_mean_ols_model, qufu_std_ols_model,
# kangla_mean_ols_model, kangla_std_ols_model,
# yanshen_mean_ols_model, yanshen_std_ols_model)
"""
屈服均值: [281.04367017]
抗拉均值: [302.12712467]
延伸率均值: [11.72968023]
屈服标准差: [4.04484533]
抗拉标准差: [3.60625011]
延伸率标准差: [0.68357895]
"""
from linear_regression import MlrModel
from linear_regression import mlr_calcutate_all
# 创建 MLR 多元线性回归模型
x = np.array([dist1_E1, dist1_E2, dist1_E3, dist1_E4, dist1_E5, dist1_E6]).transpose()
# 材料与屈服特性均值回归模型
y = np.array(dist2_qufu)
qufu_mean_mlr_model = MlrModel(x, y)
# 回归系数
# qufu_mean_mlr_model.results.coef_
# 常数,回归方程截距
# qufu_mean_mlr_model.results.intercept_
# 材料与抗拉特性均值回归模型
y = np.array(dist2_kangla)
kangla_mean_mlr_model = MlrModel(x, y)
# 材料与延伸率特性均值回归模型
y = np.array(dist2_yanshen)
yanshen_mean_mlr_model = MlrModel(x, y)
# 材料与屈服特性标准差回归模型
y = np.array(dist3_qufu)
qufu_std_mlr_model = MlrModel(x, y)
# 材料与抗拉特性标准差回归模型
y = np.array(dist3_kangla)
kangla_std_mlr_model = MlrModel(x, y)
# 材料与延伸率特性标准差回归模型
y = np.array(dist3_yanshen)
yanshen_std_mlr_model = MlrModel(x, y)
# 给定熔炼号计算均值和标准差
# ronglianhao = 90624
# x1 = pd_chem_E1[ronglianhao]
# x2 = pd_chem_E2[ronglianhao]
# x3 = pd_chem_E3[ronglianhao]
# x4 = pd_chem_E4[ronglianhao]
# x5 = pd_chem_E5[ronglianhao]
# x6 = pd_chem_E6[ronglianhao]
# x = np.array([1, x1, x2, x3, x4, x5, x6]).reshape(-1,6)
# ols_calcutate_all(x, qufu_mean_ols_model, qufu_std_ols_model,
# kangla_mean_ols_model, kangla_std_ols_model,
# yanshen_mean_ols_model, yanshen_std_ols_model)
"""
屈服均值: [281.04919773]
抗拉均值: [302.13923671]
延伸率均值: [11.75333675]
屈服标准差: [4.06391763]
抗拉标准差: [3.6079243]
延伸率标准差: [0.68167218]
"""
from heatmap import create_heatmap
# 绘制热点图
dataset = pd.DataFrame(
{'屈服': phy_dict_qufu_mean_list, '抗拉': phy_dict_kangla_mean_list, '延伸率': phy_dict_yanshen_mean_list,
'E1': np_E1, 'E2': np_E2, 'E3': np_E3, 'E4': np_E4, 'E5': np_E5, 'E6': np_E6})
create_heatmap(dataset)
# from lineplot import create_lineplot
# 绘制折线图
# x = "E1" 横坐标,从 E1-E6 选取
# y = "屈服" 纵坐标,从屈服、抗拉、延伸率 选取
# create_lineplot(dataset, x, y)
if __name__=='__main__':
main()