334 lines
13 KiB
Python
334 lines
13 KiB
Python
import sys
|
|
import pandas as pd
|
|
from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,
|
|
QHBoxLayout, QPushButton, QLabel, QFileDialog,
|
|
QTableWidget, QTableWidgetItem, QComboBox, QProgressBar,
|
|
QStatusBar, QGroupBox, QFormLayout, QMessageBox)
|
|
from PyQt5.QtCore import Qt, QThread, pyqtSignal
|
|
from PyQt5.QtGui import QFont
|
|
from thefuzz import fuzz
|
|
|
|
# 确保中文正常显示
|
|
import matplotlib
|
|
|
|
matplotlib.rcParams["font.family"] = ["SimHei", "WenQuanYi Micro Hei", "Heiti TC"]
|
|
|
|
|
|
class CalculationThread(QThread):
|
|
"""计算线程,避免UI卡顿"""
|
|
progress_updated = pyqtSignal(int)
|
|
calculation_finished = pyqtSignal(pd.DataFrame)
|
|
error_occurred = pyqtSignal(str)
|
|
|
|
def __init__(self, df, source_name_col, source_loc_col, target_name_col, target_loc_col):
|
|
super().__init__()
|
|
self.df = df.copy()
|
|
self.source_name_col = source_name_col
|
|
self.source_loc_col = source_loc_col
|
|
self.target_name_col = target_name_col
|
|
self.target_loc_col = target_loc_col
|
|
|
|
def run(self):
|
|
try:
|
|
total_rows = len(self.df)
|
|
|
|
# 定义相似度计算函数
|
|
def calculate_similarity(row, index):
|
|
# 更新进度
|
|
progress = int((index / total_rows) * 100)
|
|
self.progress_updated.emit(progress)
|
|
|
|
# 获取当前行的四个值
|
|
name_src = str(row[self.source_name_col])
|
|
loc_src = str(row[self.source_loc_col])
|
|
name_tgt = str(row[self.target_name_col])
|
|
loc_tgt = str(row[self.target_loc_col])
|
|
|
|
# 计算相似度
|
|
name_similarity = fuzz.ratio(name_src, name_tgt)
|
|
loc_similarity = fuzz.ratio(loc_src, loc_tgt)
|
|
combined_similarity = (name_similarity + loc_similarity) / 2
|
|
|
|
return pd.Series([name_similarity, loc_similarity, combined_similarity])
|
|
|
|
# 应用计算函数
|
|
results = []
|
|
for idx, row in self.df.iterrows():
|
|
results.append(calculate_similarity(row, idx))
|
|
|
|
# 添加结果到DataFrame
|
|
results_df = pd.DataFrame(results, columns=['名称相似度', '地址相似度', '综合相似度'])
|
|
self.df = pd.concat([self.df, results_df], axis=1)
|
|
|
|
# 发送计算完成信号
|
|
self.calculation_finished.emit(self.df)
|
|
|
|
except Exception as e:
|
|
self.error_occurred.emit(str(e))
|
|
|
|
|
|
class SimilarityCalculator(QMainWindow):
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.df = None
|
|
self.init_ui()
|
|
|
|
def init_ui(self):
|
|
"""初始化用户界面"""
|
|
# 设置窗口标题和大小
|
|
self.setWindowTitle('地址名称模糊匹配相似度计算工具')
|
|
self.setGeometry(100, 100, 1200, 800)
|
|
|
|
# 创建中心部件和主布局
|
|
central_widget = QWidget()
|
|
self.setCentralWidget(central_widget)
|
|
main_layout = QVBoxLayout(central_widget)
|
|
|
|
# 添加文件选择区域
|
|
file_layout = QHBoxLayout()
|
|
self.file_path_label = QLabel('未选择文件')
|
|
self.file_path_label.setWordWrap(True)
|
|
self.select_file_btn = QPushButton('选择Excel文件')
|
|
self.select_file_btn.clicked.connect(self.select_file)
|
|
|
|
file_layout.addWidget(self.select_file_btn)
|
|
file_layout.addWidget(self.file_path_label, 1)
|
|
main_layout.addLayout(file_layout)
|
|
|
|
# 添加列配置区域
|
|
self.column_group = QGroupBox('列配置')
|
|
column_layout = QFormLayout()
|
|
|
|
self.source_name_combo = QComboBox()
|
|
self.source_loc_combo = QComboBox()
|
|
self.target_name_combo = QComboBox()
|
|
self.target_loc_combo = QComboBox()
|
|
|
|
column_layout.addRow('源名称列:', self.source_name_combo)
|
|
column_layout.addRow('源位置列:', self.source_loc_combo)
|
|
column_layout.addRow('目标名称列:', self.target_name_combo)
|
|
column_layout.addRow('目标位置列:', self.target_loc_combo)
|
|
|
|
self.column_group.setLayout(column_layout)
|
|
self.column_group.setEnabled(False) # 初始禁用,选择文件后启用
|
|
main_layout.addWidget(self.column_group)
|
|
|
|
# 添加操作按钮区域
|
|
btn_layout = QHBoxLayout()
|
|
self.calculate_btn = QPushButton('开始计算相似度')
|
|
self.calculate_btn.clicked.connect(self.start_calculation)
|
|
self.calculate_btn.setEnabled(False)
|
|
|
|
self.save_btn = QPushButton('保存结果')
|
|
self.save_btn.clicked.connect(self.save_results)
|
|
self.save_btn.setEnabled(False)
|
|
|
|
btn_layout.addWidget(self.calculate_btn)
|
|
btn_layout.addWidget(self.save_btn)
|
|
main_layout.addLayout(btn_layout)
|
|
|
|
# 添加进度条
|
|
self.progress_bar = QProgressBar()
|
|
self.progress_bar.setVisible(False)
|
|
main_layout.addWidget(self.progress_bar)
|
|
|
|
# 添加结果表格
|
|
self.result_table = QTableWidget()
|
|
self.result_table.horizontalHeader().setStretchLastSection(True)
|
|
main_layout.addWidget(self.result_table)
|
|
|
|
# 设置状态栏
|
|
self.setStatusBar(QStatusBar())
|
|
self.statusBar().showMessage('就绪')
|
|
|
|
def select_file(self):
|
|
"""选择Excel文件"""
|
|
file_path, _ = QFileDialog.getOpenFileName(
|
|
self, '选择Excel文件', '', 'Excel Files (*.xlsx *.xls)'
|
|
)
|
|
|
|
if file_path:
|
|
try:
|
|
self.df = pd.read_excel(file_path)
|
|
self.file_path_label.setText(file_path)
|
|
self.statusBar().showMessage(f'已加载文件,共 {len(self.df)} 行数据')
|
|
|
|
# 填充下拉框并设置默认列
|
|
self.populate_column_combos()
|
|
|
|
# 启用列配置和计算按钮
|
|
self.column_group.setEnabled(True)
|
|
self.calculate_btn.setEnabled(True)
|
|
|
|
# 显示数据
|
|
self.display_data(self.df)
|
|
|
|
except Exception as e:
|
|
QMessageBox.critical(self, '错误', f'无法读取文件: {str(e)}')
|
|
self.statusBar().showMessage('文件读取失败')
|
|
|
|
def populate_column_combos(self):
|
|
"""填充列下拉框,并设置指定默认列"""
|
|
columns = self.df.columns.tolist()
|
|
|
|
# 清空现有选项
|
|
self.source_name_combo.clear()
|
|
self.source_loc_combo.clear()
|
|
self.target_name_combo.clear()
|
|
self.target_loc_combo.clear()
|
|
|
|
# 为所有下拉框添加所有列名
|
|
for col in columns:
|
|
self.source_name_combo.addItem(col)
|
|
self.source_loc_combo.addItem(col)
|
|
self.target_name_combo.addItem(col)
|
|
self.target_loc_combo.addItem(col)
|
|
|
|
# 明确设置默认列(存在则选中,不存在则保持下拉框默认状态)
|
|
default_cols = {
|
|
self.source_name_combo: "源文件门店店名",
|
|
self.source_loc_combo: "源文件地址",
|
|
self.target_name_combo: "name",
|
|
self.target_loc_combo: "address"
|
|
}
|
|
|
|
for combo, default_col in default_cols.items():
|
|
if default_col in columns:
|
|
combo.setCurrentText(default_col)
|
|
|
|
def display_data(self, df):
|
|
"""在表格中显示数据"""
|
|
# 限制显示的行数,避免过大的数据导致UI卡顿
|
|
display_df = df.head(1000) # 只显示前1000行
|
|
|
|
# 设置表格行数和列数
|
|
self.result_table.setRowCount(min(len(display_df), 1000))
|
|
self.result_table.setColumnCount(len(display_df.columns))
|
|
|
|
# 设置列名
|
|
self.result_table.setHorizontalHeaderLabels(display_df.columns)
|
|
|
|
# 填充数据
|
|
for row_idx, (_, row) in enumerate(display_df.iterrows()):
|
|
for col_idx, value in enumerate(row):
|
|
item = QTableWidgetItem(str(value))
|
|
item.setTextAlignment(Qt.AlignCenter)
|
|
# 如果是相似度列,根据值设置背景色
|
|
if display_df.columns[col_idx] in ['名称相似度', '地址相似度', '综合相似度']:
|
|
try:
|
|
val = float(value)
|
|
# 设置颜色从红色(0)到绿色(100)
|
|
r = 255 - int(val * 2.55)
|
|
g = int(val * 2.55)
|
|
b = 100
|
|
item.setBackground(f"rgb({r}, {g}, {b})")
|
|
item.setForeground(Qt.white if val < 50 else Qt.black)
|
|
except:
|
|
pass
|
|
self.result_table.setItem(row_idx, col_idx, item)
|
|
|
|
# 调整列宽
|
|
self.result_table.resizeColumnsToContents()
|
|
|
|
def start_calculation(self):
|
|
"""开始计算相似度"""
|
|
# 获取选中的列
|
|
source_name_col = self.source_name_combo.currentText()
|
|
source_loc_col = self.source_loc_combo.currentText()
|
|
target_name_col = self.target_name_combo.currentText()
|
|
target_loc_col = self.target_loc_combo.currentText()
|
|
|
|
# 检查列是否有效(下拉框保证选中的列一定存在,故可简化检查)
|
|
if not all([source_name_col, source_loc_col, target_name_col, target_loc_col]):
|
|
QMessageBox.warning(self, '警告', '请选择所有列')
|
|
return
|
|
|
|
# 禁用按钮
|
|
self.calculate_btn.setEnabled(False)
|
|
self.select_file_btn.setEnabled(False)
|
|
self.save_btn.setEnabled(False)
|
|
|
|
# 显示进度条
|
|
self.progress_bar.setVisible(True)
|
|
self.progress_bar.setValue(0)
|
|
self.statusBar().showMessage('正在计算相似度...')
|
|
|
|
# 创建并启动计算线程
|
|
self.calc_thread = CalculationThread(
|
|
self.df, source_name_col, source_loc_col, target_name_col, target_loc_col
|
|
)
|
|
self.calc_thread.progress_updated.connect(self.update_progress)
|
|
self.calc_thread.calculation_finished.connect(self.on_calculation_finished)
|
|
self.calc_thread.error_occurred.connect(self.on_calculation_error)
|
|
self.calc_thread.start()
|
|
|
|
def update_progress(self, value):
|
|
"""更新进度条"""
|
|
self.progress_bar.setValue(value)
|
|
self.statusBar().showMessage(f'正在计算相似度... {value}%')
|
|
|
|
def on_calculation_finished(self, result_df):
|
|
"""计算完成后的处理"""
|
|
self.df = result_df
|
|
self.display_data(self.df)
|
|
self.progress_bar.setValue(100)
|
|
self.statusBar().showMessage('相似度计算完成')
|
|
|
|
# 启用按钮
|
|
self.calculate_btn.setEnabled(True)
|
|
self.select_file_btn.setEnabled(True)
|
|
self.save_btn.setEnabled(True)
|
|
|
|
QMessageBox.information(self, '完成', '相似度计算已完成')
|
|
|
|
def on_calculation_error(self, error_msg):
|
|
"""处理计算错误"""
|
|
self.statusBar().showMessage('计算出错')
|
|
QMessageBox.critical(self, '计算错误', f'计算过程中发生错误: {error_msg}')
|
|
|
|
# 启用按钮
|
|
self.calculate_btn.setEnabled(True)
|
|
self.select_file_btn.setEnabled(True)
|
|
|
|
def save_results(self):
|
|
"""保存结果到Excel文件(增强错误处理)"""
|
|
if self.df is None:
|
|
QMessageBox.warning(self, '警告', '没有可保存的数据')
|
|
return
|
|
|
|
file_path, _ = QFileDialog.getSaveFileName(
|
|
self, '保存结果', '', 'Excel Files (*.xlsx)'
|
|
)
|
|
|
|
if file_path:
|
|
try:
|
|
# 确保文件扩展名正确
|
|
if not file_path.endswith('.xlsx'):
|
|
file_path += '.xlsx'
|
|
|
|
# 尝试保存(带详细错误捕获)
|
|
self.df.to_excel(file_path, index=False)
|
|
self.statusBar().showMessage(f'结果已保存到 {file_path}')
|
|
QMessageBox.information(self, '成功', f'结果已成功保存到 {file_path}')
|
|
except PermissionError:
|
|
QMessageBox.critical(self, '权限错误',
|
|
'保存失败:没有写入权限,请检查文件是否被占用,或选择其他路径/文件名。')
|
|
except FileNotFoundError:
|
|
QMessageBox.critical(self, '路径错误',
|
|
'保存失败:目标路径不存在,请选择有效的保存位置。')
|
|
except Exception as e:
|
|
QMessageBox.critical(self, '未知错误', f'保存文件失败: {str(e)}')
|
|
self.statusBar().showMessage('保存文件失败')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
app = QApplication(sys.argv)
|
|
|
|
# 设置全局字体,确保中文正常显示
|
|
font = QFont()
|
|
font.setFamily("SimHei")
|
|
app.setFont(font)
|
|
|
|
window = SimilarityCalculator()
|
|
window.show()
|
|
sys.exit(app.exec_()) |