fix: 为文件读取添加utf-8编码以避免潜在编码问题

This commit is contained in:
KMnO4-zx
2025-07-25 09:03:43 +08:00
parent a37078138e
commit f505e8e52c
2 changed files with 2 additions and 2 deletions

View File

@@ -41,7 +41,7 @@ def convert_message(data):
return message
with open(output_sft_data, 'a', encoding='utf-8') as sft:
with open(sft_data, 'r') as f:
with open(sft_data, 'r', encoding='utf-8') as f:
data = f.readlines()
for item in tqdm(data, desc="Processing", unit="lines"):
item = json.loads(item)

View File

@@ -1044,7 +1044,7 @@ def convert_message(data):
return message
with open('BelleGroup_sft.jsonl', 'a', encoding='utf-8') as sft:
with open('BelleGroup/train_3.5M_CN.json', 'r') as f:
with open('BelleGroup/train_3.5M_CN.json', 'r', encoding='utf-8') as f:
data = f.readlines()
for item in tqdm(data, desc="Processing", unit="lines"):
item = json.loads(item)