fix: 为文件读取添加utf-8编码以避免潜在编码问题
This commit is contained in:
@@ -41,7 +41,7 @@ def convert_message(data):
|
||||
return message
|
||||
|
||||
with open(output_sft_data, 'a', encoding='utf-8') as sft:
|
||||
with open(sft_data, 'r') as f:
|
||||
with open(sft_data, 'r', encoding='utf-8') as f:
|
||||
data = f.readlines()
|
||||
for item in tqdm(data, desc="Processing", unit="lines"):
|
||||
item = json.loads(item)
|
||||
|
||||
@@ -1044,7 +1044,7 @@ def convert_message(data):
|
||||
return message
|
||||
|
||||
with open('BelleGroup_sft.jsonl', 'a', encoding='utf-8') as sft:
|
||||
with open('BelleGroup/train_3.5M_CN.json', 'r') as f:
|
||||
with open('BelleGroup/train_3.5M_CN.json', 'r', encoding='utf-8') as f:
|
||||
data = f.readlines()
|
||||
for item in tqdm(data, desc="Processing", unit="lines"):
|
||||
item = json.loads(item)
|
||||
|
||||
Reference in New Issue
Block a user