summaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorHsiangNianian <i@jyunko.cn>2026-01-05 14:39:42 +0800
committerHsiangNianian <i@jyunko.cn>2026-01-05 14:40:15 +0800
commit65f48da74e446df81b17d0cc9bf203b75947fff1 (patch)
treea6abfb6a0ce49f4499d93e7d9aab7988ca892f58
parent1a6d6edbe14f00df021a1e826d8d30555f301e30 (diff)
downloadbase-model-65f48da74e446df81b17d0cc9bf203b75947fff1.tar.gz
base-model-65f48da74e446df81b17d0cc9bf203b75947fff1.zip
refactor: improve code formatting and readability in utils/__init__.py
-rw-r--r--src/base_model_trpgner/utils/__init__.py36
1 files changed, 21 insertions, 15 deletions
diff --git a/src/base_model_trpgner/utils/__init__.py b/src/base_model_trpgner/utils/__init__.py
index 12a3ef4..ff65c01 100644
--- a/src/base_model_trpgner/utils/__init__.py
+++ b/src/base_model_trpgner/utils/__init__.py
@@ -25,7 +25,7 @@ def word_to_char_labels(text: str, word_labels: List[Tuple[str, str]]) -> List[s
if pos >= len(text):
break
- if text[pos: pos + len(token)] == token:
+ if text[pos : pos + len(token)] == token:
for i in range(len(token)):
idx = pos + i
if idx < len(char_labels):
@@ -86,19 +86,23 @@ def parse_conll_file(filepath: str) -> List[Dict[str, Any]]:
for line in lines:
if line.startswith("-DOCSTART-"):
if current_text:
- samples.append({
- "text": "".join(current_text),
- "char_labels": current_labels.copy(),
- })
+ samples.append(
+ {
+ "text": "".join(current_text),
+ "char_labels": current_labels.copy(),
+ }
+ )
current_text, current_labels = [], []
continue
if not line:
if current_text:
- samples.append({
- "text": "".join(current_text),
- "char_labels": current_labels.copy(),
- })
+ samples.append(
+ {
+ "text": "".join(current_text),
+ "char_labels": current_labels.copy(),
+ }
+ )
current_text, current_labels = [], []
continue
@@ -110,10 +114,12 @@ def parse_conll_file(filepath: str) -> List[Dict[str, Any]]:
current_labels.append(label)
if current_text:
- samples.append({
- "text": "".join(current_text),
- "char_labels": current_labels.copy(),
- })
+ samples.append(
+ {
+ "text": "".join(current_text),
+ "char_labels": current_labels.copy(),
+ }
+ )
return samples
@@ -152,9 +158,9 @@ def load_conll_dataset(conll_dir_or_files: str) -> Tuple[Dataset, List[str]]:
i_label = "I" + label[1:]
if i_label not in label_list:
label_list.append(i_label)
- print(f"⚠️ Added missing {i_label} for {label}")
+ print(f"Added missing {i_label} for {label}")
- print(f"✅ Loaded {len(all_samples)} samples, {len(label_list)} labels: {label_list}")
+ print(f"Loaded {len(all_samples)} samples, {len(label_list)} labels: {label_list}")
return Dataset.from_list(all_samples), label_list