refactor: improve code formatting and readability in utils/__init__.py

author: HsiangNianian <i@jyunko.cn> 2026-01-05 14:39:42 +0800
committer: HsiangNianian <i@jyunko.cn> 2026-01-05 14:40:15 +0800
commit: 65f48da74e446df81b17d0cc9bf203b75947fff1 (patch)
tree: a6abfb6a0ce49f4499d93e7d9aab7988ca892f58 /src
parent: 1a6d6edbe14f00df021a1e826d8d30555f301e30 (diff)
download: base-model-65f48da74e446df81b17d0cc9bf203b75947fff1.tar.gz
base-model-65f48da74e446df81b17d0cc9bf203b75947fff1.zip
1 files changed, 21 insertions, 15 deletions
diff --git a/src/base_model_trpgner/utils/__init__.py b/src/base_model_trpgner/utils/__init__.py
index 12a3ef4..ff65c01 100644
--- a/src/base_model_trpgner/utils/__init__.py
+++ b/src/base_model_trpgner/utils/__init__.py
@@ -25,7 +25,7 @@ def word_to_char_labels(text: str, word_labels: List[Tuple[str, str]]) -> List[s
         if pos >= len(text):
             break
 
-        if text[pos: pos + len(token)] == token:
+        if text[pos : pos + len(token)] == token:
             for i in range(len(token)):
                 idx = pos + i
                 if idx < len(char_labels):
@@ -86,19 +86,23 @@ def parse_conll_file(filepath: str) -> List[Dict[str, Any]]:
         for line in lines:
             if line.startswith("-DOCSTART-"):
                 if current_text:
-                    samples.append({
-                        "text": "".join(current_text),
-                        "char_labels": current_labels.copy(),
-                    })
+                    samples.append(
+                        {
+                            "text": "".join(current_text),
+                            "char_labels": current_labels.copy(),
+                        }
+                    )
                     current_text, current_labels = [], []
                 continue
 
             if not line:
                 if current_text:
-                    samples.append({
-                        "text": "".join(current_text),
-                        "char_labels": current_labels.copy(),
-                    })
+                    samples.append(
+                        {
+                            "text": "".join(current_text),
+                            "char_labels": current_labels.copy(),
+                        }
+                    )
                     current_text, current_labels = [], []
                 continue
 
@@ -110,10 +114,12 @@ def parse_conll_file(filepath: str) -> List[Dict[str, Any]]:
                 current_labels.append(label)
 
         if current_text:
-            samples.append({
-                "text": "".join(current_text),
-                "char_labels": current_labels.copy(),
-            })
+            samples.append(
+                {
+                    "text": "".join(current_text),
+                    "char_labels": current_labels.copy(),
+                }
+            )
 
     return samples
 
@@ -152,9 +158,9 @@ def load_conll_dataset(conll_dir_or_files: str) -> Tuple[Dataset, List[str]]:
             i_label = "I" + label[1:]
             if i_label not in label_list:
                 label_list.append(i_label)
-                print(f"⚠️  Added missing {i_label} for {label}")
+                print(f"Added missing {i_label} for {label}")
 
-    print(f"✅ Loaded {len(all_samples)} samples, {len(label_list)} labels: {label_list}")
+    print(f"Loaded {len(all_samples)} samples, {len(label_list)} labels: {label_list}")
     return Dataset.from_list(all_samples), label_list
author	HsiangNianian <i@jyunko.cn>	2026-01-05 14:39:42 +0800
committer	HsiangNianian <i@jyunko.cn>	2026-01-05 14:40:15 +0800
commit	65f48da74e446df81b17d0cc9bf203b75947fff1 (patch)
tree	a6abfb6a0ce49f4499d93e7d9aab7988ca892f58 /src
parent	1a6d6edbe14f00df021a1e826d8d30555f301e30 (diff)
download	base-model-65f48da74e446df81b17d0cc9bf203b75947fff1.tar.gz base-model-65f48da74e446df81b17d0cc9bf203b75947fff1.zip