summaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorHsiangNianian <i@jyunko.cn>2025-12-30 20:16:05 +0800
committerHsiangNianian <i@jyunko.cn>2025-12-30 20:16:05 +0800
commit5dd166366b8a2f4699c1841ebd7fceabcd9868a4 (patch)
tree85d78772054529579176547c00aee9559cffff37
parentdd55c70225367dec9e8d88821b4d65fcd24edd65 (diff)
downloadbase-model-5dd166366b8a2f4699c1841ebd7fceabcd9868a4.tar.gz
base-model-5dd166366b8a2f4699c1841ebd7fceabcd9868a4.zip
refactor: Refactor TRPG NER model SDK: restructure codebase into base_model_trpgner package, implement training and inference modules, and add model download functionality. Remove legacy training and utils modules. Enhance documentation and examples for better usability.
-rw-r--r--.github/workflows/publish.yml96
-rw-r--r--pyproject.toml77
-rw-r--r--src/base_model_trpgner/__init__.py (renamed from src/basemodel/__init__.py)14
-rw-r--r--src/base_model_trpgner/download_model.py (renamed from src/basemodel/download_model.py)0
-rw-r--r--src/base_model_trpgner/inference/__init__.py (renamed from src/basemodel/inference/__init__.py)0
-rw-r--r--src/base_model_trpgner/training/__init__.py (renamed from src/basemodel/training/__init__.py)4
-rw-r--r--src/base_model_trpgner/utils/__init__.py (renamed from src/basemodel/utils/__init__.py)0
-rw-r--r--utils/word_conll_to_char_conll.py3
8 files changed, 130 insertions, 64 deletions
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 2004132..c11cd0e 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -4,6 +4,8 @@ on:
push:
tags:
- 'v*.*.*'
+ branches:
+ - main
workflow_dispatch:
inputs:
create_test:
@@ -11,6 +13,10 @@ on:
required: false
default: false
type: boolean
+ tag_name:
+ description: 'Tag name to release (e.g., v1.0.0). Use this to re-run release with fixed code.'
+ required: true
+ default: 'v1.0.0'
permissions:
contents: write
@@ -25,14 +31,25 @@ jobs:
version: ${{ steps.version.outputs.version }}
steps:
- - uses: actions/checkout@v6
+ - name: Checkout code
+ uses: actions/checkout@v6
with:
fetch-depth: 0
+ # 最佳实践: Tag 触发时使用 main 分支代码,而不是 Tag 快照
+ # 这样修复代码后可以手动重新触发 workflow 而无需重新打 tag
+ ref: main
- name: Extract version from tag
id: version
run: |
- VERSION=${{ github.ref_name }}
+ # Tag 触发时使用 github.ref_name, 手动触发时使用 inputs.tag_name
+ if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
+ VERSION="${{ github.event.inputs.tag_name }}"
+ echo "🔧 Manual trigger mode: Using tag $VERSION from input"
+ else
+ VERSION="${{ github.ref_name }}"
+ echo "🏷️ Tag trigger mode: Using tag $VERSION from GitHub"
+ fi
echo "version=${VERSION#v}" >> $GITHUB_OUTPUT
echo "Tag: $VERSION"
echo "Version: ${VERSION#v}"
@@ -44,6 +61,7 @@ jobs:
- name: Build with uv
run: |
+ uv sync
uv build
- name: Check distribution
@@ -76,7 +94,7 @@ jobs:
environment:
name: test-pypi
- url: https://test.pypi.org/p/base-model
+ url: https://test.pypi.org/p/base-model-trpgner
permissions:
id-token: write
@@ -95,11 +113,12 @@ jobs:
name: Publish to PyPI
needs: build
runs-on: ubuntu-latest
- if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
+ # Tag 推送时自动发布, 或手动触发且未指定测试模式时发布
+ if: (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')) || (github.event_name == 'workflow_dispatch' && inputs.create_test != true)
environment:
name: pypi
- url: https://pypi.org/p/base-model
+ url: https://pypi.org/p/base-model-trpgner
permissions:
id-token: write
@@ -118,22 +137,27 @@ jobs:
name: Create GitHub Release with ONNX
needs: [build, publish-pypi]
runs-on: ubuntu-latest
- if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
+ # Tag 推送时自动创建, 或手动触发且未指定测试模式时创建
+ if: (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')) || (github.event_name == 'workflow_dispatch' && inputs.create_test != true)
permissions:
contents: write
steps:
- - uses: actions/checkout@v6
+ - name: Checkout code
+ uses: actions/checkout@v6
with:
fetch-depth: 0
+ # 使用 main 分支代码生成 CHANGELOG
+ ref: main
- name: Generate CHANGELOG
id: changelog
uses: requarks/changelog-action@v1
with:
token: ${{ github.token }}
- tag: ${{ github.ref_name }}
+ # 根据触发类型选择正确的 tag
+ tag: ${{ github.event_name == 'workflow_dispatch' && inputs.tag_name || github.ref_name }}
includeInvalidCommits: true
changelogFilePath: CHANGELOG.md
writeToFile: true
@@ -150,29 +174,34 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
- VERSION="${{ github.ref_name }}"
+ # 根据触发类型选择正确的 tag
+ if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
+ VERSION="${{ github.event.inputs.tag_name }}"
+ else
+ VERSION="${{ github.ref_name }}"
+ fi
cat > release_notes.md << 'EOF'
## 📦 安装
### pip 安装
```bash
- pip install base-model
+ pip install base-model-trpgner
```
### 使用 uv(推荐)
```bash
- uv pip install base-model
+ uv pip install base-model-trpgner
```
### 训练模式
```bash
- pip install base-model[train]
+ pip install base-model-trpgner[train]
```
## 🚀 快速开始
```python
- from basemodel import TRPGParser
+ from basemodeltrpgner import TRPGParser
parser = TRPGParser()
result = parser.parse("风雨 2024-06-08 21:44:59 剧烈的疼痛...")
@@ -184,18 +213,43 @@ jobs:
${{ steps.changelog.outputs.changes }}
EOF
- # 上传 Python 包和 ONNX 模型
- gh release create "${VERSION}" \
- --repo "${{ github.repository }}" \
- --notes-file release_notes.md \
- --title "🚀 ${VERSION}" \
- artifacts/dist/* \
- artifacts/onnx-artifact/* || true
+ # 检查 release 是否已存在
+ if gh release view "${VERSION}" --repo "${{ github.repository }}" >/dev/null 2>&1; then
+ echo "📝 Release ${VERSION} 已存在,更新 release 资源..."
+
+ # 删除旧的 assets 以便上传新的
+ ASSETS=$(gh release view "${VERSION}" --repo "${{ github.repository }}" --json assets -q '.assets[].name')
+ for asset in $ASSETS; do
+ echo " 删除旧资源: $asset"
+ gh release delete-asset "${VERSION}" "$asset" --repo "${{ github.repository }}" || true
+ done
+
+ # 更新 release notes 和 assets
+ gh release edit "${VERSION}" \
+ --repo "${{ github.repository }}" \
+ --notes-file release_notes.md \
+ --title "🚀 ${VERSION}"
+
+ # 上传新的资源
+ gh release upload "${VERSION}" \
+ artifacts/dist/* \
+ artifacts/onnx-artifact/* \
+ --repo "${{ github.repository }}" --clobber
+ else
+ echo "✨ 创建新 release ${VERSION}..."
+ gh release create "${VERSION}" \
+ --repo "${{ github.repository }}" \
+ --notes-file release_notes.md \
+ --title "🚀 ${VERSION}" \
+ artifacts/dist/* \
+ artifacts/onnx-artifact/*
+ fi
- name: Commit CHANGELOG.md
if: hashFiles('CHANGELOG.md') != ''
uses: stefanzweifel/git-auto-commit-action@v7
with:
branch: main
- commit_message: "docs: update CHANGELOG.md for ${{ github.ref_name }} [skip ci]"
+ # 根据触发类型选择正确的 tag
+ commit_message: "docs: update CHANGELOG.md for ${{ github.event_name == 'workflow_dispatch' && inputs.tag_name || github.ref_name }} [skip ci]"
file_pattern: CHANGELOG.md
diff --git a/pyproject.toml b/pyproject.toml
index d9b6d38..a1dcc20 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,13 +6,19 @@ build-backend = "hatchling.build"
name = "base-model-trpgner"
version = "0.1.1"
description = "HydroRoll TRPG NER 模型 - 桌上角色扮演游戏日志命名实体识别"
-authors = [
- { name = "HsiangNianian", email = "leader@hydroroll.team" }
-]
+authors = [{ name = "HsiangNianian", email = "leader@hydroroll.team" }]
readme = "README.md"
requires-python = ">=3.12"
license = { text = "AFL-3.0" }
-keywords = ["hydroroll", "trpg", "nlp", "ner", "chinese", "onnx", "robot framework"]
+keywords = [
+ "hydroroll",
+ "trpg",
+ "nlp",
+ "ner",
+ "chinese",
+ "onnx",
+ "robot framework",
+]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
@@ -24,11 +30,7 @@ classifiers = [
"Topic :: Text Processing :: Linguistic",
]
-dependencies = [
- "numpy>=1.24.0",
- "onnxruntime>=1.23.2",
- "transformers>=4.57.3",
-]
+dependencies = ["numpy>=1.24.0", "onnxruntime>=1.23.2", "transformers>=4.57.3"]
[project.optional-dependencies]
train = [
@@ -43,14 +45,8 @@ dev = [
"black>=24.0.0",
"ruff>=0.1.0",
]
-webui = [
- "base-model-trpgner[train]",
- "gradio>=6.2.0",
- "scikit-learn>=1.4.0",
-]
-all = [
- "base-model-trpgner[train,webui,dev]",
-]
+webui = ["base-model-trpgner[train]", "gradio>=6.2.0", "scikit-learn>=1.4.0"]
+all = ["base-model-trpgner[train,webui,dev]"]
[project.urls]
Homepage = "https://ailab.hydroroll.team/"
@@ -59,10 +55,9 @@ Documentation = "https://ailab.hydroroll.team/"
Issues = "https://github.com/HydroRoll-Team/base-model/issues"
[tool.hatch.build.targets.wheel]
-packages = ["src/basemodel"]
-# 只包含 ONNX 推理需要的文件(约 41MB)
+packages = ["src/base_model_trpgner"]
artifacts = [
- "src/basemodel/**/*.py",
+ "src/base_model_trpgner/**/*.py",
"models/trpg-final/model.onnx",
"models/trpg-final/model.onnx.data",
"models/trpg-final/config.json",
@@ -73,23 +68,39 @@ artifacts = [
]
[tool.hatch.build.targets.wheel.shared-data]
-"models/trpg-final" = "basemodel/models/trpg-final"
+"models/trpg-final" = "base_model_trpgner/models/trpg-final"
[tool.hatch.build.targets.sdist]
-include = [
- "/src",
- "/models/trpg-final",
- "/README.md",
- "/COPYING",
-]
+include = ["/src", "/models/trpg-final", "/README.md", "/COPYING"]
[tool.ruff]
exclude = [
- ".bzr", ".direnv", ".eggs", ".git", ".git-rewrite", ".hg",
- ".ipynb_checkpoints", ".mypy_cache", ".nox", ".pants.d",
- ".pyenv", ".pytest_cache", ".pytype", ".ruff_cache", ".svn",
- ".tox", ".venv", ".vscode", "__pypackages__", "_build",
- "buck-out", "build", "dist", "node_modules", "site-packages", "venv",
+ ".bzr",
+ ".direnv",
+ ".eggs",
+ ".git",
+ ".git-rewrite",
+ ".hg",
+ ".ipynb_checkpoints",
+ ".mypy_cache",
+ ".nox",
+ ".pants.d",
+ ".pyenv",
+ ".pytest_cache",
+ ".pytype",
+ ".ruff_cache",
+ ".svn",
+ ".tox",
+ ".venv",
+ ".vscode",
+ "__pypackages__",
+ "_build",
+ "buck-out",
+ "build",
+ "dist",
+ "node_modules",
+ "site-packages",
+ "venv",
]
line-length = 100
indent-width = 4
@@ -112,4 +123,4 @@ line-length = 100
target-version = ["py312"]
[tool.uv]
-dev-dependencies = [] \ No newline at end of file
+dev-dependencies = []
diff --git a/src/basemodel/__init__.py b/src/base_model_trpgner/__init__.py
index 7287df4..9796c83 100644
--- a/src/basemodel/__init__.py
+++ b/src/base_model_trpgner/__init__.py
@@ -1,28 +1,28 @@
"""
-base-model - HydroRoll TRPG NER 模型 SDK
+base-model-trpgner - HydroRoll TRPG NER 模型 SDK
这是一个用于 TRPG(桌上角色扮演游戏)日志命名实体识别的 Python SDK。
基本用法:
- >>> from basemodel import TRPGParser
+ >>> from base_model_trpgner import TRPGParser
>>> parser = TRPGParser()
>>> result = parser.parse("风雨 2024-06-08 21:44:59 剧烈的疼痛...")
>>> print(result)
{'metadata': {'speaker': '风雨', 'timestamp': '2024-06-08 21:44:59'}, 'content': [...]}
训练功能(需要额外安装):
- >>> pip install base-model[train]
- >>> from basemodel.training import train_ner_model
+ >>> pip install base-model-trpgner[train]
+ >>> from base_model_trpgner.training import train_ner_model
>>> train_ner_model(conll_data="./data", output_dir="./model")
"""
-from basemodel.inference import TRPGParser, parse_line, parse_lines
+from base_model_trpgner.inference import TRPGParser, parse_line, parse_lines
try:
from importlib.metadata import version
- __version__ = version("base-model")
+ __version__ = version("base_model_trpgner")
except Exception:
- __version__ = "0.1.0.dev"
+ __version__ = "0.1.1.dev"
__all__ = [
"__version__",
diff --git a/src/basemodel/download_model.py b/src/base_model_trpgner/download_model.py
index 2d65099..2d65099 100644
--- a/src/basemodel/download_model.py
+++ b/src/base_model_trpgner/download_model.py
diff --git a/src/basemodel/inference/__init__.py b/src/base_model_trpgner/inference/__init__.py
index 93a185f..93a185f 100644
--- a/src/basemodel/inference/__init__.py
+++ b/src/base_model_trpgner/inference/__init__.py
diff --git a/src/basemodel/training/__init__.py b/src/base_model_trpgner/training/__init__.py
index 5671c42..ccf3c03 100644
--- a/src/basemodel/training/__init__.py
+++ b/src/base_model_trpgner/training/__init__.py
@@ -36,7 +36,7 @@ def train_ner_model(
resume_from_checkpoint: 恢复检查点路径
Examples:
- >>> from basemodel.training import train_ner_model
+ >>> from basemodeltrpgner.training import train_ner_model
>>> train_ner_model(
... conll_data="./data",
... output_dir="./my_model",
@@ -59,7 +59,7 @@ def train_ner_model(
) from e
# 导入数据处理函数
- from basemodel.utils.conll import load_conll_dataset, tokenize_and_align_labels
+ from base_model_trpgner.utils.conll import load_conll_dataset, tokenize_and_align_labels
print(f"🚀 Starting training...")
diff --git a/src/basemodel/utils/__init__.py b/src/base_model_trpgner/utils/__init__.py
index 12a3ef4..12a3ef4 100644
--- a/src/basemodel/utils/__init__.py
+++ b/src/base_model_trpgner/utils/__init__.py
diff --git a/utils/word_conll_to_char_conll.py b/utils/word_conll_to_char_conll.py
index e52405f..9f4217a 100644
--- a/utils/word_conll_to_char_conll.py
+++ b/utils/word_conll_to_char_conll.py
@@ -52,4 +52,5 @@ if __name__ == "__main__":
with open(output_fp, "w", encoding="utf-8") as f:
f.write("\n".join(char_conll_lines) + "\n")
- print(f"Converted {input_fp} to character-level CoNLL format at {output_fp}") \ No newline at end of file
+ print(f"Converted {input_fp} to character-level CoNLL format at {output_fp}")
+