diff options
| -rw-r--r-- | .github/workflows/publish.yml | 96 | ||||
| -rw-r--r-- | pyproject.toml | 77 | ||||
| -rw-r--r-- | src/base_model_trpgner/__init__.py (renamed from src/basemodel/__init__.py) | 14 | ||||
| -rw-r--r-- | src/base_model_trpgner/download_model.py (renamed from src/basemodel/download_model.py) | 0 | ||||
| -rw-r--r-- | src/base_model_trpgner/inference/__init__.py (renamed from src/basemodel/inference/__init__.py) | 0 | ||||
| -rw-r--r-- | src/base_model_trpgner/training/__init__.py (renamed from src/basemodel/training/__init__.py) | 4 | ||||
| -rw-r--r-- | src/base_model_trpgner/utils/__init__.py (renamed from src/basemodel/utils/__init__.py) | 0 | ||||
| -rw-r--r-- | utils/word_conll_to_char_conll.py | 3 |
8 files changed, 130 insertions, 64 deletions
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 2004132..c11cd0e 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -4,6 +4,8 @@ on: push: tags: - 'v*.*.*' + branches: + - main workflow_dispatch: inputs: create_test: @@ -11,6 +13,10 @@ on: required: false default: false type: boolean + tag_name: + description: 'Tag name to release (e.g., v1.0.0). Use this to re-run release with fixed code.' + required: true + default: 'v1.0.0' permissions: contents: write @@ -25,14 +31,25 @@ jobs: version: ${{ steps.version.outputs.version }} steps: - - uses: actions/checkout@v6 + - name: Checkout code + uses: actions/checkout@v6 with: fetch-depth: 0 + # 最佳实践: Tag 触发时使用 main 分支代码,而不是 Tag 快照 + # 这样修复代码后可以手动重新触发 workflow 而无需重新打 tag + ref: main - name: Extract version from tag id: version run: | - VERSION=${{ github.ref_name }} + # Tag 触发时使用 github.ref_name, 手动触发时使用 inputs.tag_name + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + VERSION="${{ github.event.inputs.tag_name }}" + echo "🔧 Manual trigger mode: Using tag $VERSION from input" + else + VERSION="${{ github.ref_name }}" + echo "🏷️ Tag trigger mode: Using tag $VERSION from GitHub" + fi echo "version=${VERSION#v}" >> $GITHUB_OUTPUT echo "Tag: $VERSION" echo "Version: ${VERSION#v}" @@ -44,6 +61,7 @@ jobs: - name: Build with uv run: | + uv sync uv build - name: Check distribution @@ -76,7 +94,7 @@ jobs: environment: name: test-pypi - url: https://test.pypi.org/p/base-model + url: https://test.pypi.org/p/base-model-trpgner permissions: id-token: write @@ -95,11 +113,12 @@ jobs: name: Publish to PyPI needs: build runs-on: ubuntu-latest - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + # Tag 推送时自动发布, 或手动触发且未指定测试模式时发布 + if: (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')) || (github.event_name == 'workflow_dispatch' && inputs.create_test != true) environment: name: pypi - url: https://pypi.org/p/base-model + url: https://pypi.org/p/base-model-trpgner permissions: id-token: write @@ -118,22 +137,27 @@ jobs: name: Create GitHub Release with ONNX needs: [build, publish-pypi] runs-on: ubuntu-latest - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + # Tag 推送时自动创建, 或手动触发且未指定测试模式时创建 + if: (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')) || (github.event_name == 'workflow_dispatch' && inputs.create_test != true) permissions: contents: write steps: - - uses: actions/checkout@v6 + - name: Checkout code + uses: actions/checkout@v6 with: fetch-depth: 0 + # 使用 main 分支代码生成 CHANGELOG + ref: main - name: Generate CHANGELOG id: changelog uses: requarks/changelog-action@v1 with: token: ${{ github.token }} - tag: ${{ github.ref_name }} + # 根据触发类型选择正确的 tag + tag: ${{ github.event_name == 'workflow_dispatch' && inputs.tag_name || github.ref_name }} includeInvalidCommits: true changelogFilePath: CHANGELOG.md writeToFile: true @@ -150,29 +174,34 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - VERSION="${{ github.ref_name }}" + # 根据触发类型选择正确的 tag + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + VERSION="${{ github.event.inputs.tag_name }}" + else + VERSION="${{ github.ref_name }}" + fi cat > release_notes.md << 'EOF' ## 📦 安装 ### pip 安装 ```bash - pip install base-model + pip install base-model-trpgner ``` ### 使用 uv(推荐) ```bash - uv pip install base-model + uv pip install base-model-trpgner ``` ### 训练模式 ```bash - pip install base-model[train] + pip install base-model-trpgner[train] ``` ## 🚀 快速开始 ```python - from basemodel import TRPGParser + from basemodeltrpgner import TRPGParser parser = TRPGParser() result = parser.parse("风雨 2024-06-08 21:44:59 剧烈的疼痛...") @@ -184,18 +213,43 @@ jobs: ${{ steps.changelog.outputs.changes }} EOF - # 上传 Python 包和 ONNX 模型 - gh release create "${VERSION}" \ - --repo "${{ github.repository }}" \ - --notes-file release_notes.md \ - --title "🚀 ${VERSION}" \ - artifacts/dist/* \ - artifacts/onnx-artifact/* || true + # 检查 release 是否已存在 + if gh release view "${VERSION}" --repo "${{ github.repository }}" >/dev/null 2>&1; then + echo "📝 Release ${VERSION} 已存在,更新 release 资源..." + + # 删除旧的 assets 以便上传新的 + ASSETS=$(gh release view "${VERSION}" --repo "${{ github.repository }}" --json assets -q '.assets[].name') + for asset in $ASSETS; do + echo " 删除旧资源: $asset" + gh release delete-asset "${VERSION}" "$asset" --repo "${{ github.repository }}" || true + done + + # 更新 release notes 和 assets + gh release edit "${VERSION}" \ + --repo "${{ github.repository }}" \ + --notes-file release_notes.md \ + --title "🚀 ${VERSION}" + + # 上传新的资源 + gh release upload "${VERSION}" \ + artifacts/dist/* \ + artifacts/onnx-artifact/* \ + --repo "${{ github.repository }}" --clobber + else + echo "✨ 创建新 release ${VERSION}..." + gh release create "${VERSION}" \ + --repo "${{ github.repository }}" \ + --notes-file release_notes.md \ + --title "🚀 ${VERSION}" \ + artifacts/dist/* \ + artifacts/onnx-artifact/* + fi - name: Commit CHANGELOG.md if: hashFiles('CHANGELOG.md') != '' uses: stefanzweifel/git-auto-commit-action@v7 with: branch: main - commit_message: "docs: update CHANGELOG.md for ${{ github.ref_name }} [skip ci]" + # 根据触发类型选择正确的 tag + commit_message: "docs: update CHANGELOG.md for ${{ github.event_name == 'workflow_dispatch' && inputs.tag_name || github.ref_name }} [skip ci]" file_pattern: CHANGELOG.md diff --git a/pyproject.toml b/pyproject.toml index d9b6d38..a1dcc20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,13 +6,19 @@ build-backend = "hatchling.build" name = "base-model-trpgner" version = "0.1.1" description = "HydroRoll TRPG NER 模型 - 桌上角色扮演游戏日志命名实体识别" -authors = [ - { name = "HsiangNianian", email = "leader@hydroroll.team" } -] +authors = [{ name = "HsiangNianian", email = "leader@hydroroll.team" }] readme = "README.md" requires-python = ">=3.12" license = { text = "AFL-3.0" } -keywords = ["hydroroll", "trpg", "nlp", "ner", "chinese", "onnx", "robot framework"] +keywords = [ + "hydroroll", + "trpg", + "nlp", + "ner", + "chinese", + "onnx", + "robot framework", +] classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", @@ -24,11 +30,7 @@ classifiers = [ "Topic :: Text Processing :: Linguistic", ] -dependencies = [ - "numpy>=1.24.0", - "onnxruntime>=1.23.2", - "transformers>=4.57.3", -] +dependencies = ["numpy>=1.24.0", "onnxruntime>=1.23.2", "transformers>=4.57.3"] [project.optional-dependencies] train = [ @@ -43,14 +45,8 @@ dev = [ "black>=24.0.0", "ruff>=0.1.0", ] -webui = [ - "base-model-trpgner[train]", - "gradio>=6.2.0", - "scikit-learn>=1.4.0", -] -all = [ - "base-model-trpgner[train,webui,dev]", -] +webui = ["base-model-trpgner[train]", "gradio>=6.2.0", "scikit-learn>=1.4.0"] +all = ["base-model-trpgner[train,webui,dev]"] [project.urls] Homepage = "https://ailab.hydroroll.team/" @@ -59,10 +55,9 @@ Documentation = "https://ailab.hydroroll.team/" Issues = "https://github.com/HydroRoll-Team/base-model/issues" [tool.hatch.build.targets.wheel] -packages = ["src/basemodel"] -# 只包含 ONNX 推理需要的文件(约 41MB) +packages = ["src/base_model_trpgner"] artifacts = [ - "src/basemodel/**/*.py", + "src/base_model_trpgner/**/*.py", "models/trpg-final/model.onnx", "models/trpg-final/model.onnx.data", "models/trpg-final/config.json", @@ -73,23 +68,39 @@ artifacts = [ ] [tool.hatch.build.targets.wheel.shared-data] -"models/trpg-final" = "basemodel/models/trpg-final" +"models/trpg-final" = "base_model_trpgner/models/trpg-final" [tool.hatch.build.targets.sdist] -include = [ - "/src", - "/models/trpg-final", - "/README.md", - "/COPYING", -] +include = ["/src", "/models/trpg-final", "/README.md", "/COPYING"] [tool.ruff] exclude = [ - ".bzr", ".direnv", ".eggs", ".git", ".git-rewrite", ".hg", - ".ipynb_checkpoints", ".mypy_cache", ".nox", ".pants.d", - ".pyenv", ".pytest_cache", ".pytype", ".ruff_cache", ".svn", - ".tox", ".venv", ".vscode", "__pypackages__", "_build", - "buck-out", "build", "dist", "node_modules", "site-packages", "venv", + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", ] line-length = 100 indent-width = 4 @@ -112,4 +123,4 @@ line-length = 100 target-version = ["py312"] [tool.uv] -dev-dependencies = []
\ No newline at end of file +dev-dependencies = [] diff --git a/src/basemodel/__init__.py b/src/base_model_trpgner/__init__.py index 7287df4..9796c83 100644 --- a/src/basemodel/__init__.py +++ b/src/base_model_trpgner/__init__.py @@ -1,28 +1,28 @@ """ -base-model - HydroRoll TRPG NER 模型 SDK +base-model-trpgner - HydroRoll TRPG NER 模型 SDK 这是一个用于 TRPG(桌上角色扮演游戏)日志命名实体识别的 Python SDK。 基本用法: - >>> from basemodel import TRPGParser + >>> from base_model_trpgner import TRPGParser >>> parser = TRPGParser() >>> result = parser.parse("风雨 2024-06-08 21:44:59 剧烈的疼痛...") >>> print(result) {'metadata': {'speaker': '风雨', 'timestamp': '2024-06-08 21:44:59'}, 'content': [...]} 训练功能(需要额外安装): - >>> pip install base-model[train] - >>> from basemodel.training import train_ner_model + >>> pip install base-model-trpgner[train] + >>> from base_model_trpgner.training import train_ner_model >>> train_ner_model(conll_data="./data", output_dir="./model") """ -from basemodel.inference import TRPGParser, parse_line, parse_lines +from base_model_trpgner.inference import TRPGParser, parse_line, parse_lines try: from importlib.metadata import version - __version__ = version("base-model") + __version__ = version("base_model_trpgner") except Exception: - __version__ = "0.1.0.dev" + __version__ = "0.1.1.dev" __all__ = [ "__version__", diff --git a/src/basemodel/download_model.py b/src/base_model_trpgner/download_model.py index 2d65099..2d65099 100644 --- a/src/basemodel/download_model.py +++ b/src/base_model_trpgner/download_model.py diff --git a/src/basemodel/inference/__init__.py b/src/base_model_trpgner/inference/__init__.py index 93a185f..93a185f 100644 --- a/src/basemodel/inference/__init__.py +++ b/src/base_model_trpgner/inference/__init__.py diff --git a/src/basemodel/training/__init__.py b/src/base_model_trpgner/training/__init__.py index 5671c42..ccf3c03 100644 --- a/src/basemodel/training/__init__.py +++ b/src/base_model_trpgner/training/__init__.py @@ -36,7 +36,7 @@ def train_ner_model( resume_from_checkpoint: 恢复检查点路径 Examples: - >>> from basemodel.training import train_ner_model + >>> from basemodeltrpgner.training import train_ner_model >>> train_ner_model( ... conll_data="./data", ... output_dir="./my_model", @@ -59,7 +59,7 @@ def train_ner_model( ) from e # 导入数据处理函数 - from basemodel.utils.conll import load_conll_dataset, tokenize_and_align_labels + from base_model_trpgner.utils.conll import load_conll_dataset, tokenize_and_align_labels print(f"🚀 Starting training...") diff --git a/src/basemodel/utils/__init__.py b/src/base_model_trpgner/utils/__init__.py index 12a3ef4..12a3ef4 100644 --- a/src/basemodel/utils/__init__.py +++ b/src/base_model_trpgner/utils/__init__.py diff --git a/utils/word_conll_to_char_conll.py b/utils/word_conll_to_char_conll.py index e52405f..9f4217a 100644 --- a/utils/word_conll_to_char_conll.py +++ b/utils/word_conll_to_char_conll.py @@ -52,4 +52,5 @@ if __name__ == "__main__": with open(output_fp, "w", encoding="utf-8") as f: f.write("\n".join(char_conll_lines) + "\n") - print(f"Converted {input_fp} to character-level CoNLL format at {output_fp}")
\ No newline at end of file + print(f"Converted {input_fp} to character-level CoNLL format at {output_fp}") + |
