Manual Dispatch - Wikipedia Extraction #32
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Manually starting a Wikipedia Extraction for a given language | |
# This is the main way to trigger an extraction after merging | |
# a PR with rule file additions. This can also be used to re-trigger | |
# extracts to validate changes in rule files. | |
name: Manual Dispatch - Wikipedia Extraction | |
on: | |
workflow_dispatch: | |
inputs: | |
language: | |
description: "Language Code" | |
required: true | |
default: "" | |
jobs: | |
extract: | |
runs-on: ${{ matrix.os }} | |
strategy: | |
matrix: | |
os: [ubuntu-latest] | |
rust: [nightly-2023-06-28] | |
steps: | |
# SETUP | |
- name: Maximize build space | |
uses: easimon/maximize-build-space@b4d02c14493a9653fe7af06cc89ca5298071c66e | |
with: | |
root-reserve-mb: 512 | |
swap-size-mb: 1024 | |
remove-dotnet: "true" | |
remove-android: "true" | |
remove-haskell: "true" | |
- uses: hecrj/setup-rust-action@50a120e4d34903c2c1383dec0e9b1d349a9cc2b1 | |
with: | |
rust-version: ${{ matrix.rust }} | |
- uses: actions/checkout@v3 | |
# EXTRACTION | |
- name: Full Wikipedia Extraction - ${{ github.event.inputs.language }} | |
env: | |
LANGUAGE: ${{ github.event.inputs.language }} | |
run: ./scripts/extraction.sh extract "$LANGUAGE" | |
- name: Deduplicate Wikipedia Extraction | |
run: ./scripts/dedupe.sh extract.txt | |
# UPLOAD | |
- uses: actions/upload-artifact@v2 | |
with: | |
name: extraction | |
path: output/* |