Skip to content

Manual Dispatch - Wikipedia Extraction #32

Manual Dispatch - Wikipedia Extraction

Manual Dispatch - Wikipedia Extraction #32

# Manually starting a Wikipedia Extraction for a given language
# This is the main way to trigger an extraction after merging
# a PR with rule file additions. This can also be used to re-trigger
# extracts to validate changes in rule files.
name: Manual Dispatch - Wikipedia Extraction
on:
workflow_dispatch:
inputs:
language:
description: "Language Code"
required: true
default: ""
jobs:
extract:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
rust: [nightly-2023-06-28]
steps:
# SETUP
- name: Maximize build space
uses: easimon/maximize-build-space@b4d02c14493a9653fe7af06cc89ca5298071c66e
with:
root-reserve-mb: 512
swap-size-mb: 1024
remove-dotnet: "true"
remove-android: "true"
remove-haskell: "true"
- uses: hecrj/setup-rust-action@50a120e4d34903c2c1383dec0e9b1d349a9cc2b1
with:
rust-version: ${{ matrix.rust }}
- uses: actions/checkout@v3
# EXTRACTION
- name: Full Wikipedia Extraction - ${{ github.event.inputs.language }}
env:
LANGUAGE: ${{ github.event.inputs.language }}
run: ./scripts/extraction.sh extract "$LANGUAGE"
- name: Deduplicate Wikipedia Extraction
run: ./scripts/dedupe.sh extract.txt
# UPLOAD
- uses: actions/upload-artifact@v2
with:
name: extraction
path: output/*