{
  "$schema": "https://humangarden.ai/.well-known/skill-spec.schema.json",
  "canonical_url": "https://humangarden.ai/skills/docx/spec.json",
  "slug": "docx",
  "last_tested": "2026-06-05",
  "tested_by": "humangarden",
  "verdict_one_line": "pandoc + docx-js + an opinionated XML round-tripper, glued by an unusually well-written SKILL.md — runs clean end-to-end in ~1 second on a 3-page memo",
  "fires_when": [
    "user has a .docx file and wants its text content as markdown or plain text",
    "user wants to programmatically generate a new Word document",
    "user wants to edit an existing docx via XML round-trip (unpack → edit → repack)",
    "user wants to preserve tracked changes during extraction (--track-changes=all)",
    "user is building agent workflows that round-trip Word documents"
  ],
  "skip_when": [
    "user wants to export a docx to PDF (requires LibreOffice, not bundled)",
    "user has .doc legacy files needing conversion (requires LibreOffice)",
    "user wants to accept/reject tracked changes programmatically (requires LibreOffice path)",
    "user expects a one-shot install script (you bring your own dependencies)"
  ],
  "inputs": [
    {
      "type": "file",
      "format": "docx",
      "constraint": "any modern Word document; .doc legacy not supported without LibreOffice"
    },
    {
      "type": "text",
      "format": "structured-spec",
      "constraint": "for new-doc generation, supply a description or JSON describing intended structure"
    }
  ],
  "outputs": [
    {
      "type": "file",
      "format": "docx",
      "quality_note": "~90% trustworthy; round-trip preserves styles, tables, numbering exactly; validate.py auto-checks paragraph count after pack"
    },
    {
      "type": "text",
      "format": "markdown",
      "quality_note": "pandoc extraction is clean for headings, tables, lists; page breaks silently dropped"
    }
  ],
  "installation": {
    "pip": [
      "python-docx",
      "lxml"
    ],
    "npm": [
      "docx"
    ],
    "system": {
      "universal": "pandoc (system package — brew install / apt install pandoc)",
      "optional_libreoffice": "brew install --cask libreoffice (only if you need PDF export or accept-changes)"
    },
    "notes": "No requirements.txt or package.json ships in the skill folder. Required deps must be read off SKILL.md. CommonJS gotcha: the skill's docx-js examples use require() — fails in worktrees where parent package.json has \"type\": \"module\"."
  },
  "artifacts": [
    {
      "kind": "office",
      "file": "https://humangarden.ai/spec-artifacts/docx/q2-memo.docx",
      "caption": "Input — a 3-page Q2 memo (headings, a table, bulleted + numbered lists, appendix)",
      "role": "input",
      "hero": true
    },
    {
      "kind": "text",
      "inline": "# Throughput Metrics\n\nBelow is the Q2 throughput breakdown. Curators averaged 12 minutes per skill, ...\n\n  -----------------------------------------------------------------------\n  Month                   Skills tested           Pass rate\n  ----------------------- ----------------------- -----------------------\n  April                   14                      79%\n  May                     18                      83%\n  June (partial)          15                      80%\n  -----------------------------------------------------------------------\n\n# Notable wins\n\n- Subagent worktree isolation: zero cross-contamination across 60+ parallel runs\n- pptx skill now generates valid 16:9 decks with native charts in <100ms\n- pdf skill exposed two silent-failure modes in pdfplumber tables — flagged in spec\n",
      "caption": "pandoc extraction. Headings preserved, table converted to a pandoc grid, bullets clean. Page breaks silently dropped.",
      "role": "output",
      "hero": true
    }
  ],
  "caveats": [
    "docx-js example fails with \"ReferenceError: require is not defined in ES module scope\" if parent package.json sets type=module (rename to .cjs)",
    "PDF export path silently unavailable without LibreOffice",
    "validate.py against unmodified python-docx output may flag pre-existing schema gaps (not introduced by edits)",
    "tracked-changes editing, comments, and image insertion via XML rels are documented but untested in our run"
  ],
  "needs_credentials": [],
  "source_repo": "https://github.com/anthropics/skills/tree/main/docx",
  "human_review_url": "https://humangarden.ai/skills/docx/"
}