Created
April 11, 2026 03:39
-
-
Save pleabargain/1b22200b41026a9c67298f9a08dddf96 to your computer and use it in GitHub Desktop.
converts all docx to md in a directory win11 requires pwsh and pandoc
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #Requires -Version 5.1 | |
| <# | |
| .SYNOPSIS | |
| Convert every *.docx in the current directory to Markdown using Pandoc. | |
| .DESCRIPTION | |
| - Non-recursive: only the working directory (Get-Location), not subfolders. | |
| - Output: <BaseName>.md if that file does not exist; otherwise <BaseName>-1.md, | |
| <BaseName>-2.md, ... until an unused name is found. | |
| - Embedded images and media are extracted to <Stem>_media next to the .md file, | |
| where <Stem> is the markdown file name without the .md extension. | |
| .EXAMPLE | |
| Set-Location 'C:\path\to\folder' | |
| .\Convert-DocxToMarkdown.ps1 | |
| #> | |
| [CmdletBinding()] | |
| param() | |
| $ErrorActionPreference = 'Stop' | |
| if (-not (Get-Command pandoc -ErrorAction SilentlyContinue)) { | |
| Write-Error 'Pandoc was not found on PATH. Install Pandoc from https://pandoc.org/installing.html and reopen your terminal.' | |
| exit 1 | |
| } | |
| function Get-OutputMarkdownInfo { | |
| param( | |
| [Parameter(Mandatory)] | |
| [string]$BaseName, | |
| [Parameter(Mandatory)] | |
| [string]$WorkingDirectory | |
| ) | |
| $tryPrimary = Join-Path $WorkingDirectory ($BaseName + '.md') | |
| if (-not (Test-Path -LiteralPath $tryPrimary)) { | |
| $stem = $BaseName | |
| return [pscustomobject]@{ | |
| MdPath = $tryPrimary | |
| Stem = $stem | |
| MediaDir = Join-Path $WorkingDirectory ($stem + '_media') | |
| } | |
| } | |
| $i = 1 | |
| while ($true) { | |
| $stem = $BaseName + '-' + $i | |
| $candidate = Join-Path $WorkingDirectory ($stem + '.md') | |
| if (-not (Test-Path -LiteralPath $candidate)) { | |
| return [pscustomobject]@{ | |
| MdPath = $candidate | |
| Stem = $stem | |
| MediaDir = Join-Path $WorkingDirectory ($stem + '_media') | |
| } | |
| } | |
| $i++ | |
| } | |
| } | |
| $workingDirectory = (Get-Location).ProviderPath | |
| $docxFiles = @(Get-ChildItem -LiteralPath $workingDirectory -Filter '*.docx' -File) | |
| if ($docxFiles.Count -eq 0) { | |
| Write-Host 'No .docx files found in the current directory.' | |
| exit 0 | |
| } | |
| foreach ($file in $docxFiles) { | |
| $baseName = [System.IO.Path]::GetFileNameWithoutExtension($file.Name) | |
| $out = Get-OutputMarkdownInfo -BaseName $baseName -WorkingDirectory $workingDirectory | |
| $mediaArg = '--extract-media=' + $out.MediaDir | |
| Write-Host ("Converting: {0} -> {1}" -f $file.Name, (Split-Path -Leaf $out.MdPath)) | |
| & pandoc @( | |
| $file.FullName | |
| '-f', 'docx' | |
| '-t', 'markdown' | |
| $mediaArg | |
| '-o', $out.MdPath | |
| ) | |
| if ($LASTEXITCODE -ne 0) { | |
| throw "Pandoc failed for '$($file.FullName)' (exit code $LASTEXITCODE)." | |
| } | |
| } | |
| Write-Host 'Done.' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment