Skip to content

Instantly share code, notes, and snippets.

@pleabargain
Created April 11, 2026 03:39
Show Gist options
  • Select an option

  • Save pleabargain/1b22200b41026a9c67298f9a08dddf96 to your computer and use it in GitHub Desktop.

Select an option

Save pleabargain/1b22200b41026a9c67298f9a08dddf96 to your computer and use it in GitHub Desktop.
converts all docx to md in a directory win11 requires pwsh and pandoc
#Requires -Version 5.1
<#
.SYNOPSIS
Convert every *.docx in the current directory to Markdown using Pandoc.
.DESCRIPTION
- Non-recursive: only the working directory (Get-Location), not subfolders.
- Output: <BaseName>.md if that file does not exist; otherwise <BaseName>-1.md,
<BaseName>-2.md, ... until an unused name is found.
- Embedded images and media are extracted to <Stem>_media next to the .md file,
where <Stem> is the markdown file name without the .md extension.
.EXAMPLE
Set-Location 'C:\path\to\folder'
.\Convert-DocxToMarkdown.ps1
#>
[CmdletBinding()]
param()
$ErrorActionPreference = 'Stop'
if (-not (Get-Command pandoc -ErrorAction SilentlyContinue)) {
Write-Error 'Pandoc was not found on PATH. Install Pandoc from https://pandoc.org/installing.html and reopen your terminal.'
exit 1
}
function Get-OutputMarkdownInfo {
param(
[Parameter(Mandatory)]
[string]$BaseName,
[Parameter(Mandatory)]
[string]$WorkingDirectory
)
$tryPrimary = Join-Path $WorkingDirectory ($BaseName + '.md')
if (-not (Test-Path -LiteralPath $tryPrimary)) {
$stem = $BaseName
return [pscustomobject]@{
MdPath = $tryPrimary
Stem = $stem
MediaDir = Join-Path $WorkingDirectory ($stem + '_media')
}
}
$i = 1
while ($true) {
$stem = $BaseName + '-' + $i
$candidate = Join-Path $WorkingDirectory ($stem + '.md')
if (-not (Test-Path -LiteralPath $candidate)) {
return [pscustomobject]@{
MdPath = $candidate
Stem = $stem
MediaDir = Join-Path $WorkingDirectory ($stem + '_media')
}
}
$i++
}
}
$workingDirectory = (Get-Location).ProviderPath
$docxFiles = @(Get-ChildItem -LiteralPath $workingDirectory -Filter '*.docx' -File)
if ($docxFiles.Count -eq 0) {
Write-Host 'No .docx files found in the current directory.'
exit 0
}
foreach ($file in $docxFiles) {
$baseName = [System.IO.Path]::GetFileNameWithoutExtension($file.Name)
$out = Get-OutputMarkdownInfo -BaseName $baseName -WorkingDirectory $workingDirectory
$mediaArg = '--extract-media=' + $out.MediaDir
Write-Host ("Converting: {0} -> {1}" -f $file.Name, (Split-Path -Leaf $out.MdPath))
& pandoc @(
$file.FullName
'-f', 'docx'
'-t', 'markdown'
$mediaArg
'-o', $out.MdPath
)
if ($LASTEXITCODE -ne 0) {
throw "Pandoc failed for '$($file.FullName)' (exit code $LASTEXITCODE)."
}
}
Write-Host 'Done.'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment