<#
.SYNOPSIS
Migration validation: SharePoint 2013 -> SharePoint Online
.DESCRIPTION
Compares lists/libraries, item counts, file hashes, permissions (web/list/item where unique),
homepage links. Uses parallel background jobs to compute on-prem file hashes, then compares
with SPO files. Filters results by priority (default shows only Priority 1).
.PARAMETER MappingsPath
CSV file path with SourceURL,DestinationURL
.PARAMETER OutputFolder
Folder where reports/logs will be written
.PARAMETER ThrottleLimit
Maximum concurrent background jobs computing on-prem file hashes (default 4)
.PARAMETER ShowAll
If set, show all results (Priority 1 & 2). Otherwise show only Priority 1.
.PARAMETER UseCSOM
If set, attempt CSOM-based on-prem connectivity (useful if not running on SP server).
NOTE: CSOM mode is a separate code path that requires Microsoft.SharePoint.Client assemblies.
#>
param(
[string]$MappingsPath = ".\SiteMappings.csv",
[string]$OutputFolder = ".\MigrationReports",
[int]$ThrottleLimit = 4,
[switch]$ShowAll,
[switch]$UseCSOM
)
# --- Setup output/logging
if (-not (Test-Path $OutputFolder)) { New-Item -Path $OutputFolder -ItemType Directory | Out-Null }
$ts = Get-Date -Format yyyyMMdd_HHmmss
$logFile = Join-Path $OutputFolder "MigrationValidator_$ts.log"
function Log { param($m) $line = "$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')`t$m"; $line | Tee-Object -FilePath $logFile -Append }
Log "Starting Migration Validator (parallel + item-level perms) - $(Get-Date)"
# --- Modules check
if (-not (Get-Module -ListAvailable -Name PnP.PowerShell)) {
Log "PnP.PowerShell missing. Installing for current user..."
Install-Module -Name PnP.PowerShell -Force -Scope CurrentUser
}
Import-Module PnP.PowerShell -ErrorAction Stop
# Try to load on-prem snapin if not using CSOM
$usingOnPremSnapin = $false
if (-not $UseCSOM) {
try {
Add-PSSnapin Microsoft.SharePoint.PowerShell -ErrorAction Stop
$usingOnPremSnapin = $true
Log "Loaded Microsoft.SharePoint.PowerShell snap-in."
} catch {
Log "On-prem snap-in not available. Use -UseCSOM to run with CSOM (remote) or run on SP server."
$usingOnPremSnapin = $false
}
} else {
Log "CSOM mode requested (-UseCSOM). Ensure Microsoft.SharePoint.Client assemblies are available and modify CSOM credentials as needed."
}
# Ensure mappings CSV exists
if (-not (Test-Path $MappingsPath)) {
Log "Mappings CSV not found at $MappingsPath"
throw "Mappings CSV not found at $MappingsPath"
}
$mappings = Import-Csv $MappingsPath
Log "Loaded $($mappings.Count) mappings."
# Helper functions
function Get-ByteHash([byte[]]$bytes) {
$sha = [System.Security.Cryptography.SHA256]::Create()
$hash = $sha.ComputeHash($bytes)
-join ($hash | ForEach-Object { $_.ToString('x2') })
}
function NormalizeUrl([string]$u) {
if (-not $u) { return $null }
$u = $u.Trim()
$u = $u.Split('?')[0] # drop query
$u = $u.TrimEnd('/')
return $u.ToLower()
}
# Container for issues (including priority)
$issues = [System.Collections.Generic.List[PSObject]]::new()
# temp folder for per-library hash CSVs
$tempFolder = Join-Path $env:TEMP "SP_Mig_Validator_$ts"
New-Item -Path $tempFolder -ItemType Directory -Force | Out-Null
# Function to enqueue a job that computes on-prem file hashes for a library
function Start-HashJob {
param(
[string]$MappingId,
[string]$SrcWebUrl,
[string]$ListTitle,
[string]$ListRootFolderServerRelativeUrl,
[string]$OutCsv
)
# create script block for job - this runs in background process on same machine
$sb = {
param($srcWebUrl, $listTitle, $rootRelUrl, $outCsv)
# Import SharePoint snapin inside job if available (job runs in separate process)
try { Add-PSSnapin Microsoft.SharePoint.PowerShell -ErrorAction SilentlyContinue } catch {}
function Get-ByteHashLocal([byte[]]$bytes){ $sha = [System.Security.Cryptography.SHA256]::Create(); -join ($sha.ComputeHash($bytes) | ForEach-Object { $_.ToString('x2') }) }
try {
# open web
$web = Get-SPWeb $srcWebUrl -ErrorAction Stop
# recursive file enumeration
function Get-FilesRecursive($folder) {
$files = @()
$files += $folder.Files
foreach ($sub in $folder.SubFolders) {
if ($sub.Name -eq "Forms") { continue }
$files += Get-FilesRecursive $sub
}
return $files
}
# find library by root folder server relative url
$allLists = $web.Lists | Where-Object { -not $_.Hidden }
$target = $allLists | Where-Object { $_.RootFolder.ServerRelativeUrl -eq $rootRelUrl -or $_.RootFolder.ServerRelativeUrl.TrimEnd('/') -eq $rootRelUrl.TrimEnd('/') -or $_.Title -eq $listTitle }
if (-not $target) {
"`"$($listTitle)`" not found on web $srcWebUrl" | Out-File -FilePath $outCsv -Encoding UTF8
return
}
$files = Get-FilesRecursive $target.RootFolder
$rows = @()
foreach ($f in $files) {
try {
$bytes = $f.OpenBinary()
$h = Get-ByteHashLocal -bytes $bytes
$rows += [PSCustomObject]@{
ServerRelativeUrl = $f.Url
Name = $f.Name
Length = $f.Length
Hash = $h
ItemId = ($f.ItemId -as [string])
}
} catch {
# log error as row with empty hash
$rows += [PSCustomObject]@{ ServerRelativeUrl = $f.Url; Name = $f.Name; Length = $f.Length; Hash = ""; ItemId = ($f.ItemId -as [string]); Error = $_.Exception.Message }
}
}
$rows | Export-Csv -Path $outCsv -NoTypeInformation -Encoding UTF8
} catch {
"ERROR: $($_.Exception.Message)" | Out-File -FilePath $outCsv -Encoding UTF8
} finally {
try { $web.Dispose() } catch {}
}
}
# Start job
$job = Start-Job -ScriptBlock $sb -ArgumentList $SrcWebUrl, $ListTitle, $ListRootFolderServerRelativeUrl, $OutCsv
return $job
}
# iterate mappings
foreach ($map in $mappings) {
$srcUrl = $map.SourceURL.Trim()
$dstUrl = $map.DestinationURL.Trim()
$mapId = [Guid]::NewGuid().ToString()
Log "Processing mapping: $srcUrl => $dstUrl"
# Connect to SPO (interactive once per mapping)
try {
Connect-PnPOnline -Url $dstUrl -Interactive -ErrorAction Stop
Log "Connected to SPO: $dstUrl"
} catch {
Log "Failed to connect to SPO: $($_.Exception.Message)"
$issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item="SPO Connect"; IssueType="SPO Connect Failed"; Priority=1; Details=$_.Exception.Message })
continue
}
# Connect to on-prem or use CSOM
if ($UseCSOM) {
Log "CSOM mode: not fully implemented in this script - you must provide CSOM connection code."
$issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item="OnPrem Connect"; IssueType="CSOM Mode Required"; Priority=1; Details="CSOM mode requested; modify script to provide credentials and client assemblies." })
Disconnect-PnPOnline -ErrorAction SilentlyContinue
continue
} else {
if (-not $usingOnPremSnapin) {
Log "On-prem snapin unavailable. Use -UseCSOM or run script on SP2013 server."
$issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item="OnPrem Connect"; IssueType="Snapin Missing"; Priority=1; Details="Run on SP server or run with -UseCSOM" })
Disconnect-PnPOnline -ErrorAction SilentlyContinue
continue
}
# open SPWeb
try {
$srcWeb = Get-SPWeb $srcUrl -ErrorAction Stop
Log "Connected to on-prem web: $srcUrl"
} catch {
Log "Failed to open on-prem web: $($_.Exception.Message)"
$issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item="OnPrem Connect"; IssueType="Open SPWeb Failed"; Priority=1; Details=$_.Exception.Message })
Disconnect-PnPOnline -ErrorAction SilentlyContinue
continue
}
}
# enumerate lists (non-hidden)
$srcLists = @()
try {
$srcLists = $srcWeb.Lists | Where-Object { -not $_.Hidden }
Log "On-prem lists found: $($srcLists.Count)"
} catch {
Log "Failed enumerating on-prem lists: $($_.Exception.Message)"
}
# get SPO lists metadata
try {
$dstLists = Get-PnPList -Includes RootFolder,ItemCount -ErrorAction Stop | Where-Object { -not $_.Hidden }
Log "SPO lists found: $($dstLists.Count)"
} catch {
Log "Failed enumerating SPO lists: $($_.Exception.Message)"
$dstLists = @()
}
# Build a dictionary for destination lists by title for quick lookup
$dstListByTitle = @{}
foreach ($l in $dstLists) { $dstListByTitle[$l.Title] = $l }
# SERIAL checks: list existence and item counts & queue hash jobs for doc libraries
$jobs = @()
foreach ($srcList in $srcLists) {
$title = $srcList.Title
if (-not $dstListByTitle.ContainsKey($title)) {
$issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item=$title; IssueType="Missing List/Library"; Priority=1; Details="List not found at destination" })
Log "Missing list/library: $title"
continue
}
$dstList = $dstListByTitle[$title]
# Compare counts (may be large)
$srcCount = $srcList.ItemCount
try {
$dstCount = (Get-PnPListItem -List $title -PageSize 5000 -Fields "ID" -ErrorAction Stop).Count
} catch {
$dstCount = $dstList.ItemCount
}
if ($srcCount -ne $dstCount) {
$issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item=$title; IssueType="Item Count Mismatch"; Priority=2; Details="Source:$srcCount Dest:$dstCount" })
Log "Item count mismatch: $title (src:$srcCount dst:$dstCount)"
}
# If document library, start a background job to compute hashes
if ($srcList.BaseType -eq "DocumentLibrary") {
$rootRel = $srcList.RootFolder.ServerRelativeUrl
$outCsv = Join-Path $tempFolder ("hash_" + ([Guid]::NewGuid().ToString()) + ".csv")
# Wait for throttle
while (($jobs | Where-Object { $_.State -eq 'Running' }).Count -ge $ThrottleLimit) {
Start-Sleep -Seconds 2
}
$job = Start-HashJob -MappingId $mapId -SrcWebUrl $srcUrl -ListTitle $title -ListRootFolderServerRelativeUrl $rootRel -OutCsv $outCsv
$job | Add-Member -MemberType NoteProperty -Name SrcListTitle -Value $title
$job | Add-Member -MemberType NoteProperty -Name SrcListRoot -Value $rootRel
$job | Add-Member -MemberType NoteProperty -Name OutCsv -Value $outCsv
$jobs += $job
Log "Started hash job for library '$title' (job id $($job.Id))"
}
}
# Wait for all jobs to finish for this mapping
Log "Waiting for $($jobs.Count) background hash job(s) to complete..."
if ($jobs.Count -gt 0) {
Receive-Job -Job $jobs -Keep -ErrorAction SilentlyContinue | Out-Null # flush any immediate output
while (($jobs | Where-Object { $_.State -in @('Running','NotStarted') }).Count -gt 0) {
Start-Sleep -Seconds 2
}
Log "All hash jobs completed."
}
# Now: For each completed job (i.e., each library), compare to SPO files and check item-level perms for unique items
foreach ($job in $jobs) {
try {
$job | Receive-Job -ErrorAction SilentlyContinue | Out-Null
} catch {}
$outCsv = $job.OutCsv
$listTitle = $job.SrcListTitle
Log "Processing results for library $listTitle (csv: $outCsv)"
if (-not (Test-Path $outCsv)) {
Log "Hash CSV missing for $listTitle. Marking as error."
$issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item=$listTitle; IssueType="HashJobFailed"; Priority=1; Details="Hash CSV missing: $outCsv" })
continue
}
# Read source file hashes
$srcFileHashes = @{}
try {
$rows = Import-Csv $outCsv
foreach ($r in $rows) {
if ($r.ServerRelativeUrl -and $r.Hash) {
$srcFileHashes[NormalizeUrl($r.ServerRelativeUrl)] = @{ Hash=$r.Hash; Name=$r.Name; Length=$r.Length; ItemId=$r.ItemId }
} else {
# rows with errors
if ($r.ServerRelativeUrl) {
$issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item=$r.ServerRelativeUrl; IssueType="Source File Read Error"; Priority=1; Details=($r.Error -or "No hash") })
}
}
}
} catch {
Log "Failed reading CSV $outCsv: $($_.Exception.Message)"
continue
}
# Fetch destination items for this library
try {
$dstItems = Get-PnPListItem -List $listTitle -PageSize 5000 -Fields "FileRef","FileLeafRef","FileDirRef","ID" -ErrorAction Stop
$dstMap = @{}
foreach ($it in $dstItems) {
$fr = $it.FieldValues.FileRef
if ($fr) { $dstMap[NormalizeUrl($fr)] = $it }
}
} catch {
Log "Failed fetching SPO items for $listTitle: $($_.Exception.Message)"
$issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item=$listTitle; IssueType="SPO List Items Fetch Failed"; Priority=1; Details=$_.Exception.Message })
continue
}
# Compare files
foreach ($kv in $srcFileHashes.GetEnumerator()) {
$srcRel = $kv.Key
$srcMeta = $kv.Value
if (-not $dstMap.ContainsKey($srcRel)) {
$issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item=$srcRel; IssueType="Missing File"; Priority=1; Details="File not present in SPO" })
continue
}
# Download SPO file bytes and compute hash
try {
$dstFieldItem = $dstMap[$srcRel]
$dstFileBytes = Get-PnPFile -Url $dstFieldItem.FieldValues.FileRef -AsByteArray -ErrorAction Stop
$dstHash = Get-ByteHash -bytes $dstFileBytes
if ($dstHash -ne $srcMeta.Hash) {
$issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item=$srcRel; IssueType="File Hash Mismatch"; Priority=1; Details="SourceHash:$($srcMeta.Hash) DestHash:$dstHash" })
}
} catch {
$issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item=$srcRel; IssueType="SPO File Download Error"; Priority=1; Details=$_.Exception.Message })
}
# ITEM-LEVEL PERMISSIONS: only when source item had unique permissions OR item-level perms differ in SPO
if ($srcMeta.ItemId -and $srcMeta.ItemId -ne "") {
try {
# get on-prem item object and check HasUniqueRoleAssignments
$srcListObj = $srcWeb.Lists[$listTitle]
$srcItem = $srcListObj.GetItemById([int]$srcMeta.ItemId)
$srcItem.Context.Load($srcItem)
$srcItem.Context.ExecuteQuery() # NOTE: this is CSOM style; in snapin mode we can access property directly
} catch {
# If we can't access via CSOM call, fallback to checking via SPListItem via snapin
try {
$srcItem = $srcListObj.Items.GetItemById([int]$srcMeta.ItemId)
} catch {
$srcItem = $null
}
}
$compareItemPerms = $false
try {
if ($srcItem -ne $null) {
# SPListItem has HasUniqueRoleAssignments property in server-side object
if ($srcItem.HasUniqueRoleAssignments) { $compareItemPerms = $true }
}
} catch {
# if unknown, skip
}
if ($compareItemPerms) {
# Get source role assignments
$srcRAs = @()
try {
foreach ($ra in $srcItem.RoleAssignments) {
$member = $ra.Member.LoginName
$roles = ($ra.RoleDefinitionBindings | ForEach-Object { $_.Name }) -join ";"
$srcRAs += "$member -> $roles"
}
} catch {
Log "Error reading on-prem item perms for $srcRel: $($_.Exception.Message)"
}
# Get destination item's role assignments
try {
# locate dst list item id
$dstIt = $dstMap[$srcRel]
$dstItemId = $dstIt.Id
# Get role assignments for list item via PnP
$dstRAs = @()
$dstRoles = Get-PnPRoleAssignment -List $listTitle -ListItem $dstItemId -ErrorAction Stop
foreach ($dra in $dstRoles) {
$m = $dra.Member.LoginName
$r = ($dra.RoleDefinitionBindings | ForEach-Object { $_.Name }) -join ";"_