Sunday, 10 August 2025

Migration validation: SharePoint 2013 -> SharePoint Online

 

  • Adds parallel processing (throttled background jobs) for computing on-prem file hashes (the slowest I/O work), then compares those results against SPO files.

  • Adds item-level permission diffs but only for items that have unique permissions (this limits the performance hit).

  • Adds a high-priority filter so the default output surfaces only critical issues (Priority 1). Use -ShowAll to get everything.

  • Keeps previous features: list existence, item counts, file presence, file-hash comparison, homepage link checks, and web/list-level permission checks.

Compatibility & notes (important)

  • The script is written to run on a machine that can access your SharePoint 2013 farm (typical: a SharePoint server with Microsoft.SharePoint.PowerShell snap-in). If that snap-in isn't available, run with -UseCSOM and provide the CSOM dependency — I included a stub/notice for that.

  • Background jobs compute on-prem file hashes only (no SPO remote connections inside jobs). After jobs finish the main process compares to SPO files. This avoids needing an interactive SPO login inside each job.

  • The script uses PnP.PowerShell for SPO actions. It uses interactive SPO login (Connect-PnPOnline -Interactive) — a single interactive login is required for the main session.

  • Item-level permission comparison uses on-prem RoleAssignments and PnP role assignments on SPO. Due to variations in identity mapping, some permission comparisons may need tuning to normalize accounts (e.g., AD account vs. Azure AD user mapping).


.\SPMigrationValidator_Parallel_ItemPerms.ps1 `
    -MappingsPath .\SiteMappings.csv `
    -OutputFolder .\Reports `
    -ThrottleLimit 4



<#
.SYNOPSIS
    Migration validation: SharePoint 2013 -> SharePoint Online
.DESCRIPTION
    Compares lists/libraries, item counts, file hashes, permissions (web/list/item where unique),
    homepage links. Uses parallel background jobs to compute on-prem file hashes, then compares
    with SPO files. Filters results by priority (default shows only Priority 1).
.PARAMETER MappingsPath
    CSV file path with SourceURL,DestinationURL
.PARAMETER OutputFolder
    Folder where reports/logs will be written
.PARAMETER ThrottleLimit
    Maximum concurrent background jobs computing on-prem file hashes (default 4)
.PARAMETER ShowAll
    If set, show all results (Priority 1 & 2). Otherwise show only Priority 1.
.PARAMETER UseCSOM
    If set, attempt CSOM-based on-prem connectivity (useful if not running on SP server).
    NOTE: CSOM mode is a separate code path that requires Microsoft.SharePoint.Client assemblies.
#>

param(
    [string]$MappingsPath = ".\SiteMappings.csv",
    [string]$OutputFolder = ".\MigrationReports",
    [int]$ThrottleLimit = 4,
    [switch]$ShowAll,
    [switch]$UseCSOM
)

# --- Setup output/logging
if (-not (Test-Path $OutputFolder)) { New-Item -Path $OutputFolder -ItemType Directory | Out-Null }
$ts = Get-Date -Format yyyyMMdd_HHmmss
$logFile = Join-Path $OutputFolder "MigrationValidator_$ts.log"
function Log { param($m) $line = "$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')`t$m"; $line | Tee-Object -FilePath $logFile -Append }

Log "Starting Migration Validator (parallel + item-level perms) - $(Get-Date)"

# --- Modules check
if (-not (Get-Module -ListAvailable -Name PnP.PowerShell)) {
    Log "PnP.PowerShell missing. Installing for current user..."
    Install-Module -Name PnP.PowerShell -Force -Scope CurrentUser
}
Import-Module PnP.PowerShell -ErrorAction Stop

# Try to load on-prem snapin if not using CSOM
$usingOnPremSnapin = $false
if (-not $UseCSOM) {
    try {
        Add-PSSnapin Microsoft.SharePoint.PowerShell -ErrorAction Stop
        $usingOnPremSnapin = $true
        Log "Loaded Microsoft.SharePoint.PowerShell snap-in."
    } catch {
        Log "On-prem snap-in not available. Use -UseCSOM to run with CSOM (remote) or run on SP server."
        $usingOnPremSnapin = $false
    }
} else {
    Log "CSOM mode requested (-UseCSOM). Ensure Microsoft.SharePoint.Client assemblies are available and modify CSOM credentials as needed."
}

# Ensure mappings CSV exists
if (-not (Test-Path $MappingsPath)) {
    Log "Mappings CSV not found at $MappingsPath"
    throw "Mappings CSV not found at $MappingsPath"
}
$mappings = Import-Csv $MappingsPath
Log "Loaded $($mappings.Count) mappings."

# Helper functions
function Get-ByteHash([byte[]]$bytes) {
    $sha = [System.Security.Cryptography.SHA256]::Create()
    $hash = $sha.ComputeHash($bytes)
    -join ($hash | ForEach-Object { $_.ToString('x2') })
}
function NormalizeUrl([string]$u) {
    if (-not $u) { return $null }
    $u = $u.Trim()
    $u = $u.Split('?')[0] # drop query
    $u = $u.TrimEnd('/')
    return $u.ToLower()
}

# Container for issues (including priority)
$issues = [System.Collections.Generic.List[PSObject]]::new()

# temp folder for per-library hash CSVs
$tempFolder = Join-Path $env:TEMP "SP_Mig_Validator_$ts"
New-Item -Path $tempFolder -ItemType Directory -Force | Out-Null

# Function to enqueue a job that computes on-prem file hashes for a library
function Start-HashJob {
    param(
        [string]$MappingId,
        [string]$SrcWebUrl,
        [string]$ListTitle,
        [string]$ListRootFolderServerRelativeUrl,
        [string]$OutCsv
    )

    # create script block for job - this runs in background process on same machine
    $sb = {
        param($srcWebUrl, $listTitle, $rootRelUrl, $outCsv)

        # Import SharePoint snapin inside job if available (job runs in separate process)
        try { Add-PSSnapin Microsoft.SharePoint.PowerShell -ErrorAction SilentlyContinue } catch {}

        function Get-ByteHashLocal([byte[]]$bytes){ $sha = [System.Security.Cryptography.SHA256]::Create(); -join ($sha.ComputeHash($bytes) | ForEach-Object { $_.ToString('x2') }) }

        try {
            # open web
            $web = Get-SPWeb $srcWebUrl -ErrorAction Stop

            # recursive file enumeration
            function Get-FilesRecursive($folder) {
                $files = @()
                $files += $folder.Files
                foreach ($sub in $folder.SubFolders) {
                    if ($sub.Name -eq "Forms") { continue }
                    $files += Get-FilesRecursive $sub
                }
                return $files
            }

            # find library by root folder server relative url
            $allLists = $web.Lists | Where-Object { -not $_.Hidden }
            $target = $allLists | Where-Object { $_.RootFolder.ServerRelativeUrl -eq $rootRelUrl -or $_.RootFolder.ServerRelativeUrl.TrimEnd('/') -eq $rootRelUrl.TrimEnd('/') -or $_.Title -eq $listTitle }
            if (-not $target) {
                "`"$($listTitle)`" not found on web $srcWebUrl" | Out-File -FilePath $outCsv -Encoding UTF8
                return
            }
            $files = Get-FilesRecursive $target.RootFolder
            $rows = @()
            foreach ($f in $files) {
                try {
                    $bytes = $f.OpenBinary()
                    $h = Get-ByteHashLocal -bytes $bytes
                    $rows += [PSCustomObject]@{
                        ServerRelativeUrl = $f.Url
                        Name = $f.Name
                        Length = $f.Length
                        Hash = $h
                        ItemId = ($f.ItemId -as [string])
                    }
                } catch {
                    # log error as row with empty hash
                    $rows += [PSCustomObject]@{ ServerRelativeUrl = $f.Url; Name = $f.Name; Length = $f.Length; Hash = ""; ItemId = ($f.ItemId -as [string]); Error = $_.Exception.Message }
                }
            }
            $rows | Export-Csv -Path $outCsv -NoTypeInformation -Encoding UTF8
        } catch {
            "ERROR: $($_.Exception.Message)" | Out-File -FilePath $outCsv -Encoding UTF8
        } finally {
            try { $web.Dispose() } catch {}
        }
    }

    # Start job
    $job = Start-Job -ScriptBlock $sb -ArgumentList $SrcWebUrl, $ListTitle, $ListRootFolderServerRelativeUrl, $OutCsv
    return $job
}

# iterate mappings
foreach ($map in $mappings) {
    $srcUrl = $map.SourceURL.Trim()
    $dstUrl = $map.DestinationURL.Trim()
    $mapId = [Guid]::NewGuid().ToString()
    Log "Processing mapping: $srcUrl => $dstUrl"

    # Connect to SPO (interactive once per mapping)
    try {
        Connect-PnPOnline -Url $dstUrl -Interactive -ErrorAction Stop
        Log "Connected to SPO: $dstUrl"
    } catch {
        Log "Failed to connect to SPO: $($_.Exception.Message)"
        $issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item="SPO Connect"; IssueType="SPO Connect Failed"; Priority=1; Details=$_.Exception.Message })
        continue
    }

    # Connect to on-prem or use CSOM
    if ($UseCSOM) {
        Log "CSOM mode: not fully implemented in this script - you must provide CSOM connection code."
        $issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item="OnPrem Connect"; IssueType="CSOM Mode Required"; Priority=1; Details="CSOM mode requested; modify script to provide credentials and client assemblies." })
        Disconnect-PnPOnline -ErrorAction SilentlyContinue
        continue
    } else {
        if (-not $usingOnPremSnapin) {
            Log "On-prem snapin unavailable. Use -UseCSOM or run script on SP2013 server."
            $issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item="OnPrem Connect"; IssueType="Snapin Missing"; Priority=1; Details="Run on SP server or run with -UseCSOM" })
            Disconnect-PnPOnline -ErrorAction SilentlyContinue
            continue
        }
        # open SPWeb
        try {
            $srcWeb = Get-SPWeb $srcUrl -ErrorAction Stop
            Log "Connected to on-prem web: $srcUrl"
        } catch {
            Log "Failed to open on-prem web: $($_.Exception.Message)"
            $issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item="OnPrem Connect"; IssueType="Open SPWeb Failed"; Priority=1; Details=$_.Exception.Message })
            Disconnect-PnPOnline -ErrorAction SilentlyContinue
            continue
        }
    }

    # enumerate lists (non-hidden)
    $srcLists = @()
    try {
        $srcLists = $srcWeb.Lists | Where-Object { -not $_.Hidden } 
        Log "On-prem lists found: $($srcLists.Count)"
    } catch {
        Log "Failed enumerating on-prem lists: $($_.Exception.Message)"
    }

    # get SPO lists metadata
    try {
        $dstLists = Get-PnPList -Includes RootFolder,ItemCount -ErrorAction Stop | Where-Object { -not $_.Hidden }
        Log "SPO lists found: $($dstLists.Count)"
    } catch {
        Log "Failed enumerating SPO lists: $($_.Exception.Message)"
        $dstLists = @()
    }

    # Build a dictionary for destination lists by title for quick lookup
    $dstListByTitle = @{}
    foreach ($l in $dstLists) { $dstListByTitle[$l.Title] = $l }

    # SERIAL checks: list existence and item counts & queue hash jobs for doc libraries
    $jobs = @()
    foreach ($srcList in $srcLists) {
        $title = $srcList.Title
        if (-not $dstListByTitle.ContainsKey($title)) {
            $issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item=$title; IssueType="Missing List/Library"; Priority=1; Details="List not found at destination" })
            Log "Missing list/library: $title"
            continue
        }
        $dstList = $dstListByTitle[$title]

        # Compare counts (may be large)
        $srcCount = $srcList.ItemCount
        try {
            $dstCount = (Get-PnPListItem -List $title -PageSize 5000 -Fields "ID" -ErrorAction Stop).Count
        } catch {
            $dstCount = $dstList.ItemCount
        }
        if ($srcCount -ne $dstCount) {
            $issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item=$title; IssueType="Item Count Mismatch"; Priority=2; Details="Source:$srcCount Dest:$dstCount" })
            Log "Item count mismatch: $title (src:$srcCount dst:$dstCount)"
        }

        # If document library, start a background job to compute hashes
        if ($srcList.BaseType -eq "DocumentLibrary") {
            $rootRel = $srcList.RootFolder.ServerRelativeUrl
            $outCsv = Join-Path $tempFolder ("hash_" + ([Guid]::NewGuid().ToString()) + ".csv")
            # Wait for throttle
            while (($jobs | Where-Object { $_.State -eq 'Running' }).Count -ge $ThrottleLimit) {
                Start-Sleep -Seconds 2
            }
            $job = Start-HashJob -MappingId $mapId -SrcWebUrl $srcUrl -ListTitle $title -ListRootFolderServerRelativeUrl $rootRel -OutCsv $outCsv
            $job | Add-Member -MemberType NoteProperty -Name SrcListTitle -Value $title
            $job | Add-Member -MemberType NoteProperty -Name SrcListRoot -Value $rootRel
            $job | Add-Member -MemberType NoteProperty -Name OutCsv -Value $outCsv
            $jobs += $job
            Log "Started hash job for library '$title' (job id $($job.Id))"
        }
    }

    # Wait for all jobs to finish for this mapping
    Log "Waiting for $($jobs.Count) background hash job(s) to complete..."
    if ($jobs.Count -gt 0) {
        Receive-Job -Job $jobs -Keep -ErrorAction SilentlyContinue | Out-Null # flush any immediate output
        while (($jobs | Where-Object { $_.State -in @('Running','NotStarted') }).Count -gt 0) {
            Start-Sleep -Seconds 2
        }
        Log "All hash jobs completed."
    }

    # Now: For each completed job (i.e., each library), compare to SPO files and check item-level perms for unique items
    foreach ($job in $jobs) {
        try {
            $job | Receive-Job -ErrorAction SilentlyContinue | Out-Null
        } catch {}
        $outCsv = $job.OutCsv
        $listTitle = $job.SrcListTitle
        Log "Processing results for library $listTitle (csv: $outCsv)"

        if (-not (Test-Path $outCsv)) {
            Log "Hash CSV missing for $listTitle. Marking as error."
            $issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item=$listTitle; IssueType="HashJobFailed"; Priority=1; Details="Hash CSV missing: $outCsv" })
            continue
        }

        # Read source file hashes
        $srcFileHashes = @{}
        try {
            $rows = Import-Csv $outCsv
            foreach ($r in $rows) {
                if ($r.ServerRelativeUrl -and $r.Hash) {
                    $srcFileHashes[NormalizeUrl($r.ServerRelativeUrl)] = @{ Hash=$r.Hash; Name=$r.Name; Length=$r.Length; ItemId=$r.ItemId }
                } else {
                    # rows with errors
                    if ($r.ServerRelativeUrl) {
                        $issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item=$r.ServerRelativeUrl; IssueType="Source File Read Error"; Priority=1; Details=($r.Error -or "No hash") })
                    }
                }
            }
        } catch {
            Log "Failed reading CSV $outCsv: $($_.Exception.Message)"
            continue
        }

        # Fetch destination items for this library
        try {
            $dstItems = Get-PnPListItem -List $listTitle -PageSize 5000 -Fields "FileRef","FileLeafRef","FileDirRef","ID" -ErrorAction Stop
            $dstMap = @{}
            foreach ($it in $dstItems) {
                $fr = $it.FieldValues.FileRef
                if ($fr) { $dstMap[NormalizeUrl($fr)] = $it }
            }
        } catch {
            Log "Failed fetching SPO items for $listTitle: $($_.Exception.Message)"
            $issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item=$listTitle; IssueType="SPO List Items Fetch Failed"; Priority=1; Details=$_.Exception.Message })
            continue
        }

        # Compare files
        foreach ($kv in $srcFileHashes.GetEnumerator()) {
            $srcRel = $kv.Key
            $srcMeta = $kv.Value
            if (-not $dstMap.ContainsKey($srcRel)) {
                $issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item=$srcRel; IssueType="Missing File"; Priority=1; Details="File not present in SPO" })
                continue
            }

            # Download SPO file bytes and compute hash
            try {
                $dstFieldItem = $dstMap[$srcRel]
                $dstFileBytes = Get-PnPFile -Url $dstFieldItem.FieldValues.FileRef -AsByteArray -ErrorAction Stop
                $dstHash = Get-ByteHash -bytes $dstFileBytes
                if ($dstHash -ne $srcMeta.Hash) {
                    $issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item=$srcRel; IssueType="File Hash Mismatch"; Priority=1; Details="SourceHash:$($srcMeta.Hash) DestHash:$dstHash" })
                }
            } catch {
                $issues.Add([PSCustomObject]@{ SourceSite=$srcUrl; DestSite=$dstUrl; Item=$srcRel; IssueType="SPO File Download Error"; Priority=1; Details=$_.Exception.Message })
            }

            # ITEM-LEVEL PERMISSIONS: only when source item had unique permissions OR item-level perms differ in SPO
            if ($srcMeta.ItemId -and $srcMeta.ItemId -ne "") {
                try {
                    # get on-prem item object and check HasUniqueRoleAssignments
                    $srcListObj = $srcWeb.Lists[$listTitle]
                    $srcItem = $srcListObj.GetItemById([int]$srcMeta.ItemId)
                    $srcItem.Context.Load($srcItem)
                    $srcItem.Context.ExecuteQuery() # NOTE: this is CSOM style; in snapin mode we can access property directly
                } catch {
                    # If we can't access via CSOM call, fallback to checking via SPListItem via snapin
                    try {
                        $srcItem = $srcListObj.Items.GetItemById([int]$srcMeta.ItemId)
                    } catch {
                        $srcItem = $null
                    }
                }

                $compareItemPerms = $false
                try {
                    if ($srcItem -ne $null) {
                        # SPListItem has HasUniqueRoleAssignments property in server-side object
                        if ($srcItem.HasUniqueRoleAssignments) { $compareItemPerms = $true }
                    }
                } catch {
                    # if unknown, skip
                }

                if ($compareItemPerms) {
                    # Get source role assignments
                    $srcRAs = @()
                    try {
                        foreach ($ra in $srcItem.RoleAssignments) {
                            $member = $ra.Member.LoginName
                            $roles = ($ra.RoleDefinitionBindings | ForEach-Object { $_.Name }) -join ";"
                            $srcRAs += "$member -> $roles"
                        }
                    } catch {
                        Log "Error reading on-prem item perms for $srcRel: $($_.Exception.Message)"
                    }

                    # Get destination item's role assignments
                    try {
                        # locate dst list item id
                        $dstIt = $dstMap[$srcRel]
                        $dstItemId = $dstIt.Id
                        # Get role assignments for list item via PnP
                        $dstRAs = @()
                        $dstRoles = Get-PnPRoleAssignment -List $listTitle -ListItem $dstItemId -ErrorAction Stop
                        foreach ($dra in $dstRoles) {
                            $m = $dra.Member.LoginName
                            $r = ($dra.RoleDefinitionBindings | ForEach-Object { $_.Name }) -join ";"_