From 1dfbc7f6c0130f091d74f91891802f9a3da67ef0 Mon Sep 17 00:00:00 2001 From: Luke Arp Date: Wed, 11 Jun 2025 11:38:48 -0400 Subject: [PATCH] Commit --- .gitignore | 40 ++ AzureBlob.cs | 82 +++++ Demo.cs | 6 + FileSystem.cs | 67 ++++ GetConfig.cs | 43 +++ Program.cs | 31 ++ README.md | 56 ++- Sharepoint-Migrate-Console.csproj | 13 + Sharepoint-Migrate-Console.sln | 24 ++ Sharepoint.cs | 583 ++++++++++++++++++++++++++++++ settings.json | 20 + 11 files changed, 963 insertions(+), 2 deletions(-) create mode 100644 .gitignore create mode 100644 AzureBlob.cs create mode 100644 Demo.cs create mode 100644 FileSystem.cs create mode 100644 GetConfig.cs create mode 100644 Program.cs create mode 100644 Sharepoint-Migrate-Console.csproj create mode 100644 Sharepoint-Migrate-Console.sln create mode 100644 Sharepoint.cs create mode 100644 settings.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f937b23 --- /dev/null +++ b/.gitignore @@ -0,0 +1,40 @@ +# ---> C++ +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +bin/ +publish/ +obj/ + +Demo/ +settings copy.json \ No newline at end of file diff --git a/AzureBlob.cs b/AzureBlob.cs new file mode 100644 index 0000000..3eac187 --- /dev/null +++ b/AzureBlob.cs @@ -0,0 +1,82 @@ +public class AzureBlobDownload +{ + private readonly static HttpClient blobClient; + private readonly static SharePointConfig config; + + static AzureBlobDownload() + { + config = SharePointConfig.GetConfig(); + blobClient = new HttpClient() { BaseAddress = new Uri(config.blobEndpointUrl), Timeout = TimeSpan.FromMinutes(10) }; + } + + + public static async Task UploadBlob(string fullPath, SharePointObject file, object fileLock, string logFilePath, string token) + { + string requestUri = $"/{config.blobContainer}{fullPath}"; + string dateInRfc1123Format = DateTime.UtcNow.ToString("R"); + string blobType = "BlockBlob"; + CopyResult result = new CopyResult(); + try + { + HttpRequestMessage checkOnBlob = new HttpRequestMessage(HttpMethod.Head, requestUri); + checkOnBlob.Headers.Add("x-ms-date", dateInRfc1123Format); + checkOnBlob.Headers.Add("x-ms-version", "2020-10-02"); + checkOnBlob.Headers.Add("Authorization", $"Bearer {token}"); + string lastModified = ""; + try + { + HttpResponseMessage metaCheck = await blobClient.SendAsync(checkOnBlob); + lastModified = metaCheck.Headers.FirstOrDefault(x => x.Key == "x-ms-meta-lastmodifieddatetime").Value.FirstOrDefault(""); + } + catch + { + } + if (lastModified != file.lastModifiedDateTime.ToString()) + { + using (HttpResponseMessage response = await blobClient.GetAsync(file.downloadUrl)) + { + response.EnsureSuccessStatusCode(); + HttpRequestMessage request = new HttpRequestMessage(HttpMethod.Put, requestUri); + request.Content = new StreamContent(await response.Content.ReadAsStreamAsync()); + request.Headers.Add("x-ms-date", dateInRfc1123Format); + request.Headers.Add("x-ms-version", "2020-10-02"); + request.Headers.Add("x-ms-blob-type", blobType); + request.Headers.Add("Authorization", $"Bearer {token}"); + request.Headers.Add("x-ms-meta-createdate", file.createdDateTime.ToString()); + request.Headers.Add("x-ms-meta-lastmodifieddatetime", file.lastModifiedDateTime.ToString()); + HttpResponseMessage blobResponse = await blobClient.SendAsync(request); + blobResponse.EnsureSuccessStatusCode(); + } + result.sourcePath = file.downloadUrl; + result.targetPath = $"{config.blobEndpointUrl}{config.blobContainer}{fullPath}"; + result.result = "OK"; + lock (fileLock) + { + File.AppendAllText(logFilePath, $"Upload Blob Success: {file.name} uploaded to {config.blobEndpointUrl}{config.blobContainer}{fullPath}{Environment.NewLine}"); + } + } + else + { + result.sourcePath = file.downloadUrl; + result.targetPath = $"{config.blobEndpointUrl}{config.blobContainer}{fullPath}"; + result.result = "Exists"; + lock (fileLock) + { + File.AppendAllText(logFilePath, $"Blob Exists: {file.name} exists at {config.blobEndpointUrl}{config.blobContainer}{fullPath}{Environment.NewLine}"); + } + } + } + catch + { + result.sourcePath = file.downloadUrl; + result.targetPath = $"{config.blobEndpointUrl}{config.blobContainer}{fullPath}"; + result.result = "FAILED"; + lock (fileLock) + { + File.AppendAllText(logFilePath, $"Upload Blob Failure: {file.name} uploaded to {config.blobEndpointUrl}{config.blobContainer}{fullPath}{Environment.NewLine}"); + } + } + + return result; + } +} \ No newline at end of file diff --git a/Demo.cs b/Demo.cs new file mode 100644 index 0000000..9519ffa --- /dev/null +++ b/Demo.cs @@ -0,0 +1,6 @@ +using System.Text.Json.Serialization; + +public class JsonTest { + [JsonPropertyName("@test.luke.com")] + public string luke {get;set;} +} diff --git a/FileSystem.cs b/FileSystem.cs new file mode 100644 index 0000000..da1d28d --- /dev/null +++ b/FileSystem.cs @@ -0,0 +1,67 @@ +public class FileSystemDownload +{ + private static HttpClient fileDownloads; + + static FileSystemDownload() + { + fileDownloads = new HttpClient() { Timeout = TimeSpan.FromMinutes(15) }; + } + + public static async Task DownloadToFileSystem(string fullPath, SharePointObject file, object fileLock, string logFilePath) + { + CopyResult result = new CopyResult(); + if (!File.Exists(fullPath) && File.GetLastWriteTime(fullPath) != file.lastModifiedDateTime) + { + using (HttpResponseMessage response = await fileDownloads.GetAsync(file.downloadUrl)) + { + if (response.StatusCode == System.Net.HttpStatusCode.OK) + { + using (Stream contentStream = await response.Content.ReadAsStreamAsync(), + fileStream = new FileStream(fullPath, FileMode.Create, FileAccess.Write, FileShare.None, 4096, true)) + { + await contentStream.CopyToAsync(fileStream); + } + } + else + { + lock (fileLock) + { + File.AppendAllText(logFilePath, $"Paused: API Throttled on Download, try again in 2 minutes: {response.StatusCode} {file.name} attempted to {fullPath}{Environment.NewLine}"); + } + await Task.Delay(120000); + using (HttpResponseMessage attempt2 = await fileDownloads.GetAsync(file.downloadUrl)) + { + + attempt2.EnsureSuccessStatusCode(); + using (Stream contentStream = await attempt2.Content.ReadAsStreamAsync(), + fileStream = new FileStream(fullPath, FileMode.Create, FileAccess.Write, FileShare.None, 4096, true)) + { + await contentStream.CopyToAsync(fileStream); + } + } + } + result.sourcePath = file.downloadUrl; + result.targetPath = fullPath; + result.result = "OK"; + lock (fileLock) + { + File.AppendAllText(logFilePath, $"Downloaded Success: {file.name} downloaded to {fullPath}{Environment.NewLine}"); + } + } + } + else + { + result.sourcePath = file.downloadUrl; + result.targetPath = fullPath; + result.result = "FileExists"; + lock (fileLock) + { + File.AppendAllText(logFilePath, $"File Existed: {file.name} existed at {fullPath}{Environment.NewLine}"); + } + } + await Task.Delay(100); + File.SetCreationTime(fullPath, file.createdDateTime); + File.SetLastWriteTime(fullPath, file.lastModifiedDateTime); + return result; + } +} \ No newline at end of file diff --git a/GetConfig.cs b/GetConfig.cs new file mode 100644 index 0000000..1e04bb6 --- /dev/null +++ b/GetConfig.cs @@ -0,0 +1,43 @@ +using System.Formats.Tar; +using System.Text; +using System.Text.Json; + +public class SharePointConfig { + public EntraInfo entra {get;set;} + public string logFileName {get;set;} + public string siteId {get;set;} + public string driveId {get;set;} + public string sharePointBaseFolder {get;set;} + public string targetType {get;set;} + public string targetBase {get;set;} + public string blobEndpointUrl {get;set;} + public string blobContainer {get;set;} + public string blobBaseAddress {get;set;} + public bool downloadFiles {get;set;} + public bool recursive {get;set;} + public bool foldersOnly {get;set;} + + static public SharePointConfig GetConfig() { + SharePointConfig sharePointConfig = JsonSerializer.Deserialize(string.Join("",File.ReadAllLines("settings.json",Encoding.UTF8))); + switch(sharePointConfig.targetType) { + case "FileSystem": + sharePointConfig.targetType = "FileSystem"; + break; + case "AzureBlob": + sharePointConfig.targetType = "AzureBlob"; + break; + default: { + sharePointConfig.targetType = "FileSystem"; + break; + } + } + return sharePointConfig; + } +} + +public class EntraInfo { + public string client_id {get;set;} + public string client_secret {get;set;} + public string tenantId {get;set;} + public string tokenBaseAddress {get;set;} +} \ No newline at end of file diff --git a/Program.cs b/Program.cs new file mode 100644 index 0000000..3f3d480 --- /dev/null +++ b/Program.cs @@ -0,0 +1,31 @@ +// See https://aka.ms/new-console-template for more information +using System.Diagnostics.Metrics; +using System.Net.Security; +using System.Text.Json; + +args = Environment.GetCommandLineArgs(); +Console.WriteLine(string.Join(" ", args)); +SharePointConfig config = SharePointConfig.GetConfig(); +bool downloadFiles = bool.Parse(args.SkipWhile(x => x != "--downloadFiles").Skip(1).FirstOrDefault(config.downloadFiles.ToString()).ToString()); +bool foldersOnly = bool.Parse(args.SkipWhile(x => x != "--foldersOnly").Skip(1).FirstOrDefault(config.foldersOnly.ToString()).ToString()); +bool recursive = bool.Parse(args.SkipWhile(x => x != "--recursive").Skip(1).FirstOrDefault(config.recursive.ToString()).ToString()); +bool findDriveId = bool.Parse(args.SkipWhile(x => x != "--findDriveId").Skip(1).FirstOrDefault("false").ToString()); +if (findDriveId) +{ + List sites = SharePoint.GetSharePointSites(); + foreach (SharePointSite site in sites) + { + Console.WriteLine($"Site: {site.name}, SiteId: {site.id}, WebUrl: {site.webUrl}"); + List drives = SharePoint.GetSharePointDrives(site.id); + foreach(SharePointDrive drive in drives) + { + Console.WriteLine($"DriveName: {drive.name}, DriveId: {drive.id}, WebUrl: {drive.webUrl}"); + } + } +} +else +{ + Console.WriteLine($"Download Files: {downloadFiles}, Folders Only: {foldersOnly}, Recursive: {recursive}"); + List files = SharePoint.GetItems(foldersOnly, recursive, downloadFiles); + Console.WriteLine($"Total files: {files.Count}, size of all files: {files.Sum(x => x.size)}"); +} \ No newline at end of file diff --git a/README.md b/README.md index 25a9eed..2715a16 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,55 @@ -# SharePoint-Migrate-Console +# What does this do -Mass Download from Share Point sites to local and cloud storage. \ No newline at end of file +This is still a work in progress + +* Lists files and total file size in sharepoint folders +* Downloads files and writes to a local File System or Uploads to Azure Blob with correct directory structure + +# How to use + +Fill out the settings.json file: + +```json +{ + "entra": { + "client_id": "", //Application ID that has Sites.Read.All Application permissions in Entra ID + "client_secret": "", //Secret of App ID + "tenantId": "", //Tenant ID of the Entra Tenant that has App registered + "tokenBaseAddress": "https://login.microsoftonline.com/" //Base endpoint for your Token URL (Change based on cloud you are in) + }, + "logFileName": "Files.txt", //Name of log file that will be created in the root directory of the .exe + "siteId": "", //Sharepoint Site ID + "driveId": "", //Sharepoint Drive ID + "sharePointBaseFolder": "", //Base folder that you want to count or copy. Example: /AnotherDepth/B2c-custom + "targetType": "FileSystem", //Type of copy if you are downloading. Accepted Values FileSystem and AzureBlob + "targetBase": "", //Target base of where the files are to be coppied. Example: FileSystem = C:\MyFiles Blob = /MyFiles + "blobEndpointUrl": "", //Url to blob storage account in this format https://mystorageaccount.blob.core.windows.net/ DNS Suffix may be different depending on Cloud + "blobContainer": "", //Blob container name, Example: sharepoint + "blobBaseAddress": "https://storage.azure.com/", //App URI that we will be audience for the token to write to storage account. This value will be different depending on Cloud. + "downloadFiles": true, //If you don't want to download files, set to false + "recursive": true, //If you want to only get files in the specificed directory but not subdirectories, set to false + "foldersOnly": false //If you only want to a log of folders and not files, set to true. +} +``` + +You can also override the settings.json behavior using console commands + +```cmd +.\Sharepoint-Migrate-Console.exe --downloadFiles true --recursive false --foldersOnly false +``` + +If you choose targetType as FileSystem, you will not need to give a value to blobEndpointUrl, blobContainer, or blobBaseAddress + +If you want to find all the Drive Ids in your SharePoint environment outputed to terminal you can do the following: + +```cmd +.\Sharepoint-Migrate-Console.exe --findDriveId +``` + +--findDriveId only requires the entra portion of the settings.json filled out. + +# Known Issues and Future Features + +Observed when running against Sharepoint environments that have a lot of files, threads seem to die without downloading the files. I am working to understand what is causing this, but I do check to see if the file already exists before downloading. I also check to see if the sharepoint has been updated since the last run, so you should be safe to continue running overagain and it will start downloading where it left off. + +Different Console outputs depending on Settings.json options. \ No newline at end of file diff --git a/Sharepoint-Migrate-Console.csproj b/Sharepoint-Migrate-Console.csproj new file mode 100644 index 0000000..bfa3595 --- /dev/null +++ b/Sharepoint-Migrate-Console.csproj @@ -0,0 +1,13 @@ + + + Exe + net8.0 + enable + enable + + + + PreserveNewest + + + diff --git a/Sharepoint-Migrate-Console.sln b/Sharepoint-Migrate-Console.sln new file mode 100644 index 0000000..55ed0a1 --- /dev/null +++ b/Sharepoint-Migrate-Console.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.5.2.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Sharepoint-Migrate-Console", "Sharepoint-Migrate-Console.csproj", "{DE86EC9E-11F1-57BA-2854-A3B93D22FB63}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {DE86EC9E-11F1-57BA-2854-A3B93D22FB63}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {DE86EC9E-11F1-57BA-2854-A3B93D22FB63}.Debug|Any CPU.Build.0 = Debug|Any CPU + {DE86EC9E-11F1-57BA-2854-A3B93D22FB63}.Release|Any CPU.ActiveCfg = Release|Any CPU + {DE86EC9E-11F1-57BA-2854-A3B93D22FB63}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {3EA93FFA-A60C-406F-9481-B48FCA088306} + EndGlobalSection +EndGlobal diff --git a/Sharepoint.cs b/Sharepoint.cs new file mode 100644 index 0000000..fd16cb0 --- /dev/null +++ b/Sharepoint.cs @@ -0,0 +1,583 @@ +using System.Collections; +using System.Collections.Concurrent; +using System.Net.Http.Headers; +using System.Text.Json; +using System.Text.Json.Serialization; +using System.Text; +using System.Threading.Tasks; +using System.Runtime.CompilerServices; +using System.Reflection; + +public class SharePoint +{ + private static HttpClient sharePointClient; + private static HttpClient tokenEndpoint; + private static FormUrlEncodedContent blobTokenBody; + private static string tokenEndpointUrl; + private static FormUrlEncodedContent tokenBody; + private static readonly object fileLock = new object(); + private static string logFilePath; + readonly private static SharePointConfig sharePointConfig; + private static SemaphoreSlim semaphore = new SemaphoreSlim(10); + + static SharePoint() + { + sharePointConfig = SharePointConfig.GetConfig(); + sharePointClient = new HttpClient() { BaseAddress = new Uri("https://graph.microsoft.com/"), Timeout = TimeSpan.FromMinutes(5) }; + tokenEndpoint = new HttpClient() { BaseAddress = new Uri(sharePointConfig.entra.tokenBaseAddress) }; + tokenEndpointUrl = $"/{sharePointConfig.entra.tenantId}/oauth2/v2.0/token"; + tokenBody = new FormUrlEncodedContent(new[] + { + new KeyValuePair("grant_type", "client_credentials"), + new KeyValuePair("client_id", $"{sharePointConfig.entra.client_id}"), + new KeyValuePair("client_secret", $"{sharePointConfig.entra.client_secret}"), + new KeyValuePair("scope", "https://graph.microsoft.com/.default"), + }); + logFilePath = Directory.GetCurrentDirectory() + $"/{sharePointConfig.logFileName}"; + if (File.Exists(logFilePath)) + { + File.Delete(logFilePath); + } + } + + private static TokenResponse GetToken() + { + HttpRequestMessage message = new HttpRequestMessage(HttpMethod.Post, tokenEndpointUrl); + message.Headers.Add("ContentType", "application/x-www-form-urlencoded"); + message.Content = tokenBody; + TokenResponse token; + HttpContent responseContent = tokenEndpoint.SendAsync(message).Result.Content; + token = JsonSerializer.Deserialize(responseContent.ReadAsStringAsync().Result); + return token; + } + + private static TokenResponse GetTokenBlob() + { + HttpRequestMessage message = new HttpRequestMessage(HttpMethod.Post, tokenEndpointUrl); + blobTokenBody = new FormUrlEncodedContent(new[] + { + new KeyValuePair("grant_type", "client_credentials"), + new KeyValuePair("client_id", $"{sharePointConfig.entra.client_id}"), + new KeyValuePair("client_secret", $"{sharePointConfig.entra.client_secret}"), + new KeyValuePair("scope", $"{sharePointConfig.blobBaseAddress}.default"), + }); + message.Headers.Add("ContentType", "application/x-www-form-urlencoded"); + message.Content = blobTokenBody; + TokenResponse token; + HttpContent responseContent = tokenEndpoint.SendAsync(message).Result.Content; + token = JsonSerializer.Deserialize(responseContent.ReadAsStringAsync().Result); + return token; + } + + public static List GetItems(bool foldersOnly, bool recursive, bool downloadFiles) + { + TokenResponse token; + try + { + token = GetToken(); + } + catch + { + Console.WriteLine("Failed to get token"); + return new List(); + } + string path = "/v1.0/sites/" + sharePointConfig.siteId + "/drives/" + sharePointConfig.driveId + "/root:" + sharePointConfig.sharePointBaseFolder + ":/children"; + HttpRequestMessage sharepointMessage = new HttpRequestMessage(HttpMethod.Get, path); + sharepointMessage.Headers.Add("Authorization", "Bearer " + token.access_token); + sharepointMessage.Headers.Add("ContentType", "application/json"); + SharePointObjectReturn sharePointResponse; + List returnData = new List(); + HttpContent sharePointResponseContent; + if (recursive == true) + { + try + { + ConcurrentBag allFiles = new ConcurrentBag(); + ProcessPathsInParrallel(token.access_token, path, sharePointConfig.siteId, allFiles, logFilePath, downloadFiles); + if (foldersOnly) + { + returnData = allFiles.ToList().Where(x => x.downloadUrl == null).ToList(); + } + else + { + returnData = allFiles.ToList().Where(x => x.downloadUrl != null).ToList(); + } + } + catch + { + lock (fileLock) + { + File.AppendAllText(logFilePath, $"Failure: {path}{Environment.NewLine}"); + } + } + } + else + { + try + { + sharePointResponseContent = sharePointClient.SendAsync(sharepointMessage).Result.Content; + string test = sharePointResponseContent.ReadAsStringAsync().Result; + sharePointResponse = JsonSerializer.Deserialize(sharePointResponseContent.ReadAsStringAsync().Result); + if (foldersOnly) + { + returnData = sharePointResponse.value.Where(x => x.downloadUrl == null).ToList(); + } + else + { + returnData = sharePointResponse.value.Where(x => x.downloadUrl != null).ToList(); + } + } + catch + { + lock (fileLock) + { + File.AppendAllText(logFilePath, $"Failure: {path}{Environment.NewLine}"); + } + } + + } + return returnData; + } + + static async Task ProcessPathsInParrallel(string token, string path, string siteid, ConcurrentBag allObjects, string logFilePath, bool downloadFiles) + { + List bodies = new List() { new SharePointBatchRequestBody() }; + HttpRequestMessage sharepointMessage = new HttpRequestMessage(HttpMethod.Get, path); + sharepointMessage.Headers.Add("Authorization", "Bearer " + token); + sharepointMessage.Headers.Add("ContentType", "application/json"); + HttpContent sharePointResponseContent; + SharePointObjectReturn sharePointResponse; + List tasks = new List(); + try + { + sharePointResponseContent = sharePointClient.SendAsync(sharepointMessage).Result.Content; + sharePointResponse = JsonSerializer.Deserialize(sharePointResponseContent.ReadAsStringAsync().Result); + List sharePointObjects = new List(); + sharePointObjects.AddRange(sharePointResponse.value); + while (sharePointResponse.nextLink != null) + { + sharepointMessage = new HttpRequestMessage(HttpMethod.Get, sharePointResponse.nextLink); + sharepointMessage.Headers.Add("Authorization", "Bearer " + token); + sharepointMessage.Headers.Add("ContentType", "application/json"); + sharePointResponseContent = sharePointClient.SendAsync(sharepointMessage).Result.Content; + sharePointObjects.AddRange(sharePointResponse.value); + } + ConcurrentBag downloadedFiles = new ConcurrentBag(); + if (downloadFiles) + { + tasks.Add(DownloadFiles(sharePointObjects.Where(x => x.downloadUrl != null).ToList(), logFilePath, downloadedFiles)); + } + Task.WaitAll(tasks.ToArray()); + foreach (SharePointObject file in sharePointObjects) + { + allObjects.Add(file); + lock (fileLock) + { + if (file.downloadUrl != null) + { + File.AppendAllText(logFilePath, $"File added: {file.parentReference.path}/{file.name}{Environment.NewLine}"); + } + else + { + File.AppendAllText(logFilePath, $"Folder: {file.parentReference.path}/{file.name}{Environment.NewLine}"); + } + } + } + int folderCount = 1; + int bodyIndex = 0; + foreach (SharePointObject folder in sharePointObjects.Where(x => x.downloadUrl == null)) + { + bodies[bodyIndex].requests.Add(new SharePointBatchRequest() { id = folderCount.ToString(), method = "GET", url = $"/sites/{siteid}{folder.parentReference.path}/{folder.name}:/children" }); + folderCount++; + if (folderCount == 11) + { + bodies.Add(new SharePointBatchRequestBody()); + bodyIndex++; + folderCount = 1; + } + } + string batchUrl = "/v1.0/$batch"; + string bodiesJson = JsonSerializer.Serialize>(bodies); + Parallel.ForEach(bodies, body => + { + ProcessDirectory(batchUrl, token, siteid, body, allObjects, logFilePath, downloadFiles, downloadedFiles); + }); + } + catch + { + lock (fileLock) + { + File.AppendAllText(logFilePath, $"Failure: {path}{Environment.NewLine}"); + } + } + } + + static void ProcessDirectory(string rootPath, string token, string siteid, SharePointBatchRequestBody body, ConcurrentBag allObjects, string logFilePath, bool downloadFiles, ConcurrentBag downloadedFiles) + { + HttpRequestMessage sharepointMessage = new HttpRequestMessage(HttpMethod.Post, rootPath); + sharepointMessage.Headers.Add("Authorization", "Bearer " + token); + sharepointMessage.Headers.Add("ContentType", "application/json"); + string testBody = JsonSerializer.Serialize(body); + sharepointMessage.Content = new StringContent(JsonSerializer.Serialize(body), Encoding.UTF8, "application/json"); + HttpContent sharePointResponseContent; + SharePointBatch sharePointResponse; + try + { + sharePointResponseContent = sharePointClient.SendAsync(sharepointMessage).Result.Content; + sharePointResponse = JsonSerializer.Deserialize(sharePointResponseContent.ReadAsStringAsync().Result); + while (sharePointResponse.responses.Where(y => y.status == 429).ToList().Count > 0) + { + lock (fileLock) + { + File.AppendAllText(logFilePath, $"Paused: API Throttle, will try request again in 2 minutes."); + } + Task.Delay(120000).Wait(); + sharepointMessage = new HttpRequestMessage(HttpMethod.Post, rootPath); + sharepointMessage.Headers.Add("Authorization", "Bearer " + token); + sharepointMessage.Headers.Add("ContentType", "application/json"); + sharepointMessage.Content = new StringContent(JsonSerializer.Serialize(body), Encoding.UTF8, "application/json"); + sharePointResponseContent = sharePointClient.SendAsync(sharepointMessage).Result.Content; + sharePointResponse = JsonSerializer.Deserialize(sharePointResponseContent.ReadAsStringAsync().Result); + } + List sharePointObjects = new List(); + List nextLinkBodies = new List() { }; + int nextLinkCount = 1; + int folderCount = 1; + int bodyIndex = 0; + List tasks = new List(); + foreach (SharePointBatchResponse response in sharePointResponse.responses) + { + sharePointObjects.AddRange(response.body.value); + if (downloadFiles && response.body.value.Where(x => x.downloadUrl != null).ToList().Count > 0) + { + List downloadObjects = response.body.value.Where(x => x.downloadUrl != null).ToList(); + tasks.Add(DownloadFiles(downloadObjects, logFilePath, downloadedFiles)); + } + Parallel.ForEach(response.body.value, item => + { + allObjects.Add(item); + lock (fileLock) + { + if (item.downloadUrl != null) + { + File.AppendAllText(logFilePath, $"File added: {item.parentReference.path}/{item.name}{Environment.NewLine}"); + } + else + { + File.AppendAllText(logFilePath, $"Folder: {item.parentReference.path}/{item.name}{Environment.NewLine}"); + } + } + }); + if (response.body.nextLink != null) + { + if (nextLinkBodies.Count == 0) + { + nextLinkBodies.Add(new SharePointBatchRequestBody()); + } + nextLinkBodies[bodyIndex].requests.Add(new SharePointBatchRequest() { id = nextLinkCount.ToString(), method = "GET", url = response.body.nextLink }); + nextLinkCount++; + if (nextLinkCount == 11) + { + nextLinkBodies.Add(new SharePointBatchRequestBody()); + bodyIndex++; + nextLinkCount = 1; + } + } + } + Task.WaitAll(tasks.ToArray()); + foreach (SharePointBatchRequestBody newBody in nextLinkBodies) + { + ProcessDirectory(rootPath, token, siteid, newBody, allObjects, logFilePath, downloadFiles, downloadedFiles); + } + List bodies = new List() { }; + folderCount = 1; + bodyIndex = 0; + foreach (SharePointObject folder in sharePointObjects.Where(x => x.downloadUrl == null)) + { + if (bodies.Count == 0) + { + bodies.Add(new SharePointBatchRequestBody()); + } + bodies[bodyIndex].requests.Add(new SharePointBatchRequest() { id = folderCount.ToString(), method = "GET", url = $"/sites/{siteid}{folder.parentReference.path}/{folder.name}:/children" }); + folderCount++; + if (folderCount == 11) + { + bodies.Add(new SharePointBatchRequestBody()); + bodyIndex++; + folderCount = 1; + } + } + if (bodies.Count != 0) + { + if (bodies[0].requests.Count > 0) + { + foreach (SharePointBatchRequestBody newBody in bodies) + { + ProcessDirectory(rootPath, token, siteid, newBody, allObjects, logFilePath, downloadFiles, downloadedFiles); + } + } + } + } + catch + { + lock (fileLock) + { + File.AppendAllText(logFilePath, $"Failure: Who Knows{Environment.NewLine}"); + } + } + } + + static async Task DownloadFiles(List files, string logFilePath, ConcurrentBag downloadedFiles) + { + TokenResponse token; + try + { + if (sharePointConfig.targetType == "FileSystem") + { + token = new TokenResponse(); + } + else + { + token = GetTokenBlob(); + } + } + catch + { + lock (fileLock) + { + File.AppendAllText(logFilePath, $"Downloads Not Started, Failed to Get Token, trying again{Environment.NewLine}"); + } + await Task.Delay(2000); + token = GetTokenBlob(); + if (token.access_token == null) + { + lock (fileLock) + { + File.AppendAllText(logFilePath, $"Downloads Not Started, no Token{Environment.NewLine}"); + } + } + else + { + lock (fileLock) + { + File.AppendAllText(logFilePath, $"Download token, {Environment.NewLine}"); + } + } + } + CopyResult result; + string path; + string drive; + foreach (SharePointObject file in files) + { + await semaphore.WaitAsync(); + try + { + string fullPath = $"{sharePointConfig.targetBase}/{sharePointConfig.sharePointBaseFolder.Split("/")[^1]}{file.parentReference.path.Split(":")[1].Split(sharePointConfig.sharePointBaseFolder)[1]}/{file.name}"; + result = new CopyResult(); + try + { + if (sharePointConfig.targetType == "FileSystem") + { + Directory.CreateDirectory(Path.GetDirectoryName(fullPath)); + result = await FileSystemDownload.DownloadToFileSystem(fullPath, file, fileLock, logFilePath); + } + else if (sharePointConfig.targetType == "AzureBlob") + { + result = await AzureBlobDownload.UploadBlob(fullPath, file, fileLock, logFilePath, token.access_token); + } + } + catch (Exception ex) + { + Console.WriteLine($"An error occurred: {ex.Message}"); + result.sourcePath = file.downloadUrl; + result.targetPath = fullPath; + result.result = "FAILED"; + lock (fileLock) + { + File.AppendAllText(logFilePath, $"Downloaded Failed: {file.name} from {file.downloadUrl}{Environment.NewLine}"); + } + } + } + finally + { + semaphore.Release(); + } + downloadedFiles.Add(result); + } + } + + public static List GetSharePointSites() + { + TokenResponse token = GetToken(); + HttpRequestMessage message = new HttpRequestMessage(HttpMethod.Get, "/v1.0/sites"); + message.Headers.Add("Authorization", "Bearer " + token.access_token); + message.Headers.Add("ContentType", "application/json"); + List sites = new List(); + try + { + SharePointSiteResponse response = JsonSerializer.Deserialize(sharePointClient.SendAsync(message).Result.Content.ReadAsStringAsync().Result); + sites.AddRange(response.value); + while (response.nextLink != null) + { + message = new HttpRequestMessage(HttpMethod.Get, response.nextLink); + message.Headers.Add("Authorization", "Bearer " + token.access_token); + message.Headers.Add("ContentType", "application/json"); + response = JsonSerializer.Deserialize(sharePointClient.SendAsync(message).Result.Content.ReadAsStringAsync().Result); + sites.AddRange(response.value); + } + } + catch + { + + } + return sites; + } + + public static List GetSharePointDrives(string sharpointId) + { + TokenResponse token = GetToken(); + HttpRequestMessage message = new HttpRequestMessage(HttpMethod.Get, $"/v1.0/sites/{sharpointId}/drives"); + message.Headers.Add("Authorization", "Bearer " + token.access_token); + message.Headers.Add("ContentType", "application/json"); + List drives = new List(); + try + { + SharePointDriveResponse response = JsonSerializer.Deserialize(sharePointClient.SendAsync(message).Result.Content.ReadAsStringAsync().Result); + drives.AddRange(response.value); + while (response.nextLink != null) + { + message = new HttpRequestMessage(HttpMethod.Get, response.nextLink); + message.Headers.Add("Authorization", "Bearer " + token.access_token); + message.Headers.Add("ContentType", "application/json"); + response = JsonSerializer.Deserialize(sharePointClient.SendAsync(message).Result.Content.ReadAsStringAsync().Result); + drives.AddRange(response.value); + } + } + catch + { + + } + return drives; + } +} + +public class SharePointSiteResponse +{ + [JsonPropertyName("@odata.nextLink")] + public string nextLink { get; set; } + public List value { get; set; } +} + +public class SharePointDriveResponse +{ + [JsonPropertyName("@odata.nextLink")] + public string nextLink { get; set; } + public List value { get; set; } +} + +public class SharePointDrive +{ + public string id { get; set; } + public string name { get; set; } + public string webUrl { get; set; } + public string driveType { get; set; } + public SharePointCreatedBy createdBy { get; set; } +} + +public class SharePointSite +{ + public string id { get; set; } + public string name { get; set; } + public string webUrl { get; set; } + public SharePointCreatedBy user { get; set; } + public DateTime createdDateTime { get; set; } +} + +public class CopyResult +{ + public string targetPath; + public string sourcePath; + public string result; +} +public class TokenResponse +{ + public string access_token { get; set; } +} + +public class SharePointObjectReturn +{ + public List value { get; set; } + [JsonPropertyName("@odata.nextLink")] + public string? nextLink { get; set; } + + public SharePointObjectReturn() + { + value = new List(); + } +} + +public class SharePointBatchRequest +{ + public string id { get; set; } + public string method { get; set; } + public string url { get; set; } +} + +public class SharePointBatchRequestBody +{ + public List requests { get; set; } + public SharePointBatchRequestBody() + { + requests = new List(); + } +} + +public class SharePointBatch +{ + public List responses { get; set; } + public SharePointBatch() + { + responses = new List(); + } +} + +public class SharePointBatchResponse +{ + public string id { get; set; } + public int status { get; set; } + public SharePointObjectReturn body { get; set; } +} + +public class SharePointObject +{ + [JsonPropertyName("@microsoft.graph.downloadUrl")] + public string? downloadUrl { get; set; } + public string id { get; set; } + public string name { get; set; } + public SharePointCreatedBy createdBy { get; set; } + public DateTime createdDateTime { get; set; } + public DateTime lastModifiedDateTime { get; set; } + public Int64 size { get; set; } + public SharePointParrentReference parentReference { get; set; } + + public SharePointObject() + { + size = 0; + } +} + +public class SharePointParrentReference +{ + public string path { get; set; } +} + +public class SharePointCreatedBy +{ + public SharePointUser user { get; set; } +} + +public class SharePointUser +{ + public string email { get; set; } + public string displayName { get; set; } +} \ No newline at end of file diff --git a/settings.json b/settings.json new file mode 100644 index 0000000..a36be09 --- /dev/null +++ b/settings.json @@ -0,0 +1,20 @@ +{ + "entra": { + "client_id": "1eee6c79-52e4-452f-9d93-8faadac7bf1c", + "client_secret": "32N8Q~N5dAOIny1uysu6G2AWBI6MGEsIGFhFadzk", + "tenantId": "ef3caa26-9b3d-48f1-9434-eaff760575c4", + "tokenBaseAddress": "https://login.microsoftonline.com/" + }, + "logFileName": "Files.txt", + "siteId": "", + "driveId": "", + "sharePointBaseFolder": "", + "targetType": "FileSystem", + "targetBase": "", + "blobEndpointUrl": "", + "blobContainer": "", + "blobBaseAddress": "https://storage.azure.com/", + "downloadFiles": true, + "recursive": true, + "foldersOnly": false +} \ No newline at end of file