First working version

- Move from separate dirs for upload to a unified one (identify
  media/metadata files by file extension)
- Prevent uploading when an import is already scheduled
- Allow setting custom, parser-specific settings
- Add CLI
- Implement WebDAV upload
- Implement checking of upload folders for uploadable contents

Close #6, close #7, close #9, close #3, close #1, close #4
This commit is contained in:
Joshua Ramon Enslin 2025-02-27 17:29:20 +01:00
parent 9a5c432186
commit 7cfd3bb1de
Signed by: jrenslin
GPG Key ID: 46016F84501B70AE
12 changed files with 553 additions and 72 deletions

2
go.mod
View File

@ -1,3 +1,5 @@
module gitea.armuli.eu/museum-digital/museum-digital-webdav-uploader
go 1.24.0
require github.com/studio-b12/gowebdav v0.10.0 // indirect

2
go.sum Normal file
View File

@ -0,0 +1,2 @@
github.com/studio-b12/gowebdav v0.10.0 h1:Yewz8FFiadcGEu4hxS/AAJQlHelndqln1bns3hcJIYc=
github.com/studio-b12/gowebdav v0.10.0/go.mod h1:bHA7t77X/QFExdeAnDzK6vKM34kEZAcE1OX4MfiwjkE=

16
main.go
View File

@ -41,11 +41,21 @@ func main() {
} else if slices.Contains(os.Args, "--run-manual-setup") {
cli.RunManualSetup()
} else if slices.Contains(os.Args, "--set-additional-setting") {
// cli.SetAdditionalSetting()
cli.SetAdditionalSetting()
} else if slices.Contains(os.Args, "--upload") {
// upload()
cli.HandleUpload()
} else if slices.Contains(os.Args, "--webdav-remote-list-toplevel") {
cli.ListRemoteToplevel()
} else if slices.Contains(os.Args, "--webdav-remote-list-metadata-dir") {
cli.ListRemoteMetadataDir()
} else if slices.Contains(os.Args, "--webdav-remote-list-media-dir") {
cli.ListRemoteMediaDir()
} else if slices.Contains(os.Args, "--local-list-metadata") {
cli.ListLocalMetadata()
} else if slices.Contains(os.Args, "--local-list-media") {
cli.ListLocalMedia()
} else {
cli.RunManualSetup()
// cli.RunManualSetup()
}
}

View File

@ -3,10 +3,12 @@ package cli
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
"os"
"gitea.armuli.eu/museum-digital/museum-digital-webdav-uploader/src/configloader"
"gitea.armuli.eu/museum-digital/museum-digital-webdav-uploader/src/uploadsrcdir"
"gitea.armuli.eu/museum-digital/museum-digital-webdav-uploader/src/webdavupload"
)
// Interal wrapper around opening a new bufio.Reader and getting an input
@ -155,10 +157,10 @@ func requestParser() string {
}
// Prompts the user to enter a valid metadata folder.
func requestMetadataFolder() string {
// Prompts the user to enter a valid folder for uploads.
func requestUploadFolder() string {
fmt.Println("\nPlease enter the full filepath of a directory for metadata files / exports from your local collection management system that should be automatically uploaded and imported.")
fmt.Println("\nPlease enter the full filepath of a directory in which the program should look for files to upload.")
fmt.Println("Note that the folder needs to exist.")
input := getConsoleInput()
@ -169,28 +171,7 @@ func requestMetadataFolder() string {
fmt.Print("----\nThe specific error is:\n")
fmt.Println(validationErr)
fmt.Print("----\n\n")
return requestMetadataFolder()
}
return output
}
// Prompts the user to enter a valid media folder.
func requestMediaFolder() string {
fmt.Println("\nPlease enter the full filepath of a directory for media files / exports from your local collection management system that should be automatically uploaded and imported.")
fmt.Println("Note that the folder needs to exist.")
input := getConsoleInput()
output, validationErr := configloader.ValidateUploadDir(input)
if validationErr != nil {
fmt.Println("This is not a valid metadata folder. Please try again.")
fmt.Print("----\nThe specific error is:\n")
fmt.Println(validationErr)
fmt.Print("----\n\n")
return requestMediaFolder()
return requestUploadFolder()
}
return output
@ -215,6 +196,20 @@ func requestPublishOnImport() bool {
}
// Wrapper around configloader.StoreConfigToFile that prints the error message
// if storing failed.
func storeConfigToFile(config configloader.MDWebDavUploaderConfig) {
err := configloader.StoreConfigToFile(config, "")
if err != nil {
fmt.Println("\nFailed to store config")
fmt.Println("Reason:")
fmt.Println(err)
panic("")
}
}
// Queries the user for each of the relevant values for setup.
func RunManualSetup() {
@ -229,42 +224,186 @@ func RunManualSetup() {
config.Mail = requestMail()
config.WebDavAuthToken = requestWebDavAuthToken()
config.Parser = requestParser()
config.MetadataFolder = requestMetadataFolder()
config.MediaFolder = requestMediaFolder()
config.UploadDir = requestUploadFolder()
config.PublishOnImport = requestPublishOnImport()
err := configloader.StoreConfigToFile(config, "")
if err != nil {
fmt.Println("\nFailed to store config")
fmt.Println("Reason:")
storeConfigToFile(config)
fmt.Println("Great! Your configuration has been stored. If you need to set further parser-specific settings, use --set-additional-setting. Else, use --upload.")
}
// Wrapper around configloader.LoadFromFile() that prints errors.
func loadConfigFromFile() configloader.MDWebDavUploaderConfig {
config, requiresSetup, err := configloader.LoadFromFile("")
// Re-run manual setup and then load anew.
if requiresSetup == true {
fmt.Println("Your config is incomplete. Please run the setup command before proceeding.")
RunManualSetup()
config, _, rerunerr := configloader.LoadFromFile("")
if rerunerr != nil {
fmt.Println("Another error occured in loading your configuration:")
fmt.Println(rerunerr)
panic("")
}
return config
} else if err != nil {
fmt.Println("Another error occured in loading your configuration:")
fmt.Println(err)
panic("")
}
fmt.Println("Great! Your configuration has been stored. If you need to set further parser-specific settings, use --set-additional-setting. Else, use --upload.")
return config
}
// Prints the current configuration.
func ShowCurrentConfig() {
config, requiresSetup, err := configloader.LoadFromFile("")
if requiresSetup == true {
fmt.Println("Your config is incomplete. Please run the setup command before proceeding")
return
}
if err != nil {
fmt.Println("Another error occured:")
fmt.Println(err)
panic("")
}
config := loadConfigFromFile()
fmt.Println(config)
}
// Runs the CLI workflow:
// - Check config validity
func SetAdditionalSetting() {
config := loadConfigFromFile()
// Welcome msg
fmt.Println("You are trying to set an additional setting. For this, you will need to enter 1) which setting you want to manipulate and 2) what you want to set it to.")
fmt.Println("-----\n")
// Print parser-specific settings
fmt.Println("Your currently configured parser is: " + config.Parser)
fmt.Println("You can learn more about the available settings for this parser in the parser documentation. Printing it now.")
for _, p := range(configloader.ListParsers()) {
if p.Title == config.Parser {
fmt.Println(p.Comment)
}
}
fmt.Println("\n-----\n")
// Get values
fmt.Println("Now, please enter which setting you would like to manipulate.")
settingKey := getConsoleInput()
fmt.Println("What would you like to set the setting to?")
settingValue := getConsoleInput()
if config.Settings == nil {
config.Settings = make(map[string]string)
}
config.Settings[settingKey] = settingValue
storeConfigToFile(config)
}
// Receives a list of files and an error. If the error exists, that is printed.
// Otherwise, the listed files of a directory are listed.
func printFolderContents(files []os.FileInfo, err error) {
if err != nil {
fmt.Println("Failed to list top level folder contents")
return
}
if len(files) == 0 {
fmt.Print("There are no files to print here.\n")
}
for _, f := range(files) {
fmt.Println(f.Name())
}
}
// Prints the top level folder contents of the WebDAV remote
func ListRemoteToplevel() {
config := loadConfigFromFile()
files, err := webdavupload.ListTopLevelContents(config)
printFolderContents(files, err)
}
// Prints the current contents of the remote metadata directory.
func ListRemoteMetadataDir() {
config := loadConfigFromFile()
files, err := webdavupload.ListMetadataDir(config)
printFolderContents(files, err)
}
// Prints the current contents of the remote media directory.
func ListRemoteMediaDir() {
config := loadConfigFromFile()
files, err := webdavupload.ListMediaDir(config)
printFolderContents(files, err)
}
// Prints the current contents of the local metadata directory.
func ListLocalMetadata() {
fmt.Println("Printing uploadable metadata files")
config := loadConfigFromFile()
files := uploadsrcdir.ListUploadableMetadata(config)
printFolderContents(files, nil)
}
// Prints the current contents of the local media directory.
func ListLocalMedia() {
fmt.Println("Printing uploadable media files")
config := loadConfigFromFile()
files := uploadsrcdir.ListUploadableMedia(config)
printFolderContents(files, nil)
}
// Integration: Uploads, if there is uploadable data and no import currently
// scheduled.
func HandleUpload() {
config := loadConfigFromFile()
uploadableMetadata, uploadableMedia := uploadsrcdir.GetUploadableFiles(config)
// If there are no files to upload, do nothing
if len(uploadableMetadata) == 0 && len(uploadableMedia) == 0 {
fmt.Println("No uploadable files identified.")
return
}
// TODO: Check, that the import files are not too new
// Open WebDAV client
c := webdavupload.GetWebdavClient(config)
// Check that the remote is not currently occupied.
if webdavupload.CheckRemoteIsFree(c) == false {
fmt.Println("The remote is currently occupied (very recent files, or an import config is currently waiting to be processed, are present).")
return
}
// Upload the files
if len(uploadableMetadata) > 0 {
webdavupload.UploadMetadataFiles(c, uploadableMetadata)
}
if len(uploadableMedia) > 0 {
webdavupload.UploadMediaFiles(c, uploadableMedia)
}
// Generate the import config.
webdavupload.SetImportConfigToTheRemote(c, config)
}

View File

@ -28,7 +28,6 @@ func ListParsers() []ParserListItem {
parserListFromApi := []ParserListItem{}
// parserListFromApi := new(ApiParserListFormat)
json.Unmarshal(rawParserList, &parserListFromApi)
print(parserListFromApi)
return parserListFromApi

View File

@ -14,8 +14,7 @@ type MDWebDavUploaderConfig struct {
WebDavAuthToken string `json:"token"`
InstitutionId int `json:"institution_id"`
Parser string `json:"parser"`
MetadataFolder string `json:"metadata_folder"`
MediaFolder string `json:"media_folder"`
UploadDir string `json:"upload_directory"`
PublishOnImport bool `json:"visible"`
Settings map[string]string `json:"settings"`
}
@ -125,19 +124,12 @@ func ValidateConfig(conf MDWebDavUploaderConfig) (MDWebDavUploaderConfig, error)
}
conf.Parser = parserLink
// Validate and clean metadata folder
metadataFolder, mFolderErr := ValidateUploadDir(conf.MetadataFolder)
// Validate and clean upload folder
uploadDir, mFolderErr := ValidateUploadDir(conf.UploadDir)
if mFolderErr != nil {
return conf, mFolderErr
}
conf.MetadataFolder = metadataFolder
// Validate and clean media folder
mediaFolder, mediaFolderErr := ValidateUploadDir(conf.MediaFolder)
if mediaFolderErr != nil {
return conf, mediaFolderErr
}
conf.MediaFolder = mediaFolder
conf.UploadDir = uploadDir
return conf, nil

View File

@ -23,8 +23,7 @@ func getTestConfig() MDWebDavUploaderConfig {
panic("Test failure: Failed to create test dir")
}
input.MetadataFolder = testDir
input.MediaFolder = testDir
input.UploadDir = testDir
return input
@ -128,13 +127,13 @@ func TestValidateUploaderConfigAcceptsValidParser(t *testing.T) {
}
// Test that ValidateConfig() fails on non-existent folder.
func TestValidateUploaderConfigFailsOnInvalidMetadataFolder(t *testing.T) {
func TestValidateUploaderConfigFailsOnInvalidUploadDir(t *testing.T) {
input := getTestConfig()
input.MetadataFolder = "nonexistentfolder"
input.UploadDir = "nonexistentfolder"
_, err := ValidateConfig(input)
if err == nil {
t.Fatalf("ValidateConfig() does not return an error on an invalid metadata folder")
t.Fatalf("ValidateConfig() does not return an error on an invalid upload folder")
}
}
@ -145,14 +144,14 @@ func TestWritingAndReadingConfigWorks(t *testing.T) {
input := getTestConfig()
input, _ = ValidateConfig(input)
writeErr := StoreConfigToFile(input, input.MetadataFolder + "/config.json")
writeErr := StoreConfigToFile(input, input.UploadDir + "/config.json")
if writeErr != nil {
t.Log("Error:")
t.Log(writeErr)
t.Fatalf("Failed to write config to override path")
}
loadedFromFile, setupRequired, err := LoadFromFile(input.MetadataFolder + "/config.json")
loadedFromFile, setupRequired, err := LoadFromFile(input.UploadDir + "/config.json")
if setupRequired != false {
t.Fatalf("Expected no setup to be required, but return value indicated thus")
@ -161,7 +160,7 @@ func TestWritingAndReadingConfigWorks(t *testing.T) {
t.Fatalf("Returned an error on trying to load config file")
}
// Golang can't compare structs with slices or maps
if input.InstanceLink != loadedFromFile.InstanceLink || input.Mail != loadedFromFile.Mail || input.WebDavAuthToken != loadedFromFile.WebDavAuthToken || input.InstitutionId != loadedFromFile.InstitutionId || input.Parser != loadedFromFile.Parser || input.MetadataFolder != loadedFromFile.MetadataFolder || input.MediaFolder != loadedFromFile.MediaFolder || input.PublishOnImport != loadedFromFile.PublishOnImport {
if input.InstanceLink != loadedFromFile.InstanceLink || input.Mail != loadedFromFile.Mail || input.WebDavAuthToken != loadedFromFile.WebDavAuthToken || input.InstitutionId != loadedFromFile.InstitutionId || input.Parser != loadedFromFile.Parser || input.UploadDir != loadedFromFile.UploadDir || input.PublishOnImport != loadedFromFile.PublishOnImport {
t.Log("Input")
t.Log(input)

View File

@ -0,0 +1,20 @@
package importconfiggen
import (
"gitea.armuli.eu/museum-digital/museum-digital-webdav-uploader/src/configloader"
)
func GenerateImportConfig(config configloader.MDWebDavUploaderConfig) string {
output := "# Generated by user: " + config.Username
output += "\nmail: " + config.Mail + "\nparser: " + config.Parser
if config.Settings != nil {
for settingKey, settingValue := range(config.Settings) {
output += "\nsetting: " + settingKey + ": " + settingValue
}
}
return output
}

View File

@ -0,0 +1,84 @@
package uploadsrcdir
import (
"path/filepath"
"slices"
"os"
"io/fs"
"fmt"
"strings"
"gitea.armuli.eu/museum-digital/museum-digital-webdav-uploader/src/configloader"
)
var MetadataFileExts = []string{".xml", ".csv", ".json"}
var MediaFileExts = []string{".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tif", ".tiff", ".pdf", ".mp3", ".mp4"}
// Lists all files in a folder that are of any of the expected file extensions.
func getFilesInDir(startPath string, fileExts []string) []os.FileInfo {
allFiles := []os.FileInfo{}
filepath.WalkDir(startPath, func(path string, entry fs.DirEntry, err error) error {
if err != nil {
return err
}
if slices.Contains(fileExts, filepath.Ext(entry.Name())) || slices.Contains(fileExts, strings.ToLower(filepath.Ext(entry.Name()))) {
// add entry and it's size to dirMap
file, err := entry.Info()
if err != nil {
fmt.Println("Error retrieving file info for:", err)
} else {
allFiles = append(allFiles, file)
}
}
return nil
})
return allFiles
}
// Lists all files to upload in metadata directory
func ListUploadableMetadata(config configloader.MDWebDavUploaderConfig) []os.FileInfo {
return getFilesInDir(config.UploadDir, MetadataFileExts);
}
// Lists all files to upload in metadata directory
func ListUploadableMedia(config configloader.MDWebDavUploaderConfig) []os.FileInfo {
return getFilesInDir(config.UploadDir, MediaFileExts);
}
// Returns lists of uploadable folder contents grouped into a) metadata files and b) media files.
func GetUploadableFiles(config configloader.MDWebDavUploaderConfig) ([]string, []string) {
metadataFiles := []string{}
mediaFiles := []string{}
filepath.WalkDir(config.UploadDir, func(path string, entry fs.DirEntry, err error) error {
if err != nil {
return err
}
if slices.Contains(MetadataFileExts, filepath.Ext(entry.Name())) || slices.Contains(MetadataFileExts, strings.ToLower(filepath.Ext(entry.Name()))) {
metadataFiles = append(metadataFiles, path)
}
if slices.Contains(MediaFileExts, filepath.Ext(entry.Name())) || slices.Contains(MediaFileExts, strings.ToLower(filepath.Ext(entry.Name()))) {
mediaFiles = append(mediaFiles, path)
}
return nil
})
return metadataFiles, mediaFiles
}

View File

@ -0,0 +1,115 @@
package webdavupload
import (
"os"
"sync"
"time"
"github.com/studio-b12/gowebdav"
"gitea.armuli.eu/museum-digital/museum-digital-webdav-uploader/src/configloader"
)
// Takes a path on the webdav remote and reads folder contents from it.
func listWebDavFolderContents(config configloader.MDWebDavUploaderConfig, path string) ([]os.FileInfo, error) {
c := gowebdav.NewClient(getWebDavHost(config), config.Username, config.WebDavAuthToken)
c.Connect()
files, err := c.ReadDir(path)
return files, err
}
// Returns a list of the files on the top level of the remote.
func ListTopLevelContents(config configloader.MDWebDavUploaderConfig) ([]os.FileInfo, error) {
return listWebDavFolderContents(config, ".")
}
// Returns a list of the files in the current metadata dir of the remote.
func ListMetadataDir(config configloader.MDWebDavUploaderConfig) ([]os.FileInfo, error) {
return listWebDavFolderContents(config, "./IMPORT_XML")
}
// Returns a list of the files in the current media dir of the remote.
func ListMediaDir(config configloader.MDWebDavUploaderConfig) ([]os.FileInfo, error) {
return listWebDavFolderContents(config, "./IMPORT_IMG")
}
func checkImportConfigExists(c *gowebdav.Client) bool {
_, err := c.Stat("./import_config.txt")
if gowebdav.IsErrNotFound(err) == true {
return false
}
return true
}
// Checks if a remote directory is free for updating:
// This means it can be read, and it does not contain files more recent than
// 10 minutes.
func checkRemoteDirIsFree(c *gowebdav.Client, path string) bool {
now := time.Now()
// Check metadata dir
files, err := c.ReadDir(path)
if err != nil {
print("Failed to load remote contents")
return false
}
for _, f := range(files) {
if diff := now.Sub(f.ModTime()); diff < 10 * time.Minute {
print("File " + f.Name() + " has been recently renamed")
return false
}
}
return true
}
// Checks if either the metadata or the image folder contains files that have
// been changed in the last 10 minutes.
// In this case, it can be assumed that there is a concurrent upload taking place.
func checkRemoteImportFilesAreTooRecent(c *gowebdav.Client) bool {
var wg sync.WaitGroup
var metadataDirFree bool
var mediaDirFree bool
wg.Add(2)
go func() {
defer wg.Done()
metadataDirFree = checkRemoteDirIsFree(c, "./IMPORT_XML")
}()
go func() {
defer wg.Done()
mediaDirFree = checkRemoteDirIsFree(c, "./IMPORT_IMG")
}()
wg.Wait()
if metadataDirFree == true && mediaDirFree == true {
return false
}
return true
}
// Checks the remote for the existence of an import_config.txt.
func CheckRemoteIsFree(c *gowebdav.Client) bool {
if checkImportConfigExists(c) == true {
return false
}
// Check if the remote media and metadata directories currently
// contain very recent files indicating a concurrent upload.
if checkRemoteImportFilesAreTooRecent(c) == true {
return false
}
return true
}

View File

@ -0,0 +1,35 @@
package webdavupload
import (
"net/url"
"strconv"
"github.com/studio-b12/gowebdav"
"gitea.armuli.eu/museum-digital/museum-digital-webdav-uploader/src/configloader"
)
// Generates the path for a webdav endpoint for the configured instance and institution.
func getWebDavHost(config configloader.MDWebDavUploaderConfig) string {
parsed, err := url.Parse(config.InstanceLink)
if err != nil {
panic("Your settings must be broken. Failed to load instance URL.")
}
output := url.URL{
Scheme: "https",
Host: parsed.Host,
Path: "/musdb/webdav/" + strconv.Itoa(config.InstitutionId),
}
return output.String()
}
// Returns a WebDAV client for the current config.
func GetWebdavClient(config configloader.MDWebDavUploaderConfig) *gowebdav.Client {
c := gowebdav.NewClient(getWebDavHost(config), config.Username, config.WebDavAuthToken)
c.Connect()
return c
}

View File

@ -0,0 +1,84 @@
package webdavupload
import (
"fmt"
"path/filepath"
"os"
"runtime"
"sync"
"sync/atomic"
"github.com/studio-b12/gowebdav"
"gitea.armuli.eu/museum-digital/museum-digital-webdav-uploader/src/configloader"
"gitea.armuli.eu/museum-digital/museum-digital-webdav-uploader/src/importconfiggen"
)
// Writes an import config to the remote
func SetImportConfigToTheRemote(c *gowebdav.Client, config configloader.MDWebDavUploaderConfig) error {
importConf := importconfiggen.GenerateImportConfig(config)
return c.Write("import_config.txt", []byte(importConf), 0660)
}
// Uploads a list of files to the target folder.
func uploadFiles(c *gowebdav.Client, files []string, remoteTarget string, outputContext string) {
total := len(files)
var counter atomic.Uint64
// Determine the number of upload tasks to be processed concurrently.
// 10 will be a hard maximum to not spam the server.
maxConcTasks := min(10, runtime.NumCPU())
// Set a semaphore to restrict the number of concurrent upload tasks.
semaphore := make(chan struct{}, maxConcTasks)
wg := &sync.WaitGroup{}
fmt.Printf("Will upload %v files. Processing %v tasks at a time.\n", total, maxConcTasks)
for _, f := range(files) {
semaphore <- struct{}{} // acquire
wg.Add(1)
go func() {
defer wg.Done()
basename := filepath.Base(f)
file, fOpenErr := os.Open(f)
if fOpenErr != nil {
panic("Failed to read file: " + f)
}
defer file.Close()
c.WriteStream("./" + remoteTarget + "/" + basename, file, 0644)
counter.Add(1)
fmt.Printf(outputContext + ". File %v of %v. Done. (File: %v)\n", counter, total, basename)
<-semaphore // release
}()
}
wg.Wait()
fmt.Println("Done")
}
// Uploads the selected metadata files.
func UploadMetadataFiles(c *gowebdav.Client, files []string) {
uploadFiles(c, files, "IMPORT_XML", "Uploading metadata files")
}
// Uploads the selected media files.
func UploadMediaFiles(c *gowebdav.Client, files []string) {
uploadFiles(c, files, "IMPORT_IMG", "Uploading media files")
}