From 43d8b878d1363d010d7c048604b9ef4964ad3482 Mon Sep 17 00:00:00 2001 From: Joshua Ramon Enslin Date: Mon, 23 Jan 2023 15:21:35 +0100 Subject: [PATCH] Initial --- README.md | 3 + parsers/FDH-Import-By-Dir-Structure.php | 253 ++++++++ parsers/FDH-Konvolute.php | 135 ++++ parsers/Readme.md | 3 + parsers/SRU-MODS.php | 369 +++++++++++ parsers/becollect_dtmb.php | 805 ++++++++++++++++++++++++ parsers/csv.php | 169 +++++ parsers/csv_filemaker_muehlenhaupt.php | 262 ++++++++ parsers/csv_spsg.php | 409 ++++++++++++ parsers/faust_haendelhaus.php | 332 ++++++++++ parsers/google-arts-and-culture.php | 286 +++++++++ parsers/gos.php | 94 +++ parsers/imdas_pro_sql.php | 597 ++++++++++++++++++ parsers/lido-mspt.php | 115 ++++ parsers/museo.php | 392 ++++++++++++ 15 files changed, 4224 insertions(+) create mode 100644 README.md create mode 100644 parsers/FDH-Import-By-Dir-Structure.php create mode 100644 parsers/FDH-Konvolute.php create mode 100644 parsers/Readme.md create mode 100644 parsers/SRU-MODS.php create mode 100644 parsers/becollect_dtmb.php create mode 100644 parsers/csv.php create mode 100644 parsers/csv_filemaker_muehlenhaupt.php create mode 100644 parsers/csv_spsg.php create mode 100644 parsers/faust_haendelhaus.php create mode 100644 parsers/google-arts-and-culture.php create mode 100644 parsers/gos.php create mode 100644 parsers/imdas_pro_sql.php create mode 100644 parsers/lido-mspt.php create mode 100644 parsers/museo.php diff --git a/README.md b/README.md new file mode 100644 index 0000000..573f24a --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# Archive for Rarely Used Parsers Until 2022 + +This repository is an archive repository for rarely used parsers that were written before the move to full OOP in the importer. They may be of use in the future as a base upon which OOP versions of these parsers may be written when the need arises. diff --git a/parsers/FDH-Import-By-Dir-Structure.php b/parsers/FDH-Import-By-Dir-Structure.php new file mode 100644 index 0000000..8a21bf9 --- /dev/null +++ b/parsers/FDH-Import-By-Dir-Structure.php @@ -0,0 +1,253 @@ + + * @author Stefan Rohde-Enslin + * @link https://imports.museum-digital.org/importer/parsers/csvxml.php + */ +declare(strict_types = 1); + +/** + * Parses a folder name to object group and inventory number. + * + * @param string $folder_path Folder path. + * + * @return array{0: array, 1: string} + */ +function parseFolderToObjGroupInvNo(string $folder_path):array { + + $parts = explode('/', $folder_path); + + $invNo = str_replace('jpg ', '', end($parts)); + $parts = array_diff($parts, [$invNo]); + + $objGroups = []; + foreach ($parts as $part) { + $objGroups[] = str_replace('jpg ', '', $part); + } + + return [$objGroups, $invNo]; + +} + +/** + * Recurses down a folder directory to list all folders with JPG files in them. + * + * @param string $folder_path Folder path. + * + * @return array + */ +function getFoldersWithJpgs(string $folder_path):array { + + $filesFolders = MD_STD::scandir($folder_path); + + $addToList = false; + $output = []; + foreach ($filesFolders as $fileOrFolderName) { + + $fileOrFolder = $folder_path . '/' . $fileOrFolderName; + + if (is_dir($fileOrFolder)) { + $output = array_merge($output, getFoldersWithJpgs($fileOrFolder)); + continue; + } + + // If is_file + if (in_array(mime_content_type($fileOrFolder), ['image/jpeg'], true)) { + $addToList = true; + } + + } + + if ($addToList === true) { + $output[] = $folder_path; + } + + return $output; + +} + +/** + * Parse function. + * + * @param array $version Instance to import into. + * @param integer $institution_id Institution to import to. + * @param non-empty-string $XMLFolder Folder of the XML files to import. + * @param string $dataFolder Data folder. + * @param integer $sammlung_id Collection ID. Optional. + * @param boolean $visibility Import objects to be directly visible?. + * @param boolean $insertOnly If set to true, only new objects are added, + * old are not updated. + * + * @return void + */ +function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { + + if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); + + if (empty($dataFolder)) { + throw new Exception("You need to select a data folder to run image-only imports"); + } + else $importImages = true; + + // Set up writers + + $outputHandler = new MDOutputHandler; + $outputHandler->setVerbosity(2); + + $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); + + /* + $objectWriter->disableUpdateBaseData = true; + $objectWriter->disableImportAdditionalData = true; + $objectWriter->disableImportImagesResources = true; + $objectWriter->disableImportTags = true; + $objectWriter->disableImportLiterature = true; + $objectWriter->disableImportHyperlinks = true; + $objectWriter->disableImportSeries = true; + $objectWriter->disableImportCollections = true; + $objectWriter->disableImportObjectRecords = true; + $objectWriter->disableImportTranscriptions = true; + $objectWriter->disableImportMarkings = true; + $objectWriter->disableImportExhibitions = true; + $objectWriter->disableImportReception = true; + */ + + $i = 0; + $startAtCounter = 0; + + // Get folders with JPGS + + $filesByInvNo = []; + + $foldersToSkip = 0; + $foldersSkipped = 0; + foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_files . $dataFolder) as $tFolder) { + + if ($foldersSkipped < $foldersToSkip) { + echo 'Skipped folder ' . $tFolder . PHP_EOL; + ++$foldersSkipped; + continue; + } + + $foldersToImport = getFoldersWithJpgs(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . '/' . $tFolder); + + foreach ($foldersToImport as $folderToImport) { + + /* + $folderName = str_replace(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . '/', "", $folderToImport); + echo $folderName . PHP_EOL; + + list($objectGroups, $invNo) = parseFolderToObjGroupInvNo($folderName); + + print_r($objectGroups); + print_r($invNo); + */ + foreach (MD_STD::scandir($folderToImport) as $tFile) { + + if (!in_array(mime_content_type($folderToImport . '/' . $tFile), ['image/jpeg'], true)) { + continue; + } + + $invNo = explode('_', $tFile)[0]; + if (empty($filesByInvNo[$invNo])) $filesByInvNo[$invNo] = []; + $filesByInvNo[$invNo][] = $folderToImport . '/' . $tFile; + } + + } + + } + + $tagWriter = new MDTagWriter($version['nodaDB']); + $seriesWriter = new MDSeriesWriter($version['mainDB']); + $literatureWriter = new MDLiteratureWriter($version['mainDB']); + foreach ($filesByInvNo as $invNo => $imageFiles) { + + if (empty($invNo)) continue; + + ++$i; + if ($i > $startAtCounter || $i < 20) { + continue; + } + $outputHandler->toLog("Starting to process file #{$i}", 2); + + $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $invNo, "Handschrift", $invNo, "Handschrift Friedrich von Hardenbergs.", $outputHandler); + $images = []; + + foreach ($imageFiles as $cur_img) { + + // Image title is the first line after the file name. Everything after is the description. + $image_title = pathinfo($cur_img, PATHINFO_FILENAME); + $image = new MDImage($version['mainDB'], $image_title, $cur_img); + # $image->set_image_beschreibung($img_description); + /* + if (isset($objectData["image_rights" . $suffix])) $image->set_image_rights($objectData["image_rights" . $suffix]); + if (isset($objectData["image_owner" . $suffix])) $image->set_image_owner($objectData["image_owner" . $suffix]); + if (isset($objectData["image_creator" . $suffix])) $image->set_image_creator($objectData["image_creator" . $suffix]); + */ + $image->set_visible(true); + $images[] = $image; + + $outputHandler->toLog("Added image $image_title", 2); + usleep(1800); + + } + + if (!empty($images)) $images[0]->set_main_image(true); + foreach ($images as $image) $object->appendImage($image); + + while ($version['mainDB']->more_results()) $version['mainDB']->next_result(); + while ($version['nodaDB']->more_results()) $version['nodaDB']->next_result(); + $object->appendTagByID(576); + + // Get object groups to link + $folderName = str_replace(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . '/', "", pathinfo($imageFiles[0], PATHINFO_DIRNAME)); + list($objectGroups, $inventNo) = parseFolderToObjGroupInvNo($folderName); + + foreach ($objectGroups as $objectGroup) { + $object->appendSeriesByName($objectGroup, $objectGroup, $seriesWriter, ["serie_weitergabe" => "100"]); + if (strpos($objectGroup, 'Kiste') !== false) { + $object->set_string("standort_eigentlich", $objectGroup); + } + } + + if ($sammlung_id !== 0) { + $object->appendCollectionByID($sammlung_id); + } + + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 7, $outputHandler); + $event->set_persinst_id("Georg Philipp Friedrich von Hardenberg", "1772", "1801"); + $event->set_ereignis_persinst_sicher(true); + $object->appendEvent($event); + + // Get literature link + $litAlreadyLinked = []; + foreach ($imageFiles as $imageFile) { + $imageFile = trim($imageFile, "_ &%"); + $imgFileEnd = explode('_', str_replace("_000", '', pathinfo($imageFile, PATHINFO_FILENAME))); + if (($litLink = end($imgFileEnd)) && !in_array($litLink, $litAlreadyLinked, true)) { + $litAlreadyLinked[] = $litLink; + if (substr($litLink, 0, 3) === 'HKA') { + if (($litNameEnd = strpos($litLink, ',')) !== false) { + $litName = substr($litLink, 0, $litNameEnd); + $litInLit = substr($litLink, $litNameEnd + 1); + $object->appendLiteratureByName($litName, "", "", "", $litInLit, $literatureWriter); + $object->appendSeriesByName($litName, $litName, $seriesWriter, ["serie_weitergabe" => "100"]); + $object->appendSeriesByName($litName . ' ' . $litInLit, $litName . ' ' . $litInLit, $seriesWriter, ["serie_weitergabe" => "100"]); + } + else { + $object->appendLiteratureByName($litLink, "", "", "", "", $literatureWriter); + } + } + } + } + + $object->set_objekt_publik($visibility); + $outputHandler->toLog("Importing file #{$i}, inv. no: $invNo", 2); + $newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler); + usleep(2000); + + } + +} diff --git a/parsers/FDH-Konvolute.php b/parsers/FDH-Konvolute.php new file mode 100644 index 0000000..2807b8f --- /dev/null +++ b/parsers/FDH-Konvolute.php @@ -0,0 +1,135 @@ + + * @link https://imports.museum-digital.org/importer/parsers/csvxml.php + */ +declare(strict_types = 1); + +/** + * Parse function. + * + * @param array $version Instance to import into. + * @param integer $institution_id Institution to import to. + * @param non-empty-string $XMLFolder Folder of the XML files to import. + * @param string $dataFolder Data folder. + * @param integer $sammlung_id Collection ID. Optional. + * @param boolean $visibility Import objects to be directly visible?. + * @param boolean $insertOnly If set to true, only new objects are added, + * old are not updated. + * + * @return void + */ +function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { + + if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); + + if (empty($dataFolder)) { + throw new Exception("You need to select a data folder to run image-only imports"); + } + else $importImages = true; + + // Set up writers + + $outputHandler = new MDOutputHandler; + $outputHandler->setVerbosity(2); + + $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); + + $objectWriter->disableUpdateBaseData = true; + $objectWriter->disableImportAdditionalData = true; + # $objectWriter->disableImportImagesResources = true; + $objectWriter->disableImportTags = true; + $objectWriter->disableImportLiterature = true; + $objectWriter->disableImportHyperlinks = true; + $objectWriter->disableImportSeries = true; + $objectWriter->disableImportCollections = true; + $objectWriter->disableImportObjectRecords = true; + $objectWriter->disableImportTranscriptions = true; + $objectWriter->disableImportMarkings = true; + $objectWriter->disableImportExhibitions = true; + $objectWriter->disableImportReception = true; + + $i = 0; + $startAtCounter = 0; + + foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) { + + if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "txt") continue; + + ++$i; + if ($i < $startAtCounter) { + continue; + } + $outputHandler->toLog("Starting to process file #{$i}", 2); + + // File name is inventory number + $inventory_number = pathinfo($xmlFile, PATHINFO_FILENAME); + + $all_images_raw = MD_STD::file_get_contents(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}" . '/' . $xmlFile); + + // Clean away spaces after newlines + while (strpos($all_images_raw, PHP_EOL . " ") !== false) { + $all_images_raw = str_replace(PHP_EOL . " ", PHP_EOL, $all_images_raw); + } + // Clean away tabs after newlines + while (strpos($all_images_raw, PHP_EOL . "\t") !== false) { + $all_images_raw = str_replace(PHP_EOL . "\t", PHP_EOL, $all_images_raw); + } + while (strpos($all_images_raw, "\u{feff}") !== false) { + $all_images_raw = str_replace("\u{feff}", "", $all_images_raw); + } + + // All single images are separated by two newlines + $all_images = explode(PHP_EOL . PHP_EOL . '[', $all_images_raw); + + $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $inventory_number, "ignored_anyway", "ignored_anyway", "ignored_anyway", $outputHandler); + $images = []; + + foreach ($all_images as $cur_img) { + + $img_info = array_diff(explode(PHP_EOL, $cur_img), ['', ' ']); + + $filename_base = trim($img_info[0], "[] \t["); + $filename_base = strtr($filename_base, ["[" => ""]); + if (file_exists(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $filename_base . '.jpg')) { + $filename = $filename_base . '.jpg'; + } + else if (file_exists(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $filename_base . '.JPG')) { + $filename = $filename_base . '.JPG'; + } + else { + echo MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $filename_base . '.jpg'; + throw new MDFileDoesNotExist("There is no corresponding file for " . $filename_base); + } + + // Image title is the first line after the file name. Everything after is the description. + $image_title = $img_info[1]; + unset($img_info[0], $img_info[1]); + $img_description = implode(PHP_EOL, $img_info); + + $image = new MDImage($version['mainDB'], $image_title, MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $filename); + $image->set_image_beschreibung($img_description); + /* + if (isset($objectData["image_rights" . $suffix])) $image->set_image_rights($objectData["image_rights" . $suffix]); + if (isset($objectData["image_owner" . $suffix])) $image->set_image_owner($objectData["image_owner" . $suffix]); + if (isset($objectData["image_creator" . $suffix])) $image->set_image_creator($objectData["image_creator" . $suffix]); + */ + $image->set_visible(true); + # if (isset($objectData["image_master_filename" . $suffix])) $image->set_image_master_filename($objectData["image_master_filename" . $suffix]); + $images[] = $image; + + usleep(50); + + } + + $images[0]->set_main_image(true); + foreach ($images as $image) $object->appendImage($image); + + $newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler); + usleep(IMPORTER_DELAY_PER_OBJECT); + + } + +} diff --git a/parsers/Readme.md b/parsers/Readme.md new file mode 100644 index 0000000..cce1ba3 --- /dev/null +++ b/parsers/Readme.md @@ -0,0 +1,3 @@ +# Archive for deprecated parsers + +These parsers were written for an outdated architecture for the importer. All remaining in this folder are also relatively rarely used and kept to be migrated, if a new use case shall ever arise again. diff --git a/parsers/SRU-MODS.php b/parsers/SRU-MODS.php new file mode 100644 index 0000000..4ae4731 --- /dev/null +++ b/parsers/SRU-MODS.php @@ -0,0 +1,369 @@ + + * @author Stefan Rohde-Enslin + * @link https://imports.museum-digital.org/importer/parsers/csvxml.php + */ +declare(strict_types = 1); + +/** + * Parse function. + * + * @param array $version Instance to import into. + * @param integer $institution_id Institution to import to. + * @param non-empty-string $XMLFolder Folder of the XML files to import. + * @param string $dataFolder Data folder. + * @param integer $sammlung_id Collection ID. Optional. + * @param boolean $visibility Import objects to be directly visible?. + * @param boolean $insertOnly If set to true, only new objects are added, + * old are not updated. + * + * @return void + */ +function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { + + if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); + + if (empty($dataFolder)) { + $importImages = false; + } + else $importImages = true; + + // Set up writers + + $collectionWriter = new MDCollectionWriter($version['mainDB']); + $linkWriter = new MDLinkWriter($version['mainDB']); + $seriesWriter = new MDSeriesWriter($version['mainDB']); + $exhibitionWriter = new MDExhibitionWriter($version['mainDB']); + $objectRecordWriter = new MDObjectRecordWriter($version['mainDB']); + $tagWriter = new MDTagWriter($version['nodaDB']); + + $outputHandler = new MDOutputHandler; + $outputHandler->setVerbosity(2); + + $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); + + /* + $objectWriter->disableUpdateBaseData = true; + $objectWriter->disableImportAdditionalData = true; + */ + $objectWriter->disableImportImagesResources = true; + $objectWriter->disableImportTranscriptions = true; + $objectWriter->disableImportSeries = true; + $objectWriter->disableImportObjectRecords = true; + $objectWriter->disableImportExhibitions = true; + $objectWriter->disableImportTags = true; + $objectWriter->disableImportLiterature = true; + $objectWriter->disableImportMarkings = true; + /* + $objectWriter->disableImportCollections = true; + $objectWriter->disableImportHyperlinks = true; + $objectWriter->disableImportReception = true; + */ + + $i = 0; + $startAtCounter = 0; + + foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) { + + $fileContents = MD_STD::file_get_contents(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}"); + $allRecords = explode('_____-----_____', $fileContents); + $fileContents = null; + + $languages_iso639 = array_flip(MDLanguagesSet::LANGUAGES_ISO639_2B); + + foreach ($allRecords as $recordStr) { + + ++$i; + if ($i < $startAtCounter) { + continue; + } + + if (!($objectData = simplexml_load_string($recordStr, "SimpleXMLElement", LIBXML_NOCDATA))) { + throw new Exception("Cannot load raw data into SimpleXML ({$recordStr})"); + } + + unset($invNo); + if (!empty((string)$objectData->location->shelfLocator) + and (\preg_match("/^Hs\-[0-9][0-9][0-9][0-9]$/", (string)$objectData->location->shelfLocator) + || \preg_match("/^Hs\-[0-9][0-9][0-9][0-9][0-9]$/", (string)$objectData->location->shelfLocator)) + ) { + $invNo = (string)$objectData->location->shelfLocator; + $outputHandler->toLog("Using inventory number $invNo (Hs- set) - " . $objectData->identifier, 2); + } + else if (!empty((string)$objectData->location->shelfLocator) + and (\preg_match("/^[0-9][0-9][0-9][0-9]$/", (string)$objectData->location->shelfLocator) + || \preg_match("/^[0-9][0-9][0-9][0-9][0-9]$/", (string)$objectData->location->shelfLocator)) + ) { + $invNo = 'Hs-' . (string)$objectData->location->shelfLocator; + $outputHandler->toLog("Using inventory number $invNo", 2); + } + + if (empty($invNo)) { + $invNo = substr((string)$objectData->identifier, strrpos((string)$objectData->identifier, '/') + 1); + } + + $description = $objectData->abstract . PHP_EOL . PHP_EOL . $objectData->physicalDescription; + + /* + if ($invNo !== 'Hs-28865') continue; + print_r($objectData); + exit; + */ + + $title = (string)$objectData->titleInfo->title; + if (!empty((string)$objectData->titleInfo->subTitle)) { + $title .= ': ' . (string)$objectData->titleInfo->subTitle; + unset($objectData->titleInfo->subTitle); + } + + // Check if inventory number is known already + $result = $version['mainDB']->query_by_stmt("SELECT 1 + FROM `objekt` + WHERE `objekt_inventarnr` = ?", "s", $invNo); + + if ($result->num_rows !== 0) { + $result->close(); + $result = null; + continue; + } + $result->close(); + $result = null; + + $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $invNo, (string)$objectData->genre, $title, $description, $outputHandler); + + if (!empty($objectData->physicalDescription->extent)) { + if (str_ends_with((string)$objectData->physicalDescription->extent, " Br.")) { + if (strlen((string)$objectData->physicalDescription->extent) < 8) { + $object->set_string("stueckzahl", str_replace(" Br.", "", (string)$objectData->physicalDescription->extent)); + } + } + else { + $object->append_objekt_beschreibung((string)$objectData->physicalDescription->extent); + } + } + + unset($objectData->titleInfo->title, $objectData->abstract, $objectData->physicalDescription); + if (count($objectData->titleInfo->children()) === 0) { + unset($objectData->titleInfo); + } + // Will later use $objectData->identifier for setting a link to the source repository + // Will later use $objectData->genre for tags + + $object->appendTagByName((string)$objectData->genre, "", $tagWriter); + unset($objectData->genre); + + if (count($objectData->note) > 1) { + foreach ($objectData->note as $note) { + $object->append_string("notizen_text1", PHP_EOL . (string)$note); + } + } + else if (!empty((string)$objectData->note)) { + $object->set_string("notizen_text1", (string)$objectData->note); + } + unset($objectData->note); + + unset($objectData->location->physicalLocation); + + if (!empty((string)$objectData->location->shelfLocator)) { + $object->append_string("standort_eigentlich", (string)$objectData->location->shelfLocator); + } + unset($objectData->location->shelfLocator); + unset($objectData->location); + + foreach ($objectData->name as $actor) { + + $linkTypeName = null; + if (!empty((string)$actor->role->roleTerm[1])) { + $linkTypeName = (string)$actor->role->roleTerm[1]; + } + else if (!empty((string)$actor->role->roleTerm)) { + $linkTypeName = (string)$actor->role->roleTerm; + } + + if (!isset(MDConcActor::ACTOR_ROLES_TO_EVENT_TYPE[$linkTypeName])) { + throw new Exception("Unknown actor type: " . (string)$linkTypeName . ' in ' . (string)$objectData->identifier . ' for ' . (string)$actor->namePart . ' ///// ' . PHP_EOL . PHP_EOL . var_export($actor->role->roleTerm, true)); + } + + $linkType = MDConcActor::ACTOR_ROLES_TO_EVENT_TYPE[$linkTypeName]; + + $gndUrl = (string)$actor->attributes()->valueURI; + $gndId = substr($gndUrl, strrpos($gndUrl, '/') + 1); + + $md_event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $linkType, $outputHandler); + + $md_event->set_persinst_id((string)$actor->namePart, "", "", ['gnd' => $gndId]); + if ($md_event->get_persinst_id() !== 0) { + + // Handle origin information for production events + if (!empty($objectData->originInfo)) { + if (in_array($linkType, MDEventsSet::EVENTS_PRODUCTION, true) and !empty($objectData->originInfo->dateCreated)) { + + if (str_starts_with((string)$objectData->originInfo->dateCreated[1], 'o.D. [')) { + $md_event->set_zeiten_id(trim(substr((string)$objectData->originInfo->dateCreated[1], 5), " []")); + } + + else if (str_contains((string)$objectData->originInfo->dateCreated[1], "[") + and strlen((string)$objectData->originInfo->dateCreated[1]) === 23 + and substr((string)$objectData->originInfo->dateCreated[1], 0, 10) === substr((string)$objectData->originInfo->dateCreated[1], 12, 10) + ) { + $md_event->set_zeiten_id(substr((string)$objectData->originInfo->dateCreated[1], 0, 10)); + } + + /* + else if (str_contains((string)$objectData->originInfo->dateCreated[1], "[")) { + echo substr((string)$objectData->originInfo->dateCreated[1], 12, 10); + print_r($objectData->originInfo->dateCreated); + echo "strlen:" . strlen((string)$objectData->originInfo->dateCreated[1]); + exit; + } + */ + else $md_event->set_zeiten_id((string)$objectData->originInfo->dateCreated[1]); + unset($objectData->originInfo->dateCreated); + } + + if (in_array($linkType, MDEventsSet::EVENTS_PRODUCTION, true) and !empty($objectData->originInfo->place)) { + $md_event->set_orte_id((string)$objectData->originInfo->place->placeTerm); + unset($objectData->originInfo->place->placeTerm); + if (empty($objectData->originInfo->place->children())) unset($objectData->originInfo->place); + } + if (empty($objectData->originInfo->children())) unset($objectData->originInfo); + } + + $object->appendEvent($md_event); + } + + } + unset($objectData->name); + + // If originInfo is still set here, try handling it. + if (!empty($objectData->originInfo)) { + $md_event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 1, $outputHandler); + + // Handle origin information for production events + if (!empty($objectData->originInfo->dateCreated)) { + $md_event->set_zeiten_id((string)$objectData->originInfo->dateCreated[1]); + unset($objectData->originInfo->dateCreated); + } + + if (!empty($objectData->originInfo->place)) { + $md_event->set_orte_id((string)$objectData->originInfo->place->placeTerm); + unset($objectData->originInfo->place->placeTerm); + if (empty($objectData->originInfo->place->children())) unset($objectData->originInfo->place); + } + if (empty($objectData->originInfo->children())) unset($objectData->originInfo); + + if ($md_event->get_zeiten_id() !== 0 || $md_event->get_orte_id() !== 0) { + $object->appendEvent($md_event); + } + } + + if (!empty($objectData->language)) { + $object->set_string("content_language", $languages_iso639[(string)$objectData->language->languageTerm[0]]); + $object->set_bool("content_language_show_md", true); + $object->set_bool("content_language_show_extern", true); + unset($objectData->language); + } + + if (!empty($objectData->relatedItem)) { + foreach ($objectData->relatedItem as $relatedItem) { + + if ((string)$relatedItem->attributes()->type !== 'constituent') { + continue; + } + + if (in_array((string)$relatedItem->recordInfo->recordIdentifier, ["ead_DE-F25_37_VirtuellerBestand", "ead_DE-F25_37_VirtuellerBestand_added"], true)) { + $object->appendCollectionByID(5); + } + else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9624") { + $object->appendCollectionByID(521); + } + else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9626") { + $object->appendCollectionByID(534); + } + else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9627") { + $object->appendCollectionByID(543); + } + else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9629") { + $object->appendCollectionByID(541); + } + else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9630") { + $object->appendCollectionByID(540); + } + else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9621") { + $object->appendCollectionByID(539); + } + else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9623") { + $object->appendCollectionByID(538); + } + else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9625") { + $object->appendCollectionByID(537); + } + else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9622") { + $object->appendCollectionByID(536); + } + else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-37593") { + $object->appendCollectionByID(545); + } + else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9636") { + $object->appendCollectionByID(544); + } + else if (str_starts_with((string)$relatedItem->titleInfo->title, 'Nachlaß') + || str_starts_with((string)$relatedItem->titleInfo->title, 'Teilnachlaß') + ) { + continue; + } + else { + throw new Exception("Unknown collection: " . var_export($relatedItem, true)); + } + + } + } + + // For now: skip relatedItem + unset($objectData->relatedItem); + unset($objectData->recordInfo->recordContentSource); + + $object->appendLinkByName((string)$objectData->identifier, "Das Objekt bei Kalliope"); + unset($objectData->identifier); + + unset($objectData->recordInfo->recordIdentifier); + + // Import edit history at kalliope + $editHistory = []; + $editHistory[] = "Ersterfassung bei Kalliope: " . $objectData->recordInfo->recordCreationDate; + foreach ($objectData->recordInfo->recordChangeDate as $changeDate) { + $editHistory[] = "Bearbeitung bei Kalliope: " . (string)$changeDate; + } + $editHistStr = implode(PHP_EOL, $editHistory); + + $object->append_string("notizen_text1", PHP_EOL . PHP_EOL . $editHistStr); + + unset($objectData->recordInfo->recordCreationDate, $objectData->recordInfo->recordChangeDate); + if (count($objectData->recordInfo->children()) === 0) { + unset($objectData->recordInfo); + } + + if (count($objectData->children()) === 0) { + unset($objectData); + } + + $object->set_objekt_publik($visibility); + $newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler); + + if (!empty($objectData)) { + throw new MDParserIncomplete("Incomplete parser: " . var_export($objectData, true)); + } + + $outputHandler->toLog("Done with object $i", 2); + + // Sleep for a millisecond + usleep(IMPORTER_DELAY_PER_OBJECT); + + } + + } + +} diff --git a/parsers/becollect_dtmb.php b/parsers/becollect_dtmb.php new file mode 100644 index 0000000..58f54e4 --- /dev/null +++ b/parsers/becollect_dtmb.php @@ -0,0 +1,805 @@ + + * @author Stefan Rohde-Enslin + * @link https://imports.museum-digital.org/importer/parsers/csvxml.php + */ +declare(strict_types = 1); + +/** + * Runs explode on many entries. + * + * @param array $delimiters List of delimiters. + * @param string $string Haystack. + * + * @return array + */ +function multiexplode(array $delimiters, string $string):array { + + $ready = str_replace($delimiters, $delimiters[0], $string); + if (!($launch = explode($delimiters[0], $ready))) { + throw new Exception("Error exploding"); + } + return $launch; + +} + +/** + * Wrapper around $mdEvent->set_zeiten_id. + * + * @param MDEvent $event Event. + * @param SimpleXMLElement $time Time to handle. + * + * @return MDEvent + */ +function setTimeMaybeWithDates(MDEvent $event, SimpleXMLElement $time):MDEvent { + + if (!empty((string)$time->year_from) and !empty((string)$time->year_to)) { + $event->set_zeiten_id((string)$time->text_date, intval((string)$time->year_from), intval((string)$time->year_to)); + } + else { + $event->set_zeiten_id((string)$time->text_date); + } + + return $event; + +} + +/** + * Function for parsing events. Returns objects of type MDEvent. + * + * @param array $version Version information (e.g. DB connections). + * @param MDOutputHandler $outputHandler Output handler. + * @param array $allEvents All events. + * + * @return array + */ +function parseEvents(array $version, MDOutputHandler $outputHandler, array $allEvents):array { + + $output = []; + + foreach ($allEvents as $eventType => $eventSubjects) { + + $placesUsed = $timesUsed = []; + + foreach ($eventSubjects['persinst'] as $persinst_id) { + + $placesUsedForPersinst = $timesUsedForPersinst = []; + + $persinstEvents = []; + + foreach ($eventSubjects['places'] as $place_id) { + + foreach ($eventSubjects['times'] as $time_id) { + + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $eventType, $outputHandler); + $event->set_orte_id($place_id); + $event->set_persinst_id($persinst_id['name']); + $event = setTimeMaybeWithDates($event, $time_id); + + if ($event->get_orte_id() !== 0 || $event->get_zeiten_id() !== 0 || $event->get_persinst_id() !== 0) + $persinstEvents[] = $event; + + $timesUsed[] = $timesUsedForPersinst[] = $time_id; + $placesUsed[] = $placesUsedForPersinst[] = $place_id; + + } + + if (!in_array($place_id, $placesUsedForPersinst, true)) { + + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $eventType, $outputHandler); + $event->set_orte_id($place_id); + $event->set_persinst_id($persinst_id['name']); + if ($event->get_orte_id() !== 0 || $event->get_zeiten_id() !== 0 || $event->get_persinst_id() !== 0) + $persinstEvents[] = $event; + + $placesUsed[] = $placesUsedForPersinst[] = $place_id; + + } + + } + + foreach ($eventSubjects['times'] as $time_id) { + + if (in_array($time_id, $timesUsedForPersinst, true)) continue; + + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $eventType, $outputHandler); + $event->set_persinst_id($persinst_id['name']); + $event = setTimeMaybeWithDates($event, $time_id); + if ($event->get_orte_id() !== 0 || $event->get_zeiten_id() !== 0 || $event->get_persinst_id() !== 0) + $persinstEvents[] = $event; + + $timesUsed[] = $time_id; + + } + + if ($persinstEvents === []) { + + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $eventType, $outputHandler); + $event->set_persinst_id($persinst_id['name']); + if ($event->get_orte_id() !== 0 || $event->get_zeiten_id() !== 0 || $event->get_persinst_id() !== 0) + $persinstEvents[] = $event; + + } + + $output = array_merge($output, $persinstEvents); + + } + + foreach ($eventSubjects['places'] as $place_id) { + + if (in_array($place_id, $placesUsed, true)) continue; + + $placeEvents = []; + + foreach ($eventSubjects['times'] as $time_id) { + + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $eventType, $outputHandler); + $event->set_orte_id($place_id); + $event = setTimeMaybeWithDates($event, $time_id); + if ($event->get_orte_id() !== 0 || $event->get_zeiten_id() !== 0 || $event->get_persinst_id() !== 0) + $placeEvents[] = $event; + + $timesUsed[] = $time_id; + + } + + if ($placeEvents === []) { + + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $eventType, $outputHandler); + $event->set_orte_id($place_id); + if ($event->get_orte_id() !== 0 || $event->get_zeiten_id() !== 0 || $event->get_persinst_id() !== 0) + $placeEvents[] = $event; + + $placesUsed[] = $place_id; + + } + + $output = array_merge($output, $placeEvents); + + } + + foreach ($eventSubjects['times'] as $time_id) { + + if (in_array($time_id, $timesUsed, true)) continue; + + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $eventType, $outputHandler); + $event = setTimeMaybeWithDates($event, $time_id); + if ($event->get_orte_id() !== 0 || $event->get_zeiten_id() !== 0 || $event->get_persinst_id() !== 0) + $output[] = $event; + + $timesUsed[] = $time_id; + + } + + } + + return $output; + +} + +/** + * Returns true if the input is "y", false otherwise. + * + * @param string $input Input string. + * + * @return boolean + */ +function translateYNToBool(string $input):bool { + + if ($input === "y") return true; + else return false; + +} + +/** + * Function getSubjectName retrieves subject names. + * + * @param SimpleXMLElement $subjects SimpleXMLElement containing all subjects. + * @param integer $subjectID Subject ID. + * + * @return array + */ +function getSubjectName(SimpleXMLElement $subjects, int $subjectID):array { + + foreach ($subjects->subject as $subject) { + + if ($subject->attributes()->subject_id == $subjectID) { + $output = []; + if (!empty($subject->attributes()->uri)) { + $uri = strval($subject->attributes()->uri); + if (strpos($uri, "d-nb.info/gnd") !== false and ($uriPos = strrpos($uri, "/")) !== false) $output["gnd"] = substr($uri, $uriPos + 1); + } + $output["name"] = strval($subject->name); + return $output; + } + + } + + return []; + +} + +/** + * Parse function. + * + * @param array $version Instance to import into. + * @param integer $institution_id Institution to import to. + * @param non-empty-string $XMLFolder Folder of the XML files to import. + * @param string $dataFolder Data folder. + * @param integer $sammlung_id Collection ID. Optional. + * @param boolean $visibility Import objects to be directly visible?. + * @param boolean $insertOnly If set to true, only new objects are added, + * old are not updated. + * + * @return void + */ +function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { + + if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); + + if (empty($dataFolder)) { + $importImages = false; + } + else $importImages = true; + + // Set up writers + + $collectionWriter = new MDCollectionWriter($version['mainDB']); + $literatureWriter = new MDLiteratureWriter($version['mainDB']); + $linkWriter = new MDLinkWriter($version['mainDB']); + $seriesWriter = new MDSeriesWriter($version['mainDB']); + $exhibitionWriter = new MDExhibitionWriter($version['mainDB']); + $objectRecordWriter = new MDObjectRecordWriter($version['mainDB']); + $tagWriter = new MDTagWriter($version['nodaDB']); + + $outputHandler = new MDOutputHandler; + $outputHandler->setVerbosity(2); + + $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); + + foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) { + + if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "xml") continue; + + $outputHandler->toLog("Attempting to load XML file {$xmlFile}", 2); + + $rawData = MD_STD::file_get_contents(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}"); + $rawData = MD_STD::preg_replace_str('/\x{FEFF}/u', '', $rawData); + + if (!($xmlData = simplexml_load_string($rawData))) { + echo "$xmlFile is corrupt. Check it lah"; + exit; + } + + if (!($json_encoded = MD_STD::json_encode_object($xmlData)) || !($objectData = json_decode($json_encoded, true))) { + throw new Exception("Could not load file {$xmlFile}"); + } + $objectData = $objectData['objects']['object']; + $subjectData = $xmlData->subjects; + + // + // Objekt Basis Angaben + // + $objectData['object_type'] = 'Man-made object'; + $objectData['object_title'] = (string)$xmlData->objects->object->object_name; + if (!empty($objectData['online_text']) and $objectData['online_text'] != 'ERSATZ') { + $objectData['online_text'] = str_replace('
', "\n", $objectData['online_text']); + $objectData['online_text'] = str_replace("\n ", "\n", $objectData['online_text']); + $objectData['online_text'] = str_replace("\n\n", "\n", $objectData['online_text']); + $objectData['object_description'] = trim(MD_STD::preg_replace_str('/<[^>]*>/i', '', $objectData['online_text'])); + } + else $objectData['object_description'] = 'Beschreibung für ' . $objectData['object_title'] . ' folgt.'; + + // + // Literatur + // + + if (isset($objectData['literature']) and !is_array($objectData['literature'])) { + $literatur = explode('
', $objectData['literature']); + $objectData['object_description'] .= PHP_EOL . PHP_EOL . "Weiterführend: "; + foreach ($literatur as $litEntry) { + if (!empty($litEntry)) $objectData['object_description'] .= PHP_EOL . $litEntry; + } + } + else $literatur = []; + + $outputHandler->toLog("Successfully loaded XML file {$xmlFile} (Object: {$objectData['inventory_number']})", 2); + + $availableKeys = array_keys($objectData); + + // Object base data + $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, (string)$xmlData->objects->object->inventory_number, $objectData['object_type'], $objectData['object_title'], $objectData['object_description'], $outputHandler); + + if ($sammlung_id !== 0) { + $object->appendCollectionByID($sammlung_id); + } + else { + echo "Missing collection ID, parsing collection from XML is not yet implemented!"; + exit; + } + + foreach ($literatur as $litEntry) { + if (!empty($litEntry)) $object->appendLiteratureByName(trim($litEntry), "", "", "", "", $literatureWriter); + } + + // + // Tags + // + if (isset($objectData['keywords']) and !is_array($objectData['keywords'])) { + $schlagwort = trim(MD_STD::preg_replace_str('/<[^>]*>/i', '', $objectData['keywords'])); + $schlagwort = multiexplode(['; ', ', ', ','], $schlagwort); // ', ' trennt echte Schlagworte und ',' trennt Systematik + } + else $schlagwort = []; + $notag = ['Allgemeines', 'Übergreifendes', '.']; + + foreach ($schlagwort as $swEntry) { + if (is_numeric(substr($swEntry, 0, 1))) { + if (!in_array(stristr($swEntry, ' '), $notag, true)) $swEntry = stristr($swEntry, ' ') ?: ""; // Schneide bei Klassifikationsangaben aus Systematik (e.g. 10. Nachrichtentechnik) die Nummern ab. + } + if (substr($swEntry, -1) == '.') $swEntry = substr($swEntry, 0, -1); + //if (stristr(stristr($swEntry, ' '), ' und ') === FALSE) + $object->appendTagByName(trim($swEntry), "", $tagWriter); + } + + // + // material / technique + // + if (!empty($objectData['materials_techniques']) and is_array($objectData['materials_techniques']['material_technique'])) { + $object->set_objekt_material_technik(implode(', ', $objectData['materials_techniques']['material_technique'])); + } + + // + // Ausmaße + // + $measurements = []; + foreach ($xmlData->objects->object->dimensions->dimension as $dimension) { + $tAttribute = trim(str_replace("Objektmaß", "", $dimension->attributes()->type)); + $measurements[] = $tAttribute . ": " . strval($dimension); + } + $object->set_objekt_masse(implode('; ', $measurements)); + + /* + * Handle events + */ + + $emptyEvent = [ + "places" => [], + "persinst" => [], + "times" => [], + ]; + $allEvents = [ + 1 => $emptyEvent, // Production + ]; + + // Events, 1. Alle Zeiten + + foreach ($xmlData->objects->object->datings->dating as $dating) { + + if (empty((string)$dating->text_date) || in_array((string)$dating->text_date, ["?"], true)) continue; + + $tAttribute = trim((string)$dating->attributes()->type); + + if (in_array($tAttribute, ["", "Herstellungsjahr", "Herst.-Zeitraum", "Herstellungsjahr (Nachbau)"], true)) { + $allEvents[1]['times'][] = $dating; + } + else if ($tAttribute === "Herstellungsjahr (Original)") { + $allEvents[4]['times'][] = $dating; + } + else if ($tAttribute === "Entwurf") { + $allEvents[35]['times'][] = $dating; + } + else { + echo "Unknown time type: " . $tAttribute . PHP_EOL; + echo "Please enter it at " . __FILE__ . " around line #" . __LINE__ . PHP_EOL; + exit; + } + } + + // Events, 2. Alle Orte + + foreach ($xmlData->objects->object->geographic_references->geographic_reference as $geographic_reference) { + if ((string)$geographic_reference === "unbekannt") continue; + $tAttribute = trim((string)$geographic_reference->attributes()->type); + + if ($tAttribute === "Herstellungsort") { + $allEvents[1]['places'][] = strval($geographic_reference); + } + else if ($tAttribute === "Herkunft") { + $allEvents[8]['places'][] = strval($geographic_reference); + } + else { + echo "Unknown place type: " . $tAttribute . PHP_EOL; + echo "Please enter it at " . __FILE__ . " around line #" . __LINE__ . PHP_EOL; + exit; + } + } + + // Events, 3. Alle actor + + foreach ($xmlData->objects->object->subject_references->subject_reference as $subject_reference) { + $tAttribute = trim((string)$subject_reference->attribution); + + if (in_array($tAttribute, ["Hersteller", "Herstellung", "Reparatur"], true)) { + $allEvents[1]['persinst'][] = getSubjectName($subjectData, intval((string)$subject_reference->subject_id)); + } + else if ($tAttribute === "Herkunft") { + $allEvents[8]['persinst'][] = getSubjectName($subjectData, intval((string)$subject_reference->subject_id)); + } + else if ($tAttribute === "Entwurf") { + $allEvents[35]['persinst'][] = getSubjectName($subjectData, intval((string)$subject_reference->subject_id)); + } + else if ($tAttribute === "Auftraggeber/in") { + $allEvents[25]['persinst'][] = getSubjectName($subjectData, intval((string)$subject_reference->subject_id)); + } + else if ($tAttribute === "Entwicklung") { + $allEvents[4]['persinst'][] = getSubjectName($subjectData, intval((string)$subject_reference->subject_id)); + } + else if ($tAttribute === "Nutzung") { + $allEvents[6]['persinst'][] = getSubjectName($subjectData, intval((string)$subject_reference->subject_id)); + } + else if ($tAttribute === "Künstler/in") { + $allEvents[1]['persinst'][] = getSubjectName($subjectData, intval((string)$subject_reference->subject_id)); + } + else if ($tAttribute === "Herstellung elektr. Ausrüstung") { + $allEvents[1]['persinst'][] = getSubjectName($subjectData, intval((string)$subject_reference->subject_id)); + } + else if ($tAttribute === "Mit dem Objekt verbunden") { + $allEvents[23]['persinst'][] = getSubjectName($subjectData, intval((string)$subject_reference->subject_id)); + } + else if ($tAttribute === "Lieferant") { + $allEvents[23]['persinst'][] = getSubjectName($subjectData, intval((string)$subject_reference->subject_id)); + } + else { + echo "Unknown actor type: " . $tAttribute . PHP_EOL; + echo "Please enter it at " . __FILE__ . " around line #" . __LINE__ . PHP_EOL; + exit; + } + } + + // Write events based on data parsed beforehand + + // 1 => array() + $events = parseEvents($version, $outputHandler, $allEvents); + foreach ($events as $event) { + $object->appendEvent($event); + } + $images = []; + + foreach ($xmlData->objects->object->media->medium as $medium) { + $image = new MDImage($version['mainDB'], $objectData["object_title"], MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $medium->key_file_name[0]); + $image->set_image_owner("Stiftung Deutsches Technikmuseum Berlin"); + $image->set_image_rights("CC BY-NC-SA"); + if ($medium->status == "online Hauptbild") $mainResourceSet = true; + else $mainResourceSet = false; + $image->set_main_image($mainResourceSet); + + $images[] = $image; + + } + foreach ($images as $image) $object->appendImage($image); + + /* + if (!empty($objectData["object_material_technique"]) and $objectData["object_material_technique"] != "ERSATZ") $object->set_objekt_material_technik($objectData["object_material_technique"]); + if (!empty($objectData["object_dimensions"]) and $objectData["object_dimensions"] != "ERSATZ") $object->set_objekt_masse($objectData["object_dimensions"]); + + // Verwaltung + if (!empty($objectData['systematic']) and $objectData['systematic'] != "ERSATZ") $object->set_string("systematik", $objectData['systematic']); + + if (!empty($objectData['inclusion_when']) and $objectData['inclusion_when'] != "ERSATZ") $object->set_string("zeitpunkt_zugang", $objectData['inclusion_when']); + if (!empty($objectData['inclusion_kind_of']) and $objectData['inclusion_kind_of'] != "ERSATZ") $object->set_string("zugang_art", $objectData['inclusion_kind_of']); + if (!empty($objectData['inclusion_who']) and $objectData['inclusion_who'] != "ERSATZ") $object->set_string("erwerbender", $objectData['inclusion_who']); + if (!empty($objectData['inclusion_where']) and $objectData['inclusion_where'] != "ERSATZ") $object->set_string("erwerbungsort", $objectData['inclusion_where']); + if (!empty($objectData['registration_first_who']) and $objectData['registration_first_who'] != "ERSATZ") $object->set_string("ersterfasser", $objectData['registration_first_who']); + if (!empty($objectData['owner_previous']) and $objectData['owner_previous'] != "ERSATZ") $object->set_string("vorbesitzer", $objectData['owner_previous']); + if (!empty($objectData['state']) and $objectData['state'] != "ERSATZ") $object->set_string("zustand", $objectData['state']); + + // Aufenthalt + if (!empty($objectData['abode_actual']) and $objectData['abode_actual'] != "ERSATZ") $object->set_string("standort_aktuell", $objectData['abode_actual']); + if (!empty($objectData['abode_regular']) and $objectData['abode_regular'] != "ERSATZ") $object->set_string("standort_eigentlich", $objectData['abode_regular']); + + // Rechte + if (!empty($objectData['rights_object']) and $objectData['rights_object'] != "ERSATZ") $object->set_string("urheberrechte", $objectData['rights_object']); + if (!empty($objectData['rights_of_use']) and $objectData['rights_of_use'] != "ERSATZ") $object->set_string("nutzungsrechte", $objectData['rights_of_use']); + if (!empty($objectData['rights_annotation']) and $objectData['rights_annotation'] != "ERSATZ") $object->set_string("rechte_anmerkungen", $objectData['rights_annotation']); + + // Notizen + if (!empty($objectData['remarks_short']) and $objectData['remarks_short'] != "ERSATZ") $object->set_string("notizen_text2", $objectData['remarks_short']); else $object->set_string("notizen_text2", ''); + if (!empty($objectData['remarks_long']) and $objectData['remarks_long'] != "ERSATZ") $object->set_string("notizen_text1", $objectData['remarks_long']); + + // Collections + + if ($sammlung_id !== 0) { + $object->appendCollectionByID($sammlung_id); + } + else if (!empty($objectData['collection_name1'])) { + + $searchTarget = "collection_name"; + $availableEntities = []; + foreach ($availableKeys as $key) { + if (substr($key, 0, strlen($searchTarget)) === $searchTarget) { + $availableEntities[] = substr($key, strlen($searchTarget)); + } + } + foreach ($availableEntities as $suffix) { + $object->appendCollectionByName($objectData[$searchTarget . $suffix], "", $collectionWriter); + } + + } + + // + / Zusatz + // + if (!empty($objectData['detailed_description'])) { + $object->set_string("detaileddescription2", $objectData['detailed_description']); + if (!empty($objectData['detailed_description_md']) and $objectData['detailed_description_md'] == "y") { + $object->set_bool("detaileddescription2show_md", true); + } + else $object->set_bool("detaileddescription2show_md", false); + if (!empty($objectData['detailed_description_extern']) and $objectData['detailed_description_extern'] == "y") { + $object->set_bool("detaileddescription2show_extern", true); + } + else $object->set_bool("detaileddescription2show_extern", false); + } + if (!empty($objectData['number_of_pages']) and $objectData['number_of_pages'] != "ERSATZ") $object->set_string("mass2_seitenzahl", $objectData['number_of_pages']); + if (!empty($objectData['number_of_pieces']) and $objectData['number_of_pieces'] != "ERSATZ") $object->set_string("stueckzahl", $objectData['number_of_pieces']); + if (!empty($objectData['closer_location']) and $objectData['closer_location'] != "ERSATZ") $object->set_string("ort2_name", $objectData['closer_location']); + if (!empty($objectData['closer_location_as']) and $objectData['closer_location_as'] != "ERSATZ") $object->set_closer_location_types("ort2_art", $objectData['closer_location_as']); + + if (!empty($objectData['material_separate']) and $objectData['material_separate'] != "ERSATZ") $object->set_string("material2", $objectData['material_separate']); + if (!empty($objectData['technique_separate']) and $objectData['technique_separate'] != "ERSATZ") $object->set_string("technik2", $objectData['technique_separate']); + + if (!empty($objectData['dimensions_separate_length_value']) and $objectData['dimensions_separate_length_value'] != "ERSATZ") $object->set_string("mass2_laenge_wert", $objectData['dimensions_separate_length_value']); + if (!empty($objectData['dimensions_separate_length_unit']) and $objectData['dimensions_separate_length_unit'] != "ERSATZ") $object->set_length_unit("mass2_laenge_einheit", $objectData['dimensions_separate_length_unit']); + if (!empty($objectData['dimensions_separate_width_value']) and $objectData['dimensions_separate_width_value'] != "ERSATZ") $object->set_string("mass2_breite_wert", $objectData['dimensions_separate_width_value']); + if (!empty($objectData['dimensions_separate_width_unit']) and $objectData['dimensions_separate_width_unit'] != "ERSATZ") $object->set_length_unit("mass2_breite_einheit", $objectData['dimensions_separate_width_unit']); + if (!empty($objectData['dimensions_separate_height_value']) and $objectData['dimensions_separate_height_value'] != "ERSATZ") $object->set_string("mass2_hoehe_wert", $objectData['dimensions_separate_height_value']); + if (!empty($objectData['dimensions_separate_height_unit']) and $objectData['dimensions_separate_height_unit'] != "ERSATZ") $object->set_length_unit("mass2_hoehe_einheit", $objectData['dimensions_separate_height_unit']); + if (!empty($objectData['dimensions_separate_diameter_value']) and $objectData['dimensions_separate_diameter_value'] != "ERSATZ") $object->set_string("mass2_durchmesser_wert", $objectData['dimensions_separate_diameter_value']); + if (!empty($objectData['dimensions_separate_diameter_unit']) and $objectData['dimensions_separate_diameter_unit'] != "ERSATZ") $object->set_length_unit("mass2_durchmesser_einheit", $objectData['dimensions_separate_diameter_unit']); + if (!empty($objectData['dimensions_separate_weight_value']) and $objectData['dimensions_separate_weight_value'] != "ERSATZ") $object->set_string("mass2_gewicht_wert", $objectData['dimensions_separate_weight_value']); + if (!empty($objectData['dimensions_separate_weight_unit']) and $objectData['dimensions_separate_weight_unit'] != "ERSATZ") $object->set_weight_unit("mass2_gewicht_einheit", $objectData['dimensions_separate_weight_unit']); + if (!empty($objectData['dimensions_separate_wall_thickness_value']) and $objectData['dimensions_separate_wall_thickness_value'] != "ERSATZ") $object->set_string("wandungsstaerke_wert", $objectData['dimensions_separate_wall_thickness_value']); + if (!empty($objectData['dimensions_separate_wall_thickness_unit']) and $objectData['dimensions_separate_wall_thickness_unit'] != "ERSATZ") $object->set_length_unit("wandungsstaerke_einheit", $objectData['dimensions_separate_wall_thickness_unit']); + if (!empty($objectData['dimensions_separate_show_md']) and $objectData['dimensions_separate_show_md'] != "ERSATZ") $object->set_bool("mass2show_md", translateYNToBool($objectData['dimensions_separate_show_md'])); + if (!empty($objectData['dimensions_separate_show_extern']) and $objectData['dimensions_separate_show_extern'] != "ERSATZ") $object->set_bool("mass2show_extern", translateYNToBool($objectData['dimensions_separate_show_extern'])); + + if (!empty($objectData['inscription'])) { + $object->set_string("beschriftung2", $objectData['inscription']); + if (!empty($objectData['inscription_md']) and $objectData['inscription_md'] == "y") { + $object->set_bool("beschriftung2show_md", true); + } + else $object->set_bool("beschriftung2show_md", false); + if (!empty($objectData['inscription_extern']) and $objectData['inscription_extern'] == "y") { + $object->set_bool("beschriftung2show_extern", true); + } + else $object->set_bool("beschriftung2show_extern", false); + } + + // + / Events + // + + // Related place + if (!empty($objectData['related_place1'])) { + + $searchTarget = "related_place"; + $availableEntities = []; + foreach ($availableKeys as $key) { + if (substr($key, 0, strlen($searchTarget)) === $searchTarget) { + if (is_numeric(substr($key, strlen($searchTarget)))) + $availableEntities[] = substr($key, strlen($searchTarget)); + } + } + + foreach ($availableEntities as $suffix) { + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 22, $outputHandler); + if (!empty($objectData[$searchTarget . $suffix])) $event->set_orte_id($objectData[$searchTarget . $suffix]); + if (!empty($objectData[$searchTarget . "_sure" . $suffix])) $event->set_ereignis_orte_sicher(translateYNToBool($objectData[$searchTarget . "_sure" . $suffix])); + $object->appendEvent($event); + } + + } + + // Related actor + if (!empty($objectData['related_actor1'])) { + + $searchTarget = "related_actor"; + $availableEntities = []; + foreach ($availableKeys as $key) { + if (substr($key, 0, strlen($searchTarget)) === $searchTarget) { + if (is_numeric(substr($key, strlen($searchTarget)))) + $availableEntities[] = substr($key, strlen($searchTarget)); + } + } + + foreach ($availableEntities as $suffix) { + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 23, $outputHandler); + if (!empty($objectData[$searchTarget . $suffix])) $event->set_persinst_id($objectData[$searchTarget . $suffix]); + if (!empty($objectData[$searchTarget . "_sure" . $suffix])) $event->set_ereignis_persinst_sicher(translateYNToBool($objectData[$searchTarget . "_sure" . $suffix])); + $object->appendEvent($event); + } + + } + + // + / Tags + // + if (!empty($objectData['tag1'])) { + + $searchTarget = "tag"; + $availableEntities = []; + foreach ($availableKeys as $key) { + if (substr($key, 0, strlen($searchTarget)) === $searchTarget) { + $availableEntities[] = substr($key, strlen($searchTarget)); + } + } + + foreach ($availableEntities as $suffix) { + if ($objectData[$searchTarget . $suffix] === "ERSATZ") continue; + $object->appendTagByName($objectData[$searchTarget . $suffix], "", $tagWriter); + } + + } + + // + // Literatur + // + if (!empty($objectData['literature_title1'])) { + + $searchTarget = "literature_title"; + $availableEntities = []; + foreach ($availableKeys as $key) { + if (substr($key, 0, strlen($searchTarget)) === $searchTarget) { + $availableEntities[] = substr($key, strlen($searchTarget)); + } + } + foreach ($availableEntities as $suffix) { + + if ($objectData[$searchTarget . $suffix] === "ERSATZ" || empty($objectData[$searchTarget . $suffix])) continue; + + if (is_array($objectData[$searchTarget . $suffix])) continue; + + if (!empty($objectData["literature_author" . $suffix]) and $objectData["literature_author" . $suffix] !== "ERSATZ") { + $literature_author = $objectData["literature_author" . $suffix]; + } + else $literature_author = ""; + if (!empty($objectData["literature_year" . $suffix]) and $objectData["literature_year" . $suffix] !== "ERSATZ") { + $literature_year = $objectData["literature_year" . $suffix]; + } + else $literature_year = ""; + if (!empty($objectData["literature_place" . $suffix]) and $objectData["literature_place" . $suffix] !== "ERSATZ") { + $literature_place = $objectData["literature_place" . $suffix]; + } + else $literature_place = ""; + if (!empty($objectData["literature_inlit" . $suffix]) and $objectData["literature_inlit" . $suffix] !== "ERSATZ") { + $literature_inlit = $objectData["literature_inlit" . $suffix]; + } + else $literature_inlit = ""; + + $object->appendLiteratureByName($objectData[$searchTarget . $suffix], $literature_author, $literature_year, $literature_place, $literature_inlit, $literatureWriter); + } + } + + // + // Serie + // + if (!empty($objectData['object_group_name1'])) { + + $searchTarget = "object_group_name"; + $availableEntities = []; + foreach ($availableKeys as $key) { + if (substr($key, 0, strlen($searchTarget)) === $searchTarget) { + $availableEntities[] = substr($key, strlen($searchTarget)); + } + } + foreach ($availableEntities as $suffix) { + + if (!empty($objectData[$searchTarget . $suffix]) and $objectData[$searchTarget . $suffix] != "ERSATZ") { + $series_name = $objectData["object_group_name" . $suffix]; + $serie_beschreibung = $objectData["object_group_description" . $suffix]; + $object->appendSeriesByName($series_name, $serie_beschreibung, $seriesWriter); + } + + } + } + + // + // Images + // + + $mainResourceSet = false; + + if ($importImages === true) { + + // Images + + $searchTarget = "image_name"; + $availableEntities = []; + foreach ($availableKeys as $key) { + if (substr($key, 0, strlen($searchTarget)) === $searchTarget) { + if (is_numeric(substr($key, strlen($searchTarget)))) + $availableEntities[] = substr($key, strlen($searchTarget)); + } + } + + $images = []; + + foreach ($availableEntities as $suffix) { + + $objectData["image_name" . $suffix] = str_replace("/", "-", $objectData["image_name" . $suffix]); + if (!file_exists(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $objectData["image_name" . $suffix])) { + continue; + } + + $image = new MDImage($version['mainDB'], $objectData["object_title"], MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $objectData["image_name" . $suffix]); + $image->set_image_owner($objectData["image_owner" . $suffix]); + if (isset($objectData["image_rights" . $suffix])) $image->set_image_rights($objectData["image_rights" . $suffix]); + if (isset($objectData["image_owner" . $suffix])) $image->set_image_owner($objectData["image_owner" . $suffix]); + if (isset($objectData["image_creator" . $suffix])) $image->set_image_creator($objectData["image_creator" . $suffix]); + if (isset($objectData["image_visible" . $suffix])) $image->set_visible(translateYNToBool($objectData["image_visible" . $suffix])); + if (isset($objectData["image_main" . $suffix])) { + if ($objectData["image_main" . $suffix] == "y") $mainResourceSet = true; + $image->set_main_image(translateYNToBool($objectData["image_main" . $suffix])); + } + + $images[] = $image; + } + + // Ressources + + $searchTarget = "resource_name"; + $availableEntities = []; + foreach ($availableKeys as $key) { + if (substr($key, 0, strlen($searchTarget)) === $searchTarget) { + if (is_numeric(substr($key, strlen($searchTarget)))) + $availableEntities[] = substr($key, strlen($searchTarget)); + } + } + + $resources = []; + + foreach ($availableEntities as $suffix) { + + if (!file_exists(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $objectData["resource_name" . $suffix])) { + continue; + } + + $resource = new MDResource($version['mainDB'], 0, MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $objectData["resource_name" . $suffix], $objectData["resource_media_type" . $suffix]); + $locationdir = MD_IMPORTER_CONF::$target_filepath_base . $version['filepath'] . '/resources/documents/' . date("Ym") . '/'; + if (!is_dir($locationdir)) mkdir($locationdir, 0755, true); + $resource->set_preview_real_folder($locationdir); + $resource->set_preview_folder('resources/documents/' . date("Ym")); + $resource->set_main_resource(false); + + # if (!empty($objectData["image_visible" . $suffix])) $resource->set_resource_name($objectData["image_visible" . $suffix]); + # if (!empty($objectData["image_visible" . $suffix])) $resource->set_resource_name($objectData["image_visible" . $suffix]); + if (!empty($objectData["resource_owner" . $suffix])) $resource->set_owner($objectData["resource_owner" . $suffix]); + if (!empty($objectData["resource_creator" . $suffix])) $resource->set_creator($objectData["resource_creator" . $suffix]); + if (!empty($objectData["resource_rights" . $suffix])) $resource->set_rights($objectData["resource_rights" . $suffix]); + + $resources[] = $resource; + + } + + if ($mainResourceSet === false and (!empty($images) or !empty($resources))) { + if (!empty($images)) $images[0]->set_main_image(true); + else if (!empty($resources)) $resources[0]->set_main_resource(true); + } + + foreach ($images as $image) $object->appendImage($image); + foreach ($resources as $resources) $object->appendResource($resource); + + } + */ + // + // Write it! + // + $object->set_objekt_publik($visibility); + $newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler); + + } + +} diff --git a/parsers/csv.php b/parsers/csv.php new file mode 100644 index 0000000..1c5971a --- /dev/null +++ b/parsers/csv.php @@ -0,0 +1,169 @@ + + * @link https://imports.museum-digital.org/parsers/google-arts-and-culture.php + */ +declare(strict_types = 1); + +const CSV_SEPARATOR = ','; + +/** + * Parse function. + * + * @param array $version Instance to import into. + * @param integer $institution_id Institution to import to. + * @param non-empty-string $XMLFolder Folder of the XML files to import. + * @param string $dataFolder Data folder. + * @param integer $sammlung_id Collection ID. Optional. + * @param boolean $visibility Import objects to be directly visible?. + * @param boolean $insertOnly If set to true, only new objects are added, + * old are not updated. + * + * @return void + */ +function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { + + if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); + + if (empty($dataFolder)) { + $importImages = false; + } + else $importImages = true; + + $ignore = $visibility; + $ignore = $sammlung_id; + + // Set up writers + + $collectionWriter = new MDCollectionWriter($version['mainDB']); + $literatureWriter = new MDLiteratureWriter($version['mainDB']); + $linkWriter = new MDLinkWriter($version['mainDB']); + $seriesWriter = new MDSeriesWriter($version['mainDB']); + $exhibitionWriter = new MDExhibitionWriter($version['mainDB']); + $objectRecordWriter = new MDObjectRecordWriter($version['mainDB']); + $tagWriter = new MDTagWriter($version['nodaDB']); + + $outputHandler = new MDOutputHandler; + $outputHandler->setVerbosity(2); + + $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); + + /* + $objectWriter->disableImportAdditionalData = true; + */ + $objectWriter->disableUpdateBaseData = true; + $objectWriter->disableImportCollections = true; + $objectWriter->disableImportEvents = true; + $objectWriter->disableImportTags = true; + $objectWriter->disableImportLiterature = true; + $objectWriter->disableImportHyperlinks = true; + // $objectWriter->disableImportSeries = true; + $objectWriter->disableImportImagesResources = true; + $objectWriter->disableImportObjectRecords = true; + $objectWriter->disableImportTranscriptions = true; + $objectWriter->disableImportMarkings = true; + $objectWriter->disableImportExhibitions = true; + $objectWriter->disableImportReception = true; + $objectWriter->importObjectTypeAsTag = true; + + // Set up prepared statement for checking if a tag of a given name exists + $tagNameExistsStmt = $version['nodaDB']->do_prepare("SELECT `tag_id` + FROM `tag` + WHERE `tag_name` = ? + LIMIT 1"); + + foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) { + + if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "csv") continue; + + $outputHandler->toLog("Attempting to load CSV file {$xmlFile}", 2); + + if (!($handle = fopen(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}", "r"))) { + throw new Exception("File {$xmlFile} cannot be opened"); + } + + if (!($fileHeaders = fgetcsv($handle, 5000000, CSV_SEPARATOR))) { + throw new Exception("Failed to read headers for file $xmlFile"); + } + + while ($data = fgetcsv($handle, 5000000, CSV_SEPARATOR)) { + + // Create associative array for easier parsing + $objectData = []; + foreach ($data as $key => $value) { + $cur = trim($value); + if (empty($cur)) continue; + $objectData[$fileHeaders[$key]] = $cur; + } + + if (empty($objectData['Inventarnummer'])) continue; + $inventory_number = $objectData['Inventarnummer']; + unset($objectData['Inventarnummer'], $objectData['laufende Nummer']); + + $result = $version['mainDB']->query_by_stmt("SELECT * + FROM `objekt` + WHERE `institution_id` = ? + AND `objekt_inventarnr` = ?", "is", $institution_id, $inventory_number); + + $origData = $result->fetch_assoc() ?: []; + $result->close(); + + if (empty($origData)) continue; + + $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $inventory_number, $origData["objektart"] ?: $inventory_number, $origData["objekt_name"] ?: $inventory_number, $origData["objekt_beschreibung"] ?: $inventory_number, $outputHandler); + + /* + if (isset($objectData['Standort'])) { + $object->set_string("standort_eigentlich", $objectData['Standort'] . ' / ' . $objectData['Zusatz']); + unset($objectData['Standort'], $objectData['Zusatz']); + } + + // Object base data + $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $objectData['Inventarnummer'], $objectData["Objektbezeichnung"], $objectData["Titel"] ?: $objectData['Inventarnummer'], $objectData["Bemerkung"], $outputHandler); + + unset($objectData['Inventarnummer'], $objectData["Objektbezeichnung"], $objectData["Titel"], $objectData["Bemerkung"]); + + $object->set_objekt_publik($visibility); + + if (isset($objectData['Breite'])) { + $object->set_string("mass2_breite_wert", $objectData['Breite']); + $object->set_length_unit("mass2_breite_einheit", $objectData['Maßeinheit']); + unset($objectData['Breite'], $objectData['Maßeinheit']); + } + + if (isset($objectData['Höhe'])) { + $object->set_string("mass2_hoehe_wert", $objectData['Höhe']); + $object->set_length_unit("mass2_hoehe_einheit", $objectData['Maßeinheit2']); + unset($objectData['Höhe'], $objectData['Maßeinheit2']); + } + + if (isset($objectData['Standort'])) { + $object->set_string("standort_eigentlich", $objectData['Standort']); + unset($objectData['Standort']); + } + + unset($objectData['Objektart']); + $object->appendSeriesByName("Schultafeln", "Schultafeln", $seriesWriter); + + */ + + if (!empty($objectData)) { + throw new MDParserIncomplete(var_export($objectData, true)); + } + + $newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler); + + // Sleep for a millisecond + usleep(IMPORTER_DELAY_PER_OBJECT); + + } + + fclose($handle); + + } + $tagNameExistsStmt->close(); + unset($tagNameExistsStmt); + +} diff --git a/parsers/csv_filemaker_muehlenhaupt.php b/parsers/csv_filemaker_muehlenhaupt.php new file mode 100644 index 0000000..2a40be0 --- /dev/null +++ b/parsers/csv_filemaker_muehlenhaupt.php @@ -0,0 +1,262 @@ + + * @link https://imports.museum-digital.org/parsers/google-arts-and-culture.php + */ +declare(strict_types = 1); + +const CSV_SEPARATOR = ','; +const MUEHLENHAUPT_START_AT_FILE_NO = 0; +const MUEHLENHAUPT_DELAY = 200; + +/** + * Parse function. + * + * @param array $version Instance to import into. + * @param integer $institution_id Institution to import to. + * @param non-empty-string $XMLFolder Folder of the XML files to import. + * @param string $dataFolder Data folder. + * @param integer $sammlung_id Collection ID. Optional. + * @param boolean $visibility Import objects to be directly visible?. + * @param boolean $insertOnly If set to true, only new objects are added, + * old are not updated. + * + * @return void + */ +function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { + + if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); + + if (empty($dataFolder)) { + $importImages = false; + } + else $importImages = true; + + $ignore = $visibility; + $ignore = $sammlung_id; + + // Set up writers + + $collectionWriter = new MDCollectionWriter($version['mainDB']); + $literatureWriter = new MDLiteratureWriter($version['mainDB']); + $linkWriter = new MDLinkWriter($version['mainDB']); + $seriesWriter = new MDSeriesWriter($version['mainDB']); + $exhibitionWriter = new MDExhibitionWriter($version['mainDB']); + $objectRecordWriter = new MDObjectRecordWriter($version['mainDB']); + $tagWriter = new MDTagWriter($version['nodaDB']); + + $outputHandler = new MDOutputHandler; + $outputHandler->setVerbosity(2); + + $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); + + /* + $objectWriter->disableImportImagesResources = true; + $objectWriter->disableUpdateBaseData = true; + $objectWriter->disableImportAdditionalData = true; + $objectWriter->disableImportCollections = true; + $objectWriter->disableImportEvents = true; + $objectWriter->disableImportTags = true; + $objectWriter->disableImportLiterature = true; + $objectWriter->disableImportHyperlinks = true; + // $objectWriter->disableImportSeries = true; + $objectWriter->disableImportObjectRecords = true; + $objectWriter->disableImportTranscriptions = true; + $objectWriter->disableImportMarkings = true; + $objectWriter->disableImportExhibitions = true; + $objectWriter->disableImportReception = true; + */ + $objectWriter->importObjectTypeAsTag = true; + + $importedInvNos = []; + + // Set up prepared statement for checking if a tag of a given name exists + + foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) { + + if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "csv") continue; + + $outputHandler->toLog("Attempting to load CSV file {$xmlFile}", 2); + + if (!($handle = fopen(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}", "r"))) { + throw new Exception("File {$xmlFile} cannot be opened"); + } + + if (!($fileHeaders = fgetcsv($handle, 5000000, CSV_SEPARATOR))) { + throw new Exception("Failed to read headers for file $xmlFile"); + } + + $i = 0; + + while ($data = fgetcsv($handle, 5000000, CSV_SEPARATOR)) { + + ++$i; + if ($i < MUEHLENHAUPT_START_AT_FILE_NO) { + continue; + } + + # if ($i > 20) break; + + $outputHandler->toLog("Starting to process line #{$i}", 2); + + // Create associative array for easier parsing + $objectData = []; + foreach ($data as $key => $value) { + $objectData[$fileHeaders[$key]] = trim($value); + } + + $inventory_number = $objectData['O_0002_Objektnummer02'] ?: "Import_obj_" . $i; + + while (in_array($inventory_number, $importedInvNos, true)) { + $inventory_number .= '_'; + } + $importedInvNos[] = $inventory_number; + + $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $inventory_number, $objectData['O_0202_Gegenstand'], $objectData['O_01122_Titel_Jahr'] ?: $inventory_number, $objectData['O_02011_Beschreibung_1'], $outputHandler); + + unset($objectData['O_0002_Objektnummer02'], $objectData['O_02011_Beschreibung_1'], + $objectData['O_0202_Gegenstand'], $objectData['O_01122_Titel_Jahr']); + + if (!empty($objectData["O_0600_Besitz"])) { + $object->set_string("nutzungsrechte", $objectData["O_0600_Besitz"]); + } + unset($objectData["O_0600_Besitz"]); + + if (!empty($objectData["P_0206_Technik"])) { + $object->set_string("technik2", $objectData["P_0206_Technik"]); + $object->set_objekt_material_technik($objectData["P_0206_Technik"]); + } + unset($objectData["P_0206_Technik"]); + + if (!empty($objectData["Versicherungswert"])) { + $object->set_string("wert2_zahl", $objectData["Versicherungswert"]); + } + unset($objectData["Versicherungswert"]); + + if (!empty($objectData["Objekt_Standort"])) { + $object->set_string("standort_aktuell", $objectData["Objekt_Standort"]); + } + unset($objectData["Objekt_Standort"]); + + $object->set_string("standort_eigentlich", $objectData["O_0301_Archiv_Ort"] . ' / ' . $objectData["O_0302_Archiv_Haus"] . ' / ' . $objectData["O_0303_Archiv_Ebene"] . ' / ' . $objectData["O_01037_Code_Lager_Regal_2F"]); + unset($objectData["O_0301_Archiv_Ort"], $objectData["O_0302_Archiv_Haus"], $objectData["O_0303_Archiv_Ebene"], $objectData["O_01037_Code_Lager_Regal_2F"]); + + if (!empty($objectData['P_0207_Zyklus'])) { + $object->set_string("teilvon", $objectData['P_0207_Zyklus']); + $object->appendSeriesByName('Zyklus: ' . $objectData['P_0207_Zyklus'], "", $seriesWriter); + } + unset($objectData['P_0207_Zyklus']); + + if (!empty($objectData['P_0204_Gruppe'])) { + $groups = explode(', ', $objectData['P_0204_Gruppe']); + foreach ($groups as $group) { + $object->appendTagByName($group, "", $tagWriter); + } + } + unset($objectData['P_0204_Gruppe']); + + if (!empty($objectData['O_0205_Stichworte'])) { + $object->append_objekt_beschreibung(PHP_EOL . PHP_EOL . "Stichworte: " . $objectData['O_0205_Stichworte']); + # $tagNames = explode(', ', $objectData['O_0205_Stichworte']); + # foreach ($tagNames as $tagName) { + # $object->appendTagByName($tagName, "", $tagWriter); + # } + } + unset($objectData['O_0205_Stichworte']); + + $sizes = explode('x', $objectData['O_0113_Größe_BxH_neu']); + + if (!empty($sizes[0])) { + $object->set_string("mass2_breite_wert", $sizes[0]); + } + if (!empty($sizes[1])) { + $object->set_string("mass2_hoehe_wert", $sizes[1]); + } + $object->set_length_unit("mass2_breite_einheit", "cm"); + $object->set_length_unit("mass2_hoehe_einheit", "cm"); + + $object->set_objekt_masse(implode(' x ', $sizes) . ' cm'); + unset($objectData['O_0113_Größe_BxH_neu']); + + if (!empty($objectData["P_0111_Jahr"])) { + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 1, $outputHandler); + $event->set_zeiten_id($objectData["P_0111_Jahr"]); + if ($event->get_zeiten_id() !== 0) { + $object->appendEvent($event); + } + } + unset($objectData["P_0111_Jahr"]); + + // Other notes fields + + $notesFields = [ + 'Anmerkung_Übernahme', + 'Anmerkung_Übernahme_ohne', + 'Lager_Label_Tabelle', + ]; + foreach ($notesFields as $fieldName) { + if (!empty($objectData[$fieldName])) { + $object->append_string("notizen_text1", PHP_EOL . $fieldName . ': ' . $objectData[$fieldName]); + } + unset($objectData[$fieldName]); + } + + // Image + if (!empty($objectData["O_0101_Archivnummer1"]) && $objectWriter->disableImportImagesResources === true) { + + $imgFolder = MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/"; + + $imgFilename = ""; + + if (file_exists($imgFolder . $objectData["O_0101_Archivnummer1"])) { + $imgFilename = $imgFolder . $objectData["O_0101_Archivnummer1"]; + } + else if (file_exists($imgFolder . str_replace("-FM", "", $objectData["O_0101_Archivnummer1"]) . '.JPG')) { + $imgFilename = $imgFolder . str_replace("-FM", "", $objectData["O_0101_Archivnummer1"]) . '.JPG'; + } + else if (file_exists($imgFolder . str_replace("-FM", "", $objectData["O_0101_Archivnummer1"]) . '.jpg')) { + $imgFilename = $imgFolder . str_replace("-FM", "", $objectData["O_0101_Archivnummer1"]) . '.jpg'; + } + else if (file_exists($imgFolder . $objectData["O_0101_Archivnummer1"] . '.JPG')) { + $imgFilename = $imgFolder . $objectData["O_0101_Archivnummer1"] . '.JPG'; + } + else if (file_exists($imgFolder . $objectData["O_0101_Archivnummer1"] . '.jpg')) { + $imgFilename = $imgFolder . $objectData["O_0101_Archivnummer1"] . '.jpg'; + } + + if ($imgFilename !== '') { + + $image = new MDImage($version['mainDB'], (string)$object->get_string("objekt_name"), $imgFilename); + $image->set_image_master_filename($objectData["O_0101_Archivnummer1"]); + $object->appendImage($image); + + } + else { + $object->append_string("notizen_text2", $objectData["O_0101_Archivnummer1"]); + } + + } + else $object->append_string("notizen_text2", $objectData["O_0101_Archivnummer1"]); + unset($objectData["O_0101_Archivnummer1"]); + + // Remove unwanted entries + unset($objectData['P_Summe']); + + if (!empty($objectData)) { + throw new MDParserIncomplete(var_export($objectData, true)); + } + + $newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler); + + // Sleep for a millisecond + usleep(IMPORTER_DELAY_PER_OBJECT); + + } + + fclose($handle); + + } + +} diff --git a/parsers/csv_spsg.php b/parsers/csv_spsg.php new file mode 100644 index 0000000..1b21dae --- /dev/null +++ b/parsers/csv_spsg.php @@ -0,0 +1,409 @@ + + */ +declare(strict_types = 1); + +/** + * Returns true if the input is "y", false otherwise. + * + * @param string $input Input string. + * + * @return boolean + */ +function translateYNToBool(string $input):bool { + + if ($input === "y") return true; + else return false; + +} + +/** + * Function for parsing eventtypes from the CSV. + * + * @param string $funktion Eventtype information. + * + * @return integer + */ +function getCSVEventtype(string $funktion):int { + switch ($funktion) { + case "Manufaktur": + case "Hersteller": + case "Radierer": + case "Sticker": + case "Uhrmacher": + case "Bildhauer": + case "Ebenist": + case "Bronzegießer": + case "Mosaikkünstler": + case "ohne": + $tEventType = 1; + break; + case "Verleger": + $tEventType = 3; + break; + case "Maler der Vorlage": + $tEventType = 4; + break; + case "Dargestellt/ -er": + $tEventType = 5; + break; + case "Eigentümer": + $tEventType = 6; + break; + case "Maler": + case "Maler\oder": + $tEventType = 9; + break; + case "Kopist": + $tEventType = 12; + break; + case "Besteller, Auftraggeber": + $tEventType = 25; + break; + case "Modelleur": + $tEventType = 31; + break; + case "Entwerfer": + $tEventType = 35; + break; + default: + $tEventType = 1; + break; + } + return $tEventType; + +} + +/** + * Function for parsing events from the CSV. + * + * @param array $objectData Object information. + * + * @return array> + */ +function parseCSVEvents(array $objectData):array { + + $event = $ereignis = []; + + for ($i = 1; $i < 6; $i++) { + + $tEvent = []; + if (!empty($objectData["Personen_" . $i]) and $objectData["Personen_" . $i] !== 'ERSATZ') $tEvent['actor'] = $objectData["Personen_" . $i]; + else $tEvent['actor'] = ""; + + if (!empty($objectData["Person-Funktion_" . $i]) and $objectData["Person-Funktion_" . $i] !== 'ERSATZ') $tEvent['ereignistyp'] = getCSVEventtype($objectData["Person-Funktion_" . $i]); + else $tEvent['ereignistyp'] = 1; + + if (!empty($objectData["Person-Bemerkung_" . $i])) { + if ($objectData["Person-Bemerkung_" . $i] === '?' or $objectData["Person-Bemerkung_" . $i] === '(?)') $tEvent['actor_sure'] = 'n'; + else $tEvent['actor_sure'] = 'y'; + } + else $tEvent['actor_sure'] = 'y'; + + if (!empty($objectData["Entstehungsort_" . $i]) and $objectData["Entstehungsort_" . $i] !== 'ERSATZ') $tEvent['place'] = $objectData["Entstehungsort_" . $i]; + else $tEvent['place'] = ""; + + if (!empty($objectData["Entstehungszeit_" . $i]) and $objectData["Entstehungszeit_" . $i] !== 'ERSATZ') { + $zei = explode('-', str_replace(' - ', '-', $objectData['Entstehungszeit_' . $i])); + if (count($zei) === 2 and $zei[0] == $zei[1]) { + $objectData['Entstehungszeit_' . $i] = $zei[0]; + unset($zei); + } + $tEvent['time'] = trim($objectData["Entstehungszeit_" . $i]); + } + else $tEvent["time"] = ""; + + $cur = []; + + if (!empty($tEvent['actor']) || !empty($tEvent['place']) || !empty($tEvent['time'])) { + $cur['ereignisart'] = $tEvent['ereignistyp']; + $cur['akteur_name'] = $tEvent['actor']; + $cur['persinst_sicher'] = $tEvent['actor_sure']; + $cur['ort'] = $tEvent['place']; + $cur['ort_sicher'] = 'y'; + $cur['zeit_name'] = $tEvent['time']; + $cur['zeit_sicher'] = 'y'; + $cur['ereignis_anmerkung'] = ""; + + $ereignis[] = $cur; + } + } + + return $ereignis; + +} + +/** + * Parse function. + * + * @param array $version Instance to import into. + * @param integer $institution_id Institution to import to. + * @param non-empty-string $XMLFolder Folder of the XML files to import. + * @param string $dataFolder Data folder. + * @param integer $sammlung_id Collection ID. Optional. + * @param boolean $visibility Import objects to be directly visible?. + * @param boolean $insertOnly If set to true, only new objects are added, + * old are not updated. + * + * @return void + */ +function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { + + if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); + + if (empty($dataFolder)) { + $importImages = false; + } + else $importImages = true; + + // Set up writers + + $collectionWriter = new MDCollectionWriter($version['mainDB']); + $literatureWriter = new MDLiteratureWriter($version['mainDB']); + $linkWriter = new MDLinkWriter($version['mainDB']); + $seriesWriter = new MDSeriesWriter($version['mainDB']); + $exhibitionWriter = new MDExhibitionWriter($version['mainDB']); + $objectRecordWriter = new MDObjectRecordWriter($version['mainDB']); + $tagWriter = new MDTagWriter($version['nodaDB']); + + $outputHandler = new MDOutputHandler; + $outputHandler->setVerbosity(2); + + $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); + + $i = 0; + $startAtCounter = 0; + + foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) { + + if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "xml") continue; + + ++$i; + if ($i < $startAtCounter) { + continue; + } + $outputHandler->toLog("Starting to process file #{$i}", 2); + + $outputHandler->toLog("Attempting to load XML file {$xmlFile}", 2); + + $rawData = MD_STD::file_get_contents(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}"); + $rawData = MD_STD::preg_replace_str('/\x{FEFF}/u', '', $rawData); + + if (!($xmlData = simplexml_load_string($rawData, "SimpleXMLElement", LIBXML_NOCDATA))) { + throw new Exception("Cannot load raw data into SimpleXML (file: {$xmlFile})"); + } + + if (!($json_encoded = MD_STD::json_encode_object($xmlData)) || !($objectData = json_decode($json_encoded, true))) { + throw new Exception("Could not load file {$xmlFile}"); + } + + $outputHandler->toLog("Successfully loaded XML file {$xmlFile} (Object: {$objectData['inventory_number']})", 2); + + $availableKeys = array_keys($objectData); + + // Object base data + if (isset($objectData['Autor']) and !is_array($objectData['Autor']) and isset($objectData['Red_freigegeb_Text'])) { + $objectData['Red_freigegeb_Text'] = $objectData['Red_freigegeb_Text'] . ' [' . $objectData['Autor'] . ']'; + } + + if (!isset($objectData['Objektbezeichnung'])) $objectData['Objektbezeichnung'] = "Eintrag folgt"; + + if (!isset($objectData["Material-Technik"])) $objectData["Material-Technik"] = ""; + + $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $objectData['Inventarnummer'], $objectData['Objektbezeichnung'], $objectData['Titel'], $objectData['Red_freigegeb_Text'], $outputHandler); + $object->set_objekt_material_technik($objectData["Material-Technik"]); + if (!empty($objectData["Masse"])) $object->set_objekt_masse($objectData["Masse"]); + unset($objectData['Red_freigegeb_Text'], $objectData["Material-Technik"], $objectData['Objektbezeichnung'], $objectData['Inventarnummer']); + /* + echo '
';
+        print_r($objectData);
+        echo '
'; + /* + // Aufenthalt + if (!empty($objectData['abode_regular'])) $object->set_string("standort_eigentlich", $objectData['abode_regular']); + + // Rechte + if (!empty($objectData['copyright'])) $object->set_string("urheberrechte", $objectData['copyright']); + if (!empty($objectData['rights_annotation'])) $object->set_string("rechte_anmerkungen", $objectData['rights_annotation']); + + // Notizen + if (!empty($objectData['remarks_short'])) $object->set_string("notizen_text2", $objectData['remarks_short']); + + // Collections + if (!empty($objectData['collection_name1'])) { + + $searchTarget = "collection_name"; + $availableEntities = []; + foreach ($availableKeys as $key) { + if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) { + $availableEntities[] = substr((string)$key, strlen($searchTarget)); + } + } + } + */ + if ($sammlung_id !== 0) { + $object->appendCollectionByID($sammlung_id); + } + else { + $object->appendCollectionByName($objectData["Sammlungsbereich"]); + } + + /* + // + / Zusatz + // + if (!empty($objectData['detailed_description'])) { + $object->set_string("detaileddescription2", $objectData['detailed_description']); + if (!empty($objectData['detailed_description_md']) and $objectData['detailed_description_md'] === "y") { + $object->set_bool("detaileddescription2show_md", true); + } + else $object->set_bool("detaileddescription2show_md", false); + if (!empty($objectData['detailed_description_extern']) and $objectData['detailed_description_extern'] === "y") { + $object->set_bool("detaileddescription2show_extern", true); + } + else $object->set_bool("detaileddescription2show_extern", false); + } + + // + / Events + // + // Related place + if (!empty($objectData['related_place1'])) { + + $searchTarget = "related_place"; + $availableEntities = []; + foreach ($availableKeys as $key) { + if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) { + if (is_numeric(substr((string)$key, strlen($searchTarget)))) + $availableEntities[] = substr((string)$key, strlen($searchTarget)); + } + } + + foreach ($availableEntities as $suffix) { + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 22, $outputHandler); + if (!empty($objectData[$searchTarget . $suffix])) $event->set_orte_id($objectData[$searchTarget . $suffix]); + if (!empty($objectData[$searchTarget . "_sure" . $suffix])) $event->set_ereignis_orte_sicher(translateYNToBool($objectData[$searchTarget . "_sure" . $suffix])); + $object->appendEvent($event); + } + + } + + // Related actor + if (!empty($objectData['related_actor1'])) { + + $searchTarget = "related_actor"; + $availableEntities = []; + foreach ($availableKeys as $key) { + if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) { + if (is_numeric(substr((string)$key, strlen($searchTarget)))) + $availableEntities[] = substr((string)$key, strlen($searchTarget)); + } + } + + foreach ($availableEntities as $suffix) { + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 23, $outputHandler); + if (!empty($objectData[$searchTarget . $suffix])) $event->set_persinst_id($objectData[$searchTarget . $suffix]); + if (!empty($objectData[$searchTarget . "_sure" . $suffix])) $event->set_ereignis_persinst_sicher(translateYNToBool($objectData[$searchTarget . "_sure" . $suffix])); + $object->appendEvent($event); + } + + } + */ + + $ereignis = parseCSVEvents($objectData); + + foreach ($ereignis as $tEvent) { + + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], intval($tEvent["ereignisart"]), $outputHandler); + if (!empty($tEvent["ort"])) $event->set_orte_id((string)$tEvent["ort"]); + if (!empty($tEvent["ort_sicher"])) $event->set_ereignis_orte_sicher(translateYNToBool((string)$tEvent["ort_sicher"])); + if (!empty($tEvent["akteur_name"])) $event->set_persinst_id((string)$tEvent["akteur_name"]); + if (!empty($tEvent["persinst_sicher"])) $event->set_ereignis_persinst_sicher(translateYNToBool((string)$tEvent["persinst_sicher"])); + if (!empty($tEvent["zeit_name"])) $event->set_zeiten_id((string)$tEvent["zeit_name"]); + if (!empty($tEvent["zeit_sicher"])) $event->set_ereignis_zeit_sicher(translateYNToBool((string)$tEvent["zeit_sicher"])); + if (!empty($tEvent["ereignis_anmerkung"])) $event->set_ereignis_anmerkung((string)"ereignis_anmerkung"); + $object->appendEvent($event); + + } + + // + // Tags + // + if (!empty($objectData['Schlagwort_1'])) { + + $searchTarget = "Schlagwort_"; + $availableEntities = []; + foreach ($availableKeys as $key) { + if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) { + $availableEntities[] = substr((string)$key, strlen($searchTarget)); + } + } + foreach ($availableEntities as $suffix) { + if ($objectData[$searchTarget . $suffix] === "ERSATZ") continue; + if (is_array($objectData[$searchTarget . $suffix])) continue; + $object->appendTagByName($objectData[$searchTarget . $suffix], "", $tagWriter); + } + + } + + // + // Literatur + // + if (!empty($objectData['Literatur_1'])) { + + $searchTarget = "Literatur_"; + $availableEntities = []; + foreach ($availableKeys as $key) { + if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) { + $availableEntities[] = substr((string)$key, strlen($searchTarget)); + } + } + foreach ($availableEntities as $suffix) { + if ($objectData[$searchTarget . $suffix] === "ERSATZ") continue; + if (is_array($objectData[$searchTarget . $suffix])) continue; + if (substr((string)$objectData[$searchTarget . $suffix], -1, 1) === '.') $objectData[$searchTarget . $suffix] = substr((string)$objectData[$searchTarget . $suffix], 0, -1); + $object->appendLiteratureByName(substr($objectData[$searchTarget . $suffix], 0, 200), "", "", "", "", $literatureWriter); + } + } + + // + // Images + // + if ($importImages === true) { + $image = new MDImage($version['mainDB'], $objectData["Bild_Titel"], MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $objectData["Bild_Datei"] . ".jpg"); + $image->set_image_name($objectData["Bild_Titel"]); + $image->set_image_beschreibung("Aufnahme " . $objectData["Aufnahmedatum"]); + $image->set_image_owner($objectData["Nutzungsrechte"]); + $image->set_image_creator($objectData["Fotograf"]); + if (isset($objectData["image_rights"])) $image->set_image_rights($objectData["image_rights"]); + if (isset($objectData["image_visible"])) $image->set_visible(translateYNToBool($objectData["image_visible"])); + //if (isset($objectData["image_main" . $suffix])) + $image->set_main_image(true); + + $object->appendImage($image); + + unset($objectData["Bild_Titel"], $objectData["Aufnahmedatum"], $objectData["Nutzungsrechte"], + $objectData["Fotograf"], $objectData["image_rights"], $objectData["image_visible"]); + } + + // + // Write it! + // + $object->set_objekt_publik($visibility); + $newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler); + + /* + if (!empty($objectData)) { + throw new MDParserIncomplete(var_export($objectData, true)); + } + */ + + // Sleep for a millisecond + usleep(IMPORTER_DELAY_PER_OBJECT); + + } + +} diff --git a/parsers/faust_haendelhaus.php b/parsers/faust_haendelhaus.php new file mode 100644 index 0000000..e988e42 --- /dev/null +++ b/parsers/faust_haendelhaus.php @@ -0,0 +1,332 @@ + + * @author Joshua Ramon Enslin + */ +declare(strict_types = 1); + +/** + * Parse function. + * + * @param array $version Instance to import into. + * @param integer $institution_id Institution to import to. + * @param non-empty-string $XMLFolder Folder of the XML files to import. + * @param string $dataFolder Data folder. + * @param integer $sammlung_id Collection ID. Optional. + * @param boolean $visibility Import objects to be directly visible?. + * @param boolean $insertOnly If set to true, only new objects are added, + * old are not updated. + * + * @return void + */ +function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { + + if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); + + if (empty($dataFolder)) { + $importImages = false; + } + else $importImages = true; + + // Set up writers + + $collectionWriter = new MDCollectionWriter($version['mainDB']); + $literatureWriter = new MDLiteratureWriter($version['mainDB']); + $linkWriter = new MDLinkWriter($version['mainDB']); + $seriesWriter = new MDSeriesWriter($version['mainDB']); + $exhibitionWriter = new MDExhibitionWriter($version['mainDB']); + $objectRecordWriter = new MDObjectRecordWriter($version['mainDB']); + $tagWriter = new MDTagWriter($version['nodaDB']); + $resourceWriter = new MDResourceWriter($version['mainDB']); + + $outputHandler = new MDOutputHandler; + $outputHandler->setVerbosity(2); + + $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); + + $startCounter = 0; + $iCounter = 0; + + foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) { + + if ($iCounter < $startCounter) { + ++$iCounter; + continue; + } + + if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "json") continue; + + $outputHandler->toLog("Attempting to load JSON file {$xmlFile}", 2); + + $rawData = MD_STD::file_get_contents(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}"); + $rawData = str_replace('\u0096', '-', $rawData); + $rawData = str_replace('\u0092', "'", $rawData); + $rawData = str_replace('\u0091', "'", $rawData); + $rawData = str_replace('\u0094', '"', $rawData); + $rawData = str_replace('\u0093', '"', $rawData); + $rawData = str_replace('\u0084', '"', $rawData); + $objectData = json_decode($rawData, true); + //print_r($rawData); + //print_r($objectData); + + if (empty($objectData['Signatur'])) $objectData['Signatur'] = '[' . $objectData["IMDAS_ID"] . ']'; + + $outputHandler->toLog("Successfully loaded XML file {$xmlFile} (Object: {$objectData['Signatur']})", 2); + + // Handle contents + + // Object base data + + $objektart = ""; + if (isset($objectData['Objektart']) and $objectData['Objektart'] != 'ERSATZ') $objektart = trim(MD_STD::preg_replace_str('/<[^>]*>/i', '', $objectData['Objektart'])); + else $objektart = 'KEINE ANGABE IM IMPORT'; + unset($objectData['Objektart']); + + $obj_nam = $objectData['Objekttitel']; + + if (empty($obj_nam)) $obj_nam = $objektart; + + $objektbeschreibung = $objectData['Beschreibung']; + if (!empty($objectData['K_x129x_nstlersignatur_x032x__x040x_nach_x032x_Vorlage_x041x_'])) $objektbeschreibung = $objektbeschreibung . "\n\nSignatur: " . $objectData['K_x129x_nstlersignatur_x032x__x040x_nach_x032x_Vorlage_x041x_']; + if (!empty($objectData['Aufschrift_x032x__x040x_nach_x032x_Vorlage_x041x_'])) $objektbeschreibung = $objektbeschreibung . "\n\nBeschriftung: " . $objectData['Aufschrift_x032x__x040x_nach_x032x_Vorlage_x041x_']; + if (!empty($objectData['Wasserzeichen'])) $objektbeschreibung = $objektbeschreibung . "\n\nWasserzeichen: " . $objectData['Wasserzeichen']; + if (!empty($objectData['Quelle_x032x__x040x_Graphik_x032x_entnommen_x032x_aus_x041x_'])) $objektbeschreibung = $objektbeschreibung . "\n\nQuelle: " . $objectData['Quelle_x032x__x040x_Graphik_x032x_entnommen_x032x_aus_x041x_']; + + $objektbeschreibung = str_replace("\[W9]\\", "\n", (string)$objektbeschreibung); + if (!$objektbeschreibung) $objektbeschreibung = 'Ein beschreibender Text war im Import nicht enthalten'; + + //echo PHP_EOL;print_r($objectData['Signatur']);echo PHP_EOL; + //echo PHP_EOL;print_r($objektart);echo PHP_EOL; + //echo PHP_EOL;print_r($obj_nam);echo PHP_EOL; + + unset($objectData['Beschreibung'], $objectData['K_x129x_nstlersignatur_x032x__x040x_nach_x032x_Vorlage_x041x_'], $objectData['Aufschrift_x032x__x040x_nach_x032x_Vorlage_x041x_'], $objectData['Provenienz_x047x_Herkunft'], $objectData['Wasserzeichen'], $objectData['Quelle_x032x__x040x_Graphik_x032x_entnommen_x032x_aus_x041x_']); + + $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $objectData['Signatur'], $objektart, $obj_nam, $objektbeschreibung, $outputHandler); + unset($objectData['Signatur']); + + if (!empty($objectData['Zustandsbeschreibung']) and $objectData['Zustandsbeschreibung'] != "ERSATZ") { + if (mb_strlen($objectData['Zustandsbeschreibung']) < 200) $object->set_string("zustand", $objectData['Zustandsbeschreibung']); + else $object->set_string("restaurierung", $objectData['Zustandsbeschreibung']); + } + if (!empty($objectData['Provenienz_x047x_Herkunft']) and $objectData['Provenienz_x047x_Herkunft'] != "ERSATZ") $object->set_string("objektgeschichte", $objectData['Provenienz_x047x_Herkunft']); + if (!empty($objectData['Literatur_x032x__x040x_Freitext_x041x_']) and $objectData['Literatur_x032x__x040x_Freitext_x041x_'] != "ERSATZ") { + if (is_array($objectData['Literatur_x032x__x040x_Freitext_x041x_'])) $object->set_string("notizen_text1", implode('PHP_EOL', $objectData['Literatur_x032x__x040x_Freitext_x041x_'])); + else $object->set_string("notizen_text1", $objectData['Literatur_x032x__x040x_Freitext_x041x_']); + } + + if (!empty($objectData['Literatur_x032x__x040x_Freitext_x041x_'])) { + if (is_array($objectData['Literatur_x032x__x040x_Freitext_x041x_'])) { + foreach ($objectData['Literatur_x032x__x040x_Freitext_x041x_'] as $litEntryName) { + if (substr($litEntryName, 0, 10) === 'Sasse 1962') $object->appendLiteratureByID(273); + else if (substr($litEntryName, 0, 10) === 'Sasse 1964') $object->appendLiteratureByID(436); + } + } + else { + if (substr($objectData['Literatur_x032x__x040x_Freitext_x041x_'], 0, 10) === 'Sasse 1962') $object->appendLiteratureByID(273); + else if (substr($objectData['Literatur_x032x__x040x_Freitext_x041x_'], 0, 10) === 'Sasse 1964') $object->appendLiteratureByID(436); + } + } + + unset($objectData['Zustandsbeschreibung'], $objectData['Provenienz_x047x_Herkunft'], $objectData['Literatur_x032x__x040x_Freitext_x041x_']); + + /* + * Logic: Get collection name from either COLL_OBJ -> Bereich or COLL_OBJ -> Sammlung + * Attention: COLL_OBJ -> Sammlung might be an array + */ + if ($sammlung_id !== 0) { + $object->appendCollectionByID($sammlung_id); + } + else { + if (!empty($objectData['Sammlung_x032x__x040x_geh_x148x_rt_x032x_zu_x041x_']) and !is_array($objectData['Sammlung_x032x__x040x_geh_x148x_rt_x032x_zu_x041x_']) and $objectData['Sammlung_x032x__x040x_geh_x148x_rt_x032x_zu_x041x_'] != 'ERSATZ') { + $object->appendCollectionByName($objectData['Sammlung_x032x__x040x_geh_x148x_rt_x032x_zu_x041x_'], "", $collectionWriter); + } + else if (!empty($objectData['Sammlung_x032x__x040x_geh_x148x_rt_x032x_zu_x041x_']) and is_array($objectData['Sammlung_x032x__x040x_geh_x148x_rt_x032x_zu_x041x_'])) { + foreach ($objectData['Sammlung_x032x__x040x_geh_x148x_rt_x032x_zu_x041x_'] as $value) { + $object->appendCollectionByName($value, "", $collectionWriter); + } + } + }; + unset($objectData['Sammlung_x032x__x040x_geh_x148x_rt_x032x_zu_x041x_']); + + /**/ + if (!empty($objectData["Material_x047x_Technik"]) and $objectData["Material_x047x_Technik"] != "ERSATZ") $object->set_objekt_material_technik($objectData["Material_x047x_Technik"]); + if (!empty($objectData["Umfang_x047x_Format"]) and $objectData["Umfang_x047x_Format"] != "ERSATZ") $object->set_objekt_masse($objectData["Umfang_x047x_Format"]); + if (!empty($objectData['aktueller_x032x_Standort']) and $objectData['aktueller_x032x_Standort'] != "ERSATZ") { + if (is_array($objectData['aktueller_x032x_Standort'])) { + $object->set_string("standort_aktuell", implode("; ", $objectData['aktueller_x032x_Standort'])); + } + else $object->set_string("standort_aktuell", $objectData['aktueller_x032x_Standort']); + } + unset($objectData['aktueller_x032x_Standort']); + + /* + * Events + */ + + $eventsConcordance = [ + 'verlegt' => 3, + 'Vorlage erstellt' => 4, + 'wurde abgebildet' => 5, + 'gemalt' => 9, + 'Druckplatte hergestellt' => 12, + 'gezeichnet' => 19, + 'gedruckt' => 26, + 'modelliert' => 31, + ]; + + if (!empty($objectData['Ereignis-Block']) and empty($objectData['Ereignis-Block'][0])) $objectData['Ereignis-Block'] = [$objectData['Ereignis-Block']]; + + /* + * Duplicate events in case of multiple actors or places + */ + + if (!isset($objectData['Ereignis-Block'])) $objectData['Ereignis-Block'] = []; + + // Duplicate event in case of multiple actors + foreach ($objectData['Ereignis-Block'] as $ix => $tEvent) { + + if (empty($tEvent['Ereignis-Typ'])) { + unset($objectData['Ereignis-Block'][$ix]); + continue; + } + + if (!empty($tEvent['wer_x063x__x032x__x040x_Ereignis_x041x_']) + and is_array($tEvent['wer_x063x__x032x__x040x_Ereignis_x041x_']) + ) { + $copyEvent = $tEvent; + $copyEvent['wer_x063x__x032x__x040x_Ereignis_x041x_'] = $copyEvent['wer_x063x__x032x__x040x_Ereignis_x041x_'][1]; + $objectData['Ereignis-Block'][$ix]['wer_x063x__x032x__x040x_Ereignis_x041x_'] = $objectData['Ereignis-Block'][$ix]['wer_x063x__x032x__x040x_Ereignis_x041x_'][0]; + $copyEvent['GND_x032x_Person_x032x__x040x_Ereignis_x041x_'] = $copyEvent['GND_x032x_Person_x032x__x040x_Ereignis_x041x_'][1]; + $objectData['Ereignis-Block'][$ix]['GND_x032x_Person_x032x__x040x_Ereignis_x041x_'] = $objectData['Ereignis-Block'][$ix]['GND_x032x_Person_x032x__x040x_Ereignis_x041x_'][0]; + $objectData['Ereignis-Block'][] = $copyEvent; + } + } + + // Duplicate event in case of multiple places + foreach ($objectData['Ereignis-Block'] as $ix => $tEvent) { + if (!empty($tEvent['wo_x063x__x032x__x040x_Ereignis_x041x_']) + and is_array($tEvent['wo_x063x__x032x__x040x_Ereignis_x041x_']) + ) { + $copyEvent = $tEvent; + $copyEvent['wo_x063x__x032x__x040x_Ereignis_x041x_'] = $copyEvent['wo_x063x__x032x__x040x_Ereignis_x041x_'][1]; + $objectData['Ereignis-Block'][$ix]['wo_x063x__x032x__x040x_Ereignis_x041x_'] = $objectData['Ereignis-Block'][$ix]['wo_x063x__x032x__x040x_Ereignis_x041x_'][0]; + $objectData['Ereignis-Block'][] = $copyEvent; + } + } + + foreach ($objectData['Ereignis-Block'] as $ix => $tEvent) { + + if (!isset($eventsConcordance[$tEvent['Ereignis-Typ']])) { + throw new Exception("Unknown event type: '{$tEvent['Ereignis-Typ']}'"); + } + + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $eventsConcordance[$tEvent['Ereignis-Typ']], $outputHandler); + + if (!empty($objectData['Ereignis-Block'][$ix]['GND_x032x_Person_x032x__x040x_Ereignis_x041x_'])) { + $event->set_persinst_id($objectData['Ereignis-Block'][$ix]['wer_x063x__x032x__x040x_Ereignis_x041x_'], "", "", $objectData['Ereignis-Block'][$ix]['GND_x032x_Person_x032x__x040x_Ereignis_x041x_']); + } + else if (!empty($objectData['Ereignis-Block'][$ix]['wer_x063x__x032x__x040x_Ereignis_x041x_'])) $event->set_persinst_id($objectData['Ereignis-Block'][$ix]['wer_x063x__x032x__x040x_Ereignis_x041x_']); + if (!empty($objectData['Ereignis-Block'][$ix]['wann_x063x__x032x__x040x_Ereignis_x041x_'])) $event->set_zeiten_id($objectData['Ereignis-Block'][$ix]['wann_x063x__x032x__x040x_Ereignis_x041x_']); + + if (!empty($objectData['Ereignis-Block'][$ix]['wo_x063x__x032x__x040x_Ereignis_x041x_'])) $event->set_orte_id($objectData['Ereignis-Block'][$ix]['wo_x063x__x032x__x040x_Ereignis_x041x_']); + $object->appendEvent($event); + + unset($objectData['Ereignis-Block'][$ix]['Ereignis-Typ'], $objectData['Ereignis-Block'][$ix]['wer_x063x__x032x__x040x_Ereignis_x041x_'], $objectData['Ereignis-Block'][$ix]['GND_x032x_Person_x032x__x040x_Ereignis_x041x_'], $objectData['Ereignis-Block'][$ix]['wann_x063x__x032x__x040x_Ereignis_x041x_'], $objectData['Ereignis-Block'][$ix]['wo_x063x__x032x__x040x_Ereignis_x041x_']); + if (empty($objectData['Ereignis-Block'][$ix])) unset ($objectData['Ereignis-Block'][$ix]); + + } + if (empty($objectData['Ereignis-Block'])) unset ($objectData['Ereignis-Block']); + + /* + * Tags + */ + if (!empty($objectData['Sachschlagwort'])) { + foreach ($objectData['Sachschlagwort'] as $tTag) { + $object->appendTagByName($tTag, "", $tagWriter); + } + unset($objectData['Sachschlagwort']); + } + + if ($importImages === true) { + + // Images, Vorderseite + $objectData["Dateiname_x032x_Vorderseite"] = str_replace("/", "-", $objectData["Dateiname_x032x_Vorderseite"]) . ".jpg"; + if (!file_exists(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $objectData["Dateiname_x032x_Vorderseite"])) { + + /* + $imageNameFound = false; + if ($imageNameFound === false) { + } + */ + unset($objectData["Dateiname_x032x_Vorderseite"]); + //continue; + } + else { + $image = new MDImage($version['mainDB'], (string)$object->get_string("objekt_name"), MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $objectData["Dateiname_x032x_Vorderseite"]); + $image->set_image_owner("Stiftung Händelhaus, Halle"); + $image->set_visible(true); + $image->set_main_image(true); + $image->set_image_rights("CC BY-NC-SA"); + $object->appendImage($image); + } + + unset($objectData["Dateiname_x032x_Vorderseite"], $image); + + // Images, Rückseite + $objectData["Dateiname_x032x_R_x129x_ckseite"] = str_replace("/", "-", $objectData["Dateiname_x032x_R_x129x_ckseite"]) . ".jpg"; + if (!file_exists(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $objectData["Dateiname_x032x_R_x129x_ckseite"])) { + /* + $imageNameFound = false; + + if ($imageNameFound === false) { + unset($objectData["Dateiname_x032x_R_x129x_ckseite"]); + //continue; + } + */ + } else { + $image = new MDImage($version['mainDB'], (string)$object->get_string("objekt_name"), MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $objectData["Dateiname_x032x_R_x129x_ckseite"]); + $image->set_image_owner("Stiftung Händelhaus, Halle"); + $image->set_visible(true); + $image->set_main_image(false); + $image->set_image_rights("CC BY-NC-SA"); + $object->appendImage($image); + } + unset($objectData["Dateiname_x032x_R_x129x_ckseite"], $image); + + } + + unset($objectData["Inventar-Nummer"], $objectData["Objekttitel"], + $objectData['Maler_x047x_Zeichner'], + $objectData['GND_x032x_Maler_x047x_Zeichner'], + $objectData["Ersteller_x032x_der_x032x_Druckplatte"], + $objectData["GND_x032x_Ersteller_x032x_der_x032x_Druckplatte"], + $objectData["Bildhauer_x047x_Graveur"], + $objectData["GND_x032x_Bildhauer_x047x_Graveur"], + $objectData["Ersteller_x032x_der_x032x_Vorlage"], + $objectData["GND_x032x_Ersteller_x032x_der_x032x_Vorlage"], $objectData["Dargestellte_x032x_Person"], $objectData["GND_x032x_dargestellte_x032x_Person"], $objectData["Datierung"], $objectData["Datierung_x032x_textuell"], $objectData["Material_x047x_Technik"], $objectData["Umfang_x047x_Format"], $objectData["Fotos"], $objectData["Fotosammlung_x032x_Positiv"], $objectData["Fotosammlung_x032x_Negativ"], $objectData["Erfassung"], $objectData["Korrektur"], $objectData["Verleger"], $objectData["GND_x032x_Verleger"], $objectData["Ort"], $objectData["Bemerkungen"], $objectData["Restaurierung"], $objectData["Objektsch_x132x_tzung"], $objectData["Fotosammlung_x032x_Dia"]); + + foreach ($objectData as $key => $value) { + if ($value === "ERSATZ") unset($objectData[$key]); + } + /* + * Write it! + */ + + $object->set_objekt_publik($visibility); + $newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler); + + if (!empty($objectData)) { + throw new MDParserIncomplete(var_export($objectData, true)); + } + + $outputHandler->toLog("Done with object $iCounter", 2); + ++$iCounter; + + // Sleep for a millisecond + usleep(IMPORTER_DELAY_PER_OBJECT); + } + +} diff --git a/parsers/google-arts-and-culture.php b/parsers/google-arts-and-culture.php new file mode 100644 index 0000000..a2909ad --- /dev/null +++ b/parsers/google-arts-and-culture.php @@ -0,0 +1,286 @@ + + * @link https://imports.museum-digital.org/parsers/google-arts-and-culture.php + */ +declare(strict_types = 1); + +/** + * Parse function. + * + * @param array $version Instance to import into. + * @param integer $institution_id Institution to import to. + * @param non-empty-string $XMLFolder Folder of the XML files to import. + * @param string $dataFolder Data folder. + * @param integer $sammlung_id Collection ID. Optional. + * @param boolean $visibility Import objects to be directly visible?. + * @param boolean $insertOnly If set to true, only new objects are added, + * old are not updated. + * + * @return void + */ +function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { + + if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); + + if (empty($dataFolder)) { + $importImages = false; + } + else $importImages = true; + + // Set up writers + + $collectionWriter = new MDCollectionWriter($version['mainDB']); + $literatureWriter = new MDLiteratureWriter($version['mainDB']); + $linkWriter = new MDLinkWriter($version['mainDB']); + $seriesWriter = new MDSeriesWriter($version['mainDB']); + $exhibitionWriter = new MDExhibitionWriter($version['mainDB']); + $objectRecordWriter = new MDObjectRecordWriter($version['mainDB']); + $tagWriter = new MDTagWriter($version['nodaDB']); + + $outputHandler = new MDOutputHandler; + $outputHandler->setVerbosity(2); + + $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); + + // Set up prepared statement for checking if a tag of a given name exists + $tagNameExistsStmt = $version['nodaDB']->do_prepare("SELECT `tag_id` + FROM `tag` + WHERE `tag_name` = ? + LIMIT 1"); + + // Set default language + $defaultLang = "de"; + + foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) { + + if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "csv") continue; + + $outputHandler->toLog("Attempting to load CSV file {$xmlFile}", 2); + + if (!($handle = fopen(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}", "r"))) { + throw new Exception("File {$xmlFile} cannot be opened"); + } + + if (!($fileHeaders = fgetcsv($handle, 5000000, ";"))) { + throw new Exception("Failed to read headers for file $xmlFile"); + } + + while ($data = fgetcsv($handle, 5000000, ";")) { + + // Create associative array for easier parsing + $objectData = []; + foreach ($data as $key => $value) { + $objectData[$fileHeaders[$key]] = $value; + } + + // Store type for later use (when parsing creation) + if (!empty($objectData["type/en"])) { + $type = $objectData["type/en"]; + } + else { // if (!empty($objectData["type/$defaultLang"])) + $type = $objectData["type/$defaultLang"]; + } + if (!empty($objectData["type/$defaultLang"])) { + $typeDefLang = $objectData["type/$defaultLang"]; + } + + // Object base data + $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $objectData['itemid'], $objectData["type/$defaultLang"], $objectData["title/$defaultLang"], $objectData["description/$defaultLang"], $outputHandler); + + unset($objectData['itemid'], $objectData["type/$defaultLang"], $objectData['title/' . $defaultLang], $objectData["description/$defaultLang"]); + + if (!empty($objectData["format"])) $object->set_objekt_masse($objectData["format"]); + if (!empty($objectData["medium/$defaultLang"])) $object->set_objekt_material_technik($objectData["medium/$defaultLang"]); + unset($objectData["format"], $objectData["medium/$defaultLang"]); + + if (!empty($objectData['rights'])) $object->set_string("metadata_rights_holder", $objectData['rights']); + unset($objectData['rights']); + + // Handle collections + + if ($sammlung_id !== 0) { + $object->appendCollectionByID($sammlung_id); + } + else if (!empty($objectData['customtext:collection'])) { + $object->appendCollectionByName($objectData['customtext:collection'], "", $collectionWriter); + } + unset($objectData['customtext:collection']); + + // Handle translations of base data + $availableKeys = array_keys($objectData); + + $searchTarget = "title/"; + $availableEntities = []; + foreach ($availableKeys as $key) { + if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) { + $availableEntities[] = substr((string)$key, strlen($searchTarget)); + } + } + + foreach ($availableEntities as $suffix) { + + $objectRecord = new MDObjectRecord($version['mainDB'], 0, "$suffix", $objectData["type/$suffix"], $objectData["title/$suffix"], $objectData["description/$suffix"]); + $objectRecord->set_record_objekt_material_technik($objectData["medium/$suffix"]); + $objectRecord->set_record_objekt_masse((string)$object->get_string("objekt_masse")); + $object->appendObjectRecord($objectRecord); + + unset($objectData["type/$suffix"], + $objectData["title/$suffix"], + $objectData["description/$suffix"], + $objectData["medium/$suffix"]); + + } + + if ($importImages === false) { + unset($objectData['customtext: photographer'], + $objectData['filespec'], + $objectData['filetype']); + } + + /** + * Parse creation + */ + $creationEventType = 0; + + switch($type) { + + case "print": + $creationEventType = 26; + break; + case "drawing": + $creationEventType = 19; + break; + case "pastel": + case "painting": + $creationEventType = 9; + break; + case "sculpture": + $creationEventType = 31; + break; + case "medal": + $creationEventType = 1; + break; + + } + + if ($creationEventType === 0) { + throw new MDParserIncomplete("Unknown creator type for object type: $type"); + } + + $searchTarget = "creator#"; + $availableEntities = []; + foreach ($availableKeys as $key) { + if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) { + $availableEntities[] = substr((string)$key, strlen($searchTarget)); + } + } + + $availableEntitiesReduced = []; + foreach ($availableEntities as $entity) { + $availableEntitiesReduced[] = substr((string)$entity, 0, strpos($entity, "/") ?: strlen($entity)); + } + $availableEntitiesReduced = array_unique($availableEntitiesReduced); + + $eventsCreated = 0; + foreach ($availableEntitiesReduced as $suffix) { + + if (!empty($objectData["creator#" . $suffix . "/$defaultLang"])) { + $creator = $objectData["creator#" . $suffix . "/$defaultLang"]; + } + else if (!empty($objectData["creator#" . $suffix . "/en"])) { + $creator = $objectData["creator#" . $suffix . "/en"]; + } + else continue; + + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $creationEventType, $outputHandler); + $event->set_persinst_id($creator); + + if (!empty($objectData["dateCreated:display/$defaultLang"])) { + $event->set_zeiten_id($objectData["dateCreated:display/$defaultLang"], (int)$objectData['dateCreated:start'], (int)$objectData['dateCreated:end']); + } + else if (!empty($objectData["dateCreated:display/en"])) { + $event->set_zeiten_id($objectData["dateCreated:display/en"], (int)$objectData['dateCreated:start'], (int)$objectData['dateCreated:end']); + } + + $object->appendEvent($event); + + ++$eventsCreated; + + } + + if ($eventsCreated === 0) { + + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $creationEventType, $outputHandler); + if (!empty($objectData["dateCreated:display/$defaultLang"])) { + $event->set_zeiten_id($objectData["dateCreated:display/$defaultLang"], (int)$objectData['dateCreated:start'], (int)$objectData['dateCreated:end']); + } + else if (!empty($objectData["dateCreated:display/en"])) { + $event->set_zeiten_id($objectData["dateCreated:display/en"], (int)$objectData['dateCreated:start'], (int)$objectData['dateCreated:end']); + } + $object->appendEvent($event); + + } + + // Remove events + + $searchTarget = "creator#"; + foreach ($availableKeys as $key) { + if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) { + unset($objectData[$key]); + } + } + + $searchTarget = "dateCreated:display/"; + foreach ($availableKeys as $key) { + if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) { + unset($objectData[$key]); + } + } + unset($objectData['dateCreated:start'], $objectData['dateCreated:end']); + + // Attempt to set tags based on object type + if (!empty($typeDefLang)) { + $tagNameExistsStmt->bind_param("s", $typeDefLang); + $tagNameExistsStmt->execute(); + $tagNameExistsResult = $tagNameExistsStmt->do_get_result(); + if ($tagNameExistsResult->num_rows === 1) { + $object->appendTagByID($tagNameExistsResult->fetch_row()[0]); + } + $tagNameExistsResult->close(); + unset($tagNameExistsResult); + } + + // Handle images + + if ($importImages === false) { + unset($objectData['customtext: photographer'], + $objectData['filespec'], + $objectData['filetype']); + } + + /* + * Write it! + */ + $object->set_objekt_publik($visibility); + $newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler); + + if (!empty($objectData)) { + throw new MDParserIncomplete(var_export($objectData, true)); + } + + // Sleep for a millisecond + usleep(IMPORTER_DELAY_PER_OBJECT); + + } + + fclose($handle); + + } + $tagNameExistsStmt->close(); + unset($tagNameExistsStmt); + +} diff --git a/parsers/gos.php b/parsers/gos.php new file mode 100644 index 0000000..eedd2d1 --- /dev/null +++ b/parsers/gos.php @@ -0,0 +1,94 @@ + + */ +declare(strict_types = 1); + +/** + * Parse function. + * + * @param array $version Instance to import into. + * @param integer $institution_id Institution to import to. + * @param non-empty-string $XMLFolder Folder of the XML files to import. + * @param string $dataFolder Data folder. + * @param integer $sammlung_id Collection ID. Optional. + * @param boolean $visibility Import objects to be directly visible?. + * @param boolean $insertOnly If set to true, only new objects are added, + * old are not updated. + * + * @return void + */ +function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { + + if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); + + if (empty($dataFolder)) { + $importImages = false; + } + else $importImages = true; + + $collectionWriter = new MDCollectionWriter($version['mainDB']); + $seriesWriter = new MDSeriesWriter($version['mainDB']); + $literatureWriter = new MDLiteratureWriter($version['mainDB']); + $tagWriter = new MDTagWriter($version['nodaDB']); + + // Set up writers + $outputHandler = new MDOutputHandler; + $outputHandler->setVerbosity(2); + + $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); + + $startFile = 0; + $counter = 0; + foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) { + + ++$counter; + if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "xml") { + continue; + } + if ($counter < $startFile) { + continue; + } + + // LIDO contains : in tag names. SimpleXML can't handle those. + + $rawData = MD_STD::file_get_contents(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}"); + $rawData = strtr(str_replace(":", "___", str_replace("lido:", "", $rawData)), [ + "http___//" => "http://", "xmlns___" => "xmlns:", "xsi___" => "xsi:" + ]); + + if (($xmlData = simplexml_load_string($rawData)) === false) { + throw new Exception("XML couldn't be loaded"); + } + + //echo "HI"; + $lidoObject = new GosParser($version['mainDB'], $version['nodaDB'], + $version['filepath'], + $version['language'], $institution_id, + $xmlData, $dataFolder, $outputHandler, + $collectionWriter, + $seriesWriter, + $literatureWriter, + $tagWriter, + $sammlung_id, $visibility, + ); + $insertOnly = false; + + $newObjectID = $objectWriter->writeObject($lidoObject->get_mdobject(), true, $insertOnly, $outputHandler); + + // Sleep for a millisecond + $outputHandler->toLog("Done with object $counter", 2); + usleep(IMPORTER_DELAY_PER_OBJECT); + + /* + if ($counter === 100) { + break; + } + */ + + } + +} + diff --git a/parsers/imdas_pro_sql.php b/parsers/imdas_pro_sql.php new file mode 100644 index 0000000..b706eca --- /dev/null +++ b/parsers/imdas_pro_sql.php @@ -0,0 +1,597 @@ + + * @link https://imports.museum-digital.org/parsers/google-arts-and-culture.php + */ +declare(strict_types = 1); + +const DB_NAME_IMDAS = 'imdas_dreieich'; + +/** + * Loads all data from statement. + * + * @param MDMysqliStmt $stmt Statement. + * @param integer $object_id Object ID. + * + * @return array + */ +function getAllFromStmt(MDMysqliStmt $stmt, int $object_id):array { + + $stmt->bind_param("i", $object_id); + $stmt->execute(); + $result = $stmt->do_get_result(); + $output = $result->fetch_all(MYSQLI_ASSOC); + $result->close(); + + return $output; + +} + +/** + * Parse function. + * + * @param array $version Instance to import into. + * @param integer $institution_id Institution to import to. + * @param non-empty-string $XMLFolder Folder of the XML files to import. + * @param string $dataFolder Data folder. + * @param integer $sammlung_id Collection ID. Optional. + * @param boolean $visibility Import objects to be directly visible?. + * @param boolean $insertOnly If set to true, only new objects are added, + * old are not updated. + * + * @return void + */ +function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { + + if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); + + if (empty($dataFolder)) { + $importImages = false; + } + else $importImages = true; + + $ignore = $visibility; + $ignore = $sammlung_id; + + // Set up writers + + $collectionWriter = new MDCollectionWriter($version['mainDB']); + # $literatureWriter = new MDLiteratureWriter($version['mainDB']); + # $linkWriter = new MDLinkWriter($version['mainDB']); + # $seriesWriter = new MDSeriesWriter($version['mainDB']); + $tagWriter = new MDTagWriter($version['nodaDB']); + + $outputHandler = new MDOutputHandler; + $outputHandler->setVerbosity(2); + + $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); + + /* + $objectWriter->disableImportAdditionalData = true; + $objectWriter->disableUpdateBaseData = true; + $objectWriter->disableImportCollections = true; + $objectWriter->disableImportEvents = true; + $objectWriter->disableImportTags = true; + $objectWriter->disableImportLiterature = true; + $objectWriter->disableImportHyperlinks = true; + // $objectWriter->disableImportSeries = true; + $objectWriter->disableImportImagesResources = true; + $objectWriter->disableImportObjectRecords = true; + $objectWriter->disableImportTranscriptions = true; + $objectWriter->disableImportMarkings = true; + $objectWriter->disableImportExhibitions = true; + $objectWriter->disableImportReception = true; + $objectWriter->importObjectTypeAsTag = true; + */ + + $mysqli = $version['mainDB']; + + // Set up prepared statement for checking if a tag of a given name exists + $tagNameExistsStmt = $version['nodaDB']->do_prepare("SELECT `tag_id` + FROM `tag` + WHERE `tag_name` = ? + LIMIT 1"); + + $objResult = $mysqli->do_read_query("SELECT *, + `language`.`code` AS `lang_code`, + `res_type`.`name` AS `res_type_name` + FROM `" . DB_NAME_IMDAS . "`.`object` + LEFT JOIN `" . DB_NAME_IMDAS . "`.`real_obj` + ON `object`.`object_id` = `real_obj`.`object_id` + LEFT JOIN `" . DB_NAME_IMDAS . "`.`res_type` + ON `object`.`res_type_id` = `res_type`.`res_type_id` + LEFT JOIN `" . DB_NAME_IMDAS . "`.`coin_main` + ON `object`.`object_id` = `coin_main`.`object_id` + LEFT JOIN `" . DB_NAME_IMDAS . "`.`media_obj` + ON `object`.`object_id` = `media_obj`.`object_id` + LEFT JOIN `" . DB_NAME_IMDAS . "`.`language` + ON `object`.`language_id` = `language`.`language_id` + LEFT JOIN `" . DB_NAME_IMDAS . "`.`coll_obj` + ON `object`.`object_id` = `coll_obj`.`object_id` + "); + + $colorStmt = $mysqli->do_prepare("SELECT `color_name`, `obj_part`.`name` AS `part_name` + FROM `" . DB_NAME_IMDAS . "`.`color_type`, `" . DB_NAME_IMDAS . "`.`obj_color`, + `" . DB_NAME_IMDAS . "`.`obj_part` + WHERE `object_id` = ? + AND `obj_color`.`color_type_id` = `color_type`.`color_type_id` + AND `obj_color`.`part_id` = `obj_part`.`part_id`"); + + $collectionStmt = $mysqli->do_prepare("SELECT * + FROM `" . DB_NAME_IMDAS . "`.`collection` + WHERE `collection_id` = ?"); + + $specialsStmt = $mysqli->do_prepare("SELECT * + FROM `" . DB_NAME_IMDAS . "`.`obj_specials` + WHERE `object_id` = ?"); + + $tagsStmt = $mysqli->do_prepare("SELECT * + FROM `" . DB_NAME_IMDAS . "`.`term_obj`, `" . DB_NAME_IMDAS . "`.`thes_rel` + WHERE `object_id` = ? + AND `thes_rel`.`thes_rel_id` = `term_obj`.`thes_rel_id`"); + + /* + $vtrStmt = $mysqli->do_prepare("SELECT * + FROM `" . DB_NAME_IMDAS . "`.`vtr`, `" . DB_NAME_IMDAS . "`.`vtr_term` + WHERE `object_id` = ? + AND `vtr_term`.`term_id` = `vtr`.`term_id`"); + */ + + $measurementsStmt = $mysqli->do_prepare("SELECT `obj_value`, `unit`.`name` AS `unit_name`, `value_type`.`name` + FROM `" . DB_NAME_IMDAS . "`.`obj_value`, `" . DB_NAME_IMDAS . "`.`value_type`, `" . DB_NAME_IMDAS . "`.`unit` + WHERE `object_id` = ? + AND `obj_value`.`value_type_id` = `value_type`.`value_type_id` + AND `obj_value`.`unit_id` = `unit`.`unit_id`"); + + $objNumStmt = $mysqli->do_prepare("SELECT `note`, `obj_num`.`num`, `obj_num_type`.`name` AS `type_name` + FROM `" . DB_NAME_IMDAS . "`.`obj_num`, `" . DB_NAME_IMDAS . "`.`obj_num_type` + WHERE `object_id` = ? + AND `obj_num`.`num_type_id` = `obj_num_type`.`num_type_id`"); + + $entryStmt = $mysqli->do_prepare("SELECT * + FROM `" . DB_NAME_IMDAS . "`.`regist_hist`, `" . DB_NAME_IMDAS . "`.`regist_type` + WHERE `object_id` = ? + AND `regist_hist`.`regist_type_id` = `regist_type`.`regist_type_id`"); + + $inscriptionStmt = $mysqli->do_prepare("SELECT `inscription`, `obj_part`.`name` AS `part_name` + FROM `" . DB_NAME_IMDAS . "`.`obj_inscr`, `" . DB_NAME_IMDAS . "`.`obj_part` + WHERE `object_id` = ? + AND `obj_inscr`.`part_id` = `obj_part`.`part_id`"); + + $locationStmt = $mysqli->do_prepare("SELECT * + FROM `" . DB_NAME_IMDAS . "`.`obj_loc_new`, `" . DB_NAME_IMDAS . "`.`location` + WHERE `object_id` = ? + AND `obj_loc_new`.`location_id` = `location`.`location_id`"); + + $personStmt = $mysqli->do_prepare("SELECT `role`.`name` AS `role_name`, + `person`.*, `obj_pers_role`.`note` + FROM `" . DB_NAME_IMDAS . "`.`obj_pers_role`, `" . DB_NAME_IMDAS . "`.`role`, `" . DB_NAME_IMDAS . "`.`person` + WHERE `object_id` = ? + AND `obj_pers_role`.`person_id` = `person`.`person_id` + AND `obj_pers_role`.`role_id` = `role`.`role_id`"); + + $placeStmt = $mysqli->do_prepare("SELECT `place_type`.`name` AS `type_name`, `obj_place`.`note`, `thes_rel`.`name` AS `place_name` + FROM `" . DB_NAME_IMDAS . "`.`obj_place`, `" . DB_NAME_IMDAS . "`.`place_type`, `" . DB_NAME_IMDAS . "`.`thes_rel` + WHERE `object_id` = ? + AND `obj_place`.`place_type_id` = `place_type`.`place_type_id` + AND `obj_place`.`thes_rel_id` = `thes_rel`.`thes_rel_id`"); + + echo "HI"; + + while ($objectData = $objResult->fetch_assoc()) { + + $outputHandler->toLog("Reading object " . $objectData['object_id'], 2); + if (empty($objectData['object_id'])) continue; + + $object_id = $objectData['object_id']; + $objectData['color'] = getAllFromStmt($colorStmt, $object_id); + $objectData['collection'] = getAllFromStmt($collectionStmt, $object_id); + $objectData['specials'] = getAllFromStmt($specialsStmt, $object_id); + $objectData['tags'] = getAllFromStmt($tagsStmt, $object_id); + // $objectData['vtr'] = getAllFromStmt($vtrStmt, $object_id); + $objectData['measurements'] = getAllFromStmt($measurementsStmt, $object_id); + $objectData['objNum'] = getAllFromStmt($objNumStmt, $object_id); + $objectData['entryHist'] = getAllFromStmt($entryStmt, $object_id); + $objectData['inscription'] = getAllFromStmt($inscriptionStmt, $object_id); + $objectData['location'] = getAllFromStmt($locationStmt, $object_id); + $objectData['person'] = getAllFromStmt($personStmt, $object_id); + $objectData['place'] = getAllFromStmt($placeStmt, $object_id); + + // Remove unwanted data + + foreach ($objectData as $key => $value) { + if (empty($value)) { + unset($objectData[$key]); + continue; + } + + if (is_array($value)) { + foreach ($value as $innerKey => $innerValue) { + foreach ($innerValue as $innerInnerKey => $innerInnerValue) { + if ($innerInnerValue === null || $innerInnerValue === '') { + unset($objectData[$key][$innerKey][$innerInnerKey]); + } + } + } + if (empty($objectData[$key])) unset($objectData[$key]); + } + + } + + // Remove unwanted values + + unset($objectData['regist_num_serial'], + $objectData['import_id'], + $objectData['updatevtr'], + $objectData['invent_jn'], + $objectData['publikum'], + $objectData['updateobjsort'], + $objectData['default_coll'], + $objectData['media_type_id'], + $objectData['invalid'], + $objectData['ser_num'], + $objectData['language_id'], + $objectData['sort_regist_num'], + $objectData['obj_status_id'], + $objectData['elimination'], + $objectData['name'], + $objectData['code'], + $objectData['res_type_id'], + $objectData['regist_type_id'], + $objectData['photographer_id'], + $objectData['owner_original_id'], + $objectData['collection_id'], + $objectData['sort_local_num'], + $objectData['regist_str'], + ); + + // Parse + + if (empty($objectData)) continue; + + if (!empty($objectData['regist_num'])) { + $invNo = $objectData['regist_num']; + } + else { + $invNo = $objectData['object_id']; + } + + $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $invNo, $objectData["res_type_name"], $objectData["title"] ?? $invNo, $objectData["description"] ?? $objectData['short_descr'] ?? '-', $outputHandler); + + unset($objectData['regist_num'], $objectData['object_id'], $objectData['title'], + $objectData['res_type_name'], $objectData['description']); + + if (!empty($objectData['regist_date'])) { + $object->set_string("ersterfasst_zeitpunkt", (string)$objectData['regist_date']); + } + + if (!empty($objectData['obj_history'])) { + $object->set_string("objektgeschichte", (string)$objectData['obj_history']); + } + unset($objectData['obj_history']); + + if (!empty($objectData['current_loc'])) { + $object->set_string("standort_aktuell", (string)$objectData['current_loc']); + } + unset($objectData['current_loc']); + + if (!empty($objectData['condition'])) { + $object->set_string("restaurierung", (string)$objectData['condition']); + } + unset($objectData['condition']); + + if (!empty($objectData['num_pieces'])) { + $object->set_string("stueckzahl", (string)$objectData['num_pieces']); + } + unset($objectData['num_pieces']); + + if (!empty($objectData['inscription'])) { + foreach ($objectData['inscription'] as $inscription) { + $object->append_string("beschriftung2", PHP_EOL . ($inscription['part_name'] ?? '') . ': ' . $inscription['inscription'] . PHP_EOL); + } + } + unset($objectData['inscription']); + + if (!empty($objectData['lang_code'])) { + if (isset(MDLanguagesSet::AVAILABLE_LANGUAGES[$objectData['lang_code']])) { + $object->set_string("content_language", $objectData['lang_code']); + } + } + unset($objectData['lang_code']); + + // Images in the museum + + $imgFields = [ + 'studio', + 'rights', + 'statu_o_cons', + ]; + foreach ($imgFields as $fieldName) { + if (!empty($objectData[$fieldName])) { + $object->append_string("bilder_lokal", PHP_EOL . $fieldName . ' (imdas): ' . $objectData[$fieldName]); + } + unset($objectData[$fieldName]); + } + + // Notes + + $notesFields = [ + 'regist_num_year', + 'lastdate', + 'inv_date', + 'guid', + 'newdate', + 'rec_date', + 'altbestand', + 'regist_date', + 'negative_number', + 'positive_number', + 'local_num_text', + 'local_number', + 'remarks', + 'reservation', + 'short_descr', + 'local_sub_number', + ]; + foreach ($notesFields as $fieldName) { + if (!empty($objectData[$fieldName])) { + $object->append_string("notizen_text1", PHP_EOL . $fieldName . ' (imdas): ' . $objectData[$fieldName]); + } + unset($objectData[$fieldName]); + } + + if (!empty($objectData['color'])) { + foreach ($objectData['color'] as $tEntry) { + $object->append_string("notizen_text1", PHP_EOL . implode(': ', $tEntry) . PHP_EOL); + } + } + unset($objectData['color']); + + if (!empty($objectData['collection'])) { + foreach ($objectData['collection'] as $tCollection) { + $object->appendCollectionByName($tCollection['name'], "ID: " . $tCollection['collection_id'] . PHP_EOL . 'Registriert: ' . $tCollection['lastdate'], $collectionWriter); + } + } + unset($objectData['collection']); + + if (!empty($objectData['tags'])) { + foreach ($objectData['tags'] as $tEntry) { + if (empty($tEntry['name'])) continue; + $object->appendTagByName($tEntry['name'], "", $tagWriter); + } + } + unset($objectData['tags']); + + if (!empty($objectData['place'])) { + foreach ($objectData['place'] as $tEntry) { + + if (empty($tEntry['place_name'])) continue; + + if (empty(MDConcPlace::PLACE_ROLES_TO_EVENT_TYPE[$tEntry['type_name']])) { + throw new Exception("Unknown place type: " . $tEntry['type_name']); + } + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], MDConcPlace::PLACE_ROLES_TO_EVENT_TYPE[$tEntry['type_name']], $outputHandler); + $event->set_orte_id($tEntry['place_name']); + if (!empty($tEntry['note'])) { + $event->set_ereignis_anmerkung($tEntry['note']); + } + if ($event->get_orte_id() !== 0) $object->appendEvent($event); + + } + } + unset($objectData['place']); + + if (!empty($objectData['location'])) { + foreach ($objectData['location'] as $tEntry) { + $object->append_string("standort_eigentlich", ' - ' . $tEntry['name'] . ' (' . $tEntry['abbreviation'] . '): ' . ($tEntry['description'] ?? '')); + } + } + unset($objectData['location']); + + if (!empty($objectData['objNum'])) { + foreach ($objectData['objNum'] as $tEntry) { + $object->append_string("bilder_lokal", PHP_EOL . $tEntry['type_name'] . ': ' . $tEntry['num']); + } + } + unset($objectData['objNum']); + + if (!empty($objectData['entryHist'])) { + foreach ($objectData['entryHist'] as $tEntry) { + if (!empty($tEntry['name'])) $object->set_entry_type("zugang_art", $tEntry['name']); + if (!empty($tEntry['regist_hist'])) $object->set_string("zeitpunkt_zugang", $tEntry['regist_hist']); + } + } + unset($objectData['entryHist']); + + if (!empty($objectData['specials'])) { + foreach ($objectData['specials'] as $tKey => $tEntry) { + unset($tEntry['object_id'], $tEntry['lastdate'], $tEntry['lastuser']); + foreach ($tEntry as $innerKey => $innerValue) { + if (empty($innerValue) || $innerValue == '0.0000') { + unset($tEntry[$innerKey]); + } + } + + if (!empty($tEntry['dating_abs_hist'])) { + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 1, $outputHandler); + $event->set_zeiten_id($tEntry['dating_abs_hist']); + if ($event->get_zeiten_id() !== 0) $object->appendEvent($event); + } + unset($tEntry['dating_abs_hist']); + + if (empty($tEntry)) { + unset($objectData['specials'][$tKey]); + } + } + if (empty($objectData['specials'])) { + unset($objectData['specials']); + } + } + + if (!empty($objectData['person'])) { + foreach ($objectData['person'] as $tKey => $tEntry) { + + if ($tEntry['role_name'] === 'Voreigentümer') { + $object->set_string("vorbesitzer", $tEntry['letter_addr'] ?? "" . ' ' . $tEntry['first_name'] ?? "" . ' ' . $tEntry['surename'] ?? "" . ' '); + unset($objectData['person'][$tKey]); + continue; + } + else if ($tEntry['role_name'] === 'Übernehmer') { + $object->append_string("notizen_text1", PHP_EOL . "Überbringer" . $tEntry['letter_addr'] ?? "" . ' ' . $tEntry['first_name'] ?? "" . ' ' . $tEntry['surename'] ?? "" . ' ' . PHP_EOL); + unset($objectData['person'][$tKey]); + continue; + } + else if ($tEntry['role_name'] === 'Überbringer') { + $object->append_string("notizen_text1", PHP_EOL . "Überbringer" . $tEntry['letter_addr'] ?? "" . ' ' . $tEntry['first_name'] ?? "" . ' ' . $tEntry['surename'] ?? "" . ' ' . PHP_EOL); + unset($objectData['person'][$tKey]); + continue; + } + else if ($tEntry['role_name'] === 'Hersteller') { + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 1, $outputHandler); + $event->set_persinst_id($tEntry['letter_addr'] ?? "" . ' ' . $tEntry['first_name'] ?? "" . ' ' . $tEntry['surename']); + if (!empty($tEntry['note'])) { + $event->set_ereignis_anmerkung($tEntry['note']); + } + if ($event->get_persinst_id() !== 0) $object->appendEvent($event); + unset($objectData['person'][$tKey]); + continue; + } + + } + if (empty($objectData['person'])) unset($objectData['person']); + } + + if (!empty($objectData['measurements'])) { + foreach ($objectData['measurements'] as $tKey => $tEntry) { + + if (in_array($tEntry['name'], ['Schätzpreis 2005', 'Schätzpreis 2001', 'Schätzpreis 2004', 'Schätzpreis 2006', 'Schätzpreis 2003', 'Schätzpreis 2002', 'Schätzpreis 2007', 'Schätzpreis 1999', 'Schätzpreis 1998', 'Schätzpreis 1997', 'Bewertung', 'Schätzpreis 2008', 'Schätzpreis 2009'], true)) { + $object->set_currency("wert_art", $tEntry['unit_name']); + $object->set_string("wert_zahl", (string)$tEntry['obj_value']); + + $object->append_string("notizen_text1", PHP_EOL . $tEntry['name'] . ': ' . $tEntry['obj_value'] . ' ' . $tEntry['unit_name']); + + unset($objectData['measurements'][$tKey]); + continue; + } + if (in_array($tEntry['name'], ['Kaufpreis'], true)) { + $object->set_currency("geld_art", $tEntry['unit_name']); + $object->set_string("ankaufsumme", (string)$tEntry['obj_value']); + + $object->append_string("notizen_text1", PHP_EOL . $tEntry['name'] . ': ' . $tEntry['obj_value'] . ' ' . $tEntry['unit_name']); + + unset($objectData['measurements'][$tKey]); + continue; + } + else if (in_array($tEntry['name'], ['Höhe', 'Größe'], true)) { + try { + $object->set_length_unit("mass2_hoehe_einheit", $tEntry['unit_name']); + } + catch (MDInvalidLengthUnit $e) { + } + $object->set_string("mass2_hoehe_wert", (string)$tEntry['obj_value']); + unset($objectData['measurements'][$tKey]); + continue; + } + else if (in_array($tEntry['name'], ['Tiefe', 'Länge'], true)) { + try { + $object->set_length_unit("mass2_laenge_einheit", $tEntry['unit_name']); + } + catch (MDInvalidLengthUnit $e) { + } + $object->set_string("mass2_laenge_wert", (string)$tEntry['obj_value']); + unset($objectData['measurements'][$tKey]); + continue; + } + else if (in_array($tEntry['name'], ['Breite', 'Dicke'], true)) { + try { + $object->set_length_unit("mass2_breite_einheit", $tEntry['unit_name']); + } + catch (MDInvalidLengthUnit $e) { + } + $object->set_string("mass2_breite_wert", (string)$tEntry['obj_value']); + unset($objectData['measurements'][$tKey]); + continue; + } + else if (in_array($tEntry['name'], ['Gewicht'], true)) { + try { + $object->set_weight_unit("mass2_gewicht_einheit", $tEntry['unit_name']); + } + catch (MDInvalidWeightUnit $e) { + } + $object->set_string("mass2_gewicht_wert", (string)$tEntry['obj_value']); + unset($objectData['measurements'][$tKey]); + continue; + } + else if (in_array($tEntry['name'], ['Durchmesser', 'größter Durchmesser', 'Außenmaß (Durchmesser)'], true)) { + try { + $object->set_length_unit("mass2_durchmesser_einheit", $tEntry['unit_name']); + } + catch (MDInvalidLengthUnit $e) { + } + $object->set_string("mass2_durchmesser_wert", (string)$tEntry['obj_value']); + unset($objectData['measurements'][$tKey]); + continue; + } + else if (in_array($tEntry['name'], ['innen', 'Umfang', 'Hängehöhe', 'Ringmaß'], true)) { + + $object->append_string("notizen_text1", PHP_EOL . $tEntry['name'] . ': ' . $tEntry['unit_name'] . ', ' . $tEntry['obj_value']); + unset($objectData['measurements'][$tKey]); + continue; + } + + } + if (empty($objectData['measurements'])) unset($objectData['measurements']); + } + + /* + if (isset($objectData['Standort'])) { + $object->set_string("standort_eigentlich", $objectData['Standort'] . ' / ' . $objectData['Zusatz']); + unset($objectData['Standort'], $objectData['Zusatz']); + } + + // Object base data + $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $objectData['Inventarnummer'], $objectData["Objektbezeichnung"], $objectData["Titel"] ?: $objectData['Inventarnummer'], $objectData["Bemerkung"], $outputHandler); + + unset($objectData['Inventarnummer'], $objectData["Objektbezeichnung"], $objectData["Titel"], $objectData["Bemerkung"]); + + $object->set_objekt_publik($visibility); + + if (isset($objectData['Breite'])) { + $object->set_string("mass2_breite_wert", $objectData['Breite']); + $object->set_length_unit("mass2_breite_einheit", $objectData['Maßeinheit']); + unset($objectData['Breite'], $objectData['Maßeinheit']); + } + + if (isset($objectData['Höhe'])) { + $object->set_string("mass2_hoehe_wert", $objectData['Höhe']); + $object->set_length_unit("mass2_hoehe_einheit", $objectData['Maßeinheit2']); + unset($objectData['Höhe'], $objectData['Maßeinheit2']); + } + + if (isset($objectData['Standort'])) { + $object->set_string("standort_eigentlich", $objectData['Standort']); + unset($objectData['Standort']); + } + + unset($objectData['Objektart']); + $object->appendSeriesByName("Schultafeln", "Schultafeln", $seriesWriter); + + */ + + if (!empty($objectData)) { + throw new MDParserIncomplete(var_export($objectData, true)); + } + + $newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler); + $this->outputHandler->toLog("Imported to object #" . $newObjectID, MDConsoleStatus::UPDATE); + + // Sleep for a millisecond + usleep(IMPORTER_DELAY_PER_OBJECT); + + } + $tagNameExistsStmt->close(); + unset($tagNameExistsStmt); + +} diff --git a/parsers/lido-mspt.php b/parsers/lido-mspt.php new file mode 100644 index 0000000..9210893 --- /dev/null +++ b/parsers/lido-mspt.php @@ -0,0 +1,115 @@ + + * @link http://www.lido-schema.org + */ +declare(strict_types = 1); + +/** + * Parse function. + * + * @param array $version Instance to import into. + * @param integer $institution_id Institution to import to. + * @param non-empty-string $XMLFolder Folder of the XML files to import. + * @param string $dataFolder Data folder. + * @param integer $sammlung_id Collection ID. Optional. + * @param boolean $visibility Import objects to be directly visible?. + * @param boolean $insertOnly If set to true, only new objects are added, + * old are not updated. + * + * @return void + */ +function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { + + if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); + + if (empty($dataFolder)) { + $importImages = false; + } + else $importImages = true; + + $collectionWriter = new MDCollectionWriter($version['mainDB']); + $seriesWriter = new MDSeriesWriter($version['mainDB']); + $literatureWriter = new MDLiteratureWriter($version['mainDB']); + $tagWriter = new MDTagWriter($version['nodaDB']); + + // Set up writers + $outputHandler = new MDOutputHandler; + $outputHandler->setVerbosity(2); + + $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); + + $startFile = 0; + $counter = 0; + foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . $XMLFolder) as $xmlFile) { + + ++$counter; + if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "xml") { + continue; + } + if ($counter < 0) { + continue; + } + + // LIDO contains : in tag names. SimpleXML can't handle those. + + $rawData = MD_STD::file_get_contents(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}"); + $rawData = strtr(str_replace(":", "___", str_replace("lido:", "", $rawData)), [ + "http___//" => "http://", "xmlns___" => "xmlns:", "xsi___" => "xsi:" + ]); + + if (($xmlData = simplexml_load_string($rawData)) === false) { + throw new Exception("XML couldn't be loaded"); + } + + /* + if (empty($xmlData->descriptiveMetadata->objectIdentificationWrap->objectDescriptionWrap) + || count($xmlData->descriptiveMetadata->objectIdentificationWrap->objectDescriptionWrap->children()) === 0 + ) { + continue; + } + */ + + foreach ($xmlData->lido as $lidoObject) { + echo "HI"; + + try { + + $lidoObject = new LidoParserMspt($version['mainDB'], $version['nodaDB'], + $version['filepath'], + $version['language'], $institution_id, + $lidoObject, $outputHandler, + $collectionWriter, + $seriesWriter, + $literatureWriter, + $tagWriter, + $sammlung_id, $visibility, + ); + $insertOnly = false; + + $newObjectID = $objectWriter->writeObject($lidoObject->get_mdobject(), true, $insertOnly, $outputHandler); + + } + catch (MDImageCorrupt $e) { + $outputHandler->toLog("Corrupt image with object $counter", 0); + } + + $outputHandler->toLog("Done with object $counter", 2); + + // Sleep for a millisecond + usleep(IMPORTER_DELAY_PER_OBJECT); + + } + + /* + if ($counter === 100) { + break; + } + */ + + } + +} + diff --git a/parsers/museo.php b/parsers/museo.php new file mode 100644 index 0000000..fc1ea1a --- /dev/null +++ b/parsers/museo.php @@ -0,0 +1,392 @@ + + */ +declare(strict_types = 1); + +/** + * Parse function. + * + * @param array $version Instance to import into. + * @param integer $institution_id Institution to import to. + * @param non-empty-string $XMLFolder Folder of the XML files to import. + * @param string $dataFolder Data folder. + * @param integer $sammlung_id Collection ID. Optional. + * @param boolean $visibility Import objects to be directly visible?. + * @param boolean $insertOnly If set to true, only new objects are added, + * old are not updated. + * + * @return void + */ +function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { + + if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); + + if (empty($dataFolder)) { + $importImages = false; + } + else $importImages = true; + + $collectionWriter = new MDCollectionWriter($version['mainDB']); + $seriesWriter = new MDSeriesWriter($version['mainDB']); + $literatureWriter = new MDLiteratureWriter($version['mainDB']); + $tagWriter = new MDTagWriter($version['nodaDB']); + + // Set up writers + $outputHandler = new MDOutputHandler; + $outputHandler->setVerbosity(2); + + $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); + + /* + $objectWriter->disableImportImagesResources = true; + $objectWriter->disableUpdateBaseData = true; + $objectWriter->disableImportAdditionalData = true; + $objectWriter->disableImportTags = true; + $objectWriter->disableImportEvents = true; + $objectWriter->disableImportLiterature = true; + $objectWriter->disableImportHyperlinks = true; + $objectWriter->disableImportSeries = true; + $objectWriter->disableImportCollections = true; + $objectWriter->disableImportObjectRecords = true; + $objectWriter->disableImportTranscriptions = true; + $objectWriter->disableImportMarkings = true; + $objectWriter->disableImportExhibitions = true; + $objectWriter->disableImportReception = true; + */ + + $result = $version['mainDB']->query(" + SELECT * + + , AES_DECRYPT(`desc`,'19d414f29654fbc402c4287a0a86e1b3') as `desc`, AES_DECRYPT(`desc_lang_independent`,'19d414f29654fbc402c4287a0a86e1b3') as `desc_lang_independent` + +FROM museo_lausitz.inventory, museo_lausitz.inventory_content +WHERE inventory.group_id = 41 AND inventory_content.object_id = inventory.id + ORDER BY inventory.id ASC"); + + $startAtCounter = 0; + $i = 0; + + while ($objectData = $result->fetch_assoc()) { + + ++$i; + if ($i < $startAtCounter) { + continue; + } + $outputHandler->toLog("Starting to process entry #{$i}", 2); + + unset($objectData['group_id'], $objectData['crypt_flag'], + $objectData['object_id'], $objectData['object_photo']); + + $xmlFields = [ + 'desc_lang_independent', + 'desc', + ]; + + foreach ($xmlFields as $xmlField) { + + if (!empty($objectData[$xmlField])) { + + if (!($xmlData = simplexml_load_string($objectData[$xmlField], "SimpleXMLElement", LIBXML_NOCDATA))) { + continue; + # throw new Exception("Cannot load raw data into SimpleXML (file: {$xmlFile})"); + } + $objectData = array_merge($objectData, json_decode(MD_STD::json_encode_object($xmlData), true)); + + if (!empty($xmlData->object_mesurements)) { + foreach ($xmlData->object_mesurements->children() as $field) { + $objectData[$field->getName() . '_value'] = (string)$field; + $objectData[$field->getName() . '_unit'] = (string)$field->attributes()->unit; + } + } + + unset($objectData[$xmlField]); // We still need desc_lang_independent in XML form, for the later + } + + } + + if (!empty($objectData['deleted_flag'])) continue; + + foreach ($objectData as $key => $value) { + if (is_array($value) and !empty($value['@attributes'])) { + unset($objectData[$key]['@attributes']); + unset($value['@attributes']); + } + if (empty($value)) unset($objectData[$key]); + } + + if (!empty($objectData['object_inventory_number'])) $invNo = $objectData['object_inventory_number']; + else $invNo = 'museo-' . $objectData['id']; + + # $invNo = 'eisen-' . $invNo; + + if (!empty($objectData['object_short_desc'])) $desc = $objectData['object_short_desc']; + else $desc = '------'; + + $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $invNo, mb_substr($objectData['object_title'] ?: $invNo, 0, 40), $objectData['object_title'] ?: $invNo, $desc, $outputHandler); + + unset($objectData['object_title'], $objectData['object_short_desc']); + + // Material / Technique + + $matTech = []; + if (!empty($objectData['object_material'])) { + $object->set_string("material2", $objectData['object_material']); + $matTech[] = $objectData['object_material']; + } + if (!empty($objectData['object_technic'])) { + $object->set_string("technik2", $objectData['object_technic']); + $matTech[] = $objectData['object_technic']; + } + $object->set_objekt_material_technik(implode(', ', $matTech)); + unset($objectData['object_material'], $objectData['object_technic']); + + // Measurements + + $measurements = []; + + if (!empty($objectData['depth_value'])) { + $object->set_string("mass2_laenge_wert", $objectData['depth_value']); + if (!empty($objectData['depth_unit'])) $object->set_length_unit("mass2_laenge_einheit", str_replace('cmm', 'cm', strtolower($objectData['depth_unit']))); + $measurements[] = $objectData['depth_value']; + } + unset($objectData['depth_value'], $objectData['depth_unit']); + + if (!empty($objectData['height_value'])) { + $object->set_string("mass2_hoehe_wert", $objectData['height_value']); + try { + if (!empty($objectData['height_unit'])) $object->set_length_unit("mass2_hoehe_einheit", str_replace('cmm', 'cm', str_replace(' ', '', trim($objectData['height_unit'], ' ,.')))); + } + catch (MDInvalidLengthUnit $e) { + } + $measurements[] = $objectData['height_value']; + } + unset($objectData['height_value'], $objectData['height_unit']); + + if (!empty($objectData['width_value'])) { + $object->set_string("mass2_breite_wert", $objectData['width_value']); + if (!empty($objectData['width_unit'])) $object->set_length_unit("mass2_breite_einheit", $objectData['width_unit']); + $measurements[] = $objectData['width_value']; + } + unset($objectData['width_value'], $objectData['width_unit']); + + if (!empty($objectData['weight_value'])) { + $object->set_string("mass2_gewicht_wert", $objectData['weight_value']); + if (!empty($objectData['weight_unit'])) $object->set_weight_unit("mass2_gewicht_einheit", $objectData['weight_unit']); + $measurements[] = $objectData['weight_value']; + } + unset($objectData['weight_value'], $objectData['weight_unit']); + + $object->set_objekt_masse(implode(', ', $measurements)); + + if (!empty($objectData['collector'])) { + $object->set_string('erwerbender', $objectData['collector']); + } + unset($objectData['collector']); + + if (!empty($objectData['qualifier'])) { + $object->set_string('ersterfasser', $objectData['qualifier']); + } + unset($objectData['qualifier']); + + if (!empty($objectData['lang'])) { + $object->set_string('content_language', $objectData['lang']); + } + unset($objectData['lang']); + + if (!empty($objectData['last_edit_user'])) { + $object->append_string('notizen_text1', PHP_EOL . $objectData['last_edit_user']); + } + unset($objectData['last_edit_user']); + + if (!empty($objectData['last_edit_time'])) { + $object->append_string('notizen_text1', PHP_EOL . $objectData['last_edit_time']); + } + unset($objectData['last_edit_time']); + + if (!empty($objectData['preparation_technic'])) { + $object->append_string('notizen_text1', PHP_EOL . $objectData['preparation_technic']); + } + unset($objectData['preparation_technic']); + + if (!empty($objectData['current_location'])) { + $object->set_string('standort_aktuell', $objectData['current_location']); + } + unset($objectData['current_location']); + + if (!empty($objectData['normal_location'])) { + $object->set_string('standort_eigentlich', $objectData['normal_location']); + } + unset($objectData['normal_location']); + + if (!empty($objectData['physical_desc'])) { + $object->set_string('zustand', $objectData['physical_desc']); + } + unset($objectData['physical_desc']); + + if (!empty($objectData['object_date'])) { + if (empty(trim($objectData['object_date'], '^ ,.:;-_'))) { + unset($objectData['object_date']); + } + else $object->appendTagByName($objectData['object_date']); + unset($objectData['object_date']); + } + + if (!empty($objectData['current_owner'])) { + $object->set_string('rechte_anmerkungen', 'Eigentümer: ' . $objectData['current_owner']); + } + unset($objectData['current_owner']); + + if (!empty($objectData['object_maker']) || !empty($objectData['production_place']) || !empty($objectData['object_date_year'])) { + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 1, $outputHandler); + if (!empty($objectData['object_maker'])) $event->set_persinst_id($objectData['object_maker']); + if (!empty($objectData['production_place'])) $event->set_orte_id(mb_substr($objectData['production_place'], 0, 180)); + if (!empty($objectData['object_date_year'])) $event->set_zeiten_id((string)$objectData['object_date_year']); + if (!empty($objectData['object_date_year_deviation'])) $event->set_ereignis_zeit_sicher(true); + + if ($event->get_orte_id() !== 0 + || $event->get_persinst_id() !== 0 + || $event->get_zeiten_id() !== 0 + ) { + $object->appendEvent($event); + } + } + unset($objectData['object_maker'], + $objectData['production_place'], + $objectData['object_date_year'], + $objectData['object_date_year_deviation']); + + if (!empty($objectData['associated_recovery_place'])) { + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 2, $outputHandler); + if (!empty($objectData['associated_recovery_place'])) $event->set_orte_id(mb_substr($objectData['associated_recovery_place'], 0, 180)); + if ($event->get_orte_id() !== 0 + || $event->get_persinst_id() !== 0 + || $event->get_zeiten_id() !== 0 + ) { + $object->appendEvent($event); + } + } + unset($objectData['associated_recovery_place']); + + if (!empty($objectData['associated_place'])) { + $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 22, $outputHandler); + if (!empty($objectData['associated_place'])) $event->set_orte_id(mb_substr($objectData['associated_place'], 0, 180)); + + if ($event->get_orte_id() !== 0) { + $object->appendEvent($event); + } + } + unset($objectData['associated_place']); + + if (!empty($objectData['misc_advanced'])) { + if (!isset($objectData['misc_advanced']['item'][0])) { + $object->append_objekt_beschreibung(PHP_EOL . implode(': ', $objectData['misc_advanced']['item'])); + } + else { + foreach ($objectData['misc_advanced']['item'] as $item) { + $object->append_objekt_beschreibung(PHP_EOL . implode(': ', $item)); + } + } + } + unset($objectData['misc_advanced']); + + if (!empty($objectData['object_subject'])) { + $object->append_objekt_beschreibung(PHP_EOL . 'Thema: ' . (string)$objectData['object_subject']); + } + unset($objectData['object_subject']); + + if (!empty($objectData['id'])) $object->set_string("invnr2", (string)$objectData['id']); + + if (!empty($objectData['acquisition_price'])) { + $object->set_string("ankaufsumme", $objectData['acquisition_price']); + } + unset($objectData['acquisition_price']); + + if (!empty($objectData['acquisition_date'])) { + $object->set_string("zeitpunkt_zugang", $objectData['acquisition_date']); + } + unset($objectData['acquisition_date']); + + if (!empty($objectData['acquisition_method']) and trim($objectData['acquisition_method']) !== '-') { + $object->set_entry_type("zugang_art", $objectData['acquisition_method']); + $object->set_string("notizen_text2", 'Zugangsart: ' . $objectData['acquisition_method']); + } + unset($objectData['acquisition_method']); + + if (!empty($objectData['preparation_care'])) { + $object->append_string("restaurierung", PHP_EOL . 'preparation_care: ' . $objectData['preparation_care']); + } + unset($objectData['preparation_care']); + + if (!empty($objectData['preparation_preservation'])) { + $object->append_string("restaurierung", PHP_EOL . 'preparation_preservation: ' . $objectData['preparation_preservation']); + } + unset($objectData['preparation_preservation']); + + if (!empty($objectData['reproduction_rights_note'])) { + $object->append_string("rechte_anmerkungen", PHP_EOL . 'reproduction_rights_note: ' . $objectData['reproduction_rights_note']); + } + unset($objectData['reproduction_rights_note']); + + if (!empty($objectData['object_markings'])) { + $object->append_string("beschriftung2", $objectData['object_markings']); + } + unset($objectData['object_markings']); + + if (!empty($objectData['object_photo_url'])) { + if (file_exists(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/original." . $objectData["object_photo_url"])) { + $photo_url = MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/original." . $objectData["object_photo_url"]; + } + else $photo_url = MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $objectData["object_photo_url"]; + + try { + $image = new MDImage($version['mainDB'], (string)$object->get_string("objekt_name"), $photo_url); + if (!empty($objectData['object_photo_text']) and is_string($objectData['object_photo_text'])) { + $image->set_image_beschreibung($objectData['object_photo_text']); + } + if (!empty($objectData['object_photo_marker_id_card'])) { + $image->set_image_beschreibung($image->get_image_beschreibung() . PHP_EOL . 'Marker (ID card): ' . $objectData['object_photo_marker_id_card']); + } + if (!empty($objectData['object_photo_marker_vitrine'])) { + $image->set_image_beschreibung($image->get_image_beschreibung() . PHP_EOL . 'Marker (Vitrine): ' . $objectData['object_photo_marker_vitrine']); + } + $object->appendImage($image); + } + catch (MDFileDoesNotExist $e) { + } + } + unset($objectData['object_photo_url'], + $objectData['object_photo_marker_id_card'], + $objectData['object_photo_marker_vitrine'], + $objectData['object_photo_text']); + + // Unset ID and write + unset($objectData['id'], $objectData['@attributes'], + $objectData['object_mesurements'], + $objectData['object_photo_sort'], + $objectData['version'], + $objectData['object_photo'], + $objectData['desc'], + $objectData['object'], + $objectData['object_inventory_number']); + + foreach ($objectData as $key => $value) { + if ($value === '-') continue; + throw new MDParserIncomplete("Unparsed contents in object: " . var_export($objectData, true)); + } + + $object->set_objekt_publik($visibility); + $newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler); + + // Sleep for a millisecond + usleep(IMPORTER_DELAY_PER_OBJECT); + + $objectData = []; + + } + $result->close(); + +} +