* @author Stefan Rohde-Enslin * @link https://imports.museum-digital.org/importer/parsers/csvxml.php */ declare(strict_types = 1); /** * Parse function. * * @param array $version Instance to import into. * @param integer $institution_id Institution to import to. * @param non-empty-string $XMLFolder Folder of the XML files to import. * @param string $dataFolder Data folder. * @param integer $sammlung_id Collection ID. Optional. * @param boolean $visibility Import objects to be directly visible?. * @param boolean $insertOnly If set to true, only new objects are added, * old are not updated. * * @return void */ function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); if (empty($dataFolder)) { $importImages = false; } else $importImages = true; // Set up writers $collectionWriter = new MDCollectionWriter($version['mainDB']); $linkWriter = new MDLinkWriter($version['mainDB']); $seriesWriter = new MDSeriesWriter($version['mainDB']); $exhibitionWriter = new MDExhibitionWriter($version['mainDB']); $objectRecordWriter = new MDObjectRecordWriter($version['mainDB']); $tagWriter = new MDTagWriter($version['nodaDB']); $outputHandler = new MDOutputHandler; $outputHandler->setVerbosity(2); $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); /* $objectWriter->disableUpdateBaseData = true; $objectWriter->disableImportAdditionalData = true; */ $objectWriter->disableImportImagesResources = true; $objectWriter->disableImportTranscriptions = true; $objectWriter->disableImportSeries = true; $objectWriter->disableImportObjectRecords = true; $objectWriter->disableImportExhibitions = true; $objectWriter->disableImportTags = true; $objectWriter->disableImportLiterature = true; $objectWriter->disableImportMarkings = true; /* $objectWriter->disableImportCollections = true; $objectWriter->disableImportHyperlinks = true; $objectWriter->disableImportReception = true; */ $i = 0; $startAtCounter = 0; foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) { $fileContents = MD_STD::file_get_contents(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}"); $allRecords = explode('_____-----_____', $fileContents); $fileContents = null; $languages_iso639 = array_flip(MDLanguagesSet::LANGUAGES_ISO639_2B); foreach ($allRecords as $recordStr) { ++$i; if ($i < $startAtCounter) { continue; } if (!($objectData = simplexml_load_string($recordStr, "SimpleXMLElement", LIBXML_NOCDATA))) { throw new Exception("Cannot load raw data into SimpleXML ({$recordStr})"); } unset($invNo); if (!empty((string)$objectData->location->shelfLocator) and (\preg_match("/^Hs\-[0-9][0-9][0-9][0-9]$/", (string)$objectData->location->shelfLocator) || \preg_match("/^Hs\-[0-9][0-9][0-9][0-9][0-9]$/", (string)$objectData->location->shelfLocator)) ) { $invNo = (string)$objectData->location->shelfLocator; $outputHandler->toLog("Using inventory number $invNo (Hs- set) - " . $objectData->identifier, 2); } else if (!empty((string)$objectData->location->shelfLocator) and (\preg_match("/^[0-9][0-9][0-9][0-9]$/", (string)$objectData->location->shelfLocator) || \preg_match("/^[0-9][0-9][0-9][0-9][0-9]$/", (string)$objectData->location->shelfLocator)) ) { $invNo = 'Hs-' . (string)$objectData->location->shelfLocator; $outputHandler->toLog("Using inventory number $invNo", 2); } if (empty($invNo)) { $invNo = substr((string)$objectData->identifier, strrpos((string)$objectData->identifier, '/') + 1); } $description = $objectData->abstract . PHP_EOL . PHP_EOL . $objectData->physicalDescription; /* if ($invNo !== 'Hs-28865') continue; print_r($objectData); exit; */ $title = (string)$objectData->titleInfo->title; if (!empty((string)$objectData->titleInfo->subTitle)) { $title .= ': ' . (string)$objectData->titleInfo->subTitle; unset($objectData->titleInfo->subTitle); } // Check if inventory number is known already $result = $version['mainDB']->query_by_stmt("SELECT 1 FROM `objekt` WHERE `objekt_inventarnr` = ?", "s", $invNo); if ($result->num_rows !== 0) { $result->close(); $result = null; continue; } $result->close(); $result = null; $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $invNo, (string)$objectData->genre, $title, $description, $outputHandler); if (!empty($objectData->physicalDescription->extent)) { if (str_ends_with((string)$objectData->physicalDescription->extent, " Br.")) { if (strlen((string)$objectData->physicalDescription->extent) < 8) { $object->set_string("stueckzahl", str_replace(" Br.", "", (string)$objectData->physicalDescription->extent)); } } else { $object->append_objekt_beschreibung((string)$objectData->physicalDescription->extent); } } unset($objectData->titleInfo->title, $objectData->abstract, $objectData->physicalDescription); if (count($objectData->titleInfo->children()) === 0) { unset($objectData->titleInfo); } // Will later use $objectData->identifier for setting a link to the source repository // Will later use $objectData->genre for tags $object->appendTagByName((string)$objectData->genre, "", $tagWriter); unset($objectData->genre); if (count($objectData->note) > 1) { foreach ($objectData->note as $note) { $object->append_string("notizen_text1", PHP_EOL . (string)$note); } } else if (!empty((string)$objectData->note)) { $object->set_string("notizen_text1", (string)$objectData->note); } unset($objectData->note); unset($objectData->location->physicalLocation); if (!empty((string)$objectData->location->shelfLocator)) { $object->append_string("standort_eigentlich", (string)$objectData->location->shelfLocator); } unset($objectData->location->shelfLocator); unset($objectData->location); foreach ($objectData->name as $actor) { $linkTypeName = null; if (!empty((string)$actor->role->roleTerm[1])) { $linkTypeName = (string)$actor->role->roleTerm[1]; } else if (!empty((string)$actor->role->roleTerm)) { $linkTypeName = (string)$actor->role->roleTerm; } if (!isset(MDConcActor::ACTOR_ROLES_TO_EVENT_TYPE[$linkTypeName])) { throw new Exception("Unknown actor type: " . (string)$linkTypeName . ' in ' . (string)$objectData->identifier . ' for ' . (string)$actor->namePart . ' ///// ' . PHP_EOL . PHP_EOL . var_export($actor->role->roleTerm, true)); } $linkType = MDConcActor::ACTOR_ROLES_TO_EVENT_TYPE[$linkTypeName]; $gndUrl = (string)$actor->attributes()->valueURI; $gndId = substr($gndUrl, strrpos($gndUrl, '/') + 1); $md_event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $linkType, $outputHandler); $md_event->set_persinst_id((string)$actor->namePart, "", "", ['gnd' => $gndId]); if ($md_event->get_persinst_id() !== 0) { // Handle origin information for production events if (!empty($objectData->originInfo)) { if (in_array($linkType, MDEventsSet::EVENTS_PRODUCTION, true) and !empty($objectData->originInfo->dateCreated)) { if (str_starts_with((string)$objectData->originInfo->dateCreated[1], 'o.D. [')) { $md_event->set_zeiten_id(trim(substr((string)$objectData->originInfo->dateCreated[1], 5), " []")); } else if (str_contains((string)$objectData->originInfo->dateCreated[1], "[") and strlen((string)$objectData->originInfo->dateCreated[1]) === 23 and substr((string)$objectData->originInfo->dateCreated[1], 0, 10) === substr((string)$objectData->originInfo->dateCreated[1], 12, 10) ) { $md_event->set_zeiten_id(substr((string)$objectData->originInfo->dateCreated[1], 0, 10)); } /* else if (str_contains((string)$objectData->originInfo->dateCreated[1], "[")) { echo substr((string)$objectData->originInfo->dateCreated[1], 12, 10); print_r($objectData->originInfo->dateCreated); echo "strlen:" . strlen((string)$objectData->originInfo->dateCreated[1]); exit; } */ else $md_event->set_zeiten_id((string)$objectData->originInfo->dateCreated[1]); unset($objectData->originInfo->dateCreated); } if (in_array($linkType, MDEventsSet::EVENTS_PRODUCTION, true) and !empty($objectData->originInfo->place)) { $md_event->set_orte_id((string)$objectData->originInfo->place->placeTerm); unset($objectData->originInfo->place->placeTerm); if (empty($objectData->originInfo->place->children())) unset($objectData->originInfo->place); } if (empty($objectData->originInfo->children())) unset($objectData->originInfo); } $object->appendEvent($md_event); } } unset($objectData->name); // If originInfo is still set here, try handling it. if (!empty($objectData->originInfo)) { $md_event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 1, $outputHandler); // Handle origin information for production events if (!empty($objectData->originInfo->dateCreated)) { $md_event->set_zeiten_id((string)$objectData->originInfo->dateCreated[1]); unset($objectData->originInfo->dateCreated); } if (!empty($objectData->originInfo->place)) { $md_event->set_orte_id((string)$objectData->originInfo->place->placeTerm); unset($objectData->originInfo->place->placeTerm); if (empty($objectData->originInfo->place->children())) unset($objectData->originInfo->place); } if (empty($objectData->originInfo->children())) unset($objectData->originInfo); if ($md_event->get_zeiten_id() !== 0 || $md_event->get_orte_id() !== 0) { $object->appendEvent($md_event); } } if (!empty($objectData->language)) { $object->set_string("content_language", $languages_iso639[(string)$objectData->language->languageTerm[0]]); $object->set_bool("content_language_show_md", true); $object->set_bool("content_language_show_extern", true); unset($objectData->language); } if (!empty($objectData->relatedItem)) { foreach ($objectData->relatedItem as $relatedItem) { if ((string)$relatedItem->attributes()->type !== 'constituent') { continue; } if (in_array((string)$relatedItem->recordInfo->recordIdentifier, ["ead_DE-F25_37_VirtuellerBestand", "ead_DE-F25_37_VirtuellerBestand_added"], true)) { $object->appendCollectionByID(5); } else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9624") { $object->appendCollectionByID(521); } else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9626") { $object->appendCollectionByID(534); } else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9627") { $object->appendCollectionByID(543); } else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9629") { $object->appendCollectionByID(541); } else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9630") { $object->appendCollectionByID(540); } else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9621") { $object->appendCollectionByID(539); } else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9623") { $object->appendCollectionByID(538); } else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9625") { $object->appendCollectionByID(537); } else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9622") { $object->appendCollectionByID(536); } else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-37593") { $object->appendCollectionByID(545); } else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9636") { $object->appendCollectionByID(544); } else if (str_starts_with((string)$relatedItem->titleInfo->title, 'Nachlaß') || str_starts_with((string)$relatedItem->titleInfo->title, 'Teilnachlaß') ) { continue; } else { throw new Exception("Unknown collection: " . var_export($relatedItem, true)); } } } // For now: skip relatedItem unset($objectData->relatedItem); unset($objectData->recordInfo->recordContentSource); $object->appendLinkByName((string)$objectData->identifier, "Das Objekt bei Kalliope"); unset($objectData->identifier); unset($objectData->recordInfo->recordIdentifier); // Import edit history at kalliope $editHistory = []; $editHistory[] = "Ersterfassung bei Kalliope: " . $objectData->recordInfo->recordCreationDate; foreach ($objectData->recordInfo->recordChangeDate as $changeDate) { $editHistory[] = "Bearbeitung bei Kalliope: " . (string)$changeDate; } $editHistStr = implode(PHP_EOL, $editHistory); $object->append_string("notizen_text1", PHP_EOL . PHP_EOL . $editHistStr); unset($objectData->recordInfo->recordCreationDate, $objectData->recordInfo->recordChangeDate); if (count($objectData->recordInfo->children()) === 0) { unset($objectData->recordInfo); } if (count($objectData->children()) === 0) { unset($objectData); } $object->set_objekt_publik($visibility); $newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler); if (!empty($objectData)) { throw new MDParserIncomplete("Incomplete parser: " . var_export($objectData, true)); } $outputHandler->toLog("Done with object $i", 2); // Sleep for a millisecond usleep(IMPORTER_DELAY_PER_OBJECT); } } }