370 lines
17 KiB
PHP
370 lines
17 KiB
PHP
|
<?PHP
|
||
|
/**
|
||
|
* Parser for XML generated through CSVXML.
|
||
|
*
|
||
|
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||
|
* @author Stefan Rohde-Enslin <s.rohde-enslin@museum-digital.de>
|
||
|
* @link https://imports.museum-digital.org/importer/parsers/csvxml.php
|
||
|
*/
|
||
|
declare(strict_types = 1);
|
||
|
|
||
|
/**
|
||
|
* Parse function.
|
||
|
*
|
||
|
* @param array<mixed> $version Instance to import into.
|
||
|
* @param integer $institution_id Institution to import to.
|
||
|
* @param non-empty-string $XMLFolder Folder of the XML files to import.
|
||
|
* @param string $dataFolder Data folder.
|
||
|
* @param integer $sammlung_id Collection ID. Optional.
|
||
|
* @param boolean $visibility Import objects to be directly visible?.
|
||
|
* @param boolean $insertOnly If set to true, only new objects are added,
|
||
|
* old are not updated.
|
||
|
*
|
||
|
* @return void
|
||
|
*/
|
||
|
function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) {
|
||
|
|
||
|
if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist.");
|
||
|
|
||
|
if (empty($dataFolder)) {
|
||
|
$importImages = false;
|
||
|
}
|
||
|
else $importImages = true;
|
||
|
|
||
|
// Set up writers
|
||
|
|
||
|
$collectionWriter = new MDCollectionWriter($version['mainDB']);
|
||
|
$linkWriter = new MDLinkWriter($version['mainDB']);
|
||
|
$seriesWriter = new MDSeriesWriter($version['mainDB']);
|
||
|
$exhibitionWriter = new MDExhibitionWriter($version['mainDB']);
|
||
|
$objectRecordWriter = new MDObjectRecordWriter($version['mainDB']);
|
||
|
$tagWriter = new MDTagWriter($version['nodaDB']);
|
||
|
|
||
|
$outputHandler = new MDOutputHandler;
|
||
|
$outputHandler->setVerbosity(2);
|
||
|
|
||
|
$objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']);
|
||
|
|
||
|
/*
|
||
|
$objectWriter->disableUpdateBaseData = true;
|
||
|
$objectWriter->disableImportAdditionalData = true;
|
||
|
*/
|
||
|
$objectWriter->disableImportImagesResources = true;
|
||
|
$objectWriter->disableImportTranscriptions = true;
|
||
|
$objectWriter->disableImportSeries = true;
|
||
|
$objectWriter->disableImportObjectRecords = true;
|
||
|
$objectWriter->disableImportExhibitions = true;
|
||
|
$objectWriter->disableImportTags = true;
|
||
|
$objectWriter->disableImportLiterature = true;
|
||
|
$objectWriter->disableImportMarkings = true;
|
||
|
/*
|
||
|
$objectWriter->disableImportCollections = true;
|
||
|
$objectWriter->disableImportHyperlinks = true;
|
||
|
$objectWriter->disableImportReception = true;
|
||
|
*/
|
||
|
|
||
|
$i = 0;
|
||
|
$startAtCounter = 0;
|
||
|
|
||
|
foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) {
|
||
|
|
||
|
$fileContents = MD_STD::file_get_contents(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}");
|
||
|
$allRecords = explode('_____-----_____', $fileContents);
|
||
|
$fileContents = null;
|
||
|
|
||
|
$languages_iso639 = array_flip(MDLanguagesSet::LANGUAGES_ISO639_2B);
|
||
|
|
||
|
foreach ($allRecords as $recordStr) {
|
||
|
|
||
|
++$i;
|
||
|
if ($i < $startAtCounter) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (!($objectData = simplexml_load_string($recordStr, "SimpleXMLElement", LIBXML_NOCDATA))) {
|
||
|
throw new Exception("Cannot load raw data into SimpleXML ({$recordStr})");
|
||
|
}
|
||
|
|
||
|
unset($invNo);
|
||
|
if (!empty((string)$objectData->location->shelfLocator)
|
||
|
and (\preg_match("/^Hs\-[0-9][0-9][0-9][0-9]$/", (string)$objectData->location->shelfLocator)
|
||
|
|| \preg_match("/^Hs\-[0-9][0-9][0-9][0-9][0-9]$/", (string)$objectData->location->shelfLocator))
|
||
|
) {
|
||
|
$invNo = (string)$objectData->location->shelfLocator;
|
||
|
$outputHandler->toLog("Using inventory number $invNo (Hs- set) - " . $objectData->identifier, 2);
|
||
|
}
|
||
|
else if (!empty((string)$objectData->location->shelfLocator)
|
||
|
and (\preg_match("/^[0-9][0-9][0-9][0-9]$/", (string)$objectData->location->shelfLocator)
|
||
|
|| \preg_match("/^[0-9][0-9][0-9][0-9][0-9]$/", (string)$objectData->location->shelfLocator))
|
||
|
) {
|
||
|
$invNo = 'Hs-' . (string)$objectData->location->shelfLocator;
|
||
|
$outputHandler->toLog("Using inventory number $invNo", 2);
|
||
|
}
|
||
|
|
||
|
if (empty($invNo)) {
|
||
|
$invNo = substr((string)$objectData->identifier, strrpos((string)$objectData->identifier, '/') + 1);
|
||
|
}
|
||
|
|
||
|
$description = $objectData->abstract . PHP_EOL . PHP_EOL . $objectData->physicalDescription;
|
||
|
|
||
|
/*
|
||
|
if ($invNo !== 'Hs-28865') continue;
|
||
|
print_r($objectData);
|
||
|
exit;
|
||
|
*/
|
||
|
|
||
|
$title = (string)$objectData->titleInfo->title;
|
||
|
if (!empty((string)$objectData->titleInfo->subTitle)) {
|
||
|
$title .= ': ' . (string)$objectData->titleInfo->subTitle;
|
||
|
unset($objectData->titleInfo->subTitle);
|
||
|
}
|
||
|
|
||
|
// Check if inventory number is known already
|
||
|
$result = $version['mainDB']->query_by_stmt("SELECT 1
|
||
|
FROM `objekt`
|
||
|
WHERE `objekt_inventarnr` = ?", "s", $invNo);
|
||
|
|
||
|
if ($result->num_rows !== 0) {
|
||
|
$result->close();
|
||
|
$result = null;
|
||
|
continue;
|
||
|
}
|
||
|
$result->close();
|
||
|
$result = null;
|
||
|
|
||
|
$object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $invNo, (string)$objectData->genre, $title, $description, $outputHandler);
|
||
|
|
||
|
if (!empty($objectData->physicalDescription->extent)) {
|
||
|
if (str_ends_with((string)$objectData->physicalDescription->extent, " Br.")) {
|
||
|
if (strlen((string)$objectData->physicalDescription->extent) < 8) {
|
||
|
$object->set_string("stueckzahl", str_replace(" Br.", "", (string)$objectData->physicalDescription->extent));
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
$object->append_objekt_beschreibung((string)$objectData->physicalDescription->extent);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
unset($objectData->titleInfo->title, $objectData->abstract, $objectData->physicalDescription);
|
||
|
if (count($objectData->titleInfo->children()) === 0) {
|
||
|
unset($objectData->titleInfo);
|
||
|
}
|
||
|
// Will later use $objectData->identifier for setting a link to the source repository
|
||
|
// Will later use $objectData->genre for tags
|
||
|
|
||
|
$object->appendTagByName((string)$objectData->genre, "", $tagWriter);
|
||
|
unset($objectData->genre);
|
||
|
|
||
|
if (count($objectData->note) > 1) {
|
||
|
foreach ($objectData->note as $note) {
|
||
|
$object->append_string("notizen_text1", PHP_EOL . (string)$note);
|
||
|
}
|
||
|
}
|
||
|
else if (!empty((string)$objectData->note)) {
|
||
|
$object->set_string("notizen_text1", (string)$objectData->note);
|
||
|
}
|
||
|
unset($objectData->note);
|
||
|
|
||
|
unset($objectData->location->physicalLocation);
|
||
|
|
||
|
if (!empty((string)$objectData->location->shelfLocator)) {
|
||
|
$object->append_string("standort_eigentlich", (string)$objectData->location->shelfLocator);
|
||
|
}
|
||
|
unset($objectData->location->shelfLocator);
|
||
|
unset($objectData->location);
|
||
|
|
||
|
foreach ($objectData->name as $actor) {
|
||
|
|
||
|
$linkTypeName = null;
|
||
|
if (!empty((string)$actor->role->roleTerm[1])) {
|
||
|
$linkTypeName = (string)$actor->role->roleTerm[1];
|
||
|
}
|
||
|
else if (!empty((string)$actor->role->roleTerm)) {
|
||
|
$linkTypeName = (string)$actor->role->roleTerm;
|
||
|
}
|
||
|
|
||
|
if (!isset(MDConcActor::ACTOR_ROLES_TO_EVENT_TYPE[$linkTypeName])) {
|
||
|
throw new Exception("Unknown actor type: " . (string)$linkTypeName . ' in ' . (string)$objectData->identifier . ' for ' . (string)$actor->namePart . ' ///// ' . PHP_EOL . PHP_EOL . var_export($actor->role->roleTerm, true));
|
||
|
}
|
||
|
|
||
|
$linkType = MDConcActor::ACTOR_ROLES_TO_EVENT_TYPE[$linkTypeName];
|
||
|
|
||
|
$gndUrl = (string)$actor->attributes()->valueURI;
|
||
|
$gndId = substr($gndUrl, strrpos($gndUrl, '/') + 1);
|
||
|
|
||
|
$md_event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $linkType, $outputHandler);
|
||
|
|
||
|
$md_event->set_persinst_id((string)$actor->namePart, "", "", ['gnd' => $gndId]);
|
||
|
if ($md_event->get_persinst_id() !== 0) {
|
||
|
|
||
|
// Handle origin information for production events
|
||
|
if (!empty($objectData->originInfo)) {
|
||
|
if (in_array($linkType, MDEventsSet::EVENTS_PRODUCTION, true) and !empty($objectData->originInfo->dateCreated)) {
|
||
|
|
||
|
if (str_starts_with((string)$objectData->originInfo->dateCreated[1], 'o.D. [')) {
|
||
|
$md_event->set_zeiten_id(trim(substr((string)$objectData->originInfo->dateCreated[1], 5), " []"));
|
||
|
}
|
||
|
|
||
|
else if (str_contains((string)$objectData->originInfo->dateCreated[1], "[")
|
||
|
and strlen((string)$objectData->originInfo->dateCreated[1]) === 23
|
||
|
and substr((string)$objectData->originInfo->dateCreated[1], 0, 10) === substr((string)$objectData->originInfo->dateCreated[1], 12, 10)
|
||
|
) {
|
||
|
$md_event->set_zeiten_id(substr((string)$objectData->originInfo->dateCreated[1], 0, 10));
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
else if (str_contains((string)$objectData->originInfo->dateCreated[1], "[")) {
|
||
|
echo substr((string)$objectData->originInfo->dateCreated[1], 12, 10);
|
||
|
print_r($objectData->originInfo->dateCreated);
|
||
|
echo "strlen:" . strlen((string)$objectData->originInfo->dateCreated[1]);
|
||
|
exit;
|
||
|
}
|
||
|
*/
|
||
|
else $md_event->set_zeiten_id((string)$objectData->originInfo->dateCreated[1]);
|
||
|
unset($objectData->originInfo->dateCreated);
|
||
|
}
|
||
|
|
||
|
if (in_array($linkType, MDEventsSet::EVENTS_PRODUCTION, true) and !empty($objectData->originInfo->place)) {
|
||
|
$md_event->set_orte_id((string)$objectData->originInfo->place->placeTerm);
|
||
|
unset($objectData->originInfo->place->placeTerm);
|
||
|
if (empty($objectData->originInfo->place->children())) unset($objectData->originInfo->place);
|
||
|
}
|
||
|
if (empty($objectData->originInfo->children())) unset($objectData->originInfo);
|
||
|
}
|
||
|
|
||
|
$object->appendEvent($md_event);
|
||
|
}
|
||
|
|
||
|
}
|
||
|
unset($objectData->name);
|
||
|
|
||
|
// If originInfo is still set here, try handling it.
|
||
|
if (!empty($objectData->originInfo)) {
|
||
|
$md_event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 1, $outputHandler);
|
||
|
|
||
|
// Handle origin information for production events
|
||
|
if (!empty($objectData->originInfo->dateCreated)) {
|
||
|
$md_event->set_zeiten_id((string)$objectData->originInfo->dateCreated[1]);
|
||
|
unset($objectData->originInfo->dateCreated);
|
||
|
}
|
||
|
|
||
|
if (!empty($objectData->originInfo->place)) {
|
||
|
$md_event->set_orte_id((string)$objectData->originInfo->place->placeTerm);
|
||
|
unset($objectData->originInfo->place->placeTerm);
|
||
|
if (empty($objectData->originInfo->place->children())) unset($objectData->originInfo->place);
|
||
|
}
|
||
|
if (empty($objectData->originInfo->children())) unset($objectData->originInfo);
|
||
|
|
||
|
if ($md_event->get_zeiten_id() !== 0 || $md_event->get_orte_id() !== 0) {
|
||
|
$object->appendEvent($md_event);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!empty($objectData->language)) {
|
||
|
$object->set_string("content_language", $languages_iso639[(string)$objectData->language->languageTerm[0]]);
|
||
|
$object->set_bool("content_language_show_md", true);
|
||
|
$object->set_bool("content_language_show_extern", true);
|
||
|
unset($objectData->language);
|
||
|
}
|
||
|
|
||
|
if (!empty($objectData->relatedItem)) {
|
||
|
foreach ($objectData->relatedItem as $relatedItem) {
|
||
|
|
||
|
if ((string)$relatedItem->attributes()->type !== 'constituent') {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (in_array((string)$relatedItem->recordInfo->recordIdentifier, ["ead_DE-F25_37_VirtuellerBestand", "ead_DE-F25_37_VirtuellerBestand_added"], true)) {
|
||
|
$object->appendCollectionByID(5);
|
||
|
}
|
||
|
else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9624") {
|
||
|
$object->appendCollectionByID(521);
|
||
|
}
|
||
|
else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9626") {
|
||
|
$object->appendCollectionByID(534);
|
||
|
}
|
||
|
else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9627") {
|
||
|
$object->appendCollectionByID(543);
|
||
|
}
|
||
|
else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9629") {
|
||
|
$object->appendCollectionByID(541);
|
||
|
}
|
||
|
else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9630") {
|
||
|
$object->appendCollectionByID(540);
|
||
|
}
|
||
|
else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9621") {
|
||
|
$object->appendCollectionByID(539);
|
||
|
}
|
||
|
else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9623") {
|
||
|
$object->appendCollectionByID(538);
|
||
|
}
|
||
|
else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9625") {
|
||
|
$object->appendCollectionByID(537);
|
||
|
}
|
||
|
else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9622") {
|
||
|
$object->appendCollectionByID(536);
|
||
|
}
|
||
|
else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-37593") {
|
||
|
$object->appendCollectionByID(545);
|
||
|
}
|
||
|
else if ((string)$relatedItem->recordInfo->recordIdentifier === "DE-611-BF-9636") {
|
||
|
$object->appendCollectionByID(544);
|
||
|
}
|
||
|
else if (str_starts_with((string)$relatedItem->titleInfo->title, 'Nachlaß')
|
||
|
|| str_starts_with((string)$relatedItem->titleInfo->title, 'Teilnachlaß')
|
||
|
) {
|
||
|
continue;
|
||
|
}
|
||
|
else {
|
||
|
throw new Exception("Unknown collection: " . var_export($relatedItem, true));
|
||
|
}
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// For now: skip relatedItem
|
||
|
unset($objectData->relatedItem);
|
||
|
unset($objectData->recordInfo->recordContentSource);
|
||
|
|
||
|
$object->appendLinkByName((string)$objectData->identifier, "Das Objekt bei Kalliope");
|
||
|
unset($objectData->identifier);
|
||
|
|
||
|
unset($objectData->recordInfo->recordIdentifier);
|
||
|
|
||
|
// Import edit history at kalliope
|
||
|
$editHistory = [];
|
||
|
$editHistory[] = "Ersterfassung bei Kalliope: " . $objectData->recordInfo->recordCreationDate;
|
||
|
foreach ($objectData->recordInfo->recordChangeDate as $changeDate) {
|
||
|
$editHistory[] = "Bearbeitung bei Kalliope: " . (string)$changeDate;
|
||
|
}
|
||
|
$editHistStr = implode(PHP_EOL, $editHistory);
|
||
|
|
||
|
$object->append_string("notizen_text1", PHP_EOL . PHP_EOL . $editHistStr);
|
||
|
|
||
|
unset($objectData->recordInfo->recordCreationDate, $objectData->recordInfo->recordChangeDate);
|
||
|
if (count($objectData->recordInfo->children()) === 0) {
|
||
|
unset($objectData->recordInfo);
|
||
|
}
|
||
|
|
||
|
if (count($objectData->children()) === 0) {
|
||
|
unset($objectData);
|
||
|
}
|
||
|
|
||
|
$object->set_objekt_publik($visibility);
|
||
|
$newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler);
|
||
|
|
||
|
if (!empty($objectData)) {
|
||
|
throw new MDParserIncomplete("Incomplete parser: " . var_export($objectData, true));
|
||
|
}
|
||
|
|
||
|
$outputHandler->toLog("Done with object $i", 2);
|
||
|
|
||
|
// Sleep for a millisecond
|
||
|
usleep(IMPORTER_DELAY_PER_OBJECT);
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|