* @link https://imports.museum-digital.org/parsers/google-arts-and-culture.php */ declare(strict_types = 1); /** * Parse function. * * @param array $version Instance to import into. * @param integer $institution_id Institution to import to. * @param non-empty-string $XMLFolder Folder of the XML files to import. * @param string $dataFolder Data folder. * @param integer $sammlung_id Collection ID. Optional. * @param boolean $visibility Import objects to be directly visible?. * @param boolean $insertOnly If set to true, only new objects are added, * old are not updated. * * @return void */ function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); if (empty($dataFolder)) { $importImages = false; } else $importImages = true; // Set up writers $collectionWriter = new MDCollectionWriter($version['mainDB']); $literatureWriter = new MDLiteratureWriter($version['mainDB']); $linkWriter = new MDLinkWriter($version['mainDB']); $seriesWriter = new MDSeriesWriter($version['mainDB']); $exhibitionWriter = new MDExhibitionWriter($version['mainDB']); $objectRecordWriter = new MDObjectRecordWriter($version['mainDB']); $tagWriter = new MDTagWriter($version['nodaDB']); $outputHandler = new MDOutputHandler; $outputHandler->setVerbosity(2); $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); // Set up prepared statement for checking if a tag of a given name exists $tagNameExistsStmt = $version['nodaDB']->do_prepare("SELECT `tag_id` FROM `tag` WHERE `tag_name` = ? LIMIT 1"); // Set default language $defaultLang = "de"; foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) { if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "csv") continue; $outputHandler->toLog("Attempting to load CSV file {$xmlFile}", 2); if (!($handle = fopen(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}", "r"))) { throw new Exception("File {$xmlFile} cannot be opened"); } if (!($fileHeaders = fgetcsv($handle, 5000000, ";"))) { throw new Exception("Failed to read headers for file $xmlFile"); } while ($data = fgetcsv($handle, 5000000, ";")) { // Create associative array for easier parsing $objectData = []; foreach ($data as $key => $value) { $objectData[$fileHeaders[$key]] = $value; } // Store type for later use (when parsing creation) if (!empty($objectData["type/en"])) { $type = $objectData["type/en"]; } else { // if (!empty($objectData["type/$defaultLang"])) $type = $objectData["type/$defaultLang"]; } if (!empty($objectData["type/$defaultLang"])) { $typeDefLang = $objectData["type/$defaultLang"]; } // Object base data $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $objectData['itemid'], $objectData["type/$defaultLang"], $objectData["title/$defaultLang"], $objectData["description/$defaultLang"], $outputHandler); unset($objectData['itemid'], $objectData["type/$defaultLang"], $objectData['title/' . $defaultLang], $objectData["description/$defaultLang"]); if (!empty($objectData["format"])) $object->set_objekt_masse($objectData["format"]); if (!empty($objectData["medium/$defaultLang"])) $object->set_objekt_material_technik($objectData["medium/$defaultLang"]); unset($objectData["format"], $objectData["medium/$defaultLang"]); if (!empty($objectData['rights'])) $object->set_string("metadata_rights_holder", $objectData['rights']); unset($objectData['rights']); // Handle collections if ($sammlung_id !== 0) { $object->appendCollectionByID($sammlung_id); } else if (!empty($objectData['customtext:collection'])) { $object->appendCollectionByName($objectData['customtext:collection'], "", $collectionWriter); } unset($objectData['customtext:collection']); // Handle translations of base data $availableKeys = array_keys($objectData); $searchTarget = "title/"; $availableEntities = []; foreach ($availableKeys as $key) { if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) { $availableEntities[] = substr((string)$key, strlen($searchTarget)); } } foreach ($availableEntities as $suffix) { $objectRecord = new MDObjectRecord($version['mainDB'], 0, "$suffix", $objectData["type/$suffix"], $objectData["title/$suffix"], $objectData["description/$suffix"]); $objectRecord->set_record_objekt_material_technik($objectData["medium/$suffix"]); $objectRecord->set_record_objekt_masse((string)$object->get_string("objekt_masse")); $object->appendObjectRecord($objectRecord); unset($objectData["type/$suffix"], $objectData["title/$suffix"], $objectData["description/$suffix"], $objectData["medium/$suffix"]); } if ($importImages === false) { unset($objectData['customtext: photographer'], $objectData['filespec'], $objectData['filetype']); } /** * Parse creation */ $creationEventType = 0; switch($type) { case "print": $creationEventType = 26; break; case "drawing": $creationEventType = 19; break; case "pastel": case "painting": $creationEventType = 9; break; case "sculpture": $creationEventType = 31; break; case "medal": $creationEventType = 1; break; } if ($creationEventType === 0) { throw new MDParserIncomplete("Unknown creator type for object type: $type"); } $searchTarget = "creator#"; $availableEntities = []; foreach ($availableKeys as $key) { if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) { $availableEntities[] = substr((string)$key, strlen($searchTarget)); } } $availableEntitiesReduced = []; foreach ($availableEntities as $entity) { $availableEntitiesReduced[] = substr((string)$entity, 0, strpos($entity, "/") ?: strlen($entity)); } $availableEntitiesReduced = array_unique($availableEntitiesReduced); $eventsCreated = 0; foreach ($availableEntitiesReduced as $suffix) { if (!empty($objectData["creator#" . $suffix . "/$defaultLang"])) { $creator = $objectData["creator#" . $suffix . "/$defaultLang"]; } else if (!empty($objectData["creator#" . $suffix . "/en"])) { $creator = $objectData["creator#" . $suffix . "/en"]; } else continue; $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $creationEventType, $outputHandler); $event->set_persinst_id($creator); if (!empty($objectData["dateCreated:display/$defaultLang"])) { $event->set_zeiten_id($objectData["dateCreated:display/$defaultLang"], (int)$objectData['dateCreated:start'], (int)$objectData['dateCreated:end']); } else if (!empty($objectData["dateCreated:display/en"])) { $event->set_zeiten_id($objectData["dateCreated:display/en"], (int)$objectData['dateCreated:start'], (int)$objectData['dateCreated:end']); } $object->appendEvent($event); ++$eventsCreated; } if ($eventsCreated === 0) { $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $creationEventType, $outputHandler); if (!empty($objectData["dateCreated:display/$defaultLang"])) { $event->set_zeiten_id($objectData["dateCreated:display/$defaultLang"], (int)$objectData['dateCreated:start'], (int)$objectData['dateCreated:end']); } else if (!empty($objectData["dateCreated:display/en"])) { $event->set_zeiten_id($objectData["dateCreated:display/en"], (int)$objectData['dateCreated:start'], (int)$objectData['dateCreated:end']); } $object->appendEvent($event); } // Remove events $searchTarget = "creator#"; foreach ($availableKeys as $key) { if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) { unset($objectData[$key]); } } $searchTarget = "dateCreated:display/"; foreach ($availableKeys as $key) { if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) { unset($objectData[$key]); } } unset($objectData['dateCreated:start'], $objectData['dateCreated:end']); // Attempt to set tags based on object type if (!empty($typeDefLang)) { $tagNameExistsStmt->bind_param("s", $typeDefLang); $tagNameExistsStmt->execute(); $tagNameExistsResult = $tagNameExistsStmt->do_get_result(); if ($tagNameExistsResult->num_rows === 1) { $object->appendTagByID($tagNameExistsResult->fetch_row()[0]); } $tagNameExistsResult->close(); unset($tagNameExistsResult); } // Handle images if ($importImages === false) { unset($objectData['customtext: photographer'], $objectData['filespec'], $objectData['filetype']); } /* * Write it! */ $object->set_objekt_publik($visibility); $newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler); if (!empty($objectData)) { throw new MDParserIncomplete(var_export($objectData, true)); } // Sleep for a millisecond usleep(IMPORTER_DELAY_PER_OBJECT); } fclose($handle); } $tagNameExistsStmt->close(); unset($tagNameExistsStmt); }