287 lines
12 KiB
PHP
287 lines
12 KiB
PHP
|
<?PHP
|
||
|
/**
|
||
|
* Parser for CSV files generated in the format that is also used for imports to
|
||
|
* Google Arts and Culture.
|
||
|
*
|
||
|
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||
|
* @link https://imports.museum-digital.org/parsers/google-arts-and-culture.php
|
||
|
*/
|
||
|
declare(strict_types = 1);
|
||
|
|
||
|
/**
|
||
|
* Parse function.
|
||
|
*
|
||
|
* @param array<mixed> $version Instance to import into.
|
||
|
* @param integer $institution_id Institution to import to.
|
||
|
* @param non-empty-string $XMLFolder Folder of the XML files to import.
|
||
|
* @param string $dataFolder Data folder.
|
||
|
* @param integer $sammlung_id Collection ID. Optional.
|
||
|
* @param boolean $visibility Import objects to be directly visible?.
|
||
|
* @param boolean $insertOnly If set to true, only new objects are added,
|
||
|
* old are not updated.
|
||
|
*
|
||
|
* @return void
|
||
|
*/
|
||
|
function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) {
|
||
|
|
||
|
if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist.");
|
||
|
|
||
|
if (empty($dataFolder)) {
|
||
|
$importImages = false;
|
||
|
}
|
||
|
else $importImages = true;
|
||
|
|
||
|
// Set up writers
|
||
|
|
||
|
$collectionWriter = new MDCollectionWriter($version['mainDB']);
|
||
|
$literatureWriter = new MDLiteratureWriter($version['mainDB']);
|
||
|
$linkWriter = new MDLinkWriter($version['mainDB']);
|
||
|
$seriesWriter = new MDSeriesWriter($version['mainDB']);
|
||
|
$exhibitionWriter = new MDExhibitionWriter($version['mainDB']);
|
||
|
$objectRecordWriter = new MDObjectRecordWriter($version['mainDB']);
|
||
|
$tagWriter = new MDTagWriter($version['nodaDB']);
|
||
|
|
||
|
$outputHandler = new MDOutputHandler;
|
||
|
$outputHandler->setVerbosity(2);
|
||
|
|
||
|
$objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']);
|
||
|
|
||
|
// Set up prepared statement for checking if a tag of a given name exists
|
||
|
$tagNameExistsStmt = $version['nodaDB']->do_prepare("SELECT `tag_id`
|
||
|
FROM `tag`
|
||
|
WHERE `tag_name` = ?
|
||
|
LIMIT 1");
|
||
|
|
||
|
// Set default language
|
||
|
$defaultLang = "de";
|
||
|
|
||
|
foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) {
|
||
|
|
||
|
if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "csv") continue;
|
||
|
|
||
|
$outputHandler->toLog("Attempting to load CSV file {$xmlFile}", 2);
|
||
|
|
||
|
if (!($handle = fopen(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}", "r"))) {
|
||
|
throw new Exception("File {$xmlFile} cannot be opened");
|
||
|
}
|
||
|
|
||
|
if (!($fileHeaders = fgetcsv($handle, 5000000, ";"))) {
|
||
|
throw new Exception("Failed to read headers for file $xmlFile");
|
||
|
}
|
||
|
|
||
|
while ($data = fgetcsv($handle, 5000000, ";")) {
|
||
|
|
||
|
// Create associative array for easier parsing
|
||
|
$objectData = [];
|
||
|
foreach ($data as $key => $value) {
|
||
|
$objectData[$fileHeaders[$key]] = $value;
|
||
|
}
|
||
|
|
||
|
// Store type for later use (when parsing creation)
|
||
|
if (!empty($objectData["type/en"])) {
|
||
|
$type = $objectData["type/en"];
|
||
|
}
|
||
|
else { // if (!empty($objectData["type/$defaultLang"]))
|
||
|
$type = $objectData["type/$defaultLang"];
|
||
|
}
|
||
|
if (!empty($objectData["type/$defaultLang"])) {
|
||
|
$typeDefLang = $objectData["type/$defaultLang"];
|
||
|
}
|
||
|
|
||
|
// Object base data
|
||
|
$object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $objectData['itemid'], $objectData["type/$defaultLang"], $objectData["title/$defaultLang"], $objectData["description/$defaultLang"], $outputHandler);
|
||
|
|
||
|
unset($objectData['itemid'], $objectData["type/$defaultLang"], $objectData['title/' . $defaultLang], $objectData["description/$defaultLang"]);
|
||
|
|
||
|
if (!empty($objectData["format"])) $object->set_objekt_masse($objectData["format"]);
|
||
|
if (!empty($objectData["medium/$defaultLang"])) $object->set_objekt_material_technik($objectData["medium/$defaultLang"]);
|
||
|
unset($objectData["format"], $objectData["medium/$defaultLang"]);
|
||
|
|
||
|
if (!empty($objectData['rights'])) $object->set_string("metadata_rights_holder", $objectData['rights']);
|
||
|
unset($objectData['rights']);
|
||
|
|
||
|
// Handle collections
|
||
|
|
||
|
if ($sammlung_id !== 0) {
|
||
|
$object->appendCollectionByID($sammlung_id);
|
||
|
}
|
||
|
else if (!empty($objectData['customtext:collection'])) {
|
||
|
$object->appendCollectionByName($objectData['customtext:collection'], "", $collectionWriter);
|
||
|
}
|
||
|
unset($objectData['customtext:collection']);
|
||
|
|
||
|
// Handle translations of base data
|
||
|
$availableKeys = array_keys($objectData);
|
||
|
|
||
|
$searchTarget = "title/";
|
||
|
$availableEntities = [];
|
||
|
foreach ($availableKeys as $key) {
|
||
|
if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) {
|
||
|
$availableEntities[] = substr((string)$key, strlen($searchTarget));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
foreach ($availableEntities as $suffix) {
|
||
|
|
||
|
$objectRecord = new MDObjectRecord($version['mainDB'], 0, "$suffix", $objectData["type/$suffix"], $objectData["title/$suffix"], $objectData["description/$suffix"]);
|
||
|
$objectRecord->set_record_objekt_material_technik($objectData["medium/$suffix"]);
|
||
|
$objectRecord->set_record_objekt_masse((string)$object->get_string("objekt_masse"));
|
||
|
$object->appendObjectRecord($objectRecord);
|
||
|
|
||
|
unset($objectData["type/$suffix"],
|
||
|
$objectData["title/$suffix"],
|
||
|
$objectData["description/$suffix"],
|
||
|
$objectData["medium/$suffix"]);
|
||
|
|
||
|
}
|
||
|
|
||
|
if ($importImages === false) {
|
||
|
unset($objectData['customtext: photographer'],
|
||
|
$objectData['filespec'],
|
||
|
$objectData['filetype']);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Parse creation
|
||
|
*/
|
||
|
$creationEventType = 0;
|
||
|
|
||
|
switch($type) {
|
||
|
|
||
|
case "print":
|
||
|
$creationEventType = 26;
|
||
|
break;
|
||
|
case "drawing":
|
||
|
$creationEventType = 19;
|
||
|
break;
|
||
|
case "pastel":
|
||
|
case "painting":
|
||
|
$creationEventType = 9;
|
||
|
break;
|
||
|
case "sculpture":
|
||
|
$creationEventType = 31;
|
||
|
break;
|
||
|
case "medal":
|
||
|
$creationEventType = 1;
|
||
|
break;
|
||
|
|
||
|
}
|
||
|
|
||
|
if ($creationEventType === 0) {
|
||
|
throw new MDParserIncomplete("Unknown creator type for object type: $type");
|
||
|
}
|
||
|
|
||
|
$searchTarget = "creator#";
|
||
|
$availableEntities = [];
|
||
|
foreach ($availableKeys as $key) {
|
||
|
if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) {
|
||
|
$availableEntities[] = substr((string)$key, strlen($searchTarget));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$availableEntitiesReduced = [];
|
||
|
foreach ($availableEntities as $entity) {
|
||
|
$availableEntitiesReduced[] = substr((string)$entity, 0, strpos($entity, "/") ?: strlen($entity));
|
||
|
}
|
||
|
$availableEntitiesReduced = array_unique($availableEntitiesReduced);
|
||
|
|
||
|
$eventsCreated = 0;
|
||
|
foreach ($availableEntitiesReduced as $suffix) {
|
||
|
|
||
|
if (!empty($objectData["creator#" . $suffix . "/$defaultLang"])) {
|
||
|
$creator = $objectData["creator#" . $suffix . "/$defaultLang"];
|
||
|
}
|
||
|
else if (!empty($objectData["creator#" . $suffix . "/en"])) {
|
||
|
$creator = $objectData["creator#" . $suffix . "/en"];
|
||
|
}
|
||
|
else continue;
|
||
|
|
||
|
$event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $creationEventType, $outputHandler);
|
||
|
$event->set_persinst_id($creator);
|
||
|
|
||
|
if (!empty($objectData["dateCreated:display/$defaultLang"])) {
|
||
|
$event->set_zeiten_id($objectData["dateCreated:display/$defaultLang"], (int)$objectData['dateCreated:start'], (int)$objectData['dateCreated:end']);
|
||
|
}
|
||
|
else if (!empty($objectData["dateCreated:display/en"])) {
|
||
|
$event->set_zeiten_id($objectData["dateCreated:display/en"], (int)$objectData['dateCreated:start'], (int)$objectData['dateCreated:end']);
|
||
|
}
|
||
|
|
||
|
$object->appendEvent($event);
|
||
|
|
||
|
++$eventsCreated;
|
||
|
|
||
|
}
|
||
|
|
||
|
if ($eventsCreated === 0) {
|
||
|
|
||
|
$event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $creationEventType, $outputHandler);
|
||
|
if (!empty($objectData["dateCreated:display/$defaultLang"])) {
|
||
|
$event->set_zeiten_id($objectData["dateCreated:display/$defaultLang"], (int)$objectData['dateCreated:start'], (int)$objectData['dateCreated:end']);
|
||
|
}
|
||
|
else if (!empty($objectData["dateCreated:display/en"])) {
|
||
|
$event->set_zeiten_id($objectData["dateCreated:display/en"], (int)$objectData['dateCreated:start'], (int)$objectData['dateCreated:end']);
|
||
|
}
|
||
|
$object->appendEvent($event);
|
||
|
|
||
|
}
|
||
|
|
||
|
// Remove events
|
||
|
|
||
|
$searchTarget = "creator#";
|
||
|
foreach ($availableKeys as $key) {
|
||
|
if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) {
|
||
|
unset($objectData[$key]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$searchTarget = "dateCreated:display/";
|
||
|
foreach ($availableKeys as $key) {
|
||
|
if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) {
|
||
|
unset($objectData[$key]);
|
||
|
}
|
||
|
}
|
||
|
unset($objectData['dateCreated:start'], $objectData['dateCreated:end']);
|
||
|
|
||
|
// Attempt to set tags based on object type
|
||
|
if (!empty($typeDefLang)) {
|
||
|
$tagNameExistsStmt->bind_param("s", $typeDefLang);
|
||
|
$tagNameExistsStmt->execute();
|
||
|
$tagNameExistsResult = $tagNameExistsStmt->do_get_result();
|
||
|
if ($tagNameExistsResult->num_rows === 1) {
|
||
|
$object->appendTagByID($tagNameExistsResult->fetch_row()[0]);
|
||
|
}
|
||
|
$tagNameExistsResult->close();
|
||
|
unset($tagNameExistsResult);
|
||
|
}
|
||
|
|
||
|
// Handle images
|
||
|
|
||
|
if ($importImages === false) {
|
||
|
unset($objectData['customtext: photographer'],
|
||
|
$objectData['filespec'],
|
||
|
$objectData['filetype']);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Write it!
|
||
|
*/
|
||
|
$object->set_objekt_publik($visibility);
|
||
|
$newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler);
|
||
|
|
||
|
if (!empty($objectData)) {
|
||
|
throw new MDParserIncomplete(var_export($objectData, true));
|
||
|
}
|
||
|
|
||
|
// Sleep for a millisecond
|
||
|
usleep(IMPORTER_DELAY_PER_OBJECT);
|
||
|
|
||
|
}
|
||
|
|
||
|
fclose($handle);
|
||
|
|
||
|
}
|
||
|
$tagNameExistsStmt->close();
|
||
|
unset($tagNameExistsStmt);
|
||
|
|
||
|
}
|