This repository has been archived on 2023-01-23. You can view files and clone it, but cannot push or open issues or pull requests.
importer-parsers-archive-2022/parsers/csv_spsg.php

410 lines
16 KiB
PHP
Raw Normal View History

2023-01-23 15:21:35 +01:00
<?PHP
/**
* Parser for XML coming via CSVXML from SPSG (easydb).
*
* @author Stefan Rohde-Enslin <s.rohde-enslin@museum-digital.de>
*/
declare(strict_types = 1);
/**
* Returns true if the input is "y", false otherwise.
*
* @param string $input Input string.
*
* @return boolean
*/
function translateYNToBool(string $input):bool {
if ($input === "y") return true;
else return false;
}
/**
* Function for parsing eventtypes from the CSV.
*
* @param string $funktion Eventtype information.
*
* @return integer
*/
function getCSVEventtype(string $funktion):int {
switch ($funktion) {
case "Manufaktur":
case "Hersteller":
case "Radierer":
case "Sticker":
case "Uhrmacher":
case "Bildhauer":
case "Ebenist":
case "Bronzegießer":
case "Mosaikkünstler":
case "ohne":
$tEventType = 1;
break;
case "Verleger":
$tEventType = 3;
break;
case "Maler der Vorlage":
$tEventType = 4;
break;
case "Dargestellt/ -er":
$tEventType = 5;
break;
case "Eigentümer":
$tEventType = 6;
break;
case "Maler":
case "Maler\oder":
$tEventType = 9;
break;
case "Kopist":
$tEventType = 12;
break;
case "Besteller, Auftraggeber":
$tEventType = 25;
break;
case "Modelleur":
$tEventType = 31;
break;
case "Entwerfer":
$tEventType = 35;
break;
default:
$tEventType = 1;
break;
}
return $tEventType;
}
/**
* Function for parsing events from the CSV.
*
* @param array<string> $objectData Object information.
*
* @return array<array<string|integer>>
*/
function parseCSVEvents(array $objectData):array {
$event = $ereignis = [];
for ($i = 1; $i < 6; $i++) {
$tEvent = [];
if (!empty($objectData["Personen_" . $i]) and $objectData["Personen_" . $i] !== 'ERSATZ') $tEvent['actor'] = $objectData["Personen_" . $i];
else $tEvent['actor'] = "";
if (!empty($objectData["Person-Funktion_" . $i]) and $objectData["Person-Funktion_" . $i] !== 'ERSATZ') $tEvent['ereignistyp'] = getCSVEventtype($objectData["Person-Funktion_" . $i]);
else $tEvent['ereignistyp'] = 1;
if (!empty($objectData["Person-Bemerkung_" . $i])) {
if ($objectData["Person-Bemerkung_" . $i] === '?' or $objectData["Person-Bemerkung_" . $i] === '(?)') $tEvent['actor_sure'] = 'n';
else $tEvent['actor_sure'] = 'y';
}
else $tEvent['actor_sure'] = 'y';
if (!empty($objectData["Entstehungsort_" . $i]) and $objectData["Entstehungsort_" . $i] !== 'ERSATZ') $tEvent['place'] = $objectData["Entstehungsort_" . $i];
else $tEvent['place'] = "";
if (!empty($objectData["Entstehungszeit_" . $i]) and $objectData["Entstehungszeit_" . $i] !== 'ERSATZ') {
$zei = explode('-', str_replace(' - ', '-', $objectData['Entstehungszeit_' . $i]));
if (count($zei) === 2 and $zei[0] == $zei[1]) {
$objectData['Entstehungszeit_' . $i] = $zei[0];
unset($zei);
}
$tEvent['time'] = trim($objectData["Entstehungszeit_" . $i]);
}
else $tEvent["time"] = "";
$cur = [];
if (!empty($tEvent['actor']) || !empty($tEvent['place']) || !empty($tEvent['time'])) {
$cur['ereignisart'] = $tEvent['ereignistyp'];
$cur['akteur_name'] = $tEvent['actor'];
$cur['persinst_sicher'] = $tEvent['actor_sure'];
$cur['ort'] = $tEvent['place'];
$cur['ort_sicher'] = 'y';
$cur['zeit_name'] = $tEvent['time'];
$cur['zeit_sicher'] = 'y';
$cur['ereignis_anmerkung'] = "";
$ereignis[] = $cur;
}
}
return $ereignis;
}
/**
* Parse function.
*
* @param array<mixed> $version Instance to import into.
* @param integer $institution_id Institution to import to.
* @param non-empty-string $XMLFolder Folder of the XML files to import.
* @param string $dataFolder Data folder.
* @param integer $sammlung_id Collection ID. Optional.
* @param boolean $visibility Import objects to be directly visible?.
* @param boolean $insertOnly If set to true, only new objects are added,
* old are not updated.
*
* @return void
*/
function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) {
if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist.");
if (empty($dataFolder)) {
$importImages = false;
}
else $importImages = true;
// Set up writers
$collectionWriter = new MDCollectionWriter($version['mainDB']);
$literatureWriter = new MDLiteratureWriter($version['mainDB']);
$linkWriter = new MDLinkWriter($version['mainDB']);
$seriesWriter = new MDSeriesWriter($version['mainDB']);
$exhibitionWriter = new MDExhibitionWriter($version['mainDB']);
$objectRecordWriter = new MDObjectRecordWriter($version['mainDB']);
$tagWriter = new MDTagWriter($version['nodaDB']);
$outputHandler = new MDOutputHandler;
$outputHandler->setVerbosity(2);
$objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']);
$i = 0;
$startAtCounter = 0;
foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) {
if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "xml") continue;
++$i;
if ($i < $startAtCounter) {
continue;
}
$outputHandler->toLog("Starting to process file #{$i}", 2);
$outputHandler->toLog("Attempting to load XML file {$xmlFile}", 2);
$rawData = MD_STD::file_get_contents(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}");
$rawData = MD_STD::preg_replace_str('/\x{FEFF}/u', '', $rawData);
if (!($xmlData = simplexml_load_string($rawData, "SimpleXMLElement", LIBXML_NOCDATA))) {
throw new Exception("Cannot load raw data into SimpleXML (file: {$xmlFile})");
}
if (!($json_encoded = MD_STD::json_encode_object($xmlData)) || !($objectData = json_decode($json_encoded, true))) {
throw new Exception("Could not load file {$xmlFile}");
}
$outputHandler->toLog("Successfully loaded XML file {$xmlFile} (Object: {$objectData['inventory_number']})", 2);
$availableKeys = array_keys($objectData);
// Object base data
if (isset($objectData['Autor']) and !is_array($objectData['Autor']) and isset($objectData['Red_freigegeb_Text'])) {
$objectData['Red_freigegeb_Text'] = $objectData['Red_freigegeb_Text'] . ' [' . $objectData['Autor'] . ']';
}
if (!isset($objectData['Objektbezeichnung'])) $objectData['Objektbezeichnung'] = "Eintrag folgt";
if (!isset($objectData["Material-Technik"])) $objectData["Material-Technik"] = "";
$object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $objectData['Inventarnummer'], $objectData['Objektbezeichnung'], $objectData['Titel'], $objectData['Red_freigegeb_Text'], $outputHandler);
$object->set_objekt_material_technik($objectData["Material-Technik"]);
if (!empty($objectData["Masse"])) $object->set_objekt_masse($objectData["Masse"]);
unset($objectData['Red_freigegeb_Text'], $objectData["Material-Technik"], $objectData['Objektbezeichnung'], $objectData['Inventarnummer']);
/*
echo '<pre>';
print_r($objectData);
echo '</pre>';
/*
// Aufenthalt
if (!empty($objectData['abode_regular'])) $object->set_string("standort_eigentlich", $objectData['abode_regular']);
// Rechte
if (!empty($objectData['copyright'])) $object->set_string("urheberrechte", $objectData['copyright']);
if (!empty($objectData['rights_annotation'])) $object->set_string("rechte_anmerkungen", $objectData['rights_annotation']);
// Notizen
if (!empty($objectData['remarks_short'])) $object->set_string("notizen_text2", $objectData['remarks_short']);
// Collections
if (!empty($objectData['collection_name1'])) {
$searchTarget = "collection_name";
$availableEntities = [];
foreach ($availableKeys as $key) {
if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) {
$availableEntities[] = substr((string)$key, strlen($searchTarget));
}
}
}
*/
if ($sammlung_id !== 0) {
$object->appendCollectionByID($sammlung_id);
}
else {
$object->appendCollectionByName($objectData["Sammlungsbereich"]);
}
/*
//
/ Zusatz
//
if (!empty($objectData['detailed_description'])) {
$object->set_string("detaileddescription2", $objectData['detailed_description']);
if (!empty($objectData['detailed_description_md']) and $objectData['detailed_description_md'] === "y") {
$object->set_bool("detaileddescription2show_md", true);
}
else $object->set_bool("detaileddescription2show_md", false);
if (!empty($objectData['detailed_description_extern']) and $objectData['detailed_description_extern'] === "y") {
$object->set_bool("detaileddescription2show_extern", true);
}
else $object->set_bool("detaileddescription2show_extern", false);
}
//
/ Events
//
// Related place
if (!empty($objectData['related_place1'])) {
$searchTarget = "related_place";
$availableEntities = [];
foreach ($availableKeys as $key) {
if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) {
if (is_numeric(substr((string)$key, strlen($searchTarget))))
$availableEntities[] = substr((string)$key, strlen($searchTarget));
}
}
foreach ($availableEntities as $suffix) {
$event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 22, $outputHandler);
if (!empty($objectData[$searchTarget . $suffix])) $event->set_orte_id($objectData[$searchTarget . $suffix]);
if (!empty($objectData[$searchTarget . "_sure" . $suffix])) $event->set_ereignis_orte_sicher(translateYNToBool($objectData[$searchTarget . "_sure" . $suffix]));
$object->appendEvent($event);
}
}
// Related actor
if (!empty($objectData['related_actor1'])) {
$searchTarget = "related_actor";
$availableEntities = [];
foreach ($availableKeys as $key) {
if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) {
if (is_numeric(substr((string)$key, strlen($searchTarget))))
$availableEntities[] = substr((string)$key, strlen($searchTarget));
}
}
foreach ($availableEntities as $suffix) {
$event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 23, $outputHandler);
if (!empty($objectData[$searchTarget . $suffix])) $event->set_persinst_id($objectData[$searchTarget . $suffix]);
if (!empty($objectData[$searchTarget . "_sure" . $suffix])) $event->set_ereignis_persinst_sicher(translateYNToBool($objectData[$searchTarget . "_sure" . $suffix]));
$object->appendEvent($event);
}
}
*/
$ereignis = parseCSVEvents($objectData);
foreach ($ereignis as $tEvent) {
$event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], intval($tEvent["ereignisart"]), $outputHandler);
if (!empty($tEvent["ort"])) $event->set_orte_id((string)$tEvent["ort"]);
if (!empty($tEvent["ort_sicher"])) $event->set_ereignis_orte_sicher(translateYNToBool((string)$tEvent["ort_sicher"]));
if (!empty($tEvent["akteur_name"])) $event->set_persinst_id((string)$tEvent["akteur_name"]);
if (!empty($tEvent["persinst_sicher"])) $event->set_ereignis_persinst_sicher(translateYNToBool((string)$tEvent["persinst_sicher"]));
if (!empty($tEvent["zeit_name"])) $event->set_zeiten_id((string)$tEvent["zeit_name"]);
if (!empty($tEvent["zeit_sicher"])) $event->set_ereignis_zeit_sicher(translateYNToBool((string)$tEvent["zeit_sicher"]));
if (!empty($tEvent["ereignis_anmerkung"])) $event->set_ereignis_anmerkung((string)"ereignis_anmerkung");
$object->appendEvent($event);
}
//
// Tags
//
if (!empty($objectData['Schlagwort_1'])) {
$searchTarget = "Schlagwort_";
$availableEntities = [];
foreach ($availableKeys as $key) {
if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) {
$availableEntities[] = substr((string)$key, strlen($searchTarget));
}
}
foreach ($availableEntities as $suffix) {
if ($objectData[$searchTarget . $suffix] === "ERSATZ") continue;
if (is_array($objectData[$searchTarget . $suffix])) continue;
$object->appendTagByName($objectData[$searchTarget . $suffix], "", $tagWriter);
}
}
//
// Literatur
//
if (!empty($objectData['Literatur_1'])) {
$searchTarget = "Literatur_";
$availableEntities = [];
foreach ($availableKeys as $key) {
if (substr((string)$key, 0, strlen($searchTarget)) === $searchTarget) {
$availableEntities[] = substr((string)$key, strlen($searchTarget));
}
}
foreach ($availableEntities as $suffix) {
if ($objectData[$searchTarget . $suffix] === "ERSATZ") continue;
if (is_array($objectData[$searchTarget . $suffix])) continue;
if (substr((string)$objectData[$searchTarget . $suffix], -1, 1) === '.') $objectData[$searchTarget . $suffix] = substr((string)$objectData[$searchTarget . $suffix], 0, -1);
$object->appendLiteratureByName(substr($objectData[$searchTarget . $suffix], 0, 200), "", "", "", "", $literatureWriter);
}
}
//
// Images
//
if ($importImages === true) {
$image = new MDImage($version['mainDB'], $objectData["Bild_Titel"], MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $objectData["Bild_Datei"] . ".jpg");
$image->set_image_name($objectData["Bild_Titel"]);
$image->set_image_beschreibung("Aufnahme " . $objectData["Aufnahmedatum"]);
$image->set_image_owner($objectData["Nutzungsrechte"]);
$image->set_image_creator($objectData["Fotograf"]);
if (isset($objectData["image_rights"])) $image->set_image_rights($objectData["image_rights"]);
if (isset($objectData["image_visible"])) $image->set_visible(translateYNToBool($objectData["image_visible"]));
//if (isset($objectData["image_main" . $suffix]))
$image->set_main_image(true);
$object->appendImage($image);
unset($objectData["Bild_Titel"], $objectData["Aufnahmedatum"], $objectData["Nutzungsrechte"],
$objectData["Fotograf"], $objectData["image_rights"], $objectData["image_visible"]);
}
//
// Write it!
//
$object->set_objekt_publik($visibility);
$newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler);
/*
if (!empty($objectData)) {
throw new MDParserIncomplete(var_export($objectData, true));
}
*/
// Sleep for a millisecond
usleep(IMPORTER_DELAY_PER_OBJECT);
}
}