This repository has been archived on 2023-01-23. You can view files and clone it, but cannot push or open issues or pull requests.
importer-parsers-archive-2022/parsers/faust_haendelhaus.php
2023-01-23 15:21:35 +01:00

333 lines
18 KiB
PHP

<?PHP
/**
* Parser for JSON generated from XML generated by the export function of Faust.
* Tried and tested for exports for the Stiftung Haendelhaus Halle.
*
* @author Stefan Rohde-Enslin <s.rohde-enslin@museum-digital.de>
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
declare(strict_types = 1);
/**
* Parse function.
*
* @param array<mixed> $version Instance to import into.
* @param integer $institution_id Institution to import to.
* @param non-empty-string $XMLFolder Folder of the XML files to import.
* @param string $dataFolder Data folder.
* @param integer $sammlung_id Collection ID. Optional.
* @param boolean $visibility Import objects to be directly visible?.
* @param boolean $insertOnly If set to true, only new objects are added,
* old are not updated.
*
* @return void
*/
function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) {
if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist.");
if (empty($dataFolder)) {
$importImages = false;
}
else $importImages = true;
// Set up writers
$collectionWriter = new MDCollectionWriter($version['mainDB']);
$literatureWriter = new MDLiteratureWriter($version['mainDB']);
$linkWriter = new MDLinkWriter($version['mainDB']);
$seriesWriter = new MDSeriesWriter($version['mainDB']);
$exhibitionWriter = new MDExhibitionWriter($version['mainDB']);
$objectRecordWriter = new MDObjectRecordWriter($version['mainDB']);
$tagWriter = new MDTagWriter($version['nodaDB']);
$resourceWriter = new MDResourceWriter($version['mainDB']);
$outputHandler = new MDOutputHandler;
$outputHandler->setVerbosity(2);
$objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']);
$startCounter = 0;
$iCounter = 0;
foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) {
if ($iCounter < $startCounter) {
++$iCounter;
continue;
}
if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "json") continue;
$outputHandler->toLog("Attempting to load JSON file {$xmlFile}", 2);
$rawData = MD_STD::file_get_contents(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}");
$rawData = str_replace('\u0096', '-', $rawData);
$rawData = str_replace('\u0092', "'", $rawData);
$rawData = str_replace('\u0091', "'", $rawData);
$rawData = str_replace('\u0094', '"', $rawData);
$rawData = str_replace('\u0093', '"', $rawData);
$rawData = str_replace('\u0084', '"', $rawData);
$objectData = json_decode($rawData, true);
//print_r($rawData);
//print_r($objectData);
if (empty($objectData['Signatur'])) $objectData['Signatur'] = '[' . $objectData["IMDAS_ID"] . ']';
$outputHandler->toLog("Successfully loaded XML file {$xmlFile} (Object: {$objectData['Signatur']})", 2);
// Handle contents
// Object base data
$objektart = "";
if (isset($objectData['Objektart']) and $objectData['Objektart'] != 'ERSATZ') $objektart = trim(MD_STD::preg_replace_str('/<[^>]*>/i', '', $objectData['Objektart']));
else $objektart = 'KEINE ANGABE IM IMPORT';
unset($objectData['Objektart']);
$obj_nam = $objectData['Objekttitel'];
if (empty($obj_nam)) $obj_nam = $objektart;
$objektbeschreibung = $objectData['Beschreibung'];
if (!empty($objectData['K_x129x_nstlersignatur_x032x__x040x_nach_x032x_Vorlage_x041x_'])) $objektbeschreibung = $objektbeschreibung . "\n\nSignatur: " . $objectData['K_x129x_nstlersignatur_x032x__x040x_nach_x032x_Vorlage_x041x_'];
if (!empty($objectData['Aufschrift_x032x__x040x_nach_x032x_Vorlage_x041x_'])) $objektbeschreibung = $objektbeschreibung . "\n\nBeschriftung: " . $objectData['Aufschrift_x032x__x040x_nach_x032x_Vorlage_x041x_'];
if (!empty($objectData['Wasserzeichen'])) $objektbeschreibung = $objektbeschreibung . "\n\nWasserzeichen: " . $objectData['Wasserzeichen'];
if (!empty($objectData['Quelle_x032x__x040x_Graphik_x032x_entnommen_x032x_aus_x041x_'])) $objektbeschreibung = $objektbeschreibung . "\n\nQuelle: " . $objectData['Quelle_x032x__x040x_Graphik_x032x_entnommen_x032x_aus_x041x_'];
$objektbeschreibung = str_replace("\[W9]\\", "\n", (string)$objektbeschreibung);
if (!$objektbeschreibung) $objektbeschreibung = 'Ein beschreibender Text war im Import nicht enthalten';
//echo PHP_EOL;print_r($objectData['Signatur']);echo PHP_EOL;
//echo PHP_EOL;print_r($objektart);echo PHP_EOL;
//echo PHP_EOL;print_r($obj_nam);echo PHP_EOL;
unset($objectData['Beschreibung'], $objectData['K_x129x_nstlersignatur_x032x__x040x_nach_x032x_Vorlage_x041x_'], $objectData['Aufschrift_x032x__x040x_nach_x032x_Vorlage_x041x_'], $objectData['Provenienz_x047x_Herkunft'], $objectData['Wasserzeichen'], $objectData['Quelle_x032x__x040x_Graphik_x032x_entnommen_x032x_aus_x041x_']);
$object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $objectData['Signatur'], $objektart, $obj_nam, $objektbeschreibung, $outputHandler);
unset($objectData['Signatur']);
if (!empty($objectData['Zustandsbeschreibung']) and $objectData['Zustandsbeschreibung'] != "ERSATZ") {
if (mb_strlen($objectData['Zustandsbeschreibung']) < 200) $object->set_string("zustand", $objectData['Zustandsbeschreibung']);
else $object->set_string("restaurierung", $objectData['Zustandsbeschreibung']);
}
if (!empty($objectData['Provenienz_x047x_Herkunft']) and $objectData['Provenienz_x047x_Herkunft'] != "ERSATZ") $object->set_string("objektgeschichte", $objectData['Provenienz_x047x_Herkunft']);
if (!empty($objectData['Literatur_x032x__x040x_Freitext_x041x_']) and $objectData['Literatur_x032x__x040x_Freitext_x041x_'] != "ERSATZ") {
if (is_array($objectData['Literatur_x032x__x040x_Freitext_x041x_'])) $object->set_string("notizen_text1", implode('PHP_EOL', $objectData['Literatur_x032x__x040x_Freitext_x041x_']));
else $object->set_string("notizen_text1", $objectData['Literatur_x032x__x040x_Freitext_x041x_']);
}
if (!empty($objectData['Literatur_x032x__x040x_Freitext_x041x_'])) {
if (is_array($objectData['Literatur_x032x__x040x_Freitext_x041x_'])) {
foreach ($objectData['Literatur_x032x__x040x_Freitext_x041x_'] as $litEntryName) {
if (substr($litEntryName, 0, 10) === 'Sasse 1962') $object->appendLiteratureByID(273);
else if (substr($litEntryName, 0, 10) === 'Sasse 1964') $object->appendLiteratureByID(436);
}
}
else {
if (substr($objectData['Literatur_x032x__x040x_Freitext_x041x_'], 0, 10) === 'Sasse 1962') $object->appendLiteratureByID(273);
else if (substr($objectData['Literatur_x032x__x040x_Freitext_x041x_'], 0, 10) === 'Sasse 1964') $object->appendLiteratureByID(436);
}
}
unset($objectData['Zustandsbeschreibung'], $objectData['Provenienz_x047x_Herkunft'], $objectData['Literatur_x032x__x040x_Freitext_x041x_']);
/*
* Logic: Get collection name from either COLL_OBJ -> Bereich or COLL_OBJ -> Sammlung
* Attention: COLL_OBJ -> Sammlung might be an array
*/
if ($sammlung_id !== 0) {
$object->appendCollectionByID($sammlung_id);
}
else {
if (!empty($objectData['Sammlung_x032x__x040x_geh_x148x_rt_x032x_zu_x041x_']) and !is_array($objectData['Sammlung_x032x__x040x_geh_x148x_rt_x032x_zu_x041x_']) and $objectData['Sammlung_x032x__x040x_geh_x148x_rt_x032x_zu_x041x_'] != 'ERSATZ') {
$object->appendCollectionByName($objectData['Sammlung_x032x__x040x_geh_x148x_rt_x032x_zu_x041x_'], "", $collectionWriter);
}
else if (!empty($objectData['Sammlung_x032x__x040x_geh_x148x_rt_x032x_zu_x041x_']) and is_array($objectData['Sammlung_x032x__x040x_geh_x148x_rt_x032x_zu_x041x_'])) {
foreach ($objectData['Sammlung_x032x__x040x_geh_x148x_rt_x032x_zu_x041x_'] as $value) {
$object->appendCollectionByName($value, "", $collectionWriter);
}
}
};
unset($objectData['Sammlung_x032x__x040x_geh_x148x_rt_x032x_zu_x041x_']);
/**/
if (!empty($objectData["Material_x047x_Technik"]) and $objectData["Material_x047x_Technik"] != "ERSATZ") $object->set_objekt_material_technik($objectData["Material_x047x_Technik"]);
if (!empty($objectData["Umfang_x047x_Format"]) and $objectData["Umfang_x047x_Format"] != "ERSATZ") $object->set_objekt_masse($objectData["Umfang_x047x_Format"]);
if (!empty($objectData['aktueller_x032x_Standort']) and $objectData['aktueller_x032x_Standort'] != "ERSATZ") {
if (is_array($objectData['aktueller_x032x_Standort'])) {
$object->set_string("standort_aktuell", implode("; ", $objectData['aktueller_x032x_Standort']));
}
else $object->set_string("standort_aktuell", $objectData['aktueller_x032x_Standort']);
}
unset($objectData['aktueller_x032x_Standort']);
/*
* Events
*/
$eventsConcordance = [
'verlegt' => 3,
'Vorlage erstellt' => 4,
'wurde abgebildet' => 5,
'gemalt' => 9,
'Druckplatte hergestellt' => 12,
'gezeichnet' => 19,
'gedruckt' => 26,
'modelliert' => 31,
];
if (!empty($objectData['Ereignis-Block']) and empty($objectData['Ereignis-Block'][0])) $objectData['Ereignis-Block'] = [$objectData['Ereignis-Block']];
/*
* Duplicate events in case of multiple actors or places
*/
if (!isset($objectData['Ereignis-Block'])) $objectData['Ereignis-Block'] = [];
// Duplicate event in case of multiple actors
foreach ($objectData['Ereignis-Block'] as $ix => $tEvent) {
if (empty($tEvent['Ereignis-Typ'])) {
unset($objectData['Ereignis-Block'][$ix]);
continue;
}
if (!empty($tEvent['wer_x063x__x032x__x040x_Ereignis_x041x_'])
and is_array($tEvent['wer_x063x__x032x__x040x_Ereignis_x041x_'])
) {
$copyEvent = $tEvent;
$copyEvent['wer_x063x__x032x__x040x_Ereignis_x041x_'] = $copyEvent['wer_x063x__x032x__x040x_Ereignis_x041x_'][1];
$objectData['Ereignis-Block'][$ix]['wer_x063x__x032x__x040x_Ereignis_x041x_'] = $objectData['Ereignis-Block'][$ix]['wer_x063x__x032x__x040x_Ereignis_x041x_'][0];
$copyEvent['GND_x032x_Person_x032x__x040x_Ereignis_x041x_'] = $copyEvent['GND_x032x_Person_x032x__x040x_Ereignis_x041x_'][1];
$objectData['Ereignis-Block'][$ix]['GND_x032x_Person_x032x__x040x_Ereignis_x041x_'] = $objectData['Ereignis-Block'][$ix]['GND_x032x_Person_x032x__x040x_Ereignis_x041x_'][0];
$objectData['Ereignis-Block'][] = $copyEvent;
}
}
// Duplicate event in case of multiple places
foreach ($objectData['Ereignis-Block'] as $ix => $tEvent) {
if (!empty($tEvent['wo_x063x__x032x__x040x_Ereignis_x041x_'])
and is_array($tEvent['wo_x063x__x032x__x040x_Ereignis_x041x_'])
) {
$copyEvent = $tEvent;
$copyEvent['wo_x063x__x032x__x040x_Ereignis_x041x_'] = $copyEvent['wo_x063x__x032x__x040x_Ereignis_x041x_'][1];
$objectData['Ereignis-Block'][$ix]['wo_x063x__x032x__x040x_Ereignis_x041x_'] = $objectData['Ereignis-Block'][$ix]['wo_x063x__x032x__x040x_Ereignis_x041x_'][0];
$objectData['Ereignis-Block'][] = $copyEvent;
}
}
foreach ($objectData['Ereignis-Block'] as $ix => $tEvent) {
if (!isset($eventsConcordance[$tEvent['Ereignis-Typ']])) {
throw new Exception("Unknown event type: '{$tEvent['Ereignis-Typ']}'");
}
$event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], $eventsConcordance[$tEvent['Ereignis-Typ']], $outputHandler);
if (!empty($objectData['Ereignis-Block'][$ix]['GND_x032x_Person_x032x__x040x_Ereignis_x041x_'])) {
$event->set_persinst_id($objectData['Ereignis-Block'][$ix]['wer_x063x__x032x__x040x_Ereignis_x041x_'], "", "", $objectData['Ereignis-Block'][$ix]['GND_x032x_Person_x032x__x040x_Ereignis_x041x_']);
}
else if (!empty($objectData['Ereignis-Block'][$ix]['wer_x063x__x032x__x040x_Ereignis_x041x_'])) $event->set_persinst_id($objectData['Ereignis-Block'][$ix]['wer_x063x__x032x__x040x_Ereignis_x041x_']);
if (!empty($objectData['Ereignis-Block'][$ix]['wann_x063x__x032x__x040x_Ereignis_x041x_'])) $event->set_zeiten_id($objectData['Ereignis-Block'][$ix]['wann_x063x__x032x__x040x_Ereignis_x041x_']);
if (!empty($objectData['Ereignis-Block'][$ix]['wo_x063x__x032x__x040x_Ereignis_x041x_'])) $event->set_orte_id($objectData['Ereignis-Block'][$ix]['wo_x063x__x032x__x040x_Ereignis_x041x_']);
$object->appendEvent($event);
unset($objectData['Ereignis-Block'][$ix]['Ereignis-Typ'], $objectData['Ereignis-Block'][$ix]['wer_x063x__x032x__x040x_Ereignis_x041x_'], $objectData['Ereignis-Block'][$ix]['GND_x032x_Person_x032x__x040x_Ereignis_x041x_'], $objectData['Ereignis-Block'][$ix]['wann_x063x__x032x__x040x_Ereignis_x041x_'], $objectData['Ereignis-Block'][$ix]['wo_x063x__x032x__x040x_Ereignis_x041x_']);
if (empty($objectData['Ereignis-Block'][$ix])) unset ($objectData['Ereignis-Block'][$ix]);
}
if (empty($objectData['Ereignis-Block'])) unset ($objectData['Ereignis-Block']);
/*
* Tags
*/
if (!empty($objectData['Sachschlagwort'])) {
foreach ($objectData['Sachschlagwort'] as $tTag) {
$object->appendTagByName($tTag, "", $tagWriter);
}
unset($objectData['Sachschlagwort']);
}
if ($importImages === true) {
// Images, Vorderseite
$objectData["Dateiname_x032x_Vorderseite"] = str_replace("/", "-", $objectData["Dateiname_x032x_Vorderseite"]) . ".jpg";
if (!file_exists(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $objectData["Dateiname_x032x_Vorderseite"])) {
/*
$imageNameFound = false;
if ($imageNameFound === false) {
}
*/
unset($objectData["Dateiname_x032x_Vorderseite"]);
//continue;
}
else {
$image = new MDImage($version['mainDB'], (string)$object->get_string("objekt_name"), MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $objectData["Dateiname_x032x_Vorderseite"]);
$image->set_image_owner("Stiftung Händelhaus, Halle");
$image->set_visible(true);
$image->set_main_image(true);
$image->set_image_rights("CC BY-NC-SA");
$object->appendImage($image);
}
unset($objectData["Dateiname_x032x_Vorderseite"], $image);
// Images, Rückseite
$objectData["Dateiname_x032x_R_x129x_ckseite"] = str_replace("/", "-", $objectData["Dateiname_x032x_R_x129x_ckseite"]) . ".jpg";
if (!file_exists(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $objectData["Dateiname_x032x_R_x129x_ckseite"])) {
/*
$imageNameFound = false;
if ($imageNameFound === false) {
unset($objectData["Dateiname_x032x_R_x129x_ckseite"]);
//continue;
}
*/
} else {
$image = new MDImage($version['mainDB'], (string)$object->get_string("objekt_name"), MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $objectData["Dateiname_x032x_R_x129x_ckseite"]);
$image->set_image_owner("Stiftung Händelhaus, Halle");
$image->set_visible(true);
$image->set_main_image(false);
$image->set_image_rights("CC BY-NC-SA");
$object->appendImage($image);
}
unset($objectData["Dateiname_x032x_R_x129x_ckseite"], $image);
}
unset($objectData["Inventar-Nummer"], $objectData["Objekttitel"],
$objectData['Maler_x047x_Zeichner'],
$objectData['GND_x032x_Maler_x047x_Zeichner'],
$objectData["Ersteller_x032x_der_x032x_Druckplatte"],
$objectData["GND_x032x_Ersteller_x032x_der_x032x_Druckplatte"],
$objectData["Bildhauer_x047x_Graveur"],
$objectData["GND_x032x_Bildhauer_x047x_Graveur"],
$objectData["Ersteller_x032x_der_x032x_Vorlage"],
$objectData["GND_x032x_Ersteller_x032x_der_x032x_Vorlage"], $objectData["Dargestellte_x032x_Person"], $objectData["GND_x032x_dargestellte_x032x_Person"], $objectData["Datierung"], $objectData["Datierung_x032x_textuell"], $objectData["Material_x047x_Technik"], $objectData["Umfang_x047x_Format"], $objectData["Fotos"], $objectData["Fotosammlung_x032x_Positiv"], $objectData["Fotosammlung_x032x_Negativ"], $objectData["Erfassung"], $objectData["Korrektur"], $objectData["Verleger"], $objectData["GND_x032x_Verleger"], $objectData["Ort"], $objectData["Bemerkungen"], $objectData["Restaurierung"], $objectData["Objektsch_x132x_tzung"], $objectData["Fotosammlung_x032x_Dia"]);
foreach ($objectData as $key => $value) {
if ($value === "ERSATZ") unset($objectData[$key]);
}
/*
* Write it!
*/
$object->set_objekt_publik($visibility);
$newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler);
if (!empty($objectData)) {
throw new MDParserIncomplete(var_export($objectData, true));
}
$outputHandler->toLog("Done with object $iCounter", 2);
++$iCounter;
// Sleep for a millisecond
usleep(IMPORTER_DELAY_PER_OBJECT);
}
}