This repository has been archived on 2023-01-23. You can view files and clone it, but cannot push or open issues or pull requests.
importer-parsers-archive-2022/parsers/museo.php
2023-01-23 15:21:35 +01:00

393 lines
17 KiB
PHP

<?PHP
/**
* A parser for the museo museum database as used by the Museums of the Lausitz.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
declare(strict_types = 1);
/**
* Parse function.
*
* @param array<mixed> $version Instance to import into.
* @param integer $institution_id Institution to import to.
* @param non-empty-string $XMLFolder Folder of the XML files to import.
* @param string $dataFolder Data folder.
* @param integer $sammlung_id Collection ID. Optional.
* @param boolean $visibility Import objects to be directly visible?.
* @param boolean $insertOnly If set to true, only new objects are added,
* old are not updated.
*
* @return void
*/
function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) {
if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist.");
if (empty($dataFolder)) {
$importImages = false;
}
else $importImages = true;
$collectionWriter = new MDCollectionWriter($version['mainDB']);
$seriesWriter = new MDSeriesWriter($version['mainDB']);
$literatureWriter = new MDLiteratureWriter($version['mainDB']);
$tagWriter = new MDTagWriter($version['nodaDB']);
// Set up writers
$outputHandler = new MDOutputHandler;
$outputHandler->setVerbosity(2);
$objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']);
/*
$objectWriter->disableImportImagesResources = true;
$objectWriter->disableUpdateBaseData = true;
$objectWriter->disableImportAdditionalData = true;
$objectWriter->disableImportTags = true;
$objectWriter->disableImportEvents = true;
$objectWriter->disableImportLiterature = true;
$objectWriter->disableImportHyperlinks = true;
$objectWriter->disableImportSeries = true;
$objectWriter->disableImportCollections = true;
$objectWriter->disableImportObjectRecords = true;
$objectWriter->disableImportTranscriptions = true;
$objectWriter->disableImportMarkings = true;
$objectWriter->disableImportExhibitions = true;
$objectWriter->disableImportReception = true;
*/
$result = $version['mainDB']->query("
SELECT *
, AES_DECRYPT(`desc`,'19d414f29654fbc402c4287a0a86e1b3') as `desc`, AES_DECRYPT(`desc_lang_independent`,'19d414f29654fbc402c4287a0a86e1b3') as `desc_lang_independent`
FROM museo_lausitz.inventory, museo_lausitz.inventory_content
WHERE inventory.group_id = 41 AND inventory_content.object_id = inventory.id
ORDER BY inventory.id ASC");
$startAtCounter = 0;
$i = 0;
while ($objectData = $result->fetch_assoc()) {
++$i;
if ($i < $startAtCounter) {
continue;
}
$outputHandler->toLog("Starting to process entry #{$i}", 2);
unset($objectData['group_id'], $objectData['crypt_flag'],
$objectData['object_id'], $objectData['object_photo']);
$xmlFields = [
'desc_lang_independent',
'desc',
];
foreach ($xmlFields as $xmlField) {
if (!empty($objectData[$xmlField])) {
if (!($xmlData = simplexml_load_string($objectData[$xmlField], "SimpleXMLElement", LIBXML_NOCDATA))) {
continue;
# throw new Exception("Cannot load raw data into SimpleXML (file: {$xmlFile})");
}
$objectData = array_merge($objectData, json_decode(MD_STD::json_encode_object($xmlData), true));
if (!empty($xmlData->object_mesurements)) {
foreach ($xmlData->object_mesurements->children() as $field) {
$objectData[$field->getName() . '_value'] = (string)$field;
$objectData[$field->getName() . '_unit'] = (string)$field->attributes()->unit;
}
}
unset($objectData[$xmlField]); // We still need desc_lang_independent in XML form, for the later
}
}
if (!empty($objectData['deleted_flag'])) continue;
foreach ($objectData as $key => $value) {
if (is_array($value) and !empty($value['@attributes'])) {
unset($objectData[$key]['@attributes']);
unset($value['@attributes']);
}
if (empty($value)) unset($objectData[$key]);
}
if (!empty($objectData['object_inventory_number'])) $invNo = $objectData['object_inventory_number'];
else $invNo = 'museo-' . $objectData['id'];
# $invNo = 'eisen-' . $invNo;
if (!empty($objectData['object_short_desc'])) $desc = $objectData['object_short_desc'];
else $desc = '------';
$object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $invNo, mb_substr($objectData['object_title'] ?: $invNo, 0, 40), $objectData['object_title'] ?: $invNo, $desc, $outputHandler);
unset($objectData['object_title'], $objectData['object_short_desc']);
// Material / Technique
$matTech = [];
if (!empty($objectData['object_material'])) {
$object->set_string("material2", $objectData['object_material']);
$matTech[] = $objectData['object_material'];
}
if (!empty($objectData['object_technic'])) {
$object->set_string("technik2", $objectData['object_technic']);
$matTech[] = $objectData['object_technic'];
}
$object->set_objekt_material_technik(implode(', ', $matTech));
unset($objectData['object_material'], $objectData['object_technic']);
// Measurements
$measurements = [];
if (!empty($objectData['depth_value'])) {
$object->set_string("mass2_laenge_wert", $objectData['depth_value']);
if (!empty($objectData['depth_unit'])) $object->set_length_unit("mass2_laenge_einheit", str_replace('cmm', 'cm', strtolower($objectData['depth_unit'])));
$measurements[] = $objectData['depth_value'];
}
unset($objectData['depth_value'], $objectData['depth_unit']);
if (!empty($objectData['height_value'])) {
$object->set_string("mass2_hoehe_wert", $objectData['height_value']);
try {
if (!empty($objectData['height_unit'])) $object->set_length_unit("mass2_hoehe_einheit", str_replace('cmm', 'cm', str_replace(' ', '', trim($objectData['height_unit'], ' ,.'))));
}
catch (MDInvalidLengthUnit $e) {
}
$measurements[] = $objectData['height_value'];
}
unset($objectData['height_value'], $objectData['height_unit']);
if (!empty($objectData['width_value'])) {
$object->set_string("mass2_breite_wert", $objectData['width_value']);
if (!empty($objectData['width_unit'])) $object->set_length_unit("mass2_breite_einheit", $objectData['width_unit']);
$measurements[] = $objectData['width_value'];
}
unset($objectData['width_value'], $objectData['width_unit']);
if (!empty($objectData['weight_value'])) {
$object->set_string("mass2_gewicht_wert", $objectData['weight_value']);
if (!empty($objectData['weight_unit'])) $object->set_weight_unit("mass2_gewicht_einheit", $objectData['weight_unit']);
$measurements[] = $objectData['weight_value'];
}
unset($objectData['weight_value'], $objectData['weight_unit']);
$object->set_objekt_masse(implode(', ', $measurements));
if (!empty($objectData['collector'])) {
$object->set_string('erwerbender', $objectData['collector']);
}
unset($objectData['collector']);
if (!empty($objectData['qualifier'])) {
$object->set_string('ersterfasser', $objectData['qualifier']);
}
unset($objectData['qualifier']);
if (!empty($objectData['lang'])) {
$object->set_string('content_language', $objectData['lang']);
}
unset($objectData['lang']);
if (!empty($objectData['last_edit_user'])) {
$object->append_string('notizen_text1', PHP_EOL . $objectData['last_edit_user']);
}
unset($objectData['last_edit_user']);
if (!empty($objectData['last_edit_time'])) {
$object->append_string('notizen_text1', PHP_EOL . $objectData['last_edit_time']);
}
unset($objectData['last_edit_time']);
if (!empty($objectData['preparation_technic'])) {
$object->append_string('notizen_text1', PHP_EOL . $objectData['preparation_technic']);
}
unset($objectData['preparation_technic']);
if (!empty($objectData['current_location'])) {
$object->set_string('standort_aktuell', $objectData['current_location']);
}
unset($objectData['current_location']);
if (!empty($objectData['normal_location'])) {
$object->set_string('standort_eigentlich', $objectData['normal_location']);
}
unset($objectData['normal_location']);
if (!empty($objectData['physical_desc'])) {
$object->set_string('zustand', $objectData['physical_desc']);
}
unset($objectData['physical_desc']);
if (!empty($objectData['object_date'])) {
if (empty(trim($objectData['object_date'], '^ ,.:;-_'))) {
unset($objectData['object_date']);
}
else $object->appendTagByName($objectData['object_date']);
unset($objectData['object_date']);
}
if (!empty($objectData['current_owner'])) {
$object->set_string('rechte_anmerkungen', 'Eigentümer: ' . $objectData['current_owner']);
}
unset($objectData['current_owner']);
if (!empty($objectData['object_maker']) || !empty($objectData['production_place']) || !empty($objectData['object_date_year'])) {
$event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 1, $outputHandler);
if (!empty($objectData['object_maker'])) $event->set_persinst_id($objectData['object_maker']);
if (!empty($objectData['production_place'])) $event->set_orte_id(mb_substr($objectData['production_place'], 0, 180));
if (!empty($objectData['object_date_year'])) $event->set_zeiten_id((string)$objectData['object_date_year']);
if (!empty($objectData['object_date_year_deviation'])) $event->set_ereignis_zeit_sicher(true);
if ($event->get_orte_id() !== 0
|| $event->get_persinst_id() !== 0
|| $event->get_zeiten_id() !== 0
) {
$object->appendEvent($event);
}
}
unset($objectData['object_maker'],
$objectData['production_place'],
$objectData['object_date_year'],
$objectData['object_date_year_deviation']);
if (!empty($objectData['associated_recovery_place'])) {
$event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 2, $outputHandler);
if (!empty($objectData['associated_recovery_place'])) $event->set_orte_id(mb_substr($objectData['associated_recovery_place'], 0, 180));
if ($event->get_orte_id() !== 0
|| $event->get_persinst_id() !== 0
|| $event->get_zeiten_id() !== 0
) {
$object->appendEvent($event);
}
}
unset($objectData['associated_recovery_place']);
if (!empty($objectData['associated_place'])) {
$event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 22, $outputHandler);
if (!empty($objectData['associated_place'])) $event->set_orte_id(mb_substr($objectData['associated_place'], 0, 180));
if ($event->get_orte_id() !== 0) {
$object->appendEvent($event);
}
}
unset($objectData['associated_place']);
if (!empty($objectData['misc_advanced'])) {
if (!isset($objectData['misc_advanced']['item'][0])) {
$object->append_objekt_beschreibung(PHP_EOL . implode(': ', $objectData['misc_advanced']['item']));
}
else {
foreach ($objectData['misc_advanced']['item'] as $item) {
$object->append_objekt_beschreibung(PHP_EOL . implode(': ', $item));
}
}
}
unset($objectData['misc_advanced']);
if (!empty($objectData['object_subject'])) {
$object->append_objekt_beschreibung(PHP_EOL . 'Thema: ' . (string)$objectData['object_subject']);
}
unset($objectData['object_subject']);
if (!empty($objectData['id'])) $object->set_string("invnr2", (string)$objectData['id']);
if (!empty($objectData['acquisition_price'])) {
$object->set_string("ankaufsumme", $objectData['acquisition_price']);
}
unset($objectData['acquisition_price']);
if (!empty($objectData['acquisition_date'])) {
$object->set_string("zeitpunkt_zugang", $objectData['acquisition_date']);
}
unset($objectData['acquisition_date']);
if (!empty($objectData['acquisition_method']) and trim($objectData['acquisition_method']) !== '-') {
$object->set_entry_type("zugang_art", $objectData['acquisition_method']);
$object->set_string("notizen_text2", 'Zugangsart: ' . $objectData['acquisition_method']);
}
unset($objectData['acquisition_method']);
if (!empty($objectData['preparation_care'])) {
$object->append_string("restaurierung", PHP_EOL . 'preparation_care: ' . $objectData['preparation_care']);
}
unset($objectData['preparation_care']);
if (!empty($objectData['preparation_preservation'])) {
$object->append_string("restaurierung", PHP_EOL . 'preparation_preservation: ' . $objectData['preparation_preservation']);
}
unset($objectData['preparation_preservation']);
if (!empty($objectData['reproduction_rights_note'])) {
$object->append_string("rechte_anmerkungen", PHP_EOL . 'reproduction_rights_note: ' . $objectData['reproduction_rights_note']);
}
unset($objectData['reproduction_rights_note']);
if (!empty($objectData['object_markings'])) {
$object->append_string("beschriftung2", $objectData['object_markings']);
}
unset($objectData['object_markings']);
if (!empty($objectData['object_photo_url'])) {
if (file_exists(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/original." . $objectData["object_photo_url"])) {
$photo_url = MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/original." . $objectData["object_photo_url"];
}
else $photo_url = MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/" . $objectData["object_photo_url"];
try {
$image = new MDImage($version['mainDB'], (string)$object->get_string("objekt_name"), $photo_url);
if (!empty($objectData['object_photo_text']) and is_string($objectData['object_photo_text'])) {
$image->set_image_beschreibung($objectData['object_photo_text']);
}
if (!empty($objectData['object_photo_marker_id_card'])) {
$image->set_image_beschreibung($image->get_image_beschreibung() . PHP_EOL . 'Marker (ID card): ' . $objectData['object_photo_marker_id_card']);
}
if (!empty($objectData['object_photo_marker_vitrine'])) {
$image->set_image_beschreibung($image->get_image_beschreibung() . PHP_EOL . 'Marker (Vitrine): ' . $objectData['object_photo_marker_vitrine']);
}
$object->appendImage($image);
}
catch (MDFileDoesNotExist $e) {
}
}
unset($objectData['object_photo_url'],
$objectData['object_photo_marker_id_card'],
$objectData['object_photo_marker_vitrine'],
$objectData['object_photo_text']);
// Unset ID and write
unset($objectData['id'], $objectData['@attributes'],
$objectData['object_mesurements'],
$objectData['object_photo_sort'],
$objectData['version'],
$objectData['object_photo'],
$objectData['desc'],
$objectData['object'],
$objectData['object_inventory_number']);
foreach ($objectData as $key => $value) {
if ($value === '-') continue;
throw new MDParserIncomplete("Unparsed contents in object: " . var_export($objectData, true));
}
$object->set_objekt_publik($visibility);
$newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler);
// Sleep for a millisecond
usleep(IMPORTER_DELAY_PER_OBJECT);
$objectData = [];
}
$result->close();
}