* @link https://imports.museum-digital.org/parsers/google-arts-and-culture.php */ declare(strict_types = 1); const CSV_SEPARATOR = ','; const MUEHLENHAUPT_START_AT_FILE_NO = 0; const MUEHLENHAUPT_DELAY = 200; /** * Parse function. * * @param array $version Instance to import into. * @param integer $institution_id Institution to import to. * @param non-empty-string $XMLFolder Folder of the XML files to import. * @param string $dataFolder Data folder. * @param integer $sammlung_id Collection ID. Optional. * @param boolean $visibility Import objects to be directly visible?. * @param boolean $insertOnly If set to true, only new objects are added, * old are not updated. * * @return void */ function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); if (empty($dataFolder)) { $importImages = false; } else $importImages = true; $ignore = $visibility; $ignore = $sammlung_id; // Set up writers $collectionWriter = new MDCollectionWriter($version['mainDB']); $literatureWriter = new MDLiteratureWriter($version['mainDB']); $linkWriter = new MDLinkWriter($version['mainDB']); $seriesWriter = new MDSeriesWriter($version['mainDB']); $exhibitionWriter = new MDExhibitionWriter($version['mainDB']); $objectRecordWriter = new MDObjectRecordWriter($version['mainDB']); $tagWriter = new MDTagWriter($version['nodaDB']); $outputHandler = new MDOutputHandler; $outputHandler->setVerbosity(2); $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); /* $objectWriter->disableImportImagesResources = true; $objectWriter->disableUpdateBaseData = true; $objectWriter->disableImportAdditionalData = true; $objectWriter->disableImportCollections = true; $objectWriter->disableImportEvents = true; $objectWriter->disableImportTags = true; $objectWriter->disableImportLiterature = true; $objectWriter->disableImportHyperlinks = true; // $objectWriter->disableImportSeries = true; $objectWriter->disableImportObjectRecords = true; $objectWriter->disableImportTranscriptions = true; $objectWriter->disableImportMarkings = true; $objectWriter->disableImportExhibitions = true; $objectWriter->disableImportReception = true; */ $objectWriter->importObjectTypeAsTag = true; $importedInvNos = []; // Set up prepared statement for checking if a tag of a given name exists foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) { if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "csv") continue; $outputHandler->toLog("Attempting to load CSV file {$xmlFile}", 2); if (!($handle = fopen(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}", "r"))) { throw new Exception("File {$xmlFile} cannot be opened"); } if (!($fileHeaders = fgetcsv($handle, 5000000, CSV_SEPARATOR))) { throw new Exception("Failed to read headers for file $xmlFile"); } $i = 0; while ($data = fgetcsv($handle, 5000000, CSV_SEPARATOR)) { ++$i; if ($i < MUEHLENHAUPT_START_AT_FILE_NO) { continue; } # if ($i > 20) break; $outputHandler->toLog("Starting to process line #{$i}", 2); // Create associative array for easier parsing $objectData = []; foreach ($data as $key => $value) { $objectData[$fileHeaders[$key]] = trim($value); } $inventory_number = $objectData['O_0002_Objektnummer02'] ?: "Import_obj_" . $i; while (in_array($inventory_number, $importedInvNos, true)) { $inventory_number .= '_'; } $importedInvNos[] = $inventory_number; $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $inventory_number, $objectData['O_0202_Gegenstand'], $objectData['O_01122_Titel_Jahr'] ?: $inventory_number, $objectData['O_02011_Beschreibung_1'], $outputHandler); unset($objectData['O_0002_Objektnummer02'], $objectData['O_02011_Beschreibung_1'], $objectData['O_0202_Gegenstand'], $objectData['O_01122_Titel_Jahr']); if (!empty($objectData["O_0600_Besitz"])) { $object->set_string("nutzungsrechte", $objectData["O_0600_Besitz"]); } unset($objectData["O_0600_Besitz"]); if (!empty($objectData["P_0206_Technik"])) { $object->set_string("technik2", $objectData["P_0206_Technik"]); $object->set_objekt_material_technik($objectData["P_0206_Technik"]); } unset($objectData["P_0206_Technik"]); if (!empty($objectData["Versicherungswert"])) { $object->set_string("wert2_zahl", $objectData["Versicherungswert"]); } unset($objectData["Versicherungswert"]); if (!empty($objectData["Objekt_Standort"])) { $object->set_string("standort_aktuell", $objectData["Objekt_Standort"]); } unset($objectData["Objekt_Standort"]); $object->set_string("standort_eigentlich", $objectData["O_0301_Archiv_Ort"] . ' / ' . $objectData["O_0302_Archiv_Haus"] . ' / ' . $objectData["O_0303_Archiv_Ebene"] . ' / ' . $objectData["O_01037_Code_Lager_Regal_2F"]); unset($objectData["O_0301_Archiv_Ort"], $objectData["O_0302_Archiv_Haus"], $objectData["O_0303_Archiv_Ebene"], $objectData["O_01037_Code_Lager_Regal_2F"]); if (!empty($objectData['P_0207_Zyklus'])) { $object->set_string("teilvon", $objectData['P_0207_Zyklus']); $object->appendSeriesByName('Zyklus: ' . $objectData['P_0207_Zyklus'], "", $seriesWriter); } unset($objectData['P_0207_Zyklus']); if (!empty($objectData['P_0204_Gruppe'])) { $groups = explode(', ', $objectData['P_0204_Gruppe']); foreach ($groups as $group) { $object->appendTagByName($group, "", $tagWriter); } } unset($objectData['P_0204_Gruppe']); if (!empty($objectData['O_0205_Stichworte'])) { $object->append_objekt_beschreibung(PHP_EOL . PHP_EOL . "Stichworte: " . $objectData['O_0205_Stichworte']); # $tagNames = explode(', ', $objectData['O_0205_Stichworte']); # foreach ($tagNames as $tagName) { # $object->appendTagByName($tagName, "", $tagWriter); # } } unset($objectData['O_0205_Stichworte']); $sizes = explode('x', $objectData['O_0113_Größe_BxH_neu']); if (!empty($sizes[0])) { $object->set_string("mass2_breite_wert", $sizes[0]); } if (!empty($sizes[1])) { $object->set_string("mass2_hoehe_wert", $sizes[1]); } $object->set_length_unit("mass2_breite_einheit", "cm"); $object->set_length_unit("mass2_hoehe_einheit", "cm"); $object->set_objekt_masse(implode(' x ', $sizes) . ' cm'); unset($objectData['O_0113_Größe_BxH_neu']); if (!empty($objectData["P_0111_Jahr"])) { $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 1, $outputHandler); $event->set_zeiten_id($objectData["P_0111_Jahr"]); if ($event->get_zeiten_id() !== 0) { $object->appendEvent($event); } } unset($objectData["P_0111_Jahr"]); // Other notes fields $notesFields = [ 'Anmerkung_Übernahme', 'Anmerkung_Übernahme_ohne', 'Lager_Label_Tabelle', ]; foreach ($notesFields as $fieldName) { if (!empty($objectData[$fieldName])) { $object->append_string("notizen_text1", PHP_EOL . $fieldName . ': ' . $objectData[$fieldName]); } unset($objectData[$fieldName]); } // Image if (!empty($objectData["O_0101_Archivnummer1"]) && $objectWriter->disableImportImagesResources === true) { $imgFolder = MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/"; $imgFilename = ""; if (file_exists($imgFolder . $objectData["O_0101_Archivnummer1"])) { $imgFilename = $imgFolder . $objectData["O_0101_Archivnummer1"]; } else if (file_exists($imgFolder . str_replace("-FM", "", $objectData["O_0101_Archivnummer1"]) . '.JPG')) { $imgFilename = $imgFolder . str_replace("-FM", "", $objectData["O_0101_Archivnummer1"]) . '.JPG'; } else if (file_exists($imgFolder . str_replace("-FM", "", $objectData["O_0101_Archivnummer1"]) . '.jpg')) { $imgFilename = $imgFolder . str_replace("-FM", "", $objectData["O_0101_Archivnummer1"]) . '.jpg'; } else if (file_exists($imgFolder . $objectData["O_0101_Archivnummer1"] . '.JPG')) { $imgFilename = $imgFolder . $objectData["O_0101_Archivnummer1"] . '.JPG'; } else if (file_exists($imgFolder . $objectData["O_0101_Archivnummer1"] . '.jpg')) { $imgFilename = $imgFolder . $objectData["O_0101_Archivnummer1"] . '.jpg'; } if ($imgFilename !== '') { $image = new MDImage($version['mainDB'], (string)$object->get_string("objekt_name"), $imgFilename); $image->set_image_master_filename($objectData["O_0101_Archivnummer1"]); $object->appendImage($image); } else { $object->append_string("notizen_text2", $objectData["O_0101_Archivnummer1"]); } } else $object->append_string("notizen_text2", $objectData["O_0101_Archivnummer1"]); unset($objectData["O_0101_Archivnummer1"]); // Remove unwanted entries unset($objectData['P_Summe']); if (!empty($objectData)) { throw new MDParserIncomplete(var_export($objectData, true)); } $newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler); // Sleep for a millisecond usleep(IMPORTER_DELAY_PER_OBJECT); } fclose($handle); } }