This repository has been archived on 2023-01-23. You can view files and clone it, but cannot push or open issues or pull requests.
importer-parsers-archive-2022/parsers/csv_filemaker_muehlenhaupt.php
2023-01-23 15:21:35 +01:00

263 lines
11 KiB
PHP

<?PHP
/**
* Parser for the filemaker CSV exports for the Kurt Mühlenhaupt-Museum in Berlin.
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
* @link https://imports.museum-digital.org/parsers/google-arts-and-culture.php
*/
declare(strict_types = 1);
const CSV_SEPARATOR = ',';
const MUEHLENHAUPT_START_AT_FILE_NO = 0;
const MUEHLENHAUPT_DELAY = 200;
/**
* Parse function.
*
* @param array<mixed> $version Instance to import into.
* @param integer $institution_id Institution to import to.
* @param non-empty-string $XMLFolder Folder of the XML files to import.
* @param string $dataFolder Data folder.
* @param integer $sammlung_id Collection ID. Optional.
* @param boolean $visibility Import objects to be directly visible?.
* @param boolean $insertOnly If set to true, only new objects are added,
* old are not updated.
*
* @return void
*/
function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) {
if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist.");
if (empty($dataFolder)) {
$importImages = false;
}
else $importImages = true;
$ignore = $visibility;
$ignore = $sammlung_id;
// Set up writers
$collectionWriter = new MDCollectionWriter($version['mainDB']);
$literatureWriter = new MDLiteratureWriter($version['mainDB']);
$linkWriter = new MDLinkWriter($version['mainDB']);
$seriesWriter = new MDSeriesWriter($version['mainDB']);
$exhibitionWriter = new MDExhibitionWriter($version['mainDB']);
$objectRecordWriter = new MDObjectRecordWriter($version['mainDB']);
$tagWriter = new MDTagWriter($version['nodaDB']);
$outputHandler = new MDOutputHandler;
$outputHandler->setVerbosity(2);
$objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']);
/*
$objectWriter->disableImportImagesResources = true;
$objectWriter->disableUpdateBaseData = true;
$objectWriter->disableImportAdditionalData = true;
$objectWriter->disableImportCollections = true;
$objectWriter->disableImportEvents = true;
$objectWriter->disableImportTags = true;
$objectWriter->disableImportLiterature = true;
$objectWriter->disableImportHyperlinks = true;
// $objectWriter->disableImportSeries = true;
$objectWriter->disableImportObjectRecords = true;
$objectWriter->disableImportTranscriptions = true;
$objectWriter->disableImportMarkings = true;
$objectWriter->disableImportExhibitions = true;
$objectWriter->disableImportReception = true;
*/
$objectWriter->importObjectTypeAsTag = true;
$importedInvNos = [];
// Set up prepared statement for checking if a tag of a given name exists
foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) {
if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "csv") continue;
$outputHandler->toLog("Attempting to load CSV file {$xmlFile}", 2);
if (!($handle = fopen(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}", "r"))) {
throw new Exception("File {$xmlFile} cannot be opened");
}
if (!($fileHeaders = fgetcsv($handle, 5000000, CSV_SEPARATOR))) {
throw new Exception("Failed to read headers for file $xmlFile");
}
$i = 0;
while ($data = fgetcsv($handle, 5000000, CSV_SEPARATOR)) {
++$i;
if ($i < MUEHLENHAUPT_START_AT_FILE_NO) {
continue;
}
# if ($i > 20) break;
$outputHandler->toLog("Starting to process line #{$i}", 2);
// Create associative array for easier parsing
$objectData = [];
foreach ($data as $key => $value) {
$objectData[$fileHeaders[$key]] = trim($value);
}
$inventory_number = $objectData['O_0002_Objektnummer02'] ?: "Import_obj_" . $i;
while (in_array($inventory_number, $importedInvNos, true)) {
$inventory_number .= '_';
}
$importedInvNos[] = $inventory_number;
$object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $inventory_number, $objectData['O_0202_Gegenstand'], $objectData['O_01122_Titel_Jahr'] ?: $inventory_number, $objectData['O_02011_Beschreibung_1'], $outputHandler);
unset($objectData['O_0002_Objektnummer02'], $objectData['O_02011_Beschreibung_1'],
$objectData['O_0202_Gegenstand'], $objectData['O_01122_Titel_Jahr']);
if (!empty($objectData["O_0600_Besitz"])) {
$object->set_string("nutzungsrechte", $objectData["O_0600_Besitz"]);
}
unset($objectData["O_0600_Besitz"]);
if (!empty($objectData["P_0206_Technik"])) {
$object->set_string("technik2", $objectData["P_0206_Technik"]);
$object->set_objekt_material_technik($objectData["P_0206_Technik"]);
}
unset($objectData["P_0206_Technik"]);
if (!empty($objectData["Versicherungswert"])) {
$object->set_string("wert2_zahl", $objectData["Versicherungswert"]);
}
unset($objectData["Versicherungswert"]);
if (!empty($objectData["Objekt_Standort"])) {
$object->set_string("standort_aktuell", $objectData["Objekt_Standort"]);
}
unset($objectData["Objekt_Standort"]);
$object->set_string("standort_eigentlich", $objectData["O_0301_Archiv_Ort"] . ' / ' . $objectData["O_0302_Archiv_Haus"] . ' / ' . $objectData["O_0303_Archiv_Ebene"] . ' / ' . $objectData["O_01037_Code_Lager_Regal_2F"]);
unset($objectData["O_0301_Archiv_Ort"], $objectData["O_0302_Archiv_Haus"], $objectData["O_0303_Archiv_Ebene"], $objectData["O_01037_Code_Lager_Regal_2F"]);
if (!empty($objectData['P_0207_Zyklus'])) {
$object->set_string("teilvon", $objectData['P_0207_Zyklus']);
$object->appendSeriesByName('Zyklus: ' . $objectData['P_0207_Zyklus'], "", $seriesWriter);
}
unset($objectData['P_0207_Zyklus']);
if (!empty($objectData['P_0204_Gruppe'])) {
$groups = explode(', ', $objectData['P_0204_Gruppe']);
foreach ($groups as $group) {
$object->appendTagByName($group, "", $tagWriter);
}
}
unset($objectData['P_0204_Gruppe']);
if (!empty($objectData['O_0205_Stichworte'])) {
$object->append_objekt_beschreibung(PHP_EOL . PHP_EOL . "Stichworte: " . $objectData['O_0205_Stichworte']);
# $tagNames = explode(', ', $objectData['O_0205_Stichworte']);
# foreach ($tagNames as $tagName) {
# $object->appendTagByName($tagName, "", $tagWriter);
# }
}
unset($objectData['O_0205_Stichworte']);
$sizes = explode('x', $objectData['O_0113_Größe_BxH_neu']);
if (!empty($sizes[0])) {
$object->set_string("mass2_breite_wert", $sizes[0]);
}
if (!empty($sizes[1])) {
$object->set_string("mass2_hoehe_wert", $sizes[1]);
}
$object->set_length_unit("mass2_breite_einheit", "cm");
$object->set_length_unit("mass2_hoehe_einheit", "cm");
$object->set_objekt_masse(implode(' x ', $sizes) . ' cm');
unset($objectData['O_0113_Größe_BxH_neu']);
if (!empty($objectData["P_0111_Jahr"])) {
$event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 1, $outputHandler);
$event->set_zeiten_id($objectData["P_0111_Jahr"]);
if ($event->get_zeiten_id() !== 0) {
$object->appendEvent($event);
}
}
unset($objectData["P_0111_Jahr"]);
// Other notes fields
$notesFields = [
'Anmerkung_Übernahme',
'Anmerkung_Übernahme_ohne',
'Lager_Label_Tabelle',
];
foreach ($notesFields as $fieldName) {
if (!empty($objectData[$fieldName])) {
$object->append_string("notizen_text1", PHP_EOL . $fieldName . ': ' . $objectData[$fieldName]);
}
unset($objectData[$fieldName]);
}
// Image
if (!empty($objectData["O_0101_Archivnummer1"]) && $objectWriter->disableImportImagesResources === true) {
$imgFolder = MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/";
$imgFilename = "";
if (file_exists($imgFolder . $objectData["O_0101_Archivnummer1"])) {
$imgFilename = $imgFolder . $objectData["O_0101_Archivnummer1"];
}
else if (file_exists($imgFolder . str_replace("-FM", "", $objectData["O_0101_Archivnummer1"]) . '.JPG')) {
$imgFilename = $imgFolder . str_replace("-FM", "", $objectData["O_0101_Archivnummer1"]) . '.JPG';
}
else if (file_exists($imgFolder . str_replace("-FM", "", $objectData["O_0101_Archivnummer1"]) . '.jpg')) {
$imgFilename = $imgFolder . str_replace("-FM", "", $objectData["O_0101_Archivnummer1"]) . '.jpg';
}
else if (file_exists($imgFolder . $objectData["O_0101_Archivnummer1"] . '.JPG')) {
$imgFilename = $imgFolder . $objectData["O_0101_Archivnummer1"] . '.JPG';
}
else if (file_exists($imgFolder . $objectData["O_0101_Archivnummer1"] . '.jpg')) {
$imgFilename = $imgFolder . $objectData["O_0101_Archivnummer1"] . '.jpg';
}
if ($imgFilename !== '') {
$image = new MDImage($version['mainDB'], (string)$object->get_string("objekt_name"), $imgFilename);
$image->set_image_master_filename($objectData["O_0101_Archivnummer1"]);
$object->appendImage($image);
}
else {
$object->append_string("notizen_text2", $objectData["O_0101_Archivnummer1"]);
}
}
else $object->append_string("notizen_text2", $objectData["O_0101_Archivnummer1"]);
unset($objectData["O_0101_Archivnummer1"]);
// Remove unwanted entries
unset($objectData['P_Summe']);
if (!empty($objectData)) {
throw new MDParserIncomplete(var_export($objectData, true));
}
$newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler);
// Sleep for a millisecond
usleep(IMPORTER_DELAY_PER_OBJECT);
}
fclose($handle);
}
}