263 lines
11 KiB
PHP
263 lines
11 KiB
PHP
|
<?PHP
|
||
|
/**
|
||
|
* Parser for the filemaker CSV exports for the Kurt Mühlenhaupt-Museum in Berlin.
|
||
|
*
|
||
|
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||
|
* @link https://imports.museum-digital.org/parsers/google-arts-and-culture.php
|
||
|
*/
|
||
|
declare(strict_types = 1);
|
||
|
|
||
|
const CSV_SEPARATOR = ',';
|
||
|
const MUEHLENHAUPT_START_AT_FILE_NO = 0;
|
||
|
const MUEHLENHAUPT_DELAY = 200;
|
||
|
|
||
|
/**
|
||
|
* Parse function.
|
||
|
*
|
||
|
* @param array<mixed> $version Instance to import into.
|
||
|
* @param integer $institution_id Institution to import to.
|
||
|
* @param non-empty-string $XMLFolder Folder of the XML files to import.
|
||
|
* @param string $dataFolder Data folder.
|
||
|
* @param integer $sammlung_id Collection ID. Optional.
|
||
|
* @param boolean $visibility Import objects to be directly visible?.
|
||
|
* @param boolean $insertOnly If set to true, only new objects are added,
|
||
|
* old are not updated.
|
||
|
*
|
||
|
* @return void
|
||
|
*/
|
||
|
function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) {
|
||
|
|
||
|
if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist.");
|
||
|
|
||
|
if (empty($dataFolder)) {
|
||
|
$importImages = false;
|
||
|
}
|
||
|
else $importImages = true;
|
||
|
|
||
|
$ignore = $visibility;
|
||
|
$ignore = $sammlung_id;
|
||
|
|
||
|
// Set up writers
|
||
|
|
||
|
$collectionWriter = new MDCollectionWriter($version['mainDB']);
|
||
|
$literatureWriter = new MDLiteratureWriter($version['mainDB']);
|
||
|
$linkWriter = new MDLinkWriter($version['mainDB']);
|
||
|
$seriesWriter = new MDSeriesWriter($version['mainDB']);
|
||
|
$exhibitionWriter = new MDExhibitionWriter($version['mainDB']);
|
||
|
$objectRecordWriter = new MDObjectRecordWriter($version['mainDB']);
|
||
|
$tagWriter = new MDTagWriter($version['nodaDB']);
|
||
|
|
||
|
$outputHandler = new MDOutputHandler;
|
||
|
$outputHandler->setVerbosity(2);
|
||
|
|
||
|
$objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']);
|
||
|
|
||
|
/*
|
||
|
$objectWriter->disableImportImagesResources = true;
|
||
|
$objectWriter->disableUpdateBaseData = true;
|
||
|
$objectWriter->disableImportAdditionalData = true;
|
||
|
$objectWriter->disableImportCollections = true;
|
||
|
$objectWriter->disableImportEvents = true;
|
||
|
$objectWriter->disableImportTags = true;
|
||
|
$objectWriter->disableImportLiterature = true;
|
||
|
$objectWriter->disableImportHyperlinks = true;
|
||
|
// $objectWriter->disableImportSeries = true;
|
||
|
$objectWriter->disableImportObjectRecords = true;
|
||
|
$objectWriter->disableImportTranscriptions = true;
|
||
|
$objectWriter->disableImportMarkings = true;
|
||
|
$objectWriter->disableImportExhibitions = true;
|
||
|
$objectWriter->disableImportReception = true;
|
||
|
*/
|
||
|
$objectWriter->importObjectTypeAsTag = true;
|
||
|
|
||
|
$importedInvNos = [];
|
||
|
|
||
|
// Set up prepared statement for checking if a tag of a given name exists
|
||
|
|
||
|
foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) {
|
||
|
|
||
|
if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "csv") continue;
|
||
|
|
||
|
$outputHandler->toLog("Attempting to load CSV file {$xmlFile}", 2);
|
||
|
|
||
|
if (!($handle = fopen(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}", "r"))) {
|
||
|
throw new Exception("File {$xmlFile} cannot be opened");
|
||
|
}
|
||
|
|
||
|
if (!($fileHeaders = fgetcsv($handle, 5000000, CSV_SEPARATOR))) {
|
||
|
throw new Exception("Failed to read headers for file $xmlFile");
|
||
|
}
|
||
|
|
||
|
$i = 0;
|
||
|
|
||
|
while ($data = fgetcsv($handle, 5000000, CSV_SEPARATOR)) {
|
||
|
|
||
|
++$i;
|
||
|
if ($i < MUEHLENHAUPT_START_AT_FILE_NO) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
# if ($i > 20) break;
|
||
|
|
||
|
$outputHandler->toLog("Starting to process line #{$i}", 2);
|
||
|
|
||
|
// Create associative array for easier parsing
|
||
|
$objectData = [];
|
||
|
foreach ($data as $key => $value) {
|
||
|
$objectData[$fileHeaders[$key]] = trim($value);
|
||
|
}
|
||
|
|
||
|
$inventory_number = $objectData['O_0002_Objektnummer02'] ?: "Import_obj_" . $i;
|
||
|
|
||
|
while (in_array($inventory_number, $importedInvNos, true)) {
|
||
|
$inventory_number .= '_';
|
||
|
}
|
||
|
$importedInvNos[] = $inventory_number;
|
||
|
|
||
|
$object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $inventory_number, $objectData['O_0202_Gegenstand'], $objectData['O_01122_Titel_Jahr'] ?: $inventory_number, $objectData['O_02011_Beschreibung_1'], $outputHandler);
|
||
|
|
||
|
unset($objectData['O_0002_Objektnummer02'], $objectData['O_02011_Beschreibung_1'],
|
||
|
$objectData['O_0202_Gegenstand'], $objectData['O_01122_Titel_Jahr']);
|
||
|
|
||
|
if (!empty($objectData["O_0600_Besitz"])) {
|
||
|
$object->set_string("nutzungsrechte", $objectData["O_0600_Besitz"]);
|
||
|
}
|
||
|
unset($objectData["O_0600_Besitz"]);
|
||
|
|
||
|
if (!empty($objectData["P_0206_Technik"])) {
|
||
|
$object->set_string("technik2", $objectData["P_0206_Technik"]);
|
||
|
$object->set_objekt_material_technik($objectData["P_0206_Technik"]);
|
||
|
}
|
||
|
unset($objectData["P_0206_Technik"]);
|
||
|
|
||
|
if (!empty($objectData["Versicherungswert"])) {
|
||
|
$object->set_string("wert2_zahl", $objectData["Versicherungswert"]);
|
||
|
}
|
||
|
unset($objectData["Versicherungswert"]);
|
||
|
|
||
|
if (!empty($objectData["Objekt_Standort"])) {
|
||
|
$object->set_string("standort_aktuell", $objectData["Objekt_Standort"]);
|
||
|
}
|
||
|
unset($objectData["Objekt_Standort"]);
|
||
|
|
||
|
$object->set_string("standort_eigentlich", $objectData["O_0301_Archiv_Ort"] . ' / ' . $objectData["O_0302_Archiv_Haus"] . ' / ' . $objectData["O_0303_Archiv_Ebene"] . ' / ' . $objectData["O_01037_Code_Lager_Regal_2F"]);
|
||
|
unset($objectData["O_0301_Archiv_Ort"], $objectData["O_0302_Archiv_Haus"], $objectData["O_0303_Archiv_Ebene"], $objectData["O_01037_Code_Lager_Regal_2F"]);
|
||
|
|
||
|
if (!empty($objectData['P_0207_Zyklus'])) {
|
||
|
$object->set_string("teilvon", $objectData['P_0207_Zyklus']);
|
||
|
$object->appendSeriesByName('Zyklus: ' . $objectData['P_0207_Zyklus'], "", $seriesWriter);
|
||
|
}
|
||
|
unset($objectData['P_0207_Zyklus']);
|
||
|
|
||
|
if (!empty($objectData['P_0204_Gruppe'])) {
|
||
|
$groups = explode(', ', $objectData['P_0204_Gruppe']);
|
||
|
foreach ($groups as $group) {
|
||
|
$object->appendTagByName($group, "", $tagWriter);
|
||
|
}
|
||
|
}
|
||
|
unset($objectData['P_0204_Gruppe']);
|
||
|
|
||
|
if (!empty($objectData['O_0205_Stichworte'])) {
|
||
|
$object->append_objekt_beschreibung(PHP_EOL . PHP_EOL . "Stichworte: " . $objectData['O_0205_Stichworte']);
|
||
|
# $tagNames = explode(', ', $objectData['O_0205_Stichworte']);
|
||
|
# foreach ($tagNames as $tagName) {
|
||
|
# $object->appendTagByName($tagName, "", $tagWriter);
|
||
|
# }
|
||
|
}
|
||
|
unset($objectData['O_0205_Stichworte']);
|
||
|
|
||
|
$sizes = explode('x', $objectData['O_0113_Größe_BxH_neu']);
|
||
|
|
||
|
if (!empty($sizes[0])) {
|
||
|
$object->set_string("mass2_breite_wert", $sizes[0]);
|
||
|
}
|
||
|
if (!empty($sizes[1])) {
|
||
|
$object->set_string("mass2_hoehe_wert", $sizes[1]);
|
||
|
}
|
||
|
$object->set_length_unit("mass2_breite_einheit", "cm");
|
||
|
$object->set_length_unit("mass2_hoehe_einheit", "cm");
|
||
|
|
||
|
$object->set_objekt_masse(implode(' x ', $sizes) . ' cm');
|
||
|
unset($objectData['O_0113_Größe_BxH_neu']);
|
||
|
|
||
|
if (!empty($objectData["P_0111_Jahr"])) {
|
||
|
$event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 1, $outputHandler);
|
||
|
$event->set_zeiten_id($objectData["P_0111_Jahr"]);
|
||
|
if ($event->get_zeiten_id() !== 0) {
|
||
|
$object->appendEvent($event);
|
||
|
}
|
||
|
}
|
||
|
unset($objectData["P_0111_Jahr"]);
|
||
|
|
||
|
// Other notes fields
|
||
|
|
||
|
$notesFields = [
|
||
|
'Anmerkung_Übernahme',
|
||
|
'Anmerkung_Übernahme_ohne',
|
||
|
'Lager_Label_Tabelle',
|
||
|
];
|
||
|
foreach ($notesFields as $fieldName) {
|
||
|
if (!empty($objectData[$fieldName])) {
|
||
|
$object->append_string("notizen_text1", PHP_EOL . $fieldName . ': ' . $objectData[$fieldName]);
|
||
|
}
|
||
|
unset($objectData[$fieldName]);
|
||
|
}
|
||
|
|
||
|
// Image
|
||
|
if (!empty($objectData["O_0101_Archivnummer1"]) && $objectWriter->disableImportImagesResources === true) {
|
||
|
|
||
|
$imgFolder = MD_IMPORTER_CONF::$import_dir_files . $dataFolder . "/";
|
||
|
|
||
|
$imgFilename = "";
|
||
|
|
||
|
if (file_exists($imgFolder . $objectData["O_0101_Archivnummer1"])) {
|
||
|
$imgFilename = $imgFolder . $objectData["O_0101_Archivnummer1"];
|
||
|
}
|
||
|
else if (file_exists($imgFolder . str_replace("-FM", "", $objectData["O_0101_Archivnummer1"]) . '.JPG')) {
|
||
|
$imgFilename = $imgFolder . str_replace("-FM", "", $objectData["O_0101_Archivnummer1"]) . '.JPG';
|
||
|
}
|
||
|
else if (file_exists($imgFolder . str_replace("-FM", "", $objectData["O_0101_Archivnummer1"]) . '.jpg')) {
|
||
|
$imgFilename = $imgFolder . str_replace("-FM", "", $objectData["O_0101_Archivnummer1"]) . '.jpg';
|
||
|
}
|
||
|
else if (file_exists($imgFolder . $objectData["O_0101_Archivnummer1"] . '.JPG')) {
|
||
|
$imgFilename = $imgFolder . $objectData["O_0101_Archivnummer1"] . '.JPG';
|
||
|
}
|
||
|
else if (file_exists($imgFolder . $objectData["O_0101_Archivnummer1"] . '.jpg')) {
|
||
|
$imgFilename = $imgFolder . $objectData["O_0101_Archivnummer1"] . '.jpg';
|
||
|
}
|
||
|
|
||
|
if ($imgFilename !== '') {
|
||
|
|
||
|
$image = new MDImage($version['mainDB'], (string)$object->get_string("objekt_name"), $imgFilename);
|
||
|
$image->set_image_master_filename($objectData["O_0101_Archivnummer1"]);
|
||
|
$object->appendImage($image);
|
||
|
|
||
|
}
|
||
|
else {
|
||
|
$object->append_string("notizen_text2", $objectData["O_0101_Archivnummer1"]);
|
||
|
}
|
||
|
|
||
|
}
|
||
|
else $object->append_string("notizen_text2", $objectData["O_0101_Archivnummer1"]);
|
||
|
unset($objectData["O_0101_Archivnummer1"]);
|
||
|
|
||
|
// Remove unwanted entries
|
||
|
unset($objectData['P_Summe']);
|
||
|
|
||
|
if (!empty($objectData)) {
|
||
|
throw new MDParserIncomplete(var_export($objectData, true));
|
||
|
}
|
||
|
|
||
|
$newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler);
|
||
|
|
||
|
// Sleep for a millisecond
|
||
|
usleep(IMPORTER_DELAY_PER_OBJECT);
|
||
|
|
||
|
}
|
||
|
|
||
|
fclose($handle);
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|