170 lines
6.9 KiB
PHP
170 lines
6.9 KiB
PHP
|
<?PHP
|
||
|
/**
|
||
|
* Generic parser for CSV files to be adjusted for every import.
|
||
|
*
|
||
|
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||
|
* @link https://imports.museum-digital.org/parsers/google-arts-and-culture.php
|
||
|
*/
|
||
|
declare(strict_types = 1);
|
||
|
|
||
|
const CSV_SEPARATOR = ',';
|
||
|
|
||
|
/**
|
||
|
* Parse function.
|
||
|
*
|
||
|
* @param array<mixed> $version Instance to import into.
|
||
|
* @param integer $institution_id Institution to import to.
|
||
|
* @param non-empty-string $XMLFolder Folder of the XML files to import.
|
||
|
* @param string $dataFolder Data folder.
|
||
|
* @param integer $sammlung_id Collection ID. Optional.
|
||
|
* @param boolean $visibility Import objects to be directly visible?.
|
||
|
* @param boolean $insertOnly If set to true, only new objects are added,
|
||
|
* old are not updated.
|
||
|
*
|
||
|
* @return void
|
||
|
*/
|
||
|
function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) {
|
||
|
|
||
|
if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist.");
|
||
|
|
||
|
if (empty($dataFolder)) {
|
||
|
$importImages = false;
|
||
|
}
|
||
|
else $importImages = true;
|
||
|
|
||
|
$ignore = $visibility;
|
||
|
$ignore = $sammlung_id;
|
||
|
|
||
|
// Set up writers
|
||
|
|
||
|
$collectionWriter = new MDCollectionWriter($version['mainDB']);
|
||
|
$literatureWriter = new MDLiteratureWriter($version['mainDB']);
|
||
|
$linkWriter = new MDLinkWriter($version['mainDB']);
|
||
|
$seriesWriter = new MDSeriesWriter($version['mainDB']);
|
||
|
$exhibitionWriter = new MDExhibitionWriter($version['mainDB']);
|
||
|
$objectRecordWriter = new MDObjectRecordWriter($version['mainDB']);
|
||
|
$tagWriter = new MDTagWriter($version['nodaDB']);
|
||
|
|
||
|
$outputHandler = new MDOutputHandler;
|
||
|
$outputHandler->setVerbosity(2);
|
||
|
|
||
|
$objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']);
|
||
|
|
||
|
/*
|
||
|
$objectWriter->disableImportAdditionalData = true;
|
||
|
*/
|
||
|
$objectWriter->disableUpdateBaseData = true;
|
||
|
$objectWriter->disableImportCollections = true;
|
||
|
$objectWriter->disableImportEvents = true;
|
||
|
$objectWriter->disableImportTags = true;
|
||
|
$objectWriter->disableImportLiterature = true;
|
||
|
$objectWriter->disableImportHyperlinks = true;
|
||
|
// $objectWriter->disableImportSeries = true;
|
||
|
$objectWriter->disableImportImagesResources = true;
|
||
|
$objectWriter->disableImportObjectRecords = true;
|
||
|
$objectWriter->disableImportTranscriptions = true;
|
||
|
$objectWriter->disableImportMarkings = true;
|
||
|
$objectWriter->disableImportExhibitions = true;
|
||
|
$objectWriter->disableImportReception = true;
|
||
|
$objectWriter->importObjectTypeAsTag = true;
|
||
|
|
||
|
// Set up prepared statement for checking if a tag of a given name exists
|
||
|
$tagNameExistsStmt = $version['nodaDB']->do_prepare("SELECT `tag_id`
|
||
|
FROM `tag`
|
||
|
WHERE `tag_name` = ?
|
||
|
LIMIT 1");
|
||
|
|
||
|
foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) {
|
||
|
|
||
|
if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "csv") continue;
|
||
|
|
||
|
$outputHandler->toLog("Attempting to load CSV file {$xmlFile}", 2);
|
||
|
|
||
|
if (!($handle = fopen(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}", "r"))) {
|
||
|
throw new Exception("File {$xmlFile} cannot be opened");
|
||
|
}
|
||
|
|
||
|
if (!($fileHeaders = fgetcsv($handle, 5000000, CSV_SEPARATOR))) {
|
||
|
throw new Exception("Failed to read headers for file $xmlFile");
|
||
|
}
|
||
|
|
||
|
while ($data = fgetcsv($handle, 5000000, CSV_SEPARATOR)) {
|
||
|
|
||
|
// Create associative array for easier parsing
|
||
|
$objectData = [];
|
||
|
foreach ($data as $key => $value) {
|
||
|
$cur = trim($value);
|
||
|
if (empty($cur)) continue;
|
||
|
$objectData[$fileHeaders[$key]] = $cur;
|
||
|
}
|
||
|
|
||
|
if (empty($objectData['Inventarnummer'])) continue;
|
||
|
$inventory_number = $objectData['Inventarnummer'];
|
||
|
unset($objectData['Inventarnummer'], $objectData['laufende Nummer']);
|
||
|
|
||
|
$result = $version['mainDB']->query_by_stmt("SELECT *
|
||
|
FROM `objekt`
|
||
|
WHERE `institution_id` = ?
|
||
|
AND `objekt_inventarnr` = ?", "is", $institution_id, $inventory_number);
|
||
|
|
||
|
$origData = $result->fetch_assoc() ?: [];
|
||
|
$result->close();
|
||
|
|
||
|
if (empty($origData)) continue;
|
||
|
|
||
|
$object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $inventory_number, $origData["objektart"] ?: $inventory_number, $origData["objekt_name"] ?: $inventory_number, $origData["objekt_beschreibung"] ?: $inventory_number, $outputHandler);
|
||
|
|
||
|
/*
|
||
|
if (isset($objectData['Standort'])) {
|
||
|
$object->set_string("standort_eigentlich", $objectData['Standort'] . ' / ' . $objectData['Zusatz']);
|
||
|
unset($objectData['Standort'], $objectData['Zusatz']);
|
||
|
}
|
||
|
|
||
|
// Object base data
|
||
|
$object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $objectData['Inventarnummer'], $objectData["Objektbezeichnung"], $objectData["Titel"] ?: $objectData['Inventarnummer'], $objectData["Bemerkung"], $outputHandler);
|
||
|
|
||
|
unset($objectData['Inventarnummer'], $objectData["Objektbezeichnung"], $objectData["Titel"], $objectData["Bemerkung"]);
|
||
|
|
||
|
$object->set_objekt_publik($visibility);
|
||
|
|
||
|
if (isset($objectData['Breite'])) {
|
||
|
$object->set_string("mass2_breite_wert", $objectData['Breite']);
|
||
|
$object->set_length_unit("mass2_breite_einheit", $objectData['Maßeinheit']);
|
||
|
unset($objectData['Breite'], $objectData['Maßeinheit']);
|
||
|
}
|
||
|
|
||
|
if (isset($objectData['Höhe'])) {
|
||
|
$object->set_string("mass2_hoehe_wert", $objectData['Höhe']);
|
||
|
$object->set_length_unit("mass2_hoehe_einheit", $objectData['Maßeinheit2']);
|
||
|
unset($objectData['Höhe'], $objectData['Maßeinheit2']);
|
||
|
}
|
||
|
|
||
|
if (isset($objectData['Standort'])) {
|
||
|
$object->set_string("standort_eigentlich", $objectData['Standort']);
|
||
|
unset($objectData['Standort']);
|
||
|
}
|
||
|
|
||
|
unset($objectData['Objektart']);
|
||
|
$object->appendSeriesByName("Schultafeln", "Schultafeln", $seriesWriter);
|
||
|
|
||
|
*/
|
||
|
|
||
|
if (!empty($objectData)) {
|
||
|
throw new MDParserIncomplete(var_export($objectData, true));
|
||
|
}
|
||
|
|
||
|
$newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler);
|
||
|
|
||
|
// Sleep for a millisecond
|
||
|
usleep(IMPORTER_DELAY_PER_OBJECT);
|
||
|
|
||
|
}
|
||
|
|
||
|
fclose($handle);
|
||
|
|
||
|
}
|
||
|
$tagNameExistsStmt->close();
|
||
|
unset($tagNameExistsStmt);
|
||
|
|
||
|
}
|