254 lines
9.3 KiB
PHP
254 lines
9.3 KiB
PHP
|
<?PHP
|
||
|
/**
|
||
|
* Parser for XML generated through CSVXML.
|
||
|
*
|
||
|
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
||
|
* @author Stefan Rohde-Enslin <s.rohde-enslin@museum-digital.de>
|
||
|
* @link https://imports.museum-digital.org/importer/parsers/csvxml.php
|
||
|
*/
|
||
|
declare(strict_types = 1);
|
||
|
|
||
|
/**
|
||
|
* Parses a folder name to object group and inventory number.
|
||
|
*
|
||
|
* @param string $folder_path Folder path.
|
||
|
*
|
||
|
* @return array{0: array<string>, 1: string}
|
||
|
*/
|
||
|
function parseFolderToObjGroupInvNo(string $folder_path):array {
|
||
|
|
||
|
$parts = explode('/', $folder_path);
|
||
|
|
||
|
$invNo = str_replace('jpg ', '', end($parts));
|
||
|
$parts = array_diff($parts, [$invNo]);
|
||
|
|
||
|
$objGroups = [];
|
||
|
foreach ($parts as $part) {
|
||
|
$objGroups[] = str_replace('jpg ', '', $part);
|
||
|
}
|
||
|
|
||
|
return [$objGroups, $invNo];
|
||
|
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Recurses down a folder directory to list all folders with JPG files in them.
|
||
|
*
|
||
|
* @param string $folder_path Folder path.
|
||
|
*
|
||
|
* @return array<string>
|
||
|
*/
|
||
|
function getFoldersWithJpgs(string $folder_path):array {
|
||
|
|
||
|
$filesFolders = MD_STD::scandir($folder_path);
|
||
|
|
||
|
$addToList = false;
|
||
|
$output = [];
|
||
|
foreach ($filesFolders as $fileOrFolderName) {
|
||
|
|
||
|
$fileOrFolder = $folder_path . '/' . $fileOrFolderName;
|
||
|
|
||
|
if (is_dir($fileOrFolder)) {
|
||
|
$output = array_merge($output, getFoldersWithJpgs($fileOrFolder));
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// If is_file
|
||
|
if (in_array(mime_content_type($fileOrFolder), ['image/jpeg'], true)) {
|
||
|
$addToList = true;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
if ($addToList === true) {
|
||
|
$output[] = $folder_path;
|
||
|
}
|
||
|
|
||
|
return $output;
|
||
|
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Parse function.
|
||
|
*
|
||
|
* @param array<mixed> $version Instance to import into.
|
||
|
* @param integer $institution_id Institution to import to.
|
||
|
* @param non-empty-string $XMLFolder Folder of the XML files to import.
|
||
|
* @param string $dataFolder Data folder.
|
||
|
* @param integer $sammlung_id Collection ID. Optional.
|
||
|
* @param boolean $visibility Import objects to be directly visible?.
|
||
|
* @param boolean $insertOnly If set to true, only new objects are added,
|
||
|
* old are not updated.
|
||
|
*
|
||
|
* @return void
|
||
|
*/
|
||
|
function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) {
|
||
|
|
||
|
if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist.");
|
||
|
|
||
|
if (empty($dataFolder)) {
|
||
|
throw new Exception("You need to select a data folder to run image-only imports");
|
||
|
}
|
||
|
else $importImages = true;
|
||
|
|
||
|
// Set up writers
|
||
|
|
||
|
$outputHandler = new MDOutputHandler;
|
||
|
$outputHandler->setVerbosity(2);
|
||
|
|
||
|
$objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']);
|
||
|
|
||
|
/*
|
||
|
$objectWriter->disableUpdateBaseData = true;
|
||
|
$objectWriter->disableImportAdditionalData = true;
|
||
|
$objectWriter->disableImportImagesResources = true;
|
||
|
$objectWriter->disableImportTags = true;
|
||
|
$objectWriter->disableImportLiterature = true;
|
||
|
$objectWriter->disableImportHyperlinks = true;
|
||
|
$objectWriter->disableImportSeries = true;
|
||
|
$objectWriter->disableImportCollections = true;
|
||
|
$objectWriter->disableImportObjectRecords = true;
|
||
|
$objectWriter->disableImportTranscriptions = true;
|
||
|
$objectWriter->disableImportMarkings = true;
|
||
|
$objectWriter->disableImportExhibitions = true;
|
||
|
$objectWriter->disableImportReception = true;
|
||
|
*/
|
||
|
|
||
|
$i = 0;
|
||
|
$startAtCounter = 0;
|
||
|
|
||
|
// Get folders with JPGS
|
||
|
|
||
|
$filesByInvNo = [];
|
||
|
|
||
|
$foldersToSkip = 0;
|
||
|
$foldersSkipped = 0;
|
||
|
foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_files . $dataFolder) as $tFolder) {
|
||
|
|
||
|
if ($foldersSkipped < $foldersToSkip) {
|
||
|
echo 'Skipped folder ' . $tFolder . PHP_EOL;
|
||
|
++$foldersSkipped;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
$foldersToImport = getFoldersWithJpgs(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . '/' . $tFolder);
|
||
|
|
||
|
foreach ($foldersToImport as $folderToImport) {
|
||
|
|
||
|
/*
|
||
|
$folderName = str_replace(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . '/', "", $folderToImport);
|
||
|
echo $folderName . PHP_EOL;
|
||
|
|
||
|
list($objectGroups, $invNo) = parseFolderToObjGroupInvNo($folderName);
|
||
|
|
||
|
print_r($objectGroups);
|
||
|
print_r($invNo);
|
||
|
*/
|
||
|
foreach (MD_STD::scandir($folderToImport) as $tFile) {
|
||
|
|
||
|
if (!in_array(mime_content_type($folderToImport . '/' . $tFile), ['image/jpeg'], true)) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
$invNo = explode('_', $tFile)[0];
|
||
|
if (empty($filesByInvNo[$invNo])) $filesByInvNo[$invNo] = [];
|
||
|
$filesByInvNo[$invNo][] = $folderToImport . '/' . $tFile;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
$tagWriter = new MDTagWriter($version['nodaDB']);
|
||
|
$seriesWriter = new MDSeriesWriter($version['mainDB']);
|
||
|
$literatureWriter = new MDLiteratureWriter($version['mainDB']);
|
||
|
foreach ($filesByInvNo as $invNo => $imageFiles) {
|
||
|
|
||
|
if (empty($invNo)) continue;
|
||
|
|
||
|
++$i;
|
||
|
if ($i > $startAtCounter || $i < 20) {
|
||
|
continue;
|
||
|
}
|
||
|
$outputHandler->toLog("Starting to process file #{$i}", 2);
|
||
|
|
||
|
$object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $invNo, "Handschrift", $invNo, "Handschrift Friedrich von Hardenbergs.", $outputHandler);
|
||
|
$images = [];
|
||
|
|
||
|
foreach ($imageFiles as $cur_img) {
|
||
|
|
||
|
// Image title is the first line after the file name. Everything after is the description.
|
||
|
$image_title = pathinfo($cur_img, PATHINFO_FILENAME);
|
||
|
$image = new MDImage($version['mainDB'], $image_title, $cur_img);
|
||
|
# $image->set_image_beschreibung($img_description);
|
||
|
/*
|
||
|
if (isset($objectData["image_rights" . $suffix])) $image->set_image_rights($objectData["image_rights" . $suffix]);
|
||
|
if (isset($objectData["image_owner" . $suffix])) $image->set_image_owner($objectData["image_owner" . $suffix]);
|
||
|
if (isset($objectData["image_creator" . $suffix])) $image->set_image_creator($objectData["image_creator" . $suffix]);
|
||
|
*/
|
||
|
$image->set_visible(true);
|
||
|
$images[] = $image;
|
||
|
|
||
|
$outputHandler->toLog("Added image $image_title", 2);
|
||
|
usleep(1800);
|
||
|
|
||
|
}
|
||
|
|
||
|
if (!empty($images)) $images[0]->set_main_image(true);
|
||
|
foreach ($images as $image) $object->appendImage($image);
|
||
|
|
||
|
while ($version['mainDB']->more_results()) $version['mainDB']->next_result();
|
||
|
while ($version['nodaDB']->more_results()) $version['nodaDB']->next_result();
|
||
|
$object->appendTagByID(576);
|
||
|
|
||
|
// Get object groups to link
|
||
|
$folderName = str_replace(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . '/', "", pathinfo($imageFiles[0], PATHINFO_DIRNAME));
|
||
|
list($objectGroups, $inventNo) = parseFolderToObjGroupInvNo($folderName);
|
||
|
|
||
|
foreach ($objectGroups as $objectGroup) {
|
||
|
$object->appendSeriesByName($objectGroup, $objectGroup, $seriesWriter, ["serie_weitergabe" => "100"]);
|
||
|
if (strpos($objectGroup, 'Kiste') !== false) {
|
||
|
$object->set_string("standort_eigentlich", $objectGroup);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ($sammlung_id !== 0) {
|
||
|
$object->appendCollectionByID($sammlung_id);
|
||
|
}
|
||
|
|
||
|
$event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 7, $outputHandler);
|
||
|
$event->set_persinst_id("Georg Philipp Friedrich von Hardenberg", "1772", "1801");
|
||
|
$event->set_ereignis_persinst_sicher(true);
|
||
|
$object->appendEvent($event);
|
||
|
|
||
|
// Get literature link
|
||
|
$litAlreadyLinked = [];
|
||
|
foreach ($imageFiles as $imageFile) {
|
||
|
$imageFile = trim($imageFile, "_ &%");
|
||
|
$imgFileEnd = explode('_', str_replace("_000", '', pathinfo($imageFile, PATHINFO_FILENAME)));
|
||
|
if (($litLink = end($imgFileEnd)) && !in_array($litLink, $litAlreadyLinked, true)) {
|
||
|
$litAlreadyLinked[] = $litLink;
|
||
|
if (substr($litLink, 0, 3) === 'HKA') {
|
||
|
if (($litNameEnd = strpos($litLink, ',')) !== false) {
|
||
|
$litName = substr($litLink, 0, $litNameEnd);
|
||
|
$litInLit = substr($litLink, $litNameEnd + 1);
|
||
|
$object->appendLiteratureByName($litName, "", "", "", $litInLit, $literatureWriter);
|
||
|
$object->appendSeriesByName($litName, $litName, $seriesWriter, ["serie_weitergabe" => "100"]);
|
||
|
$object->appendSeriesByName($litName . ' ' . $litInLit, $litName . ' ' . $litInLit, $seriesWriter, ["serie_weitergabe" => "100"]);
|
||
|
}
|
||
|
else {
|
||
|
$object->appendLiteratureByName($litLink, "", "", "", "", $literatureWriter);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$object->set_objekt_publik($visibility);
|
||
|
$outputHandler->toLog("Importing file #{$i}, inv. no: $invNo", 2);
|
||
|
$newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler);
|
||
|
usleep(2000);
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|