* @author Stefan Rohde-Enslin * @link https://imports.museum-digital.org/importer/parsers/csvxml.php */ declare(strict_types = 1); /** * Parses a folder name to object group and inventory number. * * @param string $folder_path Folder path. * * @return array{0: array, 1: string} */ function parseFolderToObjGroupInvNo(string $folder_path):array { $parts = explode('/', $folder_path); $invNo = str_replace('jpg ', '', end($parts)); $parts = array_diff($parts, [$invNo]); $objGroups = []; foreach ($parts as $part) { $objGroups[] = str_replace('jpg ', '', $part); } return [$objGroups, $invNo]; } /** * Recurses down a folder directory to list all folders with JPG files in them. * * @param string $folder_path Folder path. * * @return array */ function getFoldersWithJpgs(string $folder_path):array { $filesFolders = MD_STD::scandir($folder_path); $addToList = false; $output = []; foreach ($filesFolders as $fileOrFolderName) { $fileOrFolder = $folder_path . '/' . $fileOrFolderName; if (is_dir($fileOrFolder)) { $output = array_merge($output, getFoldersWithJpgs($fileOrFolder)); continue; } // If is_file if (in_array(mime_content_type($fileOrFolder), ['image/jpeg'], true)) { $addToList = true; } } if ($addToList === true) { $output[] = $folder_path; } return $output; } /** * Parse function. * * @param array $version Instance to import into. * @param integer $institution_id Institution to import to. * @param non-empty-string $XMLFolder Folder of the XML files to import. * @param string $dataFolder Data folder. * @param integer $sammlung_id Collection ID. Optional. * @param boolean $visibility Import objects to be directly visible?. * @param boolean $insertOnly If set to true, only new objects are added, * old are not updated. * * @return void */ function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) { if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist."); if (empty($dataFolder)) { throw new Exception("You need to select a data folder to run image-only imports"); } else $importImages = true; // Set up writers $outputHandler = new MDOutputHandler; $outputHandler->setVerbosity(2); $objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']); /* $objectWriter->disableUpdateBaseData = true; $objectWriter->disableImportAdditionalData = true; $objectWriter->disableImportImagesResources = true; $objectWriter->disableImportTags = true; $objectWriter->disableImportLiterature = true; $objectWriter->disableImportHyperlinks = true; $objectWriter->disableImportSeries = true; $objectWriter->disableImportCollections = true; $objectWriter->disableImportObjectRecords = true; $objectWriter->disableImportTranscriptions = true; $objectWriter->disableImportMarkings = true; $objectWriter->disableImportExhibitions = true; $objectWriter->disableImportReception = true; */ $i = 0; $startAtCounter = 0; // Get folders with JPGS $filesByInvNo = []; $foldersToSkip = 0; $foldersSkipped = 0; foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_files . $dataFolder) as $tFolder) { if ($foldersSkipped < $foldersToSkip) { echo 'Skipped folder ' . $tFolder . PHP_EOL; ++$foldersSkipped; continue; } $foldersToImport = getFoldersWithJpgs(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . '/' . $tFolder); foreach ($foldersToImport as $folderToImport) { /* $folderName = str_replace(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . '/', "", $folderToImport); echo $folderName . PHP_EOL; list($objectGroups, $invNo) = parseFolderToObjGroupInvNo($folderName); print_r($objectGroups); print_r($invNo); */ foreach (MD_STD::scandir($folderToImport) as $tFile) { if (!in_array(mime_content_type($folderToImport . '/' . $tFile), ['image/jpeg'], true)) { continue; } $invNo = explode('_', $tFile)[0]; if (empty($filesByInvNo[$invNo])) $filesByInvNo[$invNo] = []; $filesByInvNo[$invNo][] = $folderToImport . '/' . $tFile; } } } $tagWriter = new MDTagWriter($version['nodaDB']); $seriesWriter = new MDSeriesWriter($version['mainDB']); $literatureWriter = new MDLiteratureWriter($version['mainDB']); foreach ($filesByInvNo as $invNo => $imageFiles) { if (empty($invNo)) continue; ++$i; if ($i > $startAtCounter || $i < 20) { continue; } $outputHandler->toLog("Starting to process file #{$i}", 2); $object = new MDObject($version['mainDB'], $version['nodaDB'], $version['language'], $institution_id, $invNo, "Handschrift", $invNo, "Handschrift Friedrich von Hardenbergs.", $outputHandler); $images = []; foreach ($imageFiles as $cur_img) { // Image title is the first line after the file name. Everything after is the description. $image_title = pathinfo($cur_img, PATHINFO_FILENAME); $image = new MDImage($version['mainDB'], $image_title, $cur_img); # $image->set_image_beschreibung($img_description); /* if (isset($objectData["image_rights" . $suffix])) $image->set_image_rights($objectData["image_rights" . $suffix]); if (isset($objectData["image_owner" . $suffix])) $image->set_image_owner($objectData["image_owner" . $suffix]); if (isset($objectData["image_creator" . $suffix])) $image->set_image_creator($objectData["image_creator" . $suffix]); */ $image->set_visible(true); $images[] = $image; $outputHandler->toLog("Added image $image_title", 2); usleep(1800); } if (!empty($images)) $images[0]->set_main_image(true); foreach ($images as $image) $object->appendImage($image); while ($version['mainDB']->more_results()) $version['mainDB']->next_result(); while ($version['nodaDB']->more_results()) $version['nodaDB']->next_result(); $object->appendTagByID(576); // Get object groups to link $folderName = str_replace(MD_IMPORTER_CONF::$import_dir_files . $dataFolder . '/', "", pathinfo($imageFiles[0], PATHINFO_DIRNAME)); list($objectGroups, $inventNo) = parseFolderToObjGroupInvNo($folderName); foreach ($objectGroups as $objectGroup) { $object->appendSeriesByName($objectGroup, $objectGroup, $seriesWriter, ["serie_weitergabe" => "100"]); if (strpos($objectGroup, 'Kiste') !== false) { $object->set_string("standort_eigentlich", $objectGroup); } } if ($sammlung_id !== 0) { $object->appendCollectionByID($sammlung_id); } $event = new MDEvent($version['mainDB'], $version['nodaDB'], $version['language'], 7, $outputHandler); $event->set_persinst_id("Georg Philipp Friedrich von Hardenberg", "1772", "1801"); $event->set_ereignis_persinst_sicher(true); $object->appendEvent($event); // Get literature link $litAlreadyLinked = []; foreach ($imageFiles as $imageFile) { $imageFile = trim($imageFile, "_ &%"); $imgFileEnd = explode('_', str_replace("_000", '', pathinfo($imageFile, PATHINFO_FILENAME))); if (($litLink = end($imgFileEnd)) && !in_array($litLink, $litAlreadyLinked, true)) { $litAlreadyLinked[] = $litLink; if (substr($litLink, 0, 3) === 'HKA') { if (($litNameEnd = strpos($litLink, ',')) !== false) { $litName = substr($litLink, 0, $litNameEnd); $litInLit = substr($litLink, $litNameEnd + 1); $object->appendLiteratureByName($litName, "", "", "", $litInLit, $literatureWriter); $object->appendSeriesByName($litName, $litName, $seriesWriter, ["serie_weitergabe" => "100"]); $object->appendSeriesByName($litName . ' ' . $litInLit, $litName . ' ' . $litInLit, $seriesWriter, ["serie_weitergabe" => "100"]); } else { $object->appendLiteratureByName($litLink, "", "", "", "", $literatureWriter); } } } } $object->set_objekt_publik($visibility); $outputHandler->toLog("Importing file #{$i}, inv. no: $invNo", 2); $newObjectID = $objectWriter->writeObject($object, true, $insertOnly, $outputHandler); usleep(2000); } }