This repository has been archived on 2023-01-23. You can view files and clone it, but cannot push or open issues or pull requests.
importer-parsers-archive-2022/parsers/gos.php

95 lines
3.2 KiB
PHP
Raw Permalink Normal View History

2023-01-23 15:21:35 +01:00
<?PHP
/**
* Parser for GOS XML as provided by the MBK (Berlin).
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
declare(strict_types = 1);
/**
* Parse function.
*
* @param array<mixed> $version Instance to import into.
* @param integer $institution_id Institution to import to.
* @param non-empty-string $XMLFolder Folder of the XML files to import.
* @param string $dataFolder Data folder.
* @param integer $sammlung_id Collection ID. Optional.
* @param boolean $visibility Import objects to be directly visible?.
* @param boolean $insertOnly If set to true, only new objects are added,
* old are not updated.
*
* @return void
*/
function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) {
if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist.");
if (empty($dataFolder)) {
$importImages = false;
}
else $importImages = true;
$collectionWriter = new MDCollectionWriter($version['mainDB']);
$seriesWriter = new MDSeriesWriter($version['mainDB']);
$literatureWriter = new MDLiteratureWriter($version['mainDB']);
$tagWriter = new MDTagWriter($version['nodaDB']);
// Set up writers
$outputHandler = new MDOutputHandler;
$outputHandler->setVerbosity(2);
$objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']);
$startFile = 0;
$counter = 0;
foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}") as $xmlFile) {
++$counter;
if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "xml") {
continue;
}
if ($counter < $startFile) {
continue;
}
// LIDO contains : in tag names. SimpleXML can't handle those.
$rawData = MD_STD::file_get_contents(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}");
$rawData = strtr(str_replace(":", "___", str_replace("lido:", "", $rawData)), [
"http___//" => "http://", "xmlns___" => "xmlns:", "xsi___" => "xsi:"
]);
if (($xmlData = simplexml_load_string($rawData)) === false) {
throw new Exception("XML couldn't be loaded");
}
//echo "HI";
$lidoObject = new GosParser($version['mainDB'], $version['nodaDB'],
$version['filepath'],
$version['language'], $institution_id,
$xmlData, $dataFolder, $outputHandler,
$collectionWriter,
$seriesWriter,
$literatureWriter,
$tagWriter,
$sammlung_id, $visibility,
);
$insertOnly = false;
$newObjectID = $objectWriter->writeObject($lidoObject->get_mdobject(), true, $insertOnly, $outputHandler);
// Sleep for a millisecond
$outputHandler->toLog("Done with object $counter", 2);
usleep(IMPORTER_DELAY_PER_OBJECT);
/*
if ($counter === 100) {
break;
}
*/
}
}