116 lines
3.9 KiB
PHP
116 lines
3.9 KiB
PHP
<?PHP
|
|
/**
|
|
* Parser for LIDO XML as provided by the Museumsstiftung Post und Telekommunikation.
|
|
*
|
|
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
|
|
* @link http://www.lido-schema.org
|
|
*/
|
|
declare(strict_types = 1);
|
|
|
|
/**
|
|
* Parse function.
|
|
*
|
|
* @param array<mixed> $version Instance to import into.
|
|
* @param integer $institution_id Institution to import to.
|
|
* @param non-empty-string $XMLFolder Folder of the XML files to import.
|
|
* @param string $dataFolder Data folder.
|
|
* @param integer $sammlung_id Collection ID. Optional.
|
|
* @param boolean $visibility Import objects to be directly visible?.
|
|
* @param boolean $insertOnly If set to true, only new objects are added,
|
|
* old are not updated.
|
|
*
|
|
* @return void
|
|
*/
|
|
function parseImportXML(array $version, int $institution_id, string $XMLFolder, string $dataFolder = "", int $sammlung_id = 0, bool $visibility = false, bool $insertOnly = false) {
|
|
|
|
if (!is_dir(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}")) throw new MDFileDoesNotExist("The folder to import from ($XMLFolder) does not exist.");
|
|
|
|
if (empty($dataFolder)) {
|
|
$importImages = false;
|
|
}
|
|
else $importImages = true;
|
|
|
|
$collectionWriter = new MDCollectionWriter($version['mainDB']);
|
|
$seriesWriter = new MDSeriesWriter($version['mainDB']);
|
|
$literatureWriter = new MDLiteratureWriter($version['mainDB']);
|
|
$tagWriter = new MDTagWriter($version['nodaDB']);
|
|
|
|
// Set up writers
|
|
$outputHandler = new MDOutputHandler;
|
|
$outputHandler->setVerbosity(2);
|
|
|
|
$objectWriter = new MDObjectWriter($version['mainDB'], $version['nodaDB'], $version['link'], $version['filepath'], $version['dataFolderLink']);
|
|
|
|
$startFile = 0;
|
|
$counter = 0;
|
|
foreach (MD_STD::scandir(MD_IMPORTER_CONF::$import_dir_xml . $XMLFolder) as $xmlFile) {
|
|
|
|
++$counter;
|
|
if (pathinfo($xmlFile, PATHINFO_EXTENSION) !== "xml") {
|
|
continue;
|
|
}
|
|
if ($counter < 0) {
|
|
continue;
|
|
}
|
|
|
|
// LIDO contains : in tag names. SimpleXML can't handle those.
|
|
|
|
$rawData = MD_STD::file_get_contents(MD_IMPORTER_CONF::$import_dir_xml . "{$XMLFolder}/{$xmlFile}");
|
|
$rawData = strtr(str_replace(":", "___", str_replace("lido:", "", $rawData)), [
|
|
"http___//" => "http://", "xmlns___" => "xmlns:", "xsi___" => "xsi:"
|
|
]);
|
|
|
|
if (($xmlData = simplexml_load_string($rawData)) === false) {
|
|
throw new Exception("XML couldn't be loaded");
|
|
}
|
|
|
|
/*
|
|
if (empty($xmlData->descriptiveMetadata->objectIdentificationWrap->objectDescriptionWrap)
|
|
|| count($xmlData->descriptiveMetadata->objectIdentificationWrap->objectDescriptionWrap->children()) === 0
|
|
) {
|
|
continue;
|
|
}
|
|
*/
|
|
|
|
foreach ($xmlData->lido as $lidoObject) {
|
|
echo "HI";
|
|
|
|
try {
|
|
|
|
$lidoObject = new LidoParserMspt($version['mainDB'], $version['nodaDB'],
|
|
$version['filepath'],
|
|
$version['language'], $institution_id,
|
|
$lidoObject, $outputHandler,
|
|
$collectionWriter,
|
|
$seriesWriter,
|
|
$literatureWriter,
|
|
$tagWriter,
|
|
$sammlung_id, $visibility,
|
|
);
|
|
$insertOnly = false;
|
|
|
|
$newObjectID = $objectWriter->writeObject($lidoObject->get_mdobject(), true, $insertOnly, $outputHandler);
|
|
|
|
}
|
|
catch (MDImageCorrupt $e) {
|
|
$outputHandler->toLog("Corrupt image with object $counter", 0);
|
|
}
|
|
|
|
$outputHandler->toLog("Done with object $counter", 2);
|
|
|
|
// Sleep for a millisecond
|
|
usleep(IMPORTER_DELAY_PER_OBJECT);
|
|
|
|
}
|
|
|
|
/*
|
|
if ($counter === 100) {
|
|
break;
|
|
}
|
|
*/
|
|
|
|
}
|
|
|
|
}
|
|
|