Improve wikidata fetcher

This commit is contained in:
Joshua Ramon Enslin 2023-08-31 16:09:21 +02:00
parent 107a4cd640
commit a68a03e628
Signed by: jrenslin
GPG Key ID: 46016F84501B70AE
2 changed files with 30 additions and 21 deletions

View File

@ -134,7 +134,7 @@ final class NodaWikidataFetcher {
$doc->loadXML('<section>' . trim($input) . '</section>'); $doc->loadXML('<section>' . trim($input) . '</section>');
} }
catch (Exception $e) { catch (Exception $e) {
throw new Exception("Failed to load DOMDocument." . PHP_EOL . $e->getMessage() . PHP_EOL . PHP_EOL . $input); throw new Exception("Failed to load DOMDocument." . PHP_EOL . $e->getMessage() . PHP_EOL . PHP_EOL . '---' . $input . '---');
} }
$list = $doc->getElementsByTagName("style"); $list = $doc->getElementsByTagName("style");
@ -672,7 +672,6 @@ final class NodaWikidataFetcher {
]; ];
} }
# print_r($descs);
} }
@ -1007,7 +1006,7 @@ final class NodaWikidataFetcher {
} }
/** /**
* Gets the current description of a place. * Returns the current description of a place.
* *
* @param integer $onum Place ID. * @param integer $onum Place ID.
* *
@ -1029,6 +1028,29 @@ final class NodaWikidataFetcher {
} }
/**
* Returns the current description of a tag.
*
* @param integer $tag_id Tag ID.
*
* @return string
*/
private function getTagDescription(int $tag_id):string {
$result = $this->_mysqli_noda->query_by_stmt("SELECT `tag_anmerkung`
FROM `tag`
WHERE `tag_id` = ?", "i", $tag_id);
if (!($cur = $result->fetch_row())) {
$result->close();
return '';
}
$result->close();
return $cur[0];
}
/** /**
* Function for entering base information about a place from wikidata. * Function for entering base information about a place from wikidata.
* *
@ -1239,14 +1261,14 @@ final class NodaWikidataFetcher {
} }
if (isset($data['claims']['P625'])) { if (isset($data['claims']['P625'])) {
$latitude_wd = $data['claims']['P625'][0]['mainsnak']['datavalue']['value']['latitude']; $latitude_wd = \filter_var($data['claims']['P625'][0]['mainsnak']['datavalue']['value']['latitude'], FILTER_VALIDATE_FLOAT);
$longitude_wd = $data['claims']['P625'][0]['mainsnak']['datavalue']['value']['longitude']; $longitude_wd = \filter_var($data['claims']['P625'][0]['mainsnak']['datavalue']['value']['longitude'], FILTER_VALIDATE_FLOAT);
} }
NodaBatchInserter::linkNodaForPlace($this->_mysqli_noda, $onum, $nodaLinks, $erfasst_von); NodaBatchInserter::linkNodaForPlace($this->_mysqli_noda, $onum, $nodaLinks, $erfasst_von);
$this->_mysqli_noda->autocommit(false); $this->_mysqli_noda->autocommit(false);
if (!empty($tgn_id)) { if (!empty($tgn_id) and is_numeric($tgn_id)) {
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte` $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
SET `ort_land` = ? SET `ort_land` = ?
@ -1257,7 +1279,7 @@ final class NodaWikidataFetcher {
unset($updateStmt); unset($updateStmt);
} }
if (!empty($geonames_id)) { if (!empty($geonames_id) and is_numeric($geonames_id)) {
$updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte` $updateStmt = $this->_mysqli_noda->do_prepare("UPDATE `orte`
SET `ort_geonames` = ? SET `ort_geonames` = ?
@ -1342,17 +1364,7 @@ final class NodaWikidataFetcher {
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date("d.m.Y") . ')'; $datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date("d.m.Y") . ')';
$datafromwiki = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $datafromwiki)); $datafromwiki = str_replace("'", "´", MD_STD::preg_replace_str("/\&\#91\;[0-9]\&\#93\;/", '', $datafromwiki));
$cergebnis = $this->_mysqli_noda->query_by_stmt("SELECT `tag_anmerkung` $tag_anmerkung = $this->getTagDescription($tag_id);
FROM `tag`
WHERE `tag_id` = ?", "i", $tag_id);
if (!($cinfo = $cergebnis->fetch_row())) {
$cergebnis->close();
return $output;
}
$cergebnis->close();
$tag_anmerkung = $cinfo[0];
$this->_mysqli_noda->autocommit(false); $this->_mysqli_noda->autocommit(false);

View File

@ -255,9 +255,6 @@ Transclusion expansion time report (%,ms,calls,template)
"Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250) "Start of parsed Wikipedia text should be:" . PHP_EOL . PHP_EOL . $expected . PHP_EOL . PHP_EOL . 'Real start text is: ' . PHP_EOL . PHP_EOL . substr($output, 0, 250)
); );
} }
/** /**