2021-03-17 16:10:49 +01:00
< ? PHP
/**
* This file contains tools for fetching data from Wikidata .
*
* @ file
* @ author Joshua Ramon Enslin < joshua @ museum - digital . de >
*/
declare ( strict_types = 1 );
/**
* Helps fetching information from Wikidata .
*/
2021-03-18 01:23:45 +01:00
final class NodaWikidataFetcher {
2021-03-17 16:10:49 +01:00
2024-07-19 00:49:04 +02:00
private const WIKIDATA_FETCH_HEADERS = [
2021-08-12 15:33:48 +02:00
'User-Agent: museum-digital-bot GND-to-Wikidata PHP/' . PHP_VERSION ,
'Accept: application/sparql-results+json' ,
];
2024-07-19 00:49:04 +02:00
public const LANGUAGES_MAIN_DESC = [ 'de' , 'da' , 'en' , 'es' , 'fr' , 'hu' , 'it' , 'jp' , 'nl' , 'pt' , 'ru' , 'sv' , 'sk' , 'uk' , 'zh' ];
public const LANGUAGES_TO_CHECK = [ 'ar' , 'bg' , 'bn' , 'cs' , 'da' , 'de' , 'el' , 'en' , 'es' , 'fa' , 'fi' , 'fr' , 'ha' , 'he' , 'hi' , 'hu' , 'id' , 'it' , 'ja' , 'ka' , 'ko' , 'nl' , 'pl' , 'pt' , 'ro' , 'ru' , 'sv' , 'sk' , 'sw' , 'ta' , 'th' , 'tl' , 'tr' , 'uk' , 'ur' , 'vi' , 'zh' ];
2021-03-17 16:10:49 +01:00
2024-07-19 00:49:04 +02:00
public const LANGUAGES_TO_CAPITALIZE = [ " cs " , " da " , " de " , " en " , " es " , " fr " , " fi " , " id " , " it " , " nl " , " pl " , " pt " , " ru " , " sv " , 'sk' , " tl " , " tr " ];
2021-03-17 16:10:49 +01:00
2024-07-19 00:59:21 +02:00
public const P_IDS_NODA_TAGS = [
2021-03-17 22:06:08 +01:00
'gnd' => 'P227' ,
'lcsh' => 'P244' ,
'aat' => 'P1014' ,
'iconclass' => 'P1256' ,
'osm' => 'P402' ,
'loc' => 'P244' ,
'nomisma' => 'P2950' ,
'cona' => 'P1669' ,
" rkd " => " P650 " ,
" ulan " => " P245 " ,
" viaf " => " P214 " ,
" bnf " => " P268 " ,
" pim " => " P3973 " ,
" ndl " => " P349 " , // National Diet Library (Japan)
" npg " => " P1816 " , // "National" portrait gallery
" bne " => " P950 " , // Espana National Library
2021-04-07 11:33:49 +02:00
" orcid " => " P496 " ,
2021-03-17 22:06:08 +01:00
];
2024-07-19 00:49:04 +02:00
private const WIKIPEDIA_REMOVE_LITERALS = [
2021-03-17 22:06:08 +01:00
" <p>Si vous disposez d'ouvrages ou d'articles de référence ou si vous " ,
'<p><b>En pratique :</b> <a href="/wiki/Wikip%C3%A9dia:Citez_vos_sources#Qualité_des_sources" title="Wikipédia:Citez vos sources">Quelles sources sont attendu' ,
'<pVous pouvez partager vos connaissances en l’ améliorant (' ,
'<p class="mw-empty-elt">' ,
'<p><small>Géolocalisation sur la carte' ,
'<p><b>Koordinaatit:</b>' ,
'<p><span class="executeJS" data-gadgetname="ImgToggle"></span' ,
'<p><span class="imgtoggleboxTitle">' ,
//'<div class="mw-parser-output"><p>',
'<p><span style="font-size: small;"><span id="coordinates">' ,
'<p><span></span></p>' ,
'<p><a rel="nofollow" class="external text" href="https://maps.gs' ,
'<p><span class="plainlinks nourlexpansion"><a class="external text" href="//tools.wmflabs.org/geohack/geohack.php?langu' ,
'<p><span style="display:none">' ,
'<p> </p>' ,
'<p><span class="geo noexcerpt"' ,
2021-03-17 16:10:49 +01:00
];
2024-07-19 00:49:04 +02:00
public const RETRIEVAL_MODES_ACCEPTED = [
2022-04-18 13:19:00 +02:00
'list' ,
'add' ,
'keep' ,
'replace' ,
];
2024-07-19 00:49:04 +02:00
public const RETRIEVAL_MODES_DEFAULT = 'list' ;
2022-04-18 13:19:00 +02:00
/** @var 'list'|'add'|'keep'|'replace' */
private string $_retrievalMode = self :: RETRIEVAL_MODES_DEFAULT ;
2021-03-17 16:10:49 +01:00
/** @var MDMysqli */
private MDMysqli $_mysqli_noda ;
2023-08-31 15:38:12 +02:00
/**
* Returns the API link to Wikipedia ' s API for getting information on a page .
*
* @ param string $lang Language / wikipedia version to fetch .
* @ param string $searchTerm Search term .
*
* @ return non - empty - string
*/
private static function _getWikipediaApiLink ( string $lang , string $searchTerm ) : string {
return " https:// " . urlencode ( $lang ) . " .wikipedia.org/w/api.php?action=parse&page= " . urlencode ( $searchTerm ) . " &prop=text§ion=0&format=json " ;
}
2024-10-03 15:56:31 +02:00
/**
* Returns Wikipedia links from Wikidata ' s API output .
*
* @ param array < mixed > $data Wikidata API output .
*
* @ return array < string , array { url : string , title : string } >
*/
private static function _getWikipediaLinksFromWikidataOutput ( array $data ) : array {
$wikilinks = [];
foreach ( self :: LANGUAGES_MAIN_DESC as $tLang ) {
if ( ! isset ( $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'url' ]) || ! isset ( $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'title' ])) continue ;
if ( ! is_string ( $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'url' ]) || ! is_string ( $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'title' ])) continue ;
$wikilinks [ $tLang ] = [
'url' => $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'url' ],
'title' => str_replace ( ' ' , '_' , $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'title' ]),
];
}
return $wikilinks ;
}
/**
* Parses coordinates from Wikidata API output .
*
* @ param array < mixed > $data Wikidata API output .
*
* @ return array {} | array { longitude : float , latitude : float }
*/
private static function _getPlaceCoordinatesFromWikidata ( array $data ) : array {
if ( ! isset ( $data [ 'claims' ][ 'P625' ])) {
return [];
}
$latitude_wd = \filter_var ( $data [ 'claims' ][ 'P625' ][ 0 ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'latitude' ], FILTER_VALIDATE_FLOAT );
$longitude_wd = \filter_var ( $data [ 'claims' ][ 'P625' ][ 0 ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'longitude' ], FILTER_VALIDATE_FLOAT );
if ( $latitude_wd === false || $longitude_wd === false ) return [];
return [
'longitude' => $longitude_wd ,
'latitude' => $latitude_wd ,
];
}
/**
* Loads Wikipedia page and cleans output .
*
* @ param string $lang Language to load in .
* @ param string $title Title to load from .
*
* @ return string
*/
private static function _getCleanedWikipediaSnippet ( string $lang , string $title ) : string {
$datafromwiki = MD_STD :: runCurl ( self :: _getWikipediaApiLink ( $lang , $title ), 10000 );
2024-11-12 15:36:32 +01:00
$json_decoded = json_decode ( $datafromwiki , true );
if ( empty ( $json_decoded ) || ! isset ( $json_decoded [ 'parse' ])) {
return '' ;
}
$datafromwiki = strval ( $json_decoded [ 'parse' ][ 'text' ][ '*' ]);
2024-10-03 15:56:31 +02:00
return self :: _cleanWikidataInput ( $datafromwiki );
}
/**
* Loads data for a single entity from Wikidata .
*
* @ param string $wikidata_id Wikidata Q - ID .
*
* @ return array < mixed >
*/
private static function _getWikidataEntity ( string $wikidata_id ) : array {
$data = json_decode ( MD_STD :: runCurl ( " https://www.wikidata.org/wiki/Special:EntityData/ " . urlencode ( $wikidata_id ) . " .json " , 10000 ), true );
if ( $data === null ) {
throw new MDhttpFailedException ( " Failed fetching from Wikidata. Try again later. " );
}
2024-10-03 16:36:30 +02:00
if ( empty ( $data [ 'entities' ][ $wikidata_id ])) {
throw new MDhttpFailedException ( " Failed fetching from Wikidata. Try again later. " );
}
2024-10-03 15:56:31 +02:00
return $data [ 'entities' ][ $wikidata_id ];
}
2024-10-03 15:03:38 +02:00
/**
* Parses wikidata results to MDNodaLink entries .
*
* @ param 'tag' | 'persinst' | 'place' $target Target vocabulary type .
* @ param string $wikidata_id Wikidata ID .
* @ param array < mixed > $data Wikidata result .
*
* @ return list < MDNodaLink >
*/
public function _getNodaLinksFromWikidataResult ( string $target , string $wikidata_id , array $data ) : array {
$linkableVocabularies = match ( $target ) {
'tag' => MDNodaRepositoriesSet :: REPOSITORIES_TAG ,
'persinst' => MDNodaRepositoriesSet :: REPOSITORIES_ACTOR ,
'place' => MDNodaRepositoriesSet :: REPOSITORIES_PLACE ,
};
$output = [
new MDNodaLink ( MDNodaRepository :: wikidata , $wikidata_id )
];
foreach ( self :: P_IDS_NODA_TAGS as $vocabName => $pId ) {
if ( ! isset ( $data [ 'claims' ][ $pId ])) {
continue ;
}
if ( empty ( $data [ 'claims' ][ $pId ][ 0 ][ 'mainsnak' ][ 'datavalue' ])) {
continue ;
}
$url = $data [ 'claims' ][ $pId ][ 0 ][ 'mainsnak' ][ 'datavalue' ][ 'value' ];
if ( $vocabName === 'loc' || ( $vocabName === 'lcsh' )) {
$vocabName = $this -> _determineLocRefMode ( $url );
if ( empty ( $vocabName )) continue ;
}
if ( ! in_array ( $vocabName , $linkableVocabularies , true )) continue ;
$output [] = new MDNodaLink ( MDNodaRepository :: fromString ( $vocabName ), $url );
}
return $output ;
}
2023-09-29 16:20:53 +02:00
/**
* Checks if a vocabulary link to the library
* of congress is a valid LCSH or LOC link or
* something else completely .
* This is necessary , since Wikidata only knows
* one type of link to the LOC authority files ,
* while museum - digital knows two .
*
* @ param string $url LOC ID to check .
*
* @ return 'loc' | 'lcsh' | ''
*/
private function _determineLocRefMode ( string $url ) : string {
try {
if ( MDNodaRepository :: loc -> validateId ( $url ) !== false ) {
return 'loc' ;
}
}
2023-10-18 02:46:11 +02:00
catch ( MDgenericInvalidInputsException | MDInvalidNodaLinkException | MDInvalidNodaLink $e ) {
2023-09-29 16:20:53 +02:00
}
try {
if ( MDNodaRepository :: lcsh -> validateId ( $url ) !== false ) {
return 'lcsh' ;
}
}
2023-10-18 02:46:11 +02:00
catch ( MDgenericInvalidInputsException | MDInvalidNodaLinkException | MDInvalidNodaLink $e ) {
2023-09-29 16:20:53 +02:00
}
return '' ;
}
2023-08-31 15:38:12 +02:00
/**
* Cleans basic tags off Wikidata input .
*
* @ param string $input Input string .
*
* @ return string
*/
private static function _cleanWikidataInputHtml ( string $input ) : string {
// Clean off anything before first <p>
if ( $pStartPos = strpos ( $input , '<p' )) {
$input = substr ( $input , $pStartPos );
}
if ( $pEndPos = strrpos ( $input , '</p>' )) {
$input = substr ( $input , 0 , $pEndPos + 4 );
}
$doc = new DOMDocument ();
try {
$doc -> loadXML ( '<section>' . trim ( $input ) . '</section>' );
}
catch ( Exception $e ) {
2023-08-31 16:09:21 +02:00
throw new Exception ( " Failed to load DOMDocument. " . PHP_EOL . $e -> getMessage () . PHP_EOL . PHP_EOL . '---' . $input . '---' );
2023-08-31 15:38:12 +02:00
}
$list = $doc -> getElementsByTagName ( " style " );
while ( $list -> length > 0 ) {
$p = $list -> item ( 0 );
if ( $p === null || $p -> parentNode === null ) break ;
$p -> parentNode -> removeChild ( $p );
}
$list = $doc -> getElementsByTagName ( " table " );
while ( $list -> length > 0 ) {
$p = $list -> item ( 0 );
if ( $p === null || $p -> parentNode === null ) break ;
$p -> parentNode -> removeChild ( $p );
}
$list = $doc -> getElementsByTagName ( " ol " );
while ( $list -> length > 0 ) {
$p = $list -> item ( 0 );
if ( $p === null || $p -> parentNode === null ) break ;
$p -> parentNode -> removeChild ( $p );
}
if (( $firstP = $doc -> getElementsByTagName ( " p " ) -> item ( 0 )) !== null ) {
if (( $firstPhtml = $doc -> saveHTML ( $firstP )) !== false ) {
if ( strpos ( $firstPhtml , 'geohack' ) !== false ) {
if ( $firstP -> parentNode !== null ) $firstP -> parentNode -> removeChild ( $firstP );
}
}
}
$output = [];
foreach ( $doc -> getElementsByTagName ( " p " ) as $p ) {
$output [] = trim ( $p -> textContent );
}
/*
if ( strpos ( $doc -> saveHTML (), 'Coordinates:' ) !== false ) {
echo $doc -> saveHTML ();
exit ;
}
*/
return str_replace ( PHP_EOL , PHP_EOL . PHP_EOL , trim ( implode ( PHP_EOL , $output )));
}
/**
* Cleans brackets ([ 1 ], [ 2 ]) off description text .
*
* @ param string $input Input string .
*
* @ return string
*/
private static function _cleanSourceBracketsOffTranslation ( string $input ) : string {
$bracketsToRemove = [];
for ( $i = 0 ; $i < 100 ; $i ++ ) {
$bracketsToRemove [ " [ $i ] " ] = " " ;
}
return strtr ( $input , $bracketsToRemove );
}
/**
* Cleans contents parsed from Wikipedia .
*
* @ param string $input Input string .
*
* @ return string
*/
private static function _cleanWikidataInput ( string $input ) : string {
$input = trim ( $input , '"' );
foreach ( self :: WIKIPEDIA_REMOVE_LITERALS as $tToRemove ) $input = str_replace ( $tToRemove , " " , $input );
if ( substr ( $input , 0 , strlen ( '<' )) === '<' ) {
$input = self :: _cleanWikidataInputHtml ( $input );
if ( mb_strlen ( $input ) > 600 ) {
if ( strpos ( $input , PHP_EOL . PHP_EOL , 600 ) !== false ) {
$input = substr ( $input , 0 , strpos ( $input , PHP_EOL . PHP_EOL , 600 ));
}
}
$input = self :: _cleanSourceBracketsOffTranslation ( $input );
$input = str_replace ( " \t " , " " , $input );
// Remove newlines with ensuing spaces
while ( strpos ( $input , PHP_EOL . " " ) !== false ) {
$input = str_replace ( PHP_EOL . " " , PHP_EOL , $input );
}
// Remove double newlines
while ( strpos ( $input , PHP_EOL . PHP_EOL . PHP_EOL ) !== false ) {
$input = str_replace ( PHP_EOL . PHP_EOL . PHP_EOL , PHP_EOL . PHP_EOL , $input );
}
return MD_STD_IN :: sanitize_text ( $input );
}
$input = str_replace ( PHP_EOL , '' , $input );
if ( empty ( $input )) return " " ;
// Remove infobox tables specifically
$firstParagraphPosition = strpos ( $input , '<p' , 1 );
$currentSearchPos = strpos ( $input , " <table> " );
if ( $currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition ) {
if (( $tableEndPos = strpos ( $input , " </table> " )) !== false ) {
if (( $pStartPos = strpos ( $input , '<p' , $tableEndPos + 6 )) !== false ) {
$input = substr ( $input , $pStartPos );
}
}
}
// Remove leftover unnecessary paragraphs before actual content
$removeFirstParagraph = false ;
$firstParagraphPosition = strpos ( $input , '<p' , 1 );
foreach ([ " </table> " , " <img " ] as $tagPart ) {
$currentSearchPos = strpos ( $input , $tagPart );
if ( $currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition ) {
$removeFirstParagraph = true ;
break ;
}
}
if ( $removeFirstParagraph === true ) {
$input = substr ( $input , $firstParagraphPosition ? : 0 );
}
$input = str_replace ( '</p>' , '</p>' . PHP_EOL . PHP_EOL . PHP_EOL , $input );
# $input = str_replace('?/i', '', $input);
$input = strip_tags ( $input );
# for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input);
$i = 0 ;
while ( strpos ( $input , " .mw-parser-output " ) !== false and strpos ( $input , " } " , strpos ( $input , " .mw-parser-output " )) !== false ) {
$part1 = substr ( $input , 0 , strpos ( $input , " .mw-parser-output " ));
$part2 = substr ( $input , strpos ( $input , " } " , strpos ( $input , " .mw-parser-output " )) + 1 );
$input = $part1 . $part2 ;
++ $i ;
if ( $i === 30 ) break ;
}
$input = self :: _cleanSourceBracketsOffTranslation ( $input );
$input = str_replace ( " \t " , " " , $input );
// Remove double whitespaces
while ( strpos ( $input , " " ) !== false ) {
$input = str_replace ( " " , " " , $input );
}
// Remove newlines with ensuing spaces
while ( strpos ( $input , PHP_EOL . " " ) !== false ) {
$input = str_replace ( PHP_EOL . " " , PHP_EOL , $input );
}
// Remove double newlines
while ( strpos ( $input , PHP_EOL . PHP_EOL . PHP_EOL ) !== false ) {
$input = str_replace ( PHP_EOL . PHP_EOL . PHP_EOL , PHP_EOL . PHP_EOL , $input );
}
$stableToRemove = [
" Vous pouvez partager vos connaissances en l’ améliorant (comment ?) selon les recommandations des projets correspondants. " ,
];
foreach ( $stableToRemove as $tToRemove ) $input = str_replace ( $tToRemove , " " , $input );
$endings = [
" StubDenne artikel om et vandløb " ,
];
foreach ( $endings as $ending ) {
if ( strpos ( $input , $ending ) !== false ) $input = substr ( $input , 0 , strpos ( $input , $ending ));
}
$input = trim ( $input );
// Cut off overly long articles
if ( mb_strlen ( $input ) > 600 ) {
if ( strpos ( $input , PHP_EOL . PHP_EOL , 600 ) !== false ) {
$input = trim ( substr ( $input , 0 , strpos ( $input , PHP_EOL . PHP_EOL , 600 )));
}
}
if ( empty ( $input )) return '' ;
$input = str_replace ( " ' " , " ´ " , MD_STD :: preg_replace_str ( " / \ & \ #91 \ ;[0-9] \ & \ #93 \ ;/ " , '' , $input ));
$input = html_entity_decode ( $input );
return MD_STD_IN :: sanitize_text ( $input );
}
/**
* Wrapper around _cleanWikidataInput for testing .
*
* @ param string $input Input string .
*
* @ return string
*/
public static function cleanWikidataInput ( string $input ) : string {
if ( PHP_SAPI !== 'cli' ) throw new Exception ( " Use this function only for testing " );
return self :: _cleanWikidataInput ( $input );
}
2022-04-18 13:19:00 +02:00
/**
* Sets the retrieval mode .
*
* @ param string $retrievalMode New retrieval mode to set .
*
* @ return void
*/
public function setRetrievalMode ( string $retrievalMode ) : void {
if ( ! in_array ( $retrievalMode , self :: RETRIEVAL_MODES_ACCEPTED , true )) {
throw new Exception ( " Retrieval mode not in list of accepted retrieval modes: " . implode ( ',' , self :: RETRIEVAL_MODES_ACCEPTED ));
}
$this -> _retrievalMode = $retrievalMode ;
}
2022-03-05 13:58:18 +01:00
/**
* Validates a Wikidata ID . A Wikidata ID must start with a capital Q and be
* numeric otherwise .
*
* @ param string $wikidata_id Input ID to validate .
*
* @ return void
*/
public static function validateWikidataId ( string $wikidata_id ) : void {
if ( substr ( $wikidata_id , 0 , 1 ) !== 'Q' ) {
throw new MDgenericInvalidInputsException ( " Wikidata IDs start with Q " );
}
if ( ! is_numeric ( substr ( $wikidata_id , 1 ))) {
throw new MDgenericInvalidInputsException ( " Wikidata IDs are numeric following the Q " );
}
}
2021-03-18 01:23:45 +01:00
/**
* Attempts to fetch a Wikidata ID from a provided URL .
*
2021-11-29 22:31:17 +01:00
* @ param non - empty - string $linkUrl Link to a page .
2021-03-18 01:23:45 +01:00
*
* @ return string
*/
2021-05-05 01:26:32 +02:00
public static function getWikidataIdFromLink ( string $linkUrl ) : string {
2021-03-18 01:23:45 +01:00
if ( ! filter_var ( $linkUrl , FILTER_VALIDATE_URL )) {
throw new MDExpectedException ( " Invalid URL " );
}
2024-10-03 15:56:31 +02:00
if ( strpos ( $linkUrl , " http://www.wikidata.org/entity/ " ) !== false ) {
if ( $output = self :: getWikidataIdFromWikidataLink ( $linkUrl )) {
return $output ;
}
}
if ( strpos ( $linkUrl , " https://www.wikidata.org/entity/ " ) !== false ) {
if ( $output = self :: getWikidataIdFromWikidataLink ( $linkUrl )) {
return $output ;
}
}
2021-05-05 01:26:32 +02:00
if ( strpos ( $linkUrl , " https://www.wikidata.org/wiki/ " ) !== false ) {
if ( $output = self :: getWikidataIdFromWikidataLink ( $linkUrl )) {
return $output ;
}
}
if ( strpos ( $linkUrl , " .wikipedia.org/ " ) !== false ) {
if ( $output = self :: getWikidataIdFromWikipedia ( $linkUrl )) {
return $output ;
}
}
return '' ;
}
/**
* Attempts to fetch a Wikidata ID from a provided URL .
*
* @ param string $linkUrl Link to a Wikidata page .
*
* @ return string
*/
public static function getWikidataIdFromWikidataLink ( string $linkUrl ) : string {
2024-10-03 15:56:31 +02:00
if ( str_contains ( $linkUrl , " https://www.wikidata.org/wiki/ " ) === false
&& str_contains ( $linkUrl , " https://www.wikidata.org/entity/ " ) === false
&& str_contains ( $linkUrl , " http://www.wikidata.org/entity/ " ) === false
) {
2021-05-05 01:26:32 +02:00
return '' ;
}
$linkUrl = trim ( $linkUrl , '/ ' );
$parts = explode ( '/' , $linkUrl );
return end ( $parts );
}
/**
* Attempts to fetch a Wikidata ID from a provided URL .
*
2021-11-29 22:31:17 +01:00
* @ param non - empty - string $linkUrl Link to a Wikipedia page .
2021-05-05 01:26:32 +02:00
*
* @ return string
*/
public static function getWikidataIdFromWikipedia ( string $linkUrl ) : string {
if ( strpos ( $linkUrl , " .wikipedia.org/ " ) === false ) {
return '' ;
}
2021-08-12 13:59:20 +02:00
if ( empty ( $wikipedia_cont = MD_STD :: runCurl ( $linkUrl ))) return '' ;
2021-03-18 01:23:45 +01:00
2021-11-18 23:53:11 +01:00
libxml_use_internal_errors ( true );
2021-03-18 01:23:45 +01:00
$doc = new DOMDocument ();
2021-10-10 12:32:55 +02:00
if ( ! $doc -> loadHTML ( $wikipedia_cont )) {
2021-03-18 01:23:45 +01:00
return '' ;
}
2021-11-18 23:53:11 +01:00
libxml_use_internal_errors ( false );
2021-03-18 01:23:45 +01:00
if ( ! ( $wikidataLinkLi = $doc -> getElementById ( " t-wikibase " ))) {
return '' ;
}
if ( ! ( $wikidataLink = $wikidataLinkLi -> firstChild )) {
return '' ;
}
if ( ! ( $t_wikibase_href = $wikidataLink -> getAttribute ( 'href' ))) {
return '' ;
}
$t_wikibase = ( string ) $t_wikibase_href ;
2021-11-29 22:31:17 +01:00
// if (!empty($t_wikibase)) {
if (( $wikidata_id_end = strrpos ( $t_wikibase , '/' )) !== false ) {
$wikidata_id = trim ( substr ( $t_wikibase , $wikidata_id_end + 1 ), '/' );
2021-03-18 01:23:45 +01:00
2021-11-29 22:31:17 +01:00
if ( substr ( $wikidata_id , 0 , 1 ) === 'Q' ) {
return $wikidata_id ;
2021-03-18 01:23:45 +01:00
}
}
2021-11-29 22:31:17 +01:00
// }
2021-03-18 01:23:45 +01:00
return '' ;
}
2021-08-12 15:33:48 +02:00
/**
* Runs a SPARQL query against the Wikidata SPARQL endpoint .
*
* @ param string $sparqlQuery Query string .
*
* @ return array < mixed >
*/
public static function sparqlQuery ( string $sparqlQuery ) : array {
$url = 'https://query.wikidata.org/sparql?query=' . urlencode ( $sparqlQuery );
2021-08-13 13:07:29 +02:00
$result = MD_STD :: runCurl ( $url , 100000000 , self :: WIKIDATA_FETCH_HEADERS );
2021-08-12 15:33:48 +02:00
return json_decode ( $result , true );
}
/**
* Formulates a SPARQL query string for fetching from Wikidata based on an external ID .
*
* @ param string $repoName Name of the repository .
* @ param string $externalId ID in the external repository .
* @ param string $repoPId Optional P - ID of the external repository . Needed for
* Geonames and TGN , obsolete otherwise .
*
* @ return string
*/
public static function formulateWikidataQueryByExtId ( string $repoName , string $externalId , string $repoPId = '' ) : string {
if ( empty ( $repoPId )) {
if ( empty ( NodaWikidataFetcher :: P_IDS_NODA_TAGS [ $repoName ])) {
throw new MDmainEntityNotExistentException ( " Unknown external repository. The following repositories are known with their Wikidata ID: " . implode ( ', ' , array_keys ( NodaWikidataFetcher :: P_IDS_NODA_TAGS )));
}
$repoPId = NodaWikidataFetcher :: P_IDS_NODA_TAGS [ $repoName ];
}
2022-09-15 21:29:07 +02:00
return ' SELECT ? id ? idLabel WHERE {
2021-08-12 15:33:48 +02:00
? id wdt : ' . $repoPId . ' " ' . $externalId . ' " .
SERVICE wikibase : label {
bd : serviceParam wikibase : language " en " .
}
} ' ;
}
/**
* Gets the Wikidata ID based on a result from Wikidata ' s SPARQL endpoint .
*
* @ param array < mixed > $queryResult Query result .
*
* @ return string
*/
public static function readWikidataIdFromSparqlResult ( array $queryResult ) : string {
if ( count ( $queryResult [ 'results' ][ 'bindings' ]) !== 1 ) return '' ;
if ( ! empty ( $wikidataLink = $queryResult [ 'results' ][ 'bindings' ][ 0 ][ 'id' ][ 'value' ])) {
if (( $endSlashPos = strrpos ( $wikidataLink , '/' )) !== false ) {
return substr ( $wikidataLink , $endSlashPos + 1 );
}
}
return '' ;
}
/**
* Queries Wikidata by an external repository ' s ID and returns the matching Q - ID
* if there is any .
*
* @ param string $repoName Name of the repository .
* @ param string $externalId ID in the external repository .
* @ param string $repoPId Optional P - ID of the external repository . Needed for
* Geonames and TGN , obsolete otherwise .
*
* @ return string
*/
public static function getWikidataIdByExternalId ( string $repoName , string $externalId , string $repoPId = '' ) : string {
$sparqlQueryString = self :: formulateWikidataQueryByExtId ( $repoName , $externalId , $repoPId = '' );
$queryResult = self :: sparqlQuery ( $sparqlQueryString );
return self :: readWikidataIdFromSparqlResult ( $queryResult );
}
2021-03-17 16:10:49 +01:00
/**
* Gets translation source Wikipedia pages from Wikidata .
*
* @ param array < string > $checkagainstLanguage The language to check against .
* @ param array < mixed > $data Data fetched from Wikidata .
*
2021-11-29 22:31:17 +01:00
* @ return array { 0 : array < string , non - empty - string > , 1 : array < string , string > }
2021-03-17 16:10:49 +01:00
*/
public static function getWikidataWikipediaTranslationSources ( array $checkagainstLanguage , array $data ) {
$languagesToFetch = $wikilinks = [];
foreach ( $checkagainstLanguage as $lang ) {
if ( empty ( $data [ 'labels' ][ $lang ])) {
continue ;
}
if ( ! empty ( $data [ 'sitelinks' ][ $lang . 'wiki' ])) {
$wikilink = $data [ 'sitelinks' ][ $lang . 'wiki' ][ 'url' ];
$wikilinkterm = str_replace ( ' ' , '_' , $data [ 'sitelinks' ][ $lang . 'wiki' ][ 'title' ]);
if ( isset ( $wikilink )) {
2023-08-31 15:38:12 +02:00
$languagesToFetch [ $lang ] = self :: _getWikipediaApiLink ( $lang , $wikilinkterm );
2021-03-17 16:10:49 +01:00
$wikilinks [ $lang ] = $wikilink ;
}
}
}
return [ $languagesToFetch , $wikilinks ];
}
2022-11-14 00:51:56 +01:00
/**
* Loads translations from Wikipedia pages through wikidata and then merges
* them with Wikidata ' s own translations into a usable array .
*
* @ param array < string > $checkagainstLanguage The language to check against .
* @ param array < mixed > $data Data fetched from Wikidata .
*
* @ return array < string , array { label : string , description : string , link : string } >
*/
public static function listTranslationsFromWikidataWikipedia ( array $checkagainstLanguage , array $data ) : array {
list ( $languagesToFetch , $wikilinks ) = self :: getWikidataWikipediaTranslationSources ( $checkagainstLanguage , $data );
if ( empty ( $languagesToFetch )) {
return [];
}
try {
$contents = MD_STD :: runCurlMulti ( $languagesToFetch , 10000 );
}
catch ( TypeError $e ) {
throw new MDExpectedException ( " Failed to initialize a request. Try pressing F5 to run the requests again. " );
}
$output = [];
2023-08-31 16:11:37 +02:00
# $descs = [];
2022-11-14 00:51:56 +01:00
foreach ( $checkagainstLanguage as $lang ) {
if ( ! empty ( $languagesToFetch [ $lang ]) && ! empty ( $data [ 'sitelinks' ][ $lang . 'wiki' ]) && ! empty ( $wikilinks [ $lang ])) {
$wikilink = $wikilinks [ $lang ];
if ( ! empty ( $contents [ $lang ])) {
$descFromWiki = json_decode ( $contents [ $lang ], true )[ 'parse' ][ 'text' ][ '*' ];
# Process data retrieved from wikipedia
if ( $descFromWiki !== null ) $tDescription = ( string ) $descFromWiki ;
else $tDescription = " " ;
}
else {
$tDescription = " " ;
}
2023-09-01 12:43:24 +02:00
if ( $tDescription !== '' && ! empty ( $desc_cleaned = self :: _cleanWikidataInput ( $tDescription ))) {
2023-08-31 16:11:37 +02:00
# $descs[$lang] = $tDescription;
2023-09-01 12:43:24 +02:00
$output [ $lang ] = [
'label' => self :: _cleanWikidataInput (( string ) $data [ 'labels' ][ $lang ][ 'value' ]),
'description' => '"' . $desc_cleaned . '" - (' . $data [ 'labels' ][ $lang ][ 'language' ] . '.wikipedia.org ' . date ( 'd.m.Y' ) . ')' ,
'link' => $wikilink ,
];
}
// Fallback: Use Wikidata description
else if ( ! empty ( $data [ 'labels' ][ $lang ][ 'value' ]) and ! empty ( $data [ 'descriptions' ][ $lang ])) {
$output [ $lang ] = [
'label' => self :: _cleanWikidataInput ( $data [ 'labels' ][ $lang ][ 'value' ]),
'description' => self :: _cleanWikidataInput ( $data [ 'descriptions' ][ $lang ][ 'value' ]),
'link' => " " ,
];
2023-08-31 15:38:12 +02:00
}
2022-11-14 00:51:56 +01:00
}
// echo '<br><b style="color: cc0000;">Wikipedia Links fehlen</b>';
else if ( ! empty ( $data [ 'labels' ][ $lang ][ 'value' ]) and ! empty ( $data [ 'descriptions' ][ $lang ])) {
$output [ $lang ] = [
'label' => self :: _cleanWikidataInput ( $data [ 'labels' ][ $lang ][ 'value' ]),
'description' => self :: _cleanWikidataInput ( $data [ 'descriptions' ][ $lang ][ 'value' ]),
'link' => " " ,
];
}
}
return $output ;
}
2021-03-17 16:10:49 +01:00
/**
* Function for fetching description from Wikipedia
*
* @ param integer $persinst_id Person ID .
* @ param string $wikidata_id Wikidata ID .
* @ param string $datafromwiki Data fetched from Wikipedia .
* @ param string $preflang The user ' s currently used language .
* @ param string $lang Currently queried language .
* @ param string $erfasst_von User who adds the info .
*
* @ return boolean
*/
2024-10-03 15:56:31 +02:00
public function retrievePersinstDescFromWikipedia ( int $persinst_id , string $wikidata_id , string $datafromwiki , string $preflang , string $lang , string $erfasst_von ) : bool {
2021-03-17 16:10:49 +01:00
$output = false ;
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date ( 'd.m.Y' ) . ')' ;
$cergebnis = $this -> _mysqli_noda -> query_by_stmt ( " SELECT `persinst_kurzinfo`, `persinst_anzeigename` AS `display_name`
FROM `persinst`
WHERE `persinst_id` = ? " , " i " , $persinst_id );
2022-09-15 21:29:07 +02:00
if ( ! ( $cinfo = $cergebnis -> fetch_row ())) {
throw new Exception ( " There is no actor of ID # " . $persinst_id );
}
$cergebnis -> close ();
$persinst_kurzinfo = $cinfo [ 0 ];
$display_name = $cinfo [ 1 ];
2021-03-17 16:10:49 +01:00
// Update persinst table
$updatePersinstStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `persinst`
SET `persinst_kurzinfo` = ?
WHERE `persinst_id` = ? " );
2022-09-15 21:29:07 +02:00
if ( ! empty ( $persinst_kurzinfo ) and substr ( $persinst_kurzinfo , 0 , 3 ) !== 'GND' ) {
2021-03-17 16:10:49 +01:00
2022-04-18 13:19:00 +02:00
switch ( $this -> _retrievalMode ) {
case " add " :
2021-03-17 16:10:49 +01:00
2022-09-15 21:29:07 +02:00
$newDesc = $persinst_kurzinfo . PHP_EOL . PHP_EOL . $datafromwiki ;
2021-03-17 16:10:49 +01:00
2022-04-18 13:19:00 +02:00
$updatePersinstStmt -> bind_param ( " si " , $newDesc , $persinst_id );
$updatePersinstStmt -> execute ();
$output = true ;
break ;
2021-03-17 16:10:49 +01:00
2022-04-18 20:45:32 +02:00
case " keep " :
break ;
2022-04-18 13:19:00 +02:00
case " replace " :
2021-03-17 16:10:49 +01:00
2022-04-18 13:19:00 +02:00
$updatePersinstStmt -> bind_param ( " si " , $datafromwiki , $persinst_id );
$updatePersinstStmt -> execute ();
2021-03-17 16:10:49 +01:00
$output = true ;
2022-04-18 13:19:00 +02:00
break ;
default :
2021-03-17 16:10:49 +01:00
$tlLoader = new MDTlLoader ( " wiki_getter_persinst " , $preflang );
echo self :: generateHTMLHeadForWikidataFetcher ( $lang );
2022-09-15 21:29:07 +02:00
echo self :: generateWikidataFetcherHeader ( $tlLoader , " " , $display_name );
2021-03-17 16:10:49 +01:00
echo '
< p class = " alert icons iconsAlert " > Es gibt schon einen Eintrag im Beschreibungsfeld </ p >
< div class = " wikiReplaceTTile " >
< h3 > Bisher vorhanden </ h3 >
2022-09-15 21:29:07 +02:00
< p > ' . nl2br($persinst_kurzinfo) . ' </ p >
2021-03-17 16:10:49 +01:00
</ div >
< div class = " wikiReplaceTTile " >
< h3 > Jetzt gefunden </ h3 >< p > ' . $datafromwiki . ' </ p >
</ div >
< a href = " get_wikidata_for_persinst.php?wikidata_id=' . $wikidata_id . write_get_vars(['suchbegriff', 'lang', 'persinst_id']) . '&keep=keep " class = " buttonLike icons iconsPin " > Keep old entry </ a > ' ;
echo '<br><a href="get_wikidata_for_persinst.php?wikidata_id=' . $wikidata_id . write_get_vars ([ 'suchbegriff' , 'lang' , 'persinst_id' ]) . '&keep=replace" class="buttonLike icons iconsPinOff">Replace with new entry</a>' ;
echo '<br><a href="get_wikidata_for_persinst.php?wikidata_id=' . $wikidata_id . write_get_vars ([ 'suchbegriff' , 'lang' , 'persinst_id' ]) . '&keep=add" class="buttonLike icons iconsPlusOne">Keep old and add new entry</a><br><br><br>' ;
2022-04-18 13:19:00 +02:00
exit ;
2021-03-17 16:10:49 +01:00
}
}
else {
$updatePersinstStmt -> bind_param ( " si " , $datafromwiki , $persinst_id );
$updatePersinstStmt -> execute ();
$output = true ;
}
$updatePersinstStmt -> close ();
// Update edit metadata
$updatePersinstEditInfoStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `persinst`
SET `persinst_erfasst_am` = NOW (),
`persinst_erfasst_von` = ?
WHERE `persinst_id` = ? " );
$updatePersinstEditInfoStmt -> bind_param ( " si " , $erfasst_von , $persinst_id );
$updatePersinstEditInfoStmt -> execute ();
$updatePersinstEditInfoStmt -> close ();
return $output ;
}
/**
* Function for updating birth and death times based on Wikidata information .
*
* @ param array < mixed > $data Data loaded from Wikidata .
* @ param integer $persinst_id Actor ID .
*
* @ return void
*/
public function enterPersinstBirthDeathDatesFromWikidata ( array $data , int $persinst_id ) : void {
$result = $this -> _mysqli_noda -> query_by_stmt ( " SELECT `persinst_geburtsjahr`,
`persinst_sterbejahr` , `persinst_gender`
FROM `persinst`
WHERE `persinst_id` = ? " , " i " , $persinst_id );
if ( ! ( $actor_dates = $result -> fetch_assoc ())) {
throw new MDmainEntityNotExistentException ( " Failed to fetch actor information " );
}
$result -> close ();
if ( $actor_dates [ 'persinst_geburtsjahr' ] === '' ) {
// Try to get birth date
2022-07-20 15:50:42 +02:00
if ( ! empty ( $data [ 'claims' ][ 'P569' ])
and ! empty ( $data [ 'claims' ][ 'P569' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'time' ])
// Ignore entries with century / very inprecise birth dates
and ( empty ( $data [ 'claims' ][ 'P569' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'precision' ]) || ( int ) $data [ 'claims' ][ 'P569' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'precision' ] !== 7 )
) {
2021-08-07 17:38:49 +02:00
$birth_date = self :: wikidataBirthDeathToYear ( $data [ 'claims' ][ 'P569' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'time' ]);
2021-03-17 16:10:49 +01:00
}
if ( ! empty ( $birth_date )) {
$updateStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `persinst`
SET `persinst_geburtsjahr` = ?
WHERE `persinst_id` = ?
LIMIT 1 " );
$updateStmt -> bind_param ( " ii " , $birth_date , $persinst_id );
$updateStmt -> execute ();
$updateStmt -> close ();
}
}
if ( $actor_dates [ 'persinst_sterbejahr' ] === '' ) {
// Try to get birth date
if ( ! empty ( $data [ 'claims' ][ 'P570' ]) and ! empty ( $data [ 'claims' ][ 'P570' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'time' ])) {
2021-08-07 17:38:49 +02:00
$death_date = self :: wikidataBirthDeathToYear ( $data [ 'claims' ][ 'P570' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'time' ]);
2021-03-17 16:10:49 +01:00
}
if ( ! empty ( $death_date )) {
$updateStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `persinst`
SET `persinst_sterbejahr` = ?
WHERE `persinst_id` = ?
LIMIT 1 " );
$updateStmt -> bind_param ( " ii " , $death_date , $persinst_id );
$updateStmt -> execute ();
$updateStmt -> close ();
}
}
if ( $actor_dates [ 'persinst_gender' ] === '' ) {
// Try to get birth date
if ( ! empty ( $data [ 'claims' ][ 'P21' ]) and ! empty ( $data [ 'claims' ][ 'P21' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'id' ])) {
$wikidata_gender_id = $data [ 'claims' ][ 'P21' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'id' ];
switch ( $wikidata_gender_id ) {
2024-03-09 22:49:38 +01:00
case " Q6581097 " : // male
case " Q44148 " : // male organism
case " Q2449503 " : // transgender man
2021-03-17 16:10:49 +01:00
$wikidata_gender = " male " ;
break ;
case " Q6581072 " :
2024-03-09 22:49:38 +01:00
case " Q1052281 " : // transgender female
case " Q43445 " : // female organism
2021-03-17 16:10:49 +01:00
$wikidata_gender = " female " ;
break ;
case " Q48270 " :
$wikidata_gender = " other " ;
break ;
default :
2021-03-17 22:06:08 +01:00
throw new Exception ( " Unknown gender: Q-ID is " . $wikidata_gender_id );
2021-03-17 16:10:49 +01:00
}
}
if ( ! empty ( $wikidata_gender )) {
$updateStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `persinst`
SET `persinst_gender` = ?
WHERE `persinst_id` = ?
LIMIT 1 " );
$updateStmt -> bind_param ( " si " , $wikidata_gender , $persinst_id );
$updateStmt -> execute ();
$updateStmt -> close ();
}
}
}
/**
* Function for retrieving information .
*
* @ param string $lang The user ' s selected used language .
* @ param string $wikidata_id Wikidata ID .
* @ param integer $persinst_id Actor ID .
* @ param string $erfasst_von User name who ' s currently editing .
*
* @ return void
*/
public function retrievePersinstInfoFromWikidataID ( string $lang , string $wikidata_id , int $persinst_id , string $erfasst_von ) {
2022-03-05 13:58:18 +01:00
self :: validateWikidataId ( $wikidata_id );
2024-10-03 15:56:31 +02:00
$data = self :: _getWikidataEntity ( $wikidata_id );
2021-03-17 16:10:49 +01:00
// Get links to wikipedia
2024-10-03 15:56:31 +02:00
$wikilinks = self :: _getWikipediaLinksFromWikidataOutput ( $data );
2021-03-17 16:10:49 +01:00
$alreadyEntered = false ;
2024-10-03 15:56:31 +02:00
if ( isset ( $wikilinks [ $lang ])) {
2021-03-17 16:10:49 +01:00
# Process data retrieved from wikipedia
2024-10-03 15:56:31 +02:00
if ( ! empty ( $datafromwiki = self :: _getCleanedWikipediaSnippet ( $lang , $wikilinks [ $lang ][ 'title' ]))) {
$alreadyEntered = $this -> retrievePersinstDescFromWikipedia ( $persinst_id , $wikidata_id , $datafromwiki , $lang , $lang , $erfasst_von );
2021-03-17 16:10:49 +01:00
}
}
2023-08-31 15:38:12 +02:00
foreach ( self :: LANGUAGES_MAIN_DESC as $cur_lang ) {
2021-03-17 16:10:49 +01:00
2024-10-03 15:56:31 +02:00
if ( $alreadyEntered === true || ! isset ( $wikilinks [ $cur_lang ])) continue ;
2021-03-17 16:10:49 +01:00
2024-10-03 15:56:31 +02:00
if ( $datafromwiki = self :: _getCleanedWikipediaSnippet ( $lang , $wikilinks [ $cur_lang ][ 'title' ])) {
$alreadyEntered = $this -> retrievePersinstDescFromWikipedia ( $persinst_id , $wikidata_id , $datafromwiki , $lang , " $cur_lang " , $erfasst_von );
2021-03-17 16:10:49 +01:00
}
}
$this -> enterPersinstBirthDeathDatesFromWikidata ( $data , $persinst_id );
2021-03-17 22:06:08 +01:00
// Get links to other norm data sources
2024-10-03 15:03:38 +02:00
if ( ! empty ( $nodaLinks = $this -> _getNodaLinksFromWikidataResult ( 'persinst' , $wikidata_id , $data ))) {
NodaBatchInserter :: linkNodaForPersinst ( $this -> _mysqli_noda , $persinst_id , $nodaLinks , $erfasst_von );
2021-03-17 16:10:49 +01:00
}
$this -> getWikidataTranslationsForPersinst ( $data , $persinst_id );
2021-05-26 17:12:15 +02:00
NodaLogEdit :: logPersinstEdit ( $this -> _mysqli_noda , $persinst_id , " wikidata-fetcher " , $erfasst_von , 'update' , 'synchronize' );
2021-03-17 16:10:49 +01:00
}
2024-10-03 16:36:30 +02:00
/**
* Function for retrieving information .
*
* @ param string $wikidata_id Wikidata ID .
* @ param integer $persinst_id Actor ID .
* @ param string $erfasst_von User name who ' s currently editing .
*
* @ return void
*/
public function retrievePersinstNormDataLinksFromWikidataID ( string $wikidata_id , int $persinst_id , string $erfasst_von ) {
self :: validateWikidataId ( $wikidata_id );
$data = self :: _getWikidataEntity ( $wikidata_id );
if ( ! empty ( $nodaLinks = $this -> _getNodaLinksFromWikidataResult ( 'persinst' , $wikidata_id , $data ))) {
NodaBatchInserter :: linkNodaForPersinst ( $this -> _mysqli_noda , $persinst_id , $nodaLinks , $erfasst_von );
}
}
/**
* Retrieves only norm data links from Wikidata .
*
* @ param string $wikidata_id Wikidata Q - ID .
* @ param integer $onum Place ID .
* @ param string $erfasst_von User name of the current user .
*
* @ return void
*/
public function retrievePlaceNormDataLinksFromWikidataID ( string $wikidata_id , int $onum , string $erfasst_von ) {
self :: validateWikidataId ( $wikidata_id );
$data = self :: _getWikidataEntity ( $wikidata_id );
if ( ! empty ( $nodaLinks = $this -> _getNodaLinksFromWikidataResult ( 'place' , $wikidata_id , $data ))) {
NodaBatchInserter :: linkNodaForPlace ( $this -> _mysqli_noda , $onum , $nodaLinks , $erfasst_von );
}
}
2021-03-17 16:10:49 +01:00
/**
* Function for fetching translations from Wikipedia , based on Wikidata information .
*
2022-11-18 00:26:23 +01:00
* @ param array < mixed > $data Entity fetched from wikidata .
* @ param integer $persinst_id Actor ID .
* @ param string [] $checkForLangs Languages to check for . Defaults to all
* languages generally loaded by the wikidata fetcher .
2021-03-17 16:10:49 +01:00
*
* @ return void
*/
2022-11-18 00:26:23 +01:00
public function getWikidataTranslationsForPersinst ( array $data , int $persinst_id , array $checkForLangs = self :: LANGUAGES_TO_CHECK ) : void {
2021-03-17 16:10:49 +01:00
2022-11-18 00:26:23 +01:00
if ( empty ( $translations = self :: listTranslationsFromWikidataWikipedia ( $checkForLangs , $data ))) {
2021-11-29 22:31:17 +01:00
return ;
}
2021-03-17 16:10:49 +01:00
2022-11-18 00:26:23 +01:00
$toInsert = [];
2021-03-17 16:10:49 +01:00
2022-11-14 00:51:56 +01:00
foreach ( $translations as $lang => $values ) {
2021-03-17 16:10:49 +01:00
2022-11-18 00:26:23 +01:00
$toInsert [] = [
'persinst_id' => $persinst_id ,
'lang' => $lang ,
'name' => $values [ 'label' ],
'description' => $values [ 'description' ],
'link' => $values [ 'link' ],
];
2021-03-17 16:10:49 +01:00
}
2022-11-18 00:26:23 +01:00
NodaBatchInserter :: insertPersinstTranslations ( $this -> _mysqli_noda , $toInsert );
2021-03-17 16:10:49 +01:00
}
2023-08-31 15:38:12 +02:00
/**
2023-08-31 16:09:21 +02:00
* Returns the current description of a place .
2023-08-31 15:38:12 +02:00
*
* @ param integer $onum Place ID .
*
* @ return string
*/
private function getPlaceDescription ( int $onum ) : string {
$currentPlaceResult = $this -> _mysqli_noda -> query_by_stmt ( " SELECT `ort_anmerkung`
FROM `orte`
WHERE `ort_id` = ? " , " i " , $onum );
if ( ! ( $curPlaceInfo = $currentPlaceResult -> fetch_row ())) {
$currentPlaceResult -> close ();
throw new Exception ( " This place does not exist " );
}
$currentPlaceResult -> close ();
return $curPlaceInfo [ 0 ];
}
2023-08-31 16:09:21 +02:00
/**
* Returns the current description of a tag .
*
* @ param integer $tag_id Tag ID .
*
* @ return string
*/
private function getTagDescription ( int $tag_id ) : string {
$result = $this -> _mysqli_noda -> query_by_stmt ( " SELECT `tag_anmerkung`
FROM `tag`
WHERE `tag_id` = ? " , " i " , $tag_id );
if ( ! ( $cur = $result -> fetch_row ())) {
$result -> close ();
return '' ;
}
$result -> close ();
return $cur [ 0 ];
}
2021-03-17 16:10:49 +01:00
/**
* Function for entering base information about a place from wikidata .
*
2024-10-03 15:56:31 +02:00
* @ param string $cur_place_desc Mysqli result pointing to the current place .
* @ param string $datafromwiki Data parsed from wikidata .
* @ param string $preflang Language of the user interface in general .
* @ param string $lang Language of the main entry .
* @ param integer $placeID ID of the place .
* @ param string $erfasst_von User name .
2021-03-17 16:10:49 +01:00
*
* @ return boolean
*/
2024-10-03 15:56:31 +02:00
public function enterPlaceDescFromWikidata ( string $cur_place_desc , string $datafromwiki , string $preflang , string $lang , int $placeID , string $erfasst_von ) : bool {
2021-03-17 16:10:49 +01:00
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date ( " d.m.Y " ) . ')' ;
2023-08-31 15:38:12 +02:00
if ( ! empty ( trim ( $cur_place_desc )) and substr ( $cur_place_desc , 0 , 3 ) !== 'GND' ) {
2021-03-17 16:10:49 +01:00
2022-04-18 13:19:00 +02:00
switch ( $this -> _retrievalMode ) {
case " add " :
2023-08-31 15:38:12 +02:00
$datafromwiki = $cur_place_desc . PHP_EOL . PHP_EOL . $datafromwiki ;
2022-04-18 13:19:00 +02:00
break ;
case " keep " :
2023-08-31 15:38:12 +02:00
$datafromwiki = $cur_place_desc ;
2022-04-18 13:19:00 +02:00
break ;
2022-04-18 20:45:32 +02:00
case " replace " :
break ;
2022-04-18 13:19:00 +02:00
default :
2021-03-17 16:10:49 +01:00
$tlLoader = new MDTlLoader ( " wiki_getter_place " , $preflang );
echo self :: generateHTMLHeadForWikidataFetcher ( $lang );
echo self :: generateWikidataFetcherHeader ( $tlLoader );
echo '
< p class = " alert icons iconsAlert " > There is already an entry for description ...</ p >
< div class = " wikiReplaceTTile " >
2023-08-31 15:38:12 +02:00
< h3 > Actual entry </ h3 >< p > ' . nl2br($cur_place_desc) . ' </ p >
2021-03-17 16:10:49 +01:00
</ div >
< div class = " wikiReplaceTTile " >
< h3 > Now found </ h3 >
< p > ' . $datafromwiki . ' </ p >
</ div >
< a href = " get_wikidata_for_ort.php?keep=keep' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'ort_id']) . ' " class = " buttonLike icons iconsPin " > Keep old entry </ a >
< br >< a href = " get_wikidata_for_ort.php?keep=replace' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'ort_id']) . ' " class = " buttonLike icons iconsPinOff " > Replace with new entry </ a >
< br >< a href = " get_wikidata_for_ort.php?keep=add' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'ort_id']) . ' " class = " buttonLike icons iconsPlusOne " > Keep old and add new entry </ a >< br >< br >< br >
' ;
exit ;
}
}
// Write description to DB
$updateStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `orte`
SET `ort_anmerkung` = ? ,
`ort_erfasst_am` = NOW (),
`ort_erfasst_von` = ?
WHERE ort_id = ? " );
try {
$updateStmt -> bind_param ( " ssi " , $datafromwiki , $erfasst_von , $placeID );
$updateStmt -> execute ();
}
catch ( MDMysqliInvalidEncodingError $e ) {
}
$updateStmt -> close ();
unset ( $updateStmt );
return true ;
}
2021-05-11 01:37:49 +02:00
/**
* Function for retrieving a superordinate place relation from Wikidata information
* for places
*
* @ param integer $onum Place ID .
* @ param array < mixed > $data Wikidata information ( P131 claim ) .
*
* @ return void
*/
public function retrieveSuperordinateAdministrativePlace ( int $onum , array $data ) : void {
if ( ! empty ( $data [ 0 ][ " mainsnak " ][ " datavalue " ][ " value " ][ " id " ])) {
// Check if there already is a superordinate of the current place
$result = $this -> _mysqli_noda -> query_by_stmt ( " SELECT 1
FROM `ort_relation`
WHERE `ort_menor_id` = ?
LIMIT 1 " , " i " , $onum );
if ( $result -> num_rows !== 0 ) {
$result -> close ();
return ;
}
$result -> close ();
// If there is no superordinate, check if the identified superordinate
// is known in the noda DB.
$superordinateId = $data [ 0 ][ " mainsnak " ][ " datavalue " ][ " value " ][ " id " ];
$result = $this -> _mysqli_noda -> query_by_stmt ( " SELECT `ort_id`
FROM `noda_orte`
WHERE `noda_source` = 'wikidata'
AND `noda_nrinsource` = ? " , " s " , $superordinateId );
if ( ! ( $superordinateData = $result -> fetch_row ())) {
$result -> close ();
return ;
}
$result -> close ();
$topPlaceId = $superordinateData [ 0 ];
// Enter superordinate place by Wikidata
$insertStmt = $this -> _mysqli_noda -> do_prepare ( " INSERT INTO `ort_relation`
( `ort_mayor_id` , `ort_menor_id` , `ort_relation` )
VALUES
( ? , ? , 1 ) " );
$insertStmt -> bind_param ( " ii " , $topPlaceId , $onum );
$insertStmt -> execute ();
$insertStmt -> close ();
}
}
2021-03-17 16:10:49 +01:00
/**
* Function for retrieving place information based on a Wikidata ID .
*
* @ param string $lang Language .
* @ param string $wikidata_id Wikidata Q - ID .
* @ param integer $onum Place ID .
* @ param string $erfasst_von User name of the current user .
*
* @ return void
*/
public function retrievePlaceInfoFromWikidataID ( string $lang , string $wikidata_id , int $onum , string $erfasst_von ) {
2022-03-05 13:58:18 +01:00
self :: validateWikidataId ( $wikidata_id );
2024-10-03 15:56:31 +02:00
$data = self :: _getWikidataEntity ( $wikidata_id );
2022-03-05 13:58:18 +01:00
2024-10-03 15:56:31 +02:00
$wikilinks = self :: _getWikipediaLinksFromWikidataOutput ( $data );
2021-03-17 16:10:49 +01:00
2023-08-31 15:38:12 +02:00
// Get current description for overwriting
2021-03-17 16:10:49 +01:00
2021-05-11 01:37:49 +02:00
// P131: Located in administrative unit
if ( isset ( $data [ 'claims' ][ 'P131' ])) {
$this -> retrieveSuperordinateAdministrativePlace ( $onum , $data [ 'claims' ][ 'P131' ]);
}
2024-10-03 15:56:31 +02:00
$cur_place_desc = $this -> getPlaceDescription ( $onum );
$alreadyEntered = false ;
if ( ! empty ( $wikilinks [ $lang ])) {
2021-03-17 16:10:49 +01:00
2024-10-03 15:56:31 +02:00
$datafromwiki = MD_STD :: runCurl ( self :: _getWikipediaApiLink ( $lang , $wikilinks [ $lang ][ 'title' ]), 10000 );
2021-03-17 16:10:49 +01:00
$datafromwiki = json_decode ( $datafromwiki , true )[ 'parse' ][ 'text' ][ '*' ];
2022-01-08 14:15:51 +01:00
if ( ! empty ( $datafromwiki ) and ! empty ( $datafromwiki = self :: _cleanWikidataInput (( string ) $datafromwiki ))) {
2024-10-03 15:56:31 +02:00
$alreadyEntered = $this -> enterPlaceDescFromWikidata ( $cur_place_desc , $datafromwiki , $lang , $lang , $onum , $erfasst_von );
2021-03-17 16:10:49 +01:00
}
}
2023-08-31 15:38:12 +02:00
foreach ( self :: LANGUAGES_MAIN_DESC as $cur_lang ) {
2021-03-17 16:10:49 +01:00
//if ($alreadyEntered === true) break;
if ( $alreadyEntered === true ) break ;
2024-10-03 15:56:31 +02:00
if ( ! isset ( $wikilinks [ $cur_lang ][ 'url' ])) continue ;
2021-03-17 16:10:49 +01:00
2024-10-03 15:56:31 +02:00
$datafromwiki = MD_STD :: runCurl ( self :: _getWikipediaApiLink ( $cur_lang , $wikilinks [ $cur_lang ][ 'title' ]), 10000 );
2021-03-17 16:10:49 +01:00
$datafromwiki = json_decode ( $datafromwiki , true )[ 'parse' ][ 'text' ][ '*' ];
2022-01-09 22:19:22 +01:00
if ( ! empty ( $datafromwiki ) and ! empty ( $datafromwiki = self :: _cleanWikidataInput (( string ) $datafromwiki ))) {
2024-10-03 15:56:31 +02:00
$alreadyEntered = $this -> enterPlaceDescFromWikidata ( $cur_place_desc , $datafromwiki , $lang , $cur_lang , $onum , $erfasst_von );
2021-03-17 16:10:49 +01:00
}
}
2021-12-14 15:40:07 +01:00
if ( isset ( $data [ 'claims' ][ 'P1566' ])) $geonames_id = filter_var ( $data [ 'claims' ][ 'P1566' ][ 0 ][ 'mainsnak' ][ 'datavalue' ][ 'value' ], FILTER_VALIDATE_INT );
if ( isset ( $data [ 'claims' ][ 'P1667' ])) $tgn_id = filter_var ( $data [ 'claims' ][ 'P1667' ][ 0 ][ 'mainsnak' ][ 'datavalue' ][ 'value' ], FILTER_VALIDATE_INT );
2021-03-17 22:06:08 +01:00
2024-10-03 15:03:38 +02:00
if ( ! empty ( $nodaLinks = $this -> _getNodaLinksFromWikidataResult ( 'place' , $wikidata_id , $data ))) {
NodaBatchInserter :: linkNodaForPlace ( $this -> _mysqli_noda , $onum , $nodaLinks , $erfasst_von );
2021-03-17 22:06:08 +01:00
}
2021-03-17 16:10:49 +01:00
2024-10-03 15:56:31 +02:00
$coordinates_wd = self :: _getPlaceCoordinatesFromWikidata ( $data );
2021-03-17 16:10:49 +01:00
2023-08-29 17:32:22 +02:00
$this -> _mysqli_noda -> autocommit ( false );
2023-09-29 16:20:53 +02:00
if ( ! empty ( $tgn_id )) {
2021-03-17 16:10:49 +01:00
$updateStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `orte`
SET `ort_land` = ?
WHERE `ort_id` = ? " );
2021-12-14 15:38:44 +01:00
$updateStmt -> bind_param ( " ii " , $tgn_id , $onum );
2021-03-17 16:10:49 +01:00
$updateStmt -> execute ();
$updateStmt -> close ();
unset ( $updateStmt );
}
2023-09-29 16:20:53 +02:00
if ( ! empty ( $geonames_id )) {
2021-03-17 16:10:49 +01:00
$updateStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `orte`
SET `ort_geonames` = ?
WHERE `ort_id` = ? " );
2021-12-14 15:38:44 +01:00
$updateStmt -> bind_param ( " ii " , $geonames_id , $onum );
2021-03-17 16:10:49 +01:00
$updateStmt -> execute ();
$updateStmt -> close ();
unset ( $updateStmt );
}
2024-10-03 15:56:31 +02:00
if ( ! empty ( $coordinates_wd )) {
2021-03-17 16:10:49 +01:00
$updateStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `orte`
2024-09-25 15:42:59 +02:00
SET `ort_nord_sued` = ? , `ort_west_ost` = ?
2021-03-17 16:10:49 +01:00
WHERE `ort_id` = ? " );
2024-10-03 15:56:31 +02:00
$updateStmt -> bind_param ( " ddi " , $coordinates_wd [ 'latitude' ], $coordinates_wd [ 'longitude' ], $onum );
2021-03-17 16:10:49 +01:00
$updateStmt -> execute ();
$updateStmt -> close ();
unset ( $updateStmt );
}
$this -> _mysqli_noda -> commit ();
$this -> _mysqli_noda -> autocommit ( true );
$this -> getWikidataTranslationsForPlace ( $data , $onum );
2021-05-26 17:12:15 +02:00
NodaLogEdit :: logPlaceEdit ( $this -> _mysqli_noda , $onum , " wikidata-fetcher " , $erfasst_von , 'update' , 'synchronize' );
2021-03-17 16:10:49 +01:00
}
/**
* Function for fetching translations from wikidata .
*
2022-11-18 00:26:23 +01:00
* @ param array < mixed > $data Entity data fetched from wikidata .
* @ param integer $ort_id Place ID .
* @ param string [] $checkForLangs Languages to check for . Defaults to all
* languages generally loaded by the wikidata fetcher .
2021-03-17 16:10:49 +01:00
*
* @ return void
*/
2022-11-18 00:26:23 +01:00
public function getWikidataTranslationsForPlace ( array $data , int $ort_id , array $checkForLangs = self :: LANGUAGES_TO_CHECK ) : void {
2021-03-17 16:10:49 +01:00
2022-11-18 00:26:23 +01:00
if ( empty ( $translations = self :: listTranslationsFromWikidataWikipedia ( $checkForLangs , $data ))) {
2021-11-29 22:31:17 +01:00
return ;
}
2021-03-17 16:10:49 +01:00
2022-11-18 00:26:23 +01:00
$toInsert = [];
2021-03-17 16:10:49 +01:00
2022-11-14 00:51:56 +01:00
foreach ( $translations as $lang => $values ) {
2021-03-17 16:10:49 +01:00
2022-11-18 00:26:23 +01:00
$toInsert [] = [
'ort_id' => $ort_id ,
'lang' => $lang ,
'name' => $values [ 'label' ],
'description' => $values [ 'description' ],
'link' => $values [ 'link' ],
];
2021-03-17 16:10:49 +01:00
}
2022-11-18 00:26:23 +01:00
NodaBatchInserter :: insertPlaceTranslations ( $this -> _mysqli_noda , $toInsert );
2021-03-17 16:10:49 +01:00
}
/**
* Function for fetching description from Wikipedia
*
* @ param integer $tag_id Tag ID .
* @ param string $datafromwiki Data fetched from Wikipedia .
* @ param string $preflang The user ' s currently used language .
* @ param string $lang Currently queried language .
* @ param string $erfasst_von User who adds the info .
*
* @ return boolean
*/
2024-10-03 15:56:31 +02:00
public function retrieveTagDescFromWikipedia ( int $tag_id , string $datafromwiki , string $preflang , string $lang , string $erfasst_von ) : bool {
2021-03-17 16:10:49 +01:00
$output = false ;
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date ( " d.m.Y " ) . ')' ;
$datafromwiki = str_replace ( " ' " , " ´ " , MD_STD :: preg_replace_str ( " / \ & \ #91 \ ;[0-9] \ & \ #93 \ ;/ " , '' , $datafromwiki ));
2023-08-31 16:09:21 +02:00
$tag_anmerkung = $this -> getTagDescription ( $tag_id );
2021-03-17 16:10:49 +01:00
$this -> _mysqli_noda -> autocommit ( false );
$updateTagDescStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `tag`
SET `tag_anmerkung` = ?
WHERE `tag_id` = ? " );
2022-09-15 21:29:07 +02:00
if ( ! empty ( $tag_anmerkung ) and substr ( $tag_anmerkung , 0 , 3 ) !== 'GND' ) {
2021-03-17 16:10:49 +01:00
2022-04-18 13:19:00 +02:00
switch ( $this -> _retrievalMode ) {
case " add " :
2021-03-17 16:10:49 +01:00
2022-09-15 21:29:07 +02:00
$newDesc = $tag_anmerkung . PHP_EOL . PHP_EOL . $datafromwiki ;
2021-03-17 16:10:49 +01:00
2022-04-18 13:19:00 +02:00
$updateTagDescStmt -> bind_param ( " si " , $newDesc , $tag_id );
$updateTagDescStmt -> execute ();
$output = true ;
break ;
2021-03-17 16:10:49 +01:00
2022-04-18 20:45:32 +02:00
case " keep " :
break ;
2022-04-18 13:19:00 +02:00
case " replace " :
2021-03-17 16:10:49 +01:00
2022-04-18 13:19:00 +02:00
$updateTagDescStmt -> bind_param ( " si " , $datafromwiki , $tag_id );
$updateTagDescStmt -> execute ();
2021-03-17 16:10:49 +01:00
$output = true ;
2022-04-18 13:19:00 +02:00
break ;
default :
2021-03-17 16:10:49 +01:00
$tlLoader = new MDTlLoader ( " wiki_getter_tag " , $preflang );
echo self :: generateHTMLHeadForWikidataFetcher ( $lang );
echo self :: generateWikidataFetcherHeader ( $tlLoader );
echo '
< p class = " alert icons iconsAlert " > Es gibt schon einen Eintrag im Beschreibungsfeld </ b >
< div class = " wikiReplaceTTile " >
2022-09-15 21:29:07 +02:00
< h3 > Bisher vorhanden </ h3 >< p > ' . nl2br($tag_anmerkung) . ' </ p >
2021-03-17 16:10:49 +01:00
</ div >
< div class = " wikiReplaceTTile " >
< h3 > Jetzt gefunden </ h3 >< p > ' . $datafromwiki . ' < p >
</ div >
< a href = " get_wikidata_for_tag.php?keep=keep' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'tag_id']) . ' " class = " buttonLike icons iconsPin " > Keep old entry </ a > ' ;
echo '<br><a href="get_wikidata_for_tag.php?keep=replace' . write_get_vars ([ 'suchbegriff' , 'lang' , 'wikidata_id' , 'tag_id' ]) . '" class="buttonLike icons iconsPinOff">Replace with new entry</a>' ;
echo '<br><a href="get_wikidata_for_tag.php?keep=add' . write_get_vars ([ 'suchbegriff' , 'lang' , 'wikidata_id' , 'tag_id' ]) . '" class="buttonLike icons iconsPlusOne">Keep old and add new entry</a><br><br><br>' ;
exit ;
}
}
else {
$updateTagDescStmt -> bind_param ( " si " , $datafromwiki , $tag_id );
$updateTagDescStmt -> execute ();
}
$updateTagDescStmt -> close ();
2023-08-29 17:32:22 +02:00
$this -> _mysqli_noda -> commit ();
$this -> _mysqli_noda -> autocommit ( true );
2021-03-17 16:10:49 +01:00
// Update tag editing metadata
$updateTagEditInfoStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `tag`
SET `tag_erfasst_am` = NOW (),
`tag_erfasst_von` = ?
WHERE `tag_id` = ? " );
$updateTagEditInfoStmt -> bind_param ( " si " , $erfasst_von , $tag_id );
$updateTagEditInfoStmt -> execute ();
$updateTagEditInfoStmt -> close ();
2023-08-29 17:32:22 +02:00
return true ;
2021-03-17 22:06:08 +01:00
}
2021-03-17 16:10:49 +01:00
/**
* Function for retrieving information .
*
* @ param string $lang The user ' s selected used language .
* @ param string $wikidata_id Wikidata ID .
* @ param integer $tag_id Tag ID .
* @ param string $erfasst_von User name who ' s currently editing .
*
* @ return void
*/
public function retrieveTagInfoFromWikidataID ( string $lang , string $wikidata_id , int $tag_id , string $erfasst_von ) {
2022-03-05 13:58:18 +01:00
self :: validateWikidataId ( $wikidata_id );
2024-10-03 15:56:31 +02:00
$data = self :: _getWikidataEntity ( $wikidata_id );
2022-03-05 13:58:18 +01:00
2024-10-03 15:56:31 +02:00
$wikilinks = self :: _getWikipediaLinksFromWikidataOutput ( $data );
2021-03-17 16:10:49 +01:00
$alreadyEntered = false ;
2024-10-03 15:56:31 +02:00
if ( isset ( $wikilinks [ $lang ])) {
2021-03-17 16:10:49 +01:00
2024-10-03 15:56:31 +02:00
$datafromwiki = MD_STD :: runCurl ( self :: _getWikipediaApiLink ( $lang , $wikilinks [ $lang ][ 'title' ]), 10000 );
2021-03-17 16:10:49 +01:00
$datafromwiki = json_decode ( $datafromwiki , true )[ 'parse' ][ 'text' ][ '*' ];
# Process data retrieved from wikipedia
if ( ! empty ( $datafromwiki = self :: _cleanWikidataInput (( string ) $datafromwiki ))) {
2024-10-03 15:56:31 +02:00
$alreadyEntered = $this -> retrieveTagDescFromWikipedia ( $tag_id , $datafromwiki , $lang , $lang , $erfasst_von );
2021-03-17 16:10:49 +01:00
}
}
2023-08-31 15:38:12 +02:00
foreach ( self :: LANGUAGES_MAIN_DESC as $cur_lang ) {
2021-03-17 16:10:49 +01:00
2024-10-03 15:56:31 +02:00
if ( $alreadyEntered === true || ! isset ( $wikilinks [ $cur_lang ])) continue ;
2021-03-17 16:10:49 +01:00
2024-10-03 15:56:31 +02:00
$datafromwiki = MD_STD :: runCurl ( self :: _getWikipediaApiLink ( $cur_lang , $wikilinks [ $cur_lang ][ 'title' ]), 10000 );
2021-03-17 16:10:49 +01:00
$datafromwiki = json_decode ( $datafromwiki , true )[ 'parse' ][ 'text' ][ '*' ];
# Process data retrieved from wikipedia
if ( $datafromwiki = self :: _cleanWikidataInput (( string ) $datafromwiki )) {
2024-10-03 15:56:31 +02:00
$alreadyEntered = $this -> retrieveTagDescFromWikipedia ( $tag_id , $datafromwiki , $lang , $cur_lang , $erfasst_von );
2021-03-17 16:10:49 +01:00
}
}
2024-10-03 15:03:38 +02:00
if ( ! empty ( $nodaLinks = $this -> _getNodaLinksFromWikidataResult ( 'tag' , $wikidata_id , $data ))) {
NodaBatchInserter :: linkNodaForTag ( $this -> _mysqli_noda , $tag_id , $nodaLinks , $erfasst_von );
2021-03-17 16:10:49 +01:00
}
2021-03-17 22:06:08 +01:00
// Get translations
2021-03-17 16:10:49 +01:00
if ( ! empty ( $data )) $this -> getWikidataTranslationsForTag ( $data , $tag_id );
2021-05-26 17:12:15 +02:00
NodaLogEdit :: logTagEdit ( $this -> _mysqli_noda , $tag_id , " wikidata-fetcher " , $erfasst_von , 'update' , 'synchronize' );
2021-03-17 16:10:49 +01:00
}
/**
* Function for fetching translations from wikidata .
*
2022-11-18 00:26:23 +01:00
* @ param array < mixed > $data Entity data fetched from wikidata .
* @ param integer $tag_id Tag ID .
* @ param string [] $checkForLangs Languages to check for . Defaults to all
* languages generally loaded by the wikidata fetcher .
2021-03-17 16:10:49 +01:00
*
* @ return void
*/
2022-11-18 00:26:23 +01:00
public function getWikidataTranslationsForTag ( array $data , int $tag_id , array $checkForLangs = self :: LANGUAGES_TO_CHECK ) : void {
2021-03-17 16:10:49 +01:00
2022-11-18 00:26:23 +01:00
if ( empty ( $translations = self :: listTranslationsFromWikidataWikipedia ( $checkForLangs , $data ))) {
2021-11-29 22:31:17 +01:00
return ;
}
2021-03-17 16:10:49 +01:00
2022-11-18 00:26:23 +01:00
$toInsert = [];
2021-03-17 16:10:49 +01:00
2022-11-14 00:51:56 +01:00
foreach ( $translations as $lang => $values ) {
2021-03-17 16:10:49 +01:00
2022-11-14 00:51:56 +01:00
if ( in_array ( $lang , self :: LANGUAGES_TO_CAPITALIZE , true )) {
$label = ucfirst ( $values [ 'label' ]);
$description = ucfirst ( $values [ 'description' ]);
}
else {
$label = $values [ 'label' ];
$description = $values [ 'description' ];
2021-03-17 16:10:49 +01:00
}
2022-11-18 00:26:23 +01:00
$toInsert [] = [
'tag_id' => $tag_id ,
'lang' => $lang ,
'name' => $label ,
'description' => $description ,
'link' => $values [ 'link' ],
];
2021-03-17 16:10:49 +01:00
}
2022-11-18 00:26:23 +01:00
NodaBatchInserter :: insertTagTranslations ( $this -> _mysqli_noda , $toInsert );
2021-03-17 16:10:49 +01:00
}
2021-06-30 22:55:37 +02:00
/**
* Searches Wikidata for a string .
*
* @ param string $searchTerm Search string .
* @ param string $lang Searched language . Defaults to German .
*
* @ return array < mixed >
*/
public static function searchWikidataForString ( string $searchTerm , string $lang = " de " ) : array {
$wikidata_data = MD_STD :: runCurl ( " https://www.wikidata.org/w/api.php?action=wbsearchentities&format=json&search= " . urlencode ( $searchTerm ) . " &language= " . urlencode ( $lang ) . " &limit=20 " , 10000 );
if (( $wikidata_data = json_decode ( $wikidata_data , true )) === false ) {
return [];
}
2021-11-30 17:53:24 +01:00
if ( empty ( $wikidata_data [ 'search' ])) {
return [];
}
2021-06-30 22:55:37 +02:00
$output = [];
foreach ( $wikidata_data [ 'search' ] as $result ) {
2021-07-03 15:18:02 +02:00
if ( empty ( $result [ 'label' ])
2021-07-24 23:21:00 +02:00
or ( ! empty ( $result [ 'description' ]) and $result [ 'description' ] === 'Wikipedia disambiguation page' )
or ( ! empty ( $result [ 'description' ]) and $result [ 'description' ] === 'Wikimedia disambiguation page' )
2021-06-30 22:55:37 +02:00
) continue ;
$cur = [
'id' => $result [ 'id' ],
'label' => $result [ 'label' ],
'label_ext' => '' ,
'description' => '' ,
];
if ( ! empty ( $result [ 'match' ])) {
$cur [ 'label_ext' ] = " { $result [ 'match' ][ 'language' ] } : { $result [ 'match' ][ 'text' ] } " ;
}
if ( ! empty ( $result [ 'description' ])) {
$cur [ 'description' ] = $result [ 'description' ];
}
$output [] = $cur ;
}
return $output ;
}
2021-08-07 17:38:49 +02:00
/**
* Generates the HTML for an entry in the general wikidata search results list .
*
* @ param string $link Links .
* @ param string $searchTerm Search term .
* @ param string $lang Language .
* @ param array < mixed > $result Single result to display .
*
* @ return string
*/
public static function generateWikidataResultsListEntry ( string $link , string $searchTerm , string $lang , array $result ) : string {
2022-01-16 15:18:04 +01:00
if ( empty ( $result [ 'label' ]) or ( isset ( $result [ 'description' ]) and in_array ( $result [ 'description' ], [ 'Wikipedia disambiguation page' , 'Wikimedia disambiguation page' ], true ))) {
2021-08-07 17:38:49 +02:00
return '' ;
}
2021-08-15 20:03:25 +02:00
$output = '<div><a href="' . $link . 'suchbegriff=' . htmlspecialchars ( $searchTerm ) . '&wikidata_id=' . htmlspecialchars (( string ) $result [ 'id' ]) . '&lang=' . htmlspecialchars ( $lang ) . ' " >
2021-08-07 17:38:49 +02:00
< h4 class = " icons iconsTag " > ' . $result[' id '] . ' </ h4 > ' ;
$output .= '<p class="wikidataSummary">' . $result [ 'label' ];
if ( ! empty ( $result [ 'label_ext' ])) $output .= " (<span class='icons iconsTranslate'> { $result [ 'label_ext' ] } </span>) " ;
$output .= '</p>' ;
if ( ! empty ( $result [ 'description' ])) $output .= '<p>' . $result [ 'description' ] . '</p>' ;
$output .= '</a><a class="icons iconsEye" target="_blank" href="https://www.wikidata.org/wiki/' . $result [ 'id' ] . '">Wikidata page</a></div>' ;
return $output ;
}
2021-03-17 16:10:49 +01:00
/**
* Function for generating a wikidata results list .
*
2021-06-30 22:55:37 +02:00
* @ param string $link Links .
* @ param string $searchTerm Search term .
* @ param string $lang Language .
2021-03-17 16:10:49 +01:00
*
2021-06-30 22:55:37 +02:00
* @ return string
2021-03-17 16:10:49 +01:00
*/
2021-06-30 22:55:37 +02:00
public static function generateWikidataResultsList ( string $link , string $searchTerm , string $lang ) : string {
2021-08-07 17:38:49 +02:00
if ( empty ( $wikidata_data = self :: searchWikidataForString ( $searchTerm ))) {
return '<p class="icons iconsAlert alert"><b>' . ucfirst ( $searchTerm ) . '</b> not found in Wikidata</p>' ;
2021-03-17 16:10:49 +01:00
}
2021-08-07 17:38:49 +02:00
$output = '
2021-03-17 16:10:49 +01:00
< main id = " wikidataResultsList " > ' ;
2021-06-30 22:55:37 +02:00
foreach ( $wikidata_data as $result ) {
2021-08-07 17:38:49 +02:00
$output .= self :: generateWikidataResultsListEntry ( $link , $searchTerm , $lang , $result );
}
$output .= '
</ main > ' ;
return $output ;
}
/**
* Attempts to parse birth or death years from the data returned by wikidata .
*
* @ param string $inputTime Input time in the format delivered by wikidata .
*
* @ return string
*/
public static function wikidataBirthDeathToYear ( string $inputTime ) : string {
$birth_date_int = strtotime ( substr ( $inputTime , 1 , 4 ));
if ( $birth_date_int ) {
$birth_date = date ( " Y " , $birth_date_int );
2022-01-09 22:19:22 +01:00
if ( $birth_date === date ( " Y " ) and ( $tTime = strtotime ( $inputTime )) !== false ) {
2021-08-07 17:38:49 +02:00
$birth_date = date ( " Y " , $tTime );
}
return $birth_date ;
}
2021-03-17 16:10:49 +01:00
2021-08-07 17:38:49 +02:00
return '' ;
2021-03-17 16:10:49 +01:00
2021-08-07 17:38:49 +02:00
}
/**
* Function for generating a wikidata results list for actors , keeping track of life dates .
*
* @ param string $link Links .
* @ param string $searchTerm Search term .
* @ param string $lang Language .
* @ param integer $yearOfBirth Year of birth .
* @ param integer $yearOfDeath Year of death .
*
* @ return string
*/
public static function generateWikidataResultsListForActors ( string $link , string $searchTerm , string $lang , int $yearOfBirth , int $yearOfDeath ) : string {
2021-03-17 16:10:49 +01:00
2021-08-07 17:38:49 +02:00
if ( empty ( $wikidata_data = self :: searchWikidataForString ( $searchTerm ))) {
return '<p class="icons iconsAlert alert"><b>' . ucfirst ( $searchTerm ) . '</b> not found in Wikidata</p>' ;
2021-03-17 16:10:49 +01:00
}
2021-08-07 17:38:49 +02:00
$qLinksToCheck = [];
foreach ( $wikidata_data as $entry ) {
$qLinksToCheck [ $entry [ 'id' ]] = " https://www.wikidata.org/wiki/Special:EntityData/ " . $entry [ 'id' ] . " .json " ;
}
$fetched = MD_STD :: runCurlMulti ( $qLinksToCheck , 10000 );
$yearsOfBirthList = $yearsOfDeathList = [];
foreach ( $fetched as $qId => $data ) {
if ( ! ( $jsonData = json_decode ( $data , true ))) {
continue ;
}
if ( empty ( $jsonData [ 'entities' ][ $qId ])) {
continue ;
}
$data = $jsonData [ 'entities' ][ $qId ];
if ( ! empty ( $data [ 'claims' ][ 'P569' ]) and ! empty ( $data [ 'claims' ][ 'P569' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'time' ])) {
$yearsOfBirthList [ $qId ] = ( int ) self :: wikidataBirthDeathToYear ( $data [ 'claims' ][ 'P569' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'time' ]);
}
if ( ! empty ( $data [ 'claims' ][ 'P570' ]) and ! empty ( $data [ 'claims' ][ 'P570' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'time' ])) {
$yearsOfDeathList [ $qId ] = ( int ) self :: wikidataBirthDeathToYear ( $data [ 'claims' ][ 'P570' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'time' ]);
}
}
$output = '
< main id = " wikidataResultsList " > ' ;
foreach ( $wikidata_data as $result ) {
if ( empty ( $result [ 'id' ])) continue ;
if ( ! empty ( $yearsOfBirthList [ $result [ 'id' ]])) {
if ( empty ( $result [ 'description' ])) {
$result [ 'description' ] = 'Born: ' . $yearsOfBirthList [ $result [ 'id' ]];
}
else $result [ 'description' ] .= '<br/>Born: ' . $yearsOfBirthList [ $result [ 'id' ]];
}
if ( ! empty ( $yearsOfDeathList [ $result [ 'id' ]])) {
if ( empty ( $result [ 'description' ])) {
$result [ 'description' ] = 'Death: ' . $yearsOfDeathList [ $result [ 'id' ]];
}
else $result [ 'description' ] .= '<br/>Death: ' . $yearsOfDeathList [ $result [ 'id' ]];
}
if ( ! empty ( $yearsOfBirthList [ $result [ 'id' ]]) && ! empty ( $yearsOfDeathList [ $result [ 'id' ]])) {
if ( $yearsOfBirthList [ $result [ 'id' ]] === $yearOfBirth
&& $yearsOfDeathList [ $result [ 'id' ]] === $yearOfDeath
) {
$result [ 'description' ] .= '<br/><span class="buttonLike">Suggestion!</span>' ;
}
}
$output .= self :: generateWikidataResultsListEntry ( $link , $searchTerm , $lang , $result );
}
2021-06-30 22:55:37 +02:00
$output .= '
2021-03-17 16:10:49 +01:00
</ main > ' ;
2021-06-30 22:55:37 +02:00
return $output ;
2021-03-17 16:10:49 +01:00
}
/**
* Function generates HTML head for wikidata fetchers .
*
* @ param string $lang User language .
* @ param boolean $implyEnd If set to true , the end string will be echoed at the end of the script execution .
*
* @ return string
*/
public static function generateHTMLHeadForWikidataFetcher ( string $lang , bool $implyEnd = true ) : string {
$output = " <!DOCTYPE html><html class= \" getWikidata \" lang= \" { $lang } \" >
< head >
< title > Get Wikidata </ title >
< meta name = \ " viewport \" content= \" width=device-width, initial-scale=1 \" />
< link rel = \ " manifest \" href= \" ../manifest.webmanifest \" />
< meta name = \ " theme-color \" content= \" #0b1728 \" />
< link rel = \ " shortcut icon \" sizes= \" 16x16 32x32 \" href= \" ../img/mdlogo-nodac.svg.png \" />
< link rel = \ " apple-touch-icon \" sizes= \" 256x256 \" href= \" ../img/mdterm-256px.png \" />
2021-08-10 14:35:25 +02:00
< script type = \ " text/javascript \" src= \" ../js/wikidataGetter.min.js \" async></script>
2021-03-17 16:10:49 +01:00
< meta http - equiv = \ " content-type \" content= \" text/html; charset=UTF-8 \" /> " ;
if ( defined ( " MAIN_CSS_FILE " )) {
$output .= " <link rel= \" stylesheet \" type= \" text/css \" href= \" " . htmlspecialchars ( MAIN_CSS_FILE ) . " \" > " ;
}
$output .= "
< meta name = \ " description \" content= \" Fetch information from Wikidata. \" />
</ head >
< body > " ;
if ( $implyEnd === true ) {
register_shutdown_function ( function () : void {
echo printHTMLEnd ();
});
}
return MD_STD :: minimizeHTMLString ( $output );
}
/**
* Function generate header for wikidata fetcher pages .
*
* @ param MDTlLoader $tlLoader Translation variable .
* @ param string $additional Additional info .
* @ param string $searchTerm Search term .
*
* @ return string
*/
public static function generateWikidataFetcherHeader ( MDTlLoader $tlLoader , string $additional = " " , string $searchTerm = " " ) : string {
if ( empty ( $searchTerm ) and ! empty ( $_GET [ 'suchbegriff' ])) {
2023-08-31 15:38:12 +02:00
$searchTerm = ( string ) $_GET [ 'suchbegriff' ];
2021-03-17 16:10:49 +01:00
}
$output = '
< header >
< h1 >< img src = " ../img/wikidata.png " alt = " Logo: Wikidata " /> ' . $tlLoader -> tl ( " wiki " , " wiki " , " fetch_from_wikidata " );
2023-08-31 15:38:12 +02:00
$output .= ': ' . htmlspecialchars ( $searchTerm );
2021-03-17 16:10:49 +01:00
$output .= '</h1>' ;
$output .= $additional ;
$output .= '</header>' ;
return $output ;
}
/**
* Constructor .
*
* @ param MDMysqli $mysqli_noda DB connection .
*
* @ return void
*/
public function __construct ( MDMysqli $mysqli_noda ) {
$this -> _mysqli_noda = $mysqli_noda ;
}
}