2021-03-17 16:10:49 +01:00
< ? PHP
/**
* This file contains tools for fetching data from Wikidata .
*
* @ file
* @ author Joshua Ramon Enslin < joshua @ museum - digital . de >
*/
declare ( strict_types = 1 );
/**
* Helps fetching information from Wikidata .
*/
2021-03-18 01:23:45 +01:00
final class NodaWikidataFetcher {
2021-03-17 16:10:49 +01:00
2021-03-17 22:06:08 +01:00
const LANGUAGES_MAIN_DESC = [ 'de' , 'da' , 'en' , 'es' , 'fr' , 'hu' , 'it' , 'jp' , 'nl' , 'pt' , 'ru' , 'sv' , 'zh' ];
2021-03-17 16:10:49 +01:00
const LANGUAGES_TO_CHECK = [ 'ar' , 'bn' , 'cs' , 'da' , 'de' , 'el' , 'en' , 'es' , 'fa' , 'fi' , 'fr' , 'ha' , 'he' , 'hi' , 'hu' , 'id' , 'it' , 'ja' , 'ka' , 'ko' , 'nl' , 'pl' , 'pt' , 'ro' , 'ru' , 'sv' , 'sw' , 'ta' , 'th' , 'tl' , 'tr' , 'ur' , 'vi' , 'zh' ];
2021-03-17 22:06:08 +01:00
const LANGUAGES_TO_CAPITALIZE = [ " cs " , " da " , " de " , " en " , " es " , " fr " , " fi " , " id " , " it " , " nl " , " pl " , " pt " , " ru " , " sv " , " tl " , " tr " ];
2021-03-17 16:10:49 +01:00
const URL_PREFIXES_PLACES_NODA_SOURCE = [
" gnd " => " https://d-nb.info/gnd/ " ,
" nomisma " => " http://nomisma.org/id/ " ,
" osm " => " https://www.openstreetmap.org/relation/ " ,
" loc " => " http://id.loc.gov/authorities/names/ " ,
" cona " => " http://vocab.getty.edu/page/cona/ " ,
2021-03-17 22:06:08 +01:00
" aat " => " http://vocab.getty.edu/page/aat/ " ,
" iconclass " => " http://iconclass.org/rkd/ " ,
" lcsh " => " http://id.loc.gov/authorities/subjects/ " ,
" wikidata " => " https://www.wikidata.org/wiki/ " ,
" bne " => " http://datos.bne.es/persona/ " ,
" viaf " => " https://viaf.org/viaf/ " ,
" bnf " => " https://catalogue.bnf.fr/ark:/12148/cb " ,
" ulan " => " http://vocab.getty.edu/page/ulan/ " ,
" rkd " => " https://rkd.nl/explore/artists/ " ,
" pim " => " https://opac-nevter.pim.hu/en/record/-/record/ " ,
" ndl " => " https://id.ndl.go.jp/auth/ndlna/ " ,
" npg " => " https://www.npg.org.uk/collections/search/person/ " ,
];
const P_IDS_NODA_TAGS = [
'gnd' => 'P227' ,
'lcsh' => 'P244' ,
'aat' => 'P1014' ,
'iconclass' => 'P1256' ,
'osm' => 'P402' ,
'loc' => 'P244' ,
'nomisma' => 'P2950' ,
'cona' => 'P1669' ,
" rkd " => " P650 " ,
" ulan " => " P245 " ,
" viaf " => " P214 " ,
" bnf " => " P268 " ,
" pim " => " P3973 " ,
" ndl " => " P349 " , // National Diet Library (Japan)
" npg " => " P1816 " , // "National" portrait gallery
//"bmpi" => "P6077", // British Museum Person-Institution
" bne " => " P950 " , // Espana National Library
];
const WIKIPEDIA_REMOVE_LITERALS = [
" <p>Si vous disposez d'ouvrages ou d'articles de référence ou si vous " ,
'<p><b>En pratique :</b> <a href="/wiki/Wikip%C3%A9dia:Citez_vos_sources#Qualité_des_sources" title="Wikipédia:Citez vos sources">Quelles sources sont attendu' ,
'<pVous pouvez partager vos connaissances en l’ améliorant (' ,
'<p class="mw-empty-elt">' ,
'<p><small>Géolocalisation sur la carte' ,
'<p><b>Koordinaatit:</b>' ,
'<p><span class="executeJS" data-gadgetname="ImgToggle"></span' ,
'<p><span class="imgtoggleboxTitle">' ,
//'<div class="mw-parser-output"><p>',
'<p><span style="font-size: small;"><span id="coordinates">' ,
'<p><span></span></p>' ,
'<p><a rel="nofollow" class="external text" href="https://maps.gs' ,
'<p><span class="plainlinks nourlexpansion"><a class="external text" href="//tools.wmflabs.org/geohack/geohack.php?langu' ,
'<p><span style="display:none">' ,
'<p> </p>' ,
'<p><span class="geo noexcerpt"' ,
2021-03-17 16:10:49 +01:00
];
/** @var MDMysqli */
private MDMysqli $_mysqli_noda ;
2021-03-18 01:23:45 +01:00
/**
* Attempts to fetch a Wikidata ID from a provided URL .
*
* @ param string $linkUrl Link to a Wikipedia page .
*
* @ return string
*/
public static function getWikidataIdFromWikipedia ( string $linkUrl ) : string {
if ( ! filter_var ( $linkUrl , FILTER_VALIDATE_URL )) {
throw new MDExpectedException ( " Invalid URL " );
}
$wikipedia_cont = MD_STD :: runCurl ( $linkUrl );
$doc = new DOMDocument ();
if ( ! ( $doc -> loadHTML ( $wikipedia_cont ))) {
return '' ;
}
if ( ! ( $wikidataLinkLi = $doc -> getElementById ( " t-wikibase " ))) {
return '' ;
}
if ( ! ( $wikidataLink = $wikidataLinkLi -> firstChild )) {
return '' ;
}
if ( ! ( $t_wikibase_href = $wikidataLink -> getAttribute ( 'href' ))) {
return '' ;
}
$t_wikibase = ( string ) $t_wikibase_href ;
if ( ! empty ( $t_wikibase )) {
if (( $wikidata_id_end = strrpos ( $t_wikibase , '/' )) !== false ) {
$wikidata_id = trim ( substr ( $t_wikibase , $wikidata_id_end + 1 ), '/' );
if ( substr ( $wikidata_id , 0 , 1 ) === 'Q' ) {
return $wikidata_id ;
}
}
}
return '' ;
}
2021-03-17 16:10:49 +01:00
/**
* Gets translation source Wikipedia pages from Wikidata .
*
* @ param array < string > $checkagainstLanguage The language to check against .
* @ param array < mixed > $data Data fetched from Wikidata .
*
* @ return array < array < string >>
*/
public static function getWikidataWikipediaTranslationSources ( array $checkagainstLanguage , array $data ) {
$languagesToFetch = $wikilinks = [];
foreach ( $checkagainstLanguage as $lang ) {
if ( empty ( $data [ 'labels' ][ $lang ])) {
continue ;
}
if ( ! empty ( $data [ 'sitelinks' ][ $lang . 'wiki' ])) {
$wikilink = $data [ 'sitelinks' ][ $lang . 'wiki' ][ 'url' ];
$wikilinkterm = str_replace ( ' ' , '_' , $data [ 'sitelinks' ][ $lang . 'wiki' ][ 'title' ]);
if ( isset ( $wikilink )) {
$languagesToFetch [ $lang ] = " https:// " . $lang . " .wikipedia.org/w/api.php?action=parse&page= " . urlencode ( $wikilinkterm ) . " &prop=text§ion=0&format=json " ;
$wikilinks [ $lang ] = $wikilink ;
}
}
}
return [ $languagesToFetch , $wikilinks ];
}
/**
* Cleans contents parsed from Wikipedia .
*
* @ param string $input Input string .
*
* @ return string
*/
private static function _cleanWikidataInput ( string $input ) : string {
if ( substr ( $input , 0 , strlen ( '<' )) === '<' ) {
$doc = new DOMDocument ();
$doc -> loadXML ( $input );
$list = $doc -> getElementsByTagName ( " style " );
while ( $list -> length > 0 ) {
$p = $list -> item ( 0 );
$p -> parentNode -> removeChild ( $p );
}
$list = $doc -> getElementsByTagName ( " table " );
while ( $list -> length > 0 ) {
$p = $list -> item ( 0 );
$p -> parentNode -> removeChild ( $p );
}
$list = $doc -> getElementsByTagName ( " div " );
while ( $list -> length > 1 ) {
$p = $list -> item ( 1 );
$p -> parentNode -> removeChild ( $p );
}
$list = $doc -> getElementsByTagName ( " ol " );
while ( $list -> length > 0 ) {
$p = $list -> item ( 0 );
$p -> parentNode -> removeChild ( $p );
}
$firstP = $doc -> getElementsByTagName ( " p " ) -> item ( 0 );
if ( strpos ( $doc -> saveHTML ( $firstP ), 'geohack' ) !== false ) {
$firstP -> parentNode -> removeChild ( $firstP );
}
/*
if ( strpos ( $doc -> saveHTML (), 'Coordinates:' ) !== false ) {
echo $doc -> saveHTML ();
exit ;
}
*/
$input = str_replace ( PHP_EOL , PHP_EOL . PHP_EOL , trim ( $doc -> textContent ));
if ( mb_strlen ( $input ) > 600 ) {
if ( strpos ( $input , PHP_EOL . PHP_EOL , 600 ) !== false ) {
$input = substr ( $input , 0 , strpos ( $input , PHP_EOL . PHP_EOL , 600 ));
}
}
$bracketsToRemove = [];
for ( $i = 0 ; $i < 100 ; $i ++ ) {
$bracketsToRemove [ " [ $i ] " ] = " " ;
}
$input = strtr ( $input , $bracketsToRemove );
$input = str_replace ( " \t " , " " , $input );
// Remove newlines with ensuing spaces
while ( strpos ( $input , PHP_EOL . " " ) !== false ) {
$input = str_replace ( PHP_EOL . " " , PHP_EOL , $input );
}
// Remove double newlines
while ( strpos ( $input , PHP_EOL . PHP_EOL . PHP_EOL ) !== false ) {
$input = str_replace ( PHP_EOL . PHP_EOL . PHP_EOL , PHP_EOL . PHP_EOL , $input );
}
return $input ;
}
$input = str_replace ( PHP_EOL , '' , $input );
2021-03-17 22:06:08 +01:00
foreach ( self :: WIKIPEDIA_REMOVE_LITERALS as $tToRemove ) $input = str_replace ( $tToRemove , " " , $input );
2021-03-17 16:10:49 +01:00
$first_mention_of_paragraph = strpos ( $input , '<p>' );
if ( $first_mention_of_paragraph !== false ) $input = substr ( $input , $first_mention_of_paragraph , ( strrpos ( $input , '</p>' ) ? : strlen ( $input )) - $first_mention_of_paragraph );
// Remove infobox tables specifically
$removeFirstParagraph = false ;
$firstParagraphPosition = strpos ( $input , '<p' , 1 );
$currentSearchPos = strpos ( $input , " <table> " );
if ( $currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition ) {
if (( $tableEndPos = strpos ( $input , " </table> " )) !== false ) {
if (( $pStartPos = strpos ( $input , '<p' , $tableEndPos + 6 )) !== false ) {
$input = substr ( $input , $pStartPos );
}
}
}
// Remove leftover unnecessary paragraphs before actual content
$removeFirstParagraph = false ;
$firstParagraphPosition = strpos ( $input , '<p' , 1 );
foreach ([ " </table> " , " <img " ] as $tagPart ) {
$currentSearchPos = strpos ( $input , $tagPart );
if ( $currentSearchPos !== false && $currentSearchPos < $firstParagraphPosition ) {
$removeFirstParagraph = true ;
break ;
}
}
if ( $removeFirstParagraph === true ) {
$input = substr ( $input , $firstParagraphPosition ? : 0 );
}
$input = str_replace ( '</p>' , '</p>' . PHP_EOL . PHP_EOL . PHP_EOL , $input );
# $input = str_replace('?/i', '', $input);
$input = strip_tags ( $input );
# for ($i = 150; $i < 1000; $i++) $input = str_replace("&#$i;", " ", $input);
$i = 0 ;
while ( strpos ( $input , " .mw-parser-output " ) !== false and strpos ( $input , " } " , strpos ( $input , " .mw-parser-output " )) !== false ) {
$part1 = substr ( $input , 0 , strpos ( $input , " .mw-parser-output " ));
$part2 = substr ( $input , strpos ( $input , " } " , strpos ( $input , " .mw-parser-output " )) + 1 );
$input = $part1 . $part2 ;
$i ++ ;
if ( $i === 30 ) break ;
}
$bracketsToRemove = [];
for ( $i = 0 ; $i < 100 ; $i ++ ) {
$bracketsToRemove [ " [ $i ] " ] = " " ;
}
$input = strtr ( $input , $bracketsToRemove );
$input = str_replace ( " \t " , " " , $input );
// Remove double whitespaces
while ( strpos ( $input , " " ) !== false ) {
$input = str_replace ( " " , " " , $input );
}
// Remove newlines with ensuing spaces
while ( strpos ( $input , PHP_EOL . " " ) !== false ) {
$input = str_replace ( PHP_EOL . " " , PHP_EOL , $input );
}
// Remove double newlines
while ( strpos ( $input , PHP_EOL . PHP_EOL . PHP_EOL ) !== false ) {
$input = str_replace ( PHP_EOL . PHP_EOL . PHP_EOL , PHP_EOL . PHP_EOL , $input );
}
$stableToRemove = [
" Vous pouvez partager vos connaissances en l’ améliorant (comment ?) selon les recommandations des projets correspondants. " ,
];
foreach ( $stableToRemove as $tToRemove ) $input = str_replace ( $tToRemove , " " , $input );
$endings = [
" StubDenne artikel om et vandløb " ,
];
foreach ( $endings as $ending ) {
if ( strpos ( $input , $ending ) !== false ) $input = substr ( $input , 0 , strpos ( $input , $ending ));
}
$input = trim ( $input );
// Cut off overly long articles
if ( mb_strlen ( $input ) > 600 ) {
if ( strpos ( $input , PHP_EOL . PHP_EOL , 600 ) !== false ) {
$input = substr ( $input , 0 , strpos ( $input , PHP_EOL . PHP_EOL , 600 ));
}
}
// Trim again to make really, really no superfluous whitespaces remain
$input = trim ( $input );
$input = str_replace ( " ' " , " ´ " , MD_STD :: preg_replace_str ( " / \ & \ #91 \ ;[0-9] \ & \ #93 \ ;/ " , '' , $input ));
$input = html_entity_decode ( $input );
return $input ;
}
/**
* Function for fetching description from Wikipedia
*
* @ param integer $persinst_id Person ID .
* @ param string $wikidata_id Wikidata ID .
* @ param string $datafromwiki Data fetched from Wikipedia .
* @ param string $wikilink Link to wikipedia entry .
* @ param string $preflang The user ' s currently used language .
* @ param string $lang Currently queried language .
* @ param string $erfasst_von User who adds the info .
*
* @ return boolean
*/
public function retrievePersinstDescFromWikipedia ( int $persinst_id , string $wikidata_id , string $datafromwiki , string $wikilink , string $preflang , string $lang , string $erfasst_von ) : bool {
$output = false ;
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date ( 'd.m.Y' ) . ')' ;
$cergebnis = $this -> _mysqli_noda -> query_by_stmt ( " SELECT `persinst_kurzinfo`, `persinst_anzeigename` AS `display_name`
FROM `persinst`
WHERE `persinst_id` = ? " , " i " , $persinst_id );
// Update persinst table
$updatePersinstStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `persinst`
SET `persinst_kurzinfo` = ?
WHERE `persinst_id` = ? " );
if ( $cergebnis -> num_rows === 0 ) exit ;
$cinfo = $cergebnis -> fetch_assoc ();
if ( ! empty ( $cinfo [ 'persinst_kurzinfo' ]) and substr ( $cinfo [ 'persinst_kurzinfo' ], 0 , 3 ) != 'GND' ) {
if ( isset ( $_GET [ 'keep' ])) {
if ( ! ( $_GET [ 'keep' ]) || $_GET [ 'keep' ] === 'replace' ) {
$updatePersinstStmt -> bind_param ( " si " , $datafromwiki , $persinst_id );
$updatePersinstStmt -> execute ();
}
else if ( $_GET [ 'keep' ] === 'add' ) {
$newDesc = $cinfo [ 'persinst_kurzinfo' ] . PHP_EOL . PHP_EOL . $datafromwiki ;
$updatePersinstStmt -> bind_param ( " si " , $newDesc , $persinst_id );
$updatePersinstStmt -> execute ();
}
$output = true ;
}
else {
$tlLoader = new MDTlLoader ( " wiki_getter_persinst " , $preflang );
echo self :: generateHTMLHeadForWikidataFetcher ( $lang );
echo self :: generateWikidataFetcherHeader ( $tlLoader , " " , $cinfo [ 'display_name' ]);
echo '
< p class = " alert icons iconsAlert " > Es gibt schon einen Eintrag im Beschreibungsfeld </ p >
< div class = " wikiReplaceTTile " >
< h3 > Bisher vorhanden </ h3 >
< p > ' . nl2br($cinfo[' persinst_kurzinfo ']) . ' </ p >
</ div >
< div class = " wikiReplaceTTile " >
< h3 > Jetzt gefunden </ h3 >< p > ' . $datafromwiki . ' </ p >
</ div >
< a href = " get_wikidata_for_persinst.php?wikidata_id=' . $wikidata_id . write_get_vars(['suchbegriff', 'lang', 'persinst_id']) . '&keep=keep " class = " buttonLike icons iconsPin " > Keep old entry </ a > ' ;
echo '<br><a href="get_wikidata_for_persinst.php?wikidata_id=' . $wikidata_id . write_get_vars ([ 'suchbegriff' , 'lang' , 'persinst_id' ]) . '&keep=replace" class="buttonLike icons iconsPinOff">Replace with new entry</a>' ;
echo '<br><a href="get_wikidata_for_persinst.php?wikidata_id=' . $wikidata_id . write_get_vars ([ 'suchbegriff' , 'lang' , 'persinst_id' ]) . '&keep=add" class="buttonLike icons iconsPlusOne">Keep old and add new entry</a><br><br><br>' ;
exit ;
}
}
else {
$updatePersinstStmt -> bind_param ( " si " , $datafromwiki , $persinst_id );
$updatePersinstStmt -> execute ();
$output = true ;
}
$cergebnis -> close ();
$updatePersinstStmt -> close ();
// Set link to Wikipedia in noda table
$insertNodaStmt = $this -> _mysqli_noda -> do_prepare ( " INSERT INTO `noda`
( `persinst_id` , `noda_source` , `noda_nrinsource` , `noda_link` , `noda_erfasst_von` )
VALUES
( ? , 'Wikipedia' , '' , ? , ? )
ON DUPLICATE KEY UPDATE `noda_link` = ? " );
$insertNodaStmt -> bind_param ( " isss " , $persinst_id , $wikilink , $erfasst_von , $wikilink );
$insertNodaStmt -> execute ();
$insertNodaStmt -> close ();
// Update edit metadata
$updatePersinstEditInfoStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `persinst`
SET `persinst_erfasst_am` = NOW (),
`persinst_erfasst_von` = ?
WHERE `persinst_id` = ? " );
$updatePersinstEditInfoStmt -> bind_param ( " si " , $erfasst_von , $persinst_id );
$updatePersinstEditInfoStmt -> execute ();
$updatePersinstEditInfoStmt -> close ();
$updatePersinstEditInfoStmt = null ;
return $output ;
}
/**
* Function for updating birth and death times based on Wikidata information .
*
* @ param array < mixed > $data Data loaded from Wikidata .
* @ param integer $persinst_id Actor ID .
*
* @ return void
*/
public function enterPersinstBirthDeathDatesFromWikidata ( array $data , int $persinst_id ) : void {
$result = $this -> _mysqli_noda -> query_by_stmt ( " SELECT `persinst_geburtsjahr`,
`persinst_sterbejahr` , `persinst_gender`
FROM `persinst`
WHERE `persinst_id` = ? " , " i " , $persinst_id );
if ( ! ( $actor_dates = $result -> fetch_assoc ())) {
throw new MDmainEntityNotExistentException ( " Failed to fetch actor information " );
}
$result -> close ();
$result = null ;
if ( $actor_dates [ 'persinst_geburtsjahr' ] === '' ) {
// Try to get birth date
if ( ! empty ( $data [ 'claims' ][ 'P569' ]) and ! empty ( $data [ 'claims' ][ 'P569' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'time' ])) {
$birth_date_int = strtotime ( substr ( $data [ 'claims' ][ 'P569' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'time' ], 1 , 4 ));
if ( $birth_date_int ) {
$birth_date = date ( " Y " , $birth_date_int );
if ( $birth_date === date ( " Y " )) {
$birth_date = date ( " Y " , strtotime ( $data [ 'claims' ][ 'P569' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'time' ]));
}
}
}
if ( ! empty ( $birth_date )) {
$updateStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `persinst`
SET `persinst_geburtsjahr` = ?
WHERE `persinst_id` = ?
LIMIT 1 " );
$updateStmt -> bind_param ( " ii " , $birth_date , $persinst_id );
$updateStmt -> execute ();
$updateStmt -> close ();
$updateStmt = null ;
}
}
if ( $actor_dates [ 'persinst_sterbejahr' ] === '' ) {
// Try to get birth date
if ( ! empty ( $data [ 'claims' ][ 'P570' ]) and ! empty ( $data [ 'claims' ][ 'P570' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'time' ])) {
$death_date_int = strtotime ( substr ( $data [ 'claims' ][ 'P570' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'time' ], 1 , 4 ));
if ( $death_date_int ) {
$death_date = date ( " Y " , $death_date_int );
if ( $death_date === date ( " Y " )) {
$death_date = date ( " Y " , strtotime ( $data [ 'claims' ][ 'P570' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'time' ]));
}
}
}
if ( ! empty ( $death_date )) {
$updateStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `persinst`
SET `persinst_sterbejahr` = ?
WHERE `persinst_id` = ?
LIMIT 1 " );
$updateStmt -> bind_param ( " ii " , $death_date , $persinst_id );
$updateStmt -> execute ();
$updateStmt -> close ();
$updateStmt = null ;
}
}
if ( $actor_dates [ 'persinst_gender' ] === '' ) {
// Try to get birth date
if ( ! empty ( $data [ 'claims' ][ 'P21' ]) and ! empty ( $data [ 'claims' ][ 'P21' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'id' ])) {
$wikidata_gender_id = $data [ 'claims' ][ 'P21' ][ '0' ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'id' ];
switch ( $wikidata_gender_id ) {
case " Q6581097 " :
$wikidata_gender = " male " ;
break ;
case " Q6581072 " :
case " Q1052281 " :
$wikidata_gender = " female " ;
break ;
case " Q48270 " :
$wikidata_gender = " other " ;
break ;
default :
2021-03-17 22:06:08 +01:00
throw new Exception ( " Unknown gender: Q-ID is " . $wikidata_gender_id );
2021-03-17 16:10:49 +01:00
}
}
if ( ! empty ( $wikidata_gender )) {
$updateStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `persinst`
SET `persinst_gender` = ?
WHERE `persinst_id` = ?
LIMIT 1 " );
$updateStmt -> bind_param ( " si " , $wikidata_gender , $persinst_id );
$updateStmt -> execute ();
$updateStmt -> close ();
$updateStmt = null ;
}
}
}
/**
* Function for retrieving information .
*
* @ param string $lang The user ' s selected used language .
* @ param string $wikidata_id Wikidata ID .
* @ param integer $persinst_id Actor ID .
* @ param string $erfasst_von User name who ' s currently editing .
*
* @ return void
*/
public function retrievePersinstInfoFromWikidataID ( string $lang , string $wikidata_id , int $persinst_id , string $erfasst_von ) {
$data = json_decode ( MD_STD :: runCurl ( " https://www.wikidata.org/wiki/Special:EntityData/ " . $wikidata_id . " .json " , 10000 ), true );
if ( $data === null ) {
throw new MDhttpFailedException ( " Failed fetching from Wikidata. Try again later. " );
}
$data = $data [ 'entities' ][ $wikidata_id ];
// Get links to wikipedia
$wikilink = $wikilinkterm = [];
2021-03-17 22:06:08 +01:00
foreach ( self :: LANGUAGES_MAIN_DESC as $tLang ) {
if ( isset ( $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'url' ])) $wikilink [ $tLang ] = $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'url' ];
if ( isset ( $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'title' ])) $wikilinkterm [ $tLang ] = str_replace ( ' ' , '_' , $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'title' ]);
}
2021-03-17 16:10:49 +01:00
$alreadyEntered = false ;
if ( isset ( $wikilink [ $lang ]) and isset ( $wikilinkterm [ $lang ]) and is_string ( $wikilinkterm [ $lang ])) {
$datafromwiki = MD_STD :: runCurl ( " https:// " . $lang . " .wikipedia.org/w/api.php?action=parse&page= " . urlencode ( $wikilinkterm [ $lang ]) . " &prop=text§ion=0&format=json " , 10000 );
$datafromwiki = json_decode ( $datafromwiki , true )[ 'parse' ][ 'text' ][ '*' ];
# Process data retrieved from wikipedia
if ( ! empty ( $datafromwiki = self :: _cleanWikidataInput (( string ) $datafromwiki ))) {
$alreadyEntered = $this -> retrievePersinstDescFromWikipedia ( $persinst_id , $wikidata_id , $datafromwiki , $wikilink [ $lang ], $lang , $lang , $erfasst_von );
}
}
foreach ( self :: LANGUAGES_MAIN_DESC as $sprache ) {
if ( $alreadyEntered === true ) break ;
if ( ! isset ( $wikilink [ $sprache ]) || ! isset ( $wikilinkterm [ $sprache ]) || ! is_string ( $wikilinkterm [ $sprache ])) continue ;
$datafromwiki = MD_STD :: runCurl ( " https:// " . $sprache . " .wikipedia.org/w/api.php?action=parse&page= " . urlencode (( string ) $wikilinkterm [ $sprache ]) . " &prop=text§ion=0&format=json " , 10000 );
$datafromwiki = json_decode ( $datafromwiki , true )[ 'parse' ][ 'text' ][ '*' ];
# Process data retrieved from wikipedia
if ( $datafromwiki = self :: _cleanWikidataInput (( string ) $datafromwiki )) {
$alreadyEntered = $this -> retrievePersinstDescFromWikipedia ( $persinst_id , $wikidata_id , $datafromwiki , $wikilink [ $sprache ], $lang , " $sprache " , $erfasst_von );
}
}
$this -> enterPersinstBirthDeathDatesFromWikidata ( $data , $persinst_id );
2021-03-17 22:06:08 +01:00
// Get links to other norm data sources
$nodaLinks = [
" wikidata " => $wikidata_id ,
];
foreach ( self :: P_IDS_NODA_TAGS as $vocabName => $pId ) {
if ( $vocabName === 'lcsh' ) continue ;
if ( isset ( $data [ 'claims' ][ $pId ])) $nodaLinks [ $vocabName ] = $data [ 'claims' ][ $pId ][ 0 ][ 'mainsnak' ][ 'datavalue' ][ 'value' ];
}
2021-03-17 16:10:49 +01:00
// GET links to other noda entries.
2021-03-17 22:06:08 +01:00
$insertNodaLinkStmt = $this -> _mysqli_noda -> do_prepare ( " INSERT INTO `noda`
2021-03-17 16:10:49 +01:00
( `persinst_id` , `noda_source` , `noda_nrinsource` , `noda_link` , `noda_erfasst_von` )
VALUES
( ? , ? , ? , ? , ? )
ON DUPLICATE KEY UPDATE `noda_nrinsource` = ? ,
`noda_link` = ? ,
`noda_erfasst_von` = ? " );
$this -> _mysqli_noda -> autocommit ( false );
2021-03-17 22:06:08 +01:00
foreach ( $nodaLinks as $noda_source => $nodaId ) {
2021-03-17 16:10:49 +01:00
2021-03-17 22:06:08 +01:00
if ( ! isset ( self :: URL_PREFIXES_PLACES_NODA_SOURCE [ $noda_source ])) {
throw new Exception ( " Unknown noda link: " . $noda_source );
}
$noda_link_url = self :: URL_PREFIXES_PLACES_NODA_SOURCE [ $noda_source ] . $nodaId ;
2021-03-17 16:10:49 +01:00
2021-03-17 22:06:08 +01:00
$insertNodaLinkStmt -> bind_param ( " isssssss " , $persinst_id , $noda_source , $nodaId , $noda_link_url , $erfasst_von , $nodaId , $noda_link_url , $noda_link_url );
$insertNodaLinkStmt -> execute ();
2021-03-17 16:10:49 +01:00
}
$this -> _mysqli_noda -> commit ();
$this -> _mysqli_noda -> autocommit ( true );
2021-03-17 22:06:08 +01:00
$insertNodaLinkStmt -> close ();
2021-03-17 16:10:49 +01:00
$this -> getWikidataTranslationsForPersinst ( $data , $persinst_id );
}
/**
* Function for fetching translations from Wikipedia , based on Wikidata information .
*
* @ param array < mixed > $data Entity fetched from wikidata .
* @ param integer $persinst_id Actor ID .
*
* @ return void
*/
public function getWikidataTranslationsForPersinst ( array $data , int $persinst_id ) {
$checkagainstLanguage = self :: LANGUAGES_TO_CHECK ;
$insertStmt = $this -> _mysqli_noda -> do_prepare ( " CALL nodaInsertPersinstTranslation(?, ?, ?, ?, ?) " );
list ( $languagesToFetch , $wikilinks ) = self :: getWikidataWikipediaTranslationSources ( $checkagainstLanguage , $data );
try {
$contents = MD_STD :: runCurlMulti ( $languagesToFetch , 10000 );
}
catch ( TypeError $e ) {
throw new MDExpectedException ( " Failed to initialize a request. Try pressing F5 to run the requests again. " );
}
$this -> _mysqli_noda -> autocommit ( false );
foreach ( $checkagainstLanguage as $lang ) {
if ( ! empty ( $languagesToFetch [ $lang ]) && ! empty ( $data [ 'sitelinks' ][ $lang . 'wiki' ])) {
$url = $languagesToFetch [ $lang ];
$wikilink = $wikilinks [ $lang ];
if ( ! empty ( $contents [ $lang ])) {
$descFromWiki = $contents [ $lang ];
$descFromWiki = json_decode ( $descFromWiki , true )[ 'parse' ][ 'text' ][ '*' ];
# Process data retrieved from wikipedia
if ( $descFromWiki !== null ) $tDescription = self :: _cleanWikidataInput (( string ) $descFromWiki );
else $tDescription = " " ;
if ( substr ( $tDescription , - 1 ) == chr ( 10 )) $tDescription = substr ( $tDescription , 0 , strlen ( $tDescription ) - 1 );
$tDescription = '"' . $tDescription . '" - (' . $data [ 'labels' ][ $lang ][ 'language' ] . '.wikipedia.org ' . date ( 'd.m.Y' ) . ')' ;
// Inhalt erster Absatz jeweilige Wikipedia: ' . $tDescription
// dies enthält den ersten Absatz der jeweiligen Wikipedia
}
else {
$tDescription = " " ;
}
$tLang = self :: _cleanWikidataInput (( string ) $data [ 'labels' ][ $lang ][ 'language' ]);
$tLabel = self :: _cleanWikidataInput (( string ) $data [ 'labels' ][ $lang ][ 'value' ]);
try {
$insertStmt -> bind_param ( " issss " , $persinst_id , $tLang , $tLabel , $tDescription , $wikilink );
$insertStmt -> execute ();
}
catch ( MDMysqliInvalidEncodingError $e ) {
}
}
// echo '<br><b style="color: cc0000;">Wikipedia Links fehlen</b>';
else if ( ! empty ( $data [ 'labels' ][ $lang ][ 'value' ]) and ! empty ( $data [ 'descriptions' ][ $lang ])) {
$wikilink = " " ;
$insertStmt -> bind_param ( " issss " , $persinst_id , $data [ 'labels' ][ $lang ][ 'language' ], $data [ 'labels' ][ $lang ][ 'value' ], $data [ 'descriptions' ][ $lang ][ 'value' ], $wikilink );
$insertStmt -> execute ();
}
}
$this -> _mysqli_noda -> commit ();
$this -> _mysqli_noda -> autocommit ( true );
$insertStmt -> close ();
unset ( $insertStmt );
}
/**
* Function for entering base information about a place from wikidata .
*
* @ param mysqli_result $currentPlaceResult Mysqli result pointing to the current place .
* @ param string $datafromwiki Data parsed from wikidata .
* @ param array < mixed > $wikilink Wikilink .
* @ param string $preflang Language of the user interface in general .
* @ param string $lang Language of the main entry .
* @ param integer $placeID ID of the place .
* @ param string $erfasst_von User name .
*
* @ return boolean
*/
public function enterPlaceDescFromWikidata ( mysqli_result $currentPlaceResult , string $datafromwiki , array $wikilink , string $preflang , string $lang , int $placeID , string $erfasst_von ) {
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date ( " d.m.Y " ) . ')' ;
if ( ! ( $curPlaceInfo = $currentPlaceResult -> fetch_assoc ())) return false ;
if ( ! empty ( trim ( $curPlaceInfo [ 'ort_anmerkung' ])) and substr ( $curPlaceInfo [ 'ort_anmerkung' ], 0 , 3 ) !== 'GND' ) {
if ( isset ( $_GET [ 'keep' ])) {
if ( $_GET [ 'keep' ] === 'add' ) {
$datafromwiki = $curPlaceInfo [ 'ort_anmerkung' ] . PHP_EOL . PHP_EOL . $datafromwiki ;
}
else if ( $_GET [ 'keep' ] === 'keep' ) {
$datafromwiki = $curPlaceInfo [ 'ort_anmerkung' ];
}
}
else {
$tlLoader = new MDTlLoader ( " wiki_getter_place " , $preflang );
echo self :: generateHTMLHeadForWikidataFetcher ( $lang );
echo self :: generateWikidataFetcherHeader ( $tlLoader );
echo '
< p class = " alert icons iconsAlert " > There is already an entry for description ...</ p >
< div class = " wikiReplaceTTile " >
< h3 > Actual entry </ h3 >< p > ' . nl2br($curPlaceInfo[' ort_anmerkung ']) . ' </ p >
</ div >
< div class = " wikiReplaceTTile " >
< h3 > Now found </ h3 >
< p > ' . $datafromwiki . ' </ p >
</ div >
< a href = " get_wikidata_for_ort.php?keep=keep' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'ort_id']) . ' " class = " buttonLike icons iconsPin " > Keep old entry </ a >
< br >< a href = " get_wikidata_for_ort.php?keep=replace' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'ort_id']) . ' " class = " buttonLike icons iconsPinOff " > Replace with new entry </ a >
< br >< a href = " get_wikidata_for_ort.php?keep=add' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'ort_id']) . ' " class = " buttonLike icons iconsPlusOne " > Keep old and add new entry </ a >< br >< br >< br >
' ;
exit ;
}
}
// Write description to DB
$updateStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `orte`
SET `ort_anmerkung` = ? ,
`ort_erfasst_am` = NOW (),
`ort_erfasst_von` = ?
WHERE ort_id = ? " );
try {
$updateStmt -> bind_param ( " ssi " , $datafromwiki , $erfasst_von , $placeID );
$updateStmt -> execute ();
}
catch ( MDMysqliInvalidEncodingError $e ) {
$_SESSION [ " editHistory " ] = [ " changesStored " , " Error adding base description " ];
}
$updateStmt -> close ();
unset ( $updateStmt );
// Write link to wikipedia to relevant noda DB table
$wikiAlreadyResult = $this -> _mysqli_noda -> query_by_stmt ( " SELECT `noda_orte`.`noda_id`
FROM `noda_orte`
WHERE `noda_orte` . `ort_id` = ?
AND `noda_orte` . `noda_source` = 'Wikipedia' " , " i " , $placeID );
switch ( $wikiAlreadyResult -> num_rows ) {
case 0 :
$insertWikiStmt = $this -> _mysqli_noda -> do_prepare ( " INSERT INTO `noda_orte`
( `ort_id` , `noda_source` , `noda_nrinsource` , `noda_link` , `noda_erfasst_am` , `noda_erfasst_von` )
VALUES
( ? , 'Wikipedia' , '' , ? , NOW (), ? ) " );
$insertWikiStmt -> bind_param ( " iss " , $placeID , $wikilink [ $lang ], $erfasst_von );
$insertWikiStmt -> execute ();
$insertWikiStmt -> close ();
unset ( $insertWikiStmt );
break ;
case 1 :
if ( $wikiAlreadyData = $wikiAlreadyResult -> fetch_assoc ()) {
$wikischon_id = $wikiAlreadyData [ 'noda_id' ];
$updateStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `noda_orte` SET `noda_link` = ? WHERE `noda_id` = ? " );
$updateStmt -> bind_param ( " si " , $wikilink [ $lang ], $wikischon_id );
$updateStmt -> execute ();
$updateStmt -> close ();
unset ( $updateStmt );
}
break ;
}
$wikiAlreadyResult -> close ();
unset ( $wikiAlreadyResult );
return true ;
}
/**
* Function for retrieving place information based on a Wikidata ID .
*
* @ param string $lang Language .
* @ param string $wikidata_id Wikidata Q - ID .
* @ param integer $onum Place ID .
* @ param string $erfasst_von User name of the current user .
*
* @ return void
*/
public function retrievePlaceInfoFromWikidataID ( string $lang , string $wikidata_id , int $onum , string $erfasst_von ) {
$data = MD_STD :: runCurl ( " https://www.wikidata.org/wiki/Special:EntityData/ " . urlencode ( $wikidata_id ) . " .json " , 10000 );
if ( ! $data = json_decode ( $data , true )) {
throw new MDhttpFailedException ( " Failed fetching from Wikidata. Try again later. " );
}
$data = $data [ 'entities' ][ $wikidata_id ];
$wikilink = $wikilinkterm = [];
foreach ( self :: LANGUAGES_MAIN_DESC as $tLang ) {
if ( isset ( $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'url' ])) $wikilink [ $tLang ] = $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'url' ];
if ( isset ( $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'title' ])) $wikilinkterm [ $tLang ] = str_replace ( ' ' , '_' , $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'title' ]);
}
$currentPlaceResult = $this -> _mysqli_noda -> query_by_stmt ( " SELECT `ort_anmerkung`
FROM `orte`
WHERE `ort_id` = ? " , " i " , $onum );
$alreadyEntered = false ;
if ( ! empty ( $wikilink [ $lang ])) {
$datafromwiki = MD_STD :: runCurl ( " https:// " . urlencode ( $lang ) . " .wikipedia.org/w/api.php?action=parse&page= " . urlencode ( $wikilinkterm [ $lang ]) . " &prop=text§ion=0&format=json " , 10000 );
$datafromwiki = json_decode ( $datafromwiki , true )[ 'parse' ][ 'text' ][ '*' ];
if ( ! empty ( $datafromwiki ) and $datafromwiki = self :: _cleanWikidataInput (( string ) $datafromwiki )) {
$alreadyEntered = $this -> enterPlaceDescFromWikidata ( $currentPlaceResult , $datafromwiki , $wikilink , $lang , $lang , $onum , $erfasst_von );
}
}
foreach ( self :: LANGUAGES_MAIN_DESC as $sprache ) {
//if ($alreadyEntered === true) break;
if ( $alreadyEntered === true ) break ;
if ( ! isset ( $wikilink [ $sprache ])) continue ;
$datafromwiki = MD_STD :: runCurl ( " https:// " . urlencode ( $sprache ) . " .wikipedia.org/w/api.php?action=parse&page= " . urlencode ( $wikilinkterm [ $sprache ]) . " &prop=text§ion=0&format=json " , 10000 );
$datafromwiki = json_decode ( $datafromwiki , true )[ 'parse' ][ 'text' ][ '*' ];
if ( ! empty ( $datafromwiki ) and $datafromwiki = self :: _cleanWikidataInput (( string ) $datafromwiki )) {
$alreadyEntered = $this -> enterPlaceDescFromWikidata ( $currentPlaceResult , $datafromwiki , $wikilink , $lang , $sprache , $onum , $erfasst_von );
}
}
$currentPlaceResult -> close ();
unset ( $currentPlaceResult );
if ( isset ( $data [ 'claims' ][ 'P1566' ])) $geonames_id = $data [ 'claims' ][ 'P1566' ][ 0 ][ 'mainsnak' ][ 'datavalue' ][ 'value' ];
if ( isset ( $data [ 'claims' ][ 'P1667' ])) $tgn_id = $data [ 'claims' ][ 'P1667' ][ 0 ][ 'mainsnak' ][ 'datavalue' ][ 'value' ];
2021-03-17 22:06:08 +01:00
$nodaLinks = [
" wikidata " => $wikidata_id ,
];
foreach ( self :: P_IDS_NODA_TAGS as $vocabName => $pId ) {
if ( $vocabName === 'lcsh' ) continue ;
if ( isset ( $data [ 'claims' ][ $pId ])) $nodaLinks [ $vocabName ] = $data [ 'claims' ][ $pId ][ 0 ][ 'mainsnak' ][ 'datavalue' ][ 'value' ];
}
2021-03-17 16:10:49 +01:00
if ( isset ( $data [ 'claims' ][ 'P625' ])) {
$latitude_wd = $data [ 'claims' ][ 'P625' ][ 0 ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'latitude' ];
$longitude_wd = $data [ 'claims' ][ 'P625' ][ 0 ][ 'mainsnak' ][ 'datavalue' ][ 'value' ][ 'longitude' ];
}
$this -> _mysqli_noda -> autocommit ( false );
$insertNodaLinkStmt = $this -> _mysqli_noda -> do_prepare ( " INSERT INTO `noda_orte`
( `ort_id` , `noda_source` , `noda_nrinsource` , `noda_link` , `noda_erfasst_von` )
VALUES
( ? , ? , ? , ? , ? )
ON DUPLICATE KEY UPDATE `noda_nrinsource` = ? ,
`noda_link` = ? " );
2021-03-17 22:06:08 +01:00
foreach ( $nodaLinks as $noda_source => $nodaId ) {
2021-03-17 16:10:49 +01:00
2021-03-17 22:06:08 +01:00
if ( ! isset ( self :: URL_PREFIXES_PLACES_NODA_SOURCE [ $noda_source ])) {
throw new Exception ( " Unknown noda link: " . $noda_source );
2021-03-17 16:10:49 +01:00
}
2021-03-17 22:06:08 +01:00
$noda_link_url = self :: URL_PREFIXES_PLACES_NODA_SOURCE [ $noda_source ] . $nodaId ;
2021-03-17 16:10:49 +01:00
2021-03-17 22:06:08 +01:00
$insertNodaLinkStmt -> bind_param ( " issssss " , $onum , $noda_source , $nodaId , $noda_link_url , $erfasst_von , $nodaId , $noda_link_url );
$insertNodaLinkStmt -> execute ();
2021-03-17 16:10:49 +01:00
}
$insertNodaLinkStmt -> close ();
unset ( $insertNodaLinkStmt );
if ( ! empty ( $tgn_id )) {
$updateStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `orte`
SET `ort_land` = ?
WHERE `ort_id` = ? " );
$updateStmt -> bind_param ( " si " , $tgn_id , $onum );
$updateStmt -> execute ();
$updateStmt -> close ();
unset ( $updateStmt );
}
if ( ! empty ( $geonames_id )) {
$updateStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `orte`
SET `ort_geonames` = ?
WHERE `ort_id` = ? " );
$updateStmt -> bind_param ( " si " , $geonames_id , $onum );
$updateStmt -> execute ();
$updateStmt -> close ();
unset ( $updateStmt );
}
if ( ! empty ( $latitude_wd ) and ! empty ( $longitude_wd )) {
$updateStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `orte`
SET `ort_nord_sued` = ? , `ort_west_ost` = ? , ort_zoom = '9'
WHERE `ort_id` = ? " );
$updateStmt -> bind_param ( " ssi " , $latitude_wd , $longitude_wd , $onum );
$updateStmt -> execute ();
$updateStmt -> close ();
unset ( $updateStmt );
}
$this -> _mysqli_noda -> commit ();
$this -> _mysqli_noda -> autocommit ( true );
$this -> getWikidataTranslationsForPlace ( $data , $onum );
}
/**
* Function for fetching translations from wikidata .
*
* @ param array < mixed > $data Entity data fetched from wikidata .
* @ param integer $ort_id Place ID .
*
* @ return void
*/
public function getWikidataTranslationsForPlace ( array $data , int $ort_id ) {
$checkagainstLanguage = self :: LANGUAGES_TO_CHECK ;
list ( $languagesToFetch , $wikilinks ) = self :: getWikidataWikipediaTranslationSources ( $checkagainstLanguage , $data );
try {
$contents = MD_STD :: runCurlMulti ( $languagesToFetch , 10000 );
}
catch ( TypeError $e ) {
throw new MDExpectedException ( " Failed to initialize a request. Try pressing F5 to run the requests again. " );
}
2021-03-17 22:06:08 +01:00
$insertStmt = $this -> _mysqli_noda -> do_prepare ( " CALL `nodaInsertOrtTranslation`(?, ?, ?, ?, ?) " );
2021-03-17 16:10:49 +01:00
$this -> _mysqli_noda -> autocommit ( false );
foreach ( $checkagainstLanguage as $lang ) {
if ( ! empty ( $languagesToFetch [ $lang ]) && ! empty ( $data [ 'sitelinks' ][ $lang . 'wiki' ])) {
$url = $languagesToFetch [ $lang ];
$wikilink = $wikilinks [ $lang ];
if ( ! empty ( $contents [ $lang ])) {
$descFromWiki = $contents [ $lang ];
if ( ! ( $wikiDataDecoded = json_decode ( $descFromWiki , true ))) {
continue ;
}
$tLabel = $wikiDataDecoded [ 'parse' ][ 'title' ];
$descFromWiki = $wikiDataDecoded [ 'parse' ][ 'text' ][ '*' ];
# Process data retrieved from wikipedia
if ( empty ( $descFromWiki )) $tDescription = " " ;
else {
$tDescription = self :: _cleanWikidataInput (( string ) $descFromWiki );
if ( substr ( $tDescription , - 1 ) == chr ( 10 )) $tDescription = substr ( $tDescription , 0 , strlen ( $tDescription ) - 1 );
$tDescription = '"' . $tDescription . '" - (' . $data [ 'labels' ][ $lang ][ 'language' ] . '.wikipedia.org ' . date ( 'd.m.Y' ) . ')' ;
$tDescription = str_replace ( " ' " , " ´ " , MD_STD :: preg_replace_str ( " / \ & \ #91 \ ;[0-9] \ & \ #93 \ ;/ " , '' , $tDescription ));
// echo '<br>Inhalt erster Absatz jeweilige Wikipedia: ' . $tDescription; // dies enthält den ersten Absatz der jeweiligen Wikipedia
}
}
else {
$tDescription = " " ;
}
$tLang = self :: _cleanWikidataInput (( string ) $data [ 'labels' ][ $lang ][ 'language' ]);
if ( empty ( $tLabel )) $tLabel = self :: _cleanWikidataInput (( string ) $data [ 'labels' ][ $lang ][ 'value' ]);
try {
$insertStmt -> bind_param ( " issss " , $ort_id , $tLang , $tLabel , $tDescription , $wikilink );
$insertStmt -> execute ();
}
catch ( MDMysqliInvalidEncodingError $e ) {
$_SESSION [ " editHistory " ] = [ " changesStored " , " Error adding translation for language $tLang " ];
}
}
else if ( ! empty ( $data [ 'labels' ][ $lang ][ 'value' ]) and ! empty ( $data [ 'descriptions' ][ $lang ])) {
$wikilink = " " ;
$insertStmt -> bind_param ( " issss " , $ort_id , $data [ 'labels' ][ $lang ][ 'language' ], $data [ 'labels' ][ $lang ][ 'value' ], $data [ 'descriptions' ][ $lang ][ 'value' ], $wikilink );
$insertStmt -> execute ();
}
}
$this -> _mysqli_noda -> commit ();
$this -> _mysqli_noda -> autocommit ( true );
$insertStmt -> close ();
unset ( $insertStmt );
}
/**
* Function for fetching description from Wikipedia
*
* @ param integer $tag_id Tag ID .
* @ param string $datafromwiki Data fetched from Wikipedia .
* @ param string $wikilink Link to wikipedia entry .
* @ param string $preflang The user ' s currently used language .
* @ param string $lang Currently queried language .
* @ param string $erfasst_von User who adds the info .
*
* @ return boolean
*/
public function retrieveTagDescFromWikipedia ( int $tag_id , string $datafromwiki , string $wikilink , string $preflang , string $lang , string $erfasst_von ) : bool {
$output = false ;
$datafromwiki = '"' . $datafromwiki . '" - (Wikipedia (' . $lang . ') ' . date ( " d.m.Y " ) . ')' ;
$datafromwiki = str_replace ( " ' " , " ´ " , MD_STD :: preg_replace_str ( " / \ & \ #91 \ ;[0-9] \ & \ #93 \ ;/ " , '' , $datafromwiki ));
$cergebnis = $this -> _mysqli_noda -> query_by_stmt ( " SELECT `tag_anmerkung`
FROM `tag`
WHERE `tag_id` = ? " , " i " , $tag_id );
if ( ! ( $cinfo = $cergebnis -> fetch_assoc ())) {
$cergebnis -> close ();
$cergebnis = null ;
return $output ;
}
$cergebnis -> close ();
$cergebnis = null ;
$this -> _mysqli_noda -> autocommit ( false );
$updateTagDescStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `tag`
SET `tag_anmerkung` = ?
WHERE `tag_id` = ? " );
if ( ! empty ( $cinfo [ 'tag_anmerkung' ]) and substr ( $cinfo [ 'tag_anmerkung' ], 0 , 3 ) != 'GND' ) {
if ( isset ( $_GET [ 'keep' ])) {
if ( ! ( $_GET [ 'keep' ]) || $_GET [ 'keep' ] === 'replace' ) {
$updateTagDescStmt -> bind_param ( " si " , $datafromwiki , $tag_id );
$updateTagDescStmt -> execute ();
}
else if ( $_GET [ 'keep' ] === 'add' ) {
$newDesc = $cinfo [ 'tag_anmerkung' ] . PHP_EOL . PHP_EOL . $datafromwiki ;
$updateTagDescStmt -> bind_param ( " si " , $newDesc , $tag_id );
$updateTagDescStmt -> execute ();
}
$output = true ;
}
else {
$tlLoader = new MDTlLoader ( " wiki_getter_tag " , $preflang );
echo self :: generateHTMLHeadForWikidataFetcher ( $lang );
echo self :: generateWikidataFetcherHeader ( $tlLoader );
echo '
< p class = " alert icons iconsAlert " > Es gibt schon einen Eintrag im Beschreibungsfeld </ b >
< div class = " wikiReplaceTTile " >
< h3 > Bisher vorhanden </ h3 >< p > ' . nl2br($cinfo[' tag_anmerkung ']) . ' </ p >
</ div >
< div class = " wikiReplaceTTile " >
< h3 > Jetzt gefunden </ h3 >< p > ' . $datafromwiki . ' < p >
</ div >
< a href = " get_wikidata_for_tag.php?keep=keep' . write_get_vars(['suchbegriff', 'lang', 'wikidata_id', 'tag_id']) . ' " class = " buttonLike icons iconsPin " > Keep old entry </ a > ' ;
echo '<br><a href="get_wikidata_for_tag.php?keep=replace' . write_get_vars ([ 'suchbegriff' , 'lang' , 'wikidata_id' , 'tag_id' ]) . '" class="buttonLike icons iconsPinOff">Replace with new entry</a>' ;
echo '<br><a href="get_wikidata_for_tag.php?keep=add' . write_get_vars ([ 'suchbegriff' , 'lang' , 'wikidata_id' , 'tag_id' ]) . '" class="buttonLike icons iconsPlusOne">Keep old and add new entry</a><br><br><br>' ;
exit ;
}
}
else {
$updateTagDescStmt -> bind_param ( " si " , $datafromwiki , $tag_id );
$updateTagDescStmt -> execute ();
}
$updateTagDescStmt -> close ();
$updateTagDescStmt = null ;
$insertNodaTagStmt = $this -> _mysqli_noda -> do_prepare ( " INSERT INTO `noda_tag`
( `tag_id` , `noda_source` , `noda_nrinsource` , `noda_link` , `noda_erfasst_von` )
VALUES
( ? , 'Wikipedia' , '' , ? , ? )
ON DUPLICATE KEY UPDATE `noda_link` = ? " );
$insertNodaTagStmt -> bind_param ( " isss " , $tag_id , $wikilink , $erfasst_von , $wikilink );
$insertNodaTagStmt -> execute ();
$insertNodaTagStmt -> close ();
$output = true ;
// Update tag editing metadata
$updateTagEditInfoStmt = $this -> _mysqli_noda -> do_prepare ( " UPDATE `tag`
SET `tag_erfasst_am` = NOW (),
`tag_erfasst_von` = ?
WHERE `tag_id` = ? " );
$updateTagEditInfoStmt -> bind_param ( " si " , $erfasst_von , $tag_id );
$updateTagEditInfoStmt -> execute ();
$updateTagEditInfoStmt -> close ();
$updateTagEditInfoStmt = null ;
$this -> _mysqli_noda -> commit ();
$this -> _mysqli_noda -> autocommit ( true );
return $output ;
}
2021-03-17 22:06:08 +01:00
/**
* Writes relations to norm data sources to DB .
*
2021-03-18 01:23:45 +01:00
* @ param array < string > $nodaLinks Links to other noda sources .
* @ param integer $tag_id Tag ID .
* @ param string $erfasst_von Name of the user to edit this .
2021-03-17 22:06:08 +01:00
*
* @ return void
*/
public function writeNodaLinksTag ( array $nodaLinks , int $tag_id , string $erfasst_von ) : void {
$this -> _mysqli_noda -> autocommit ( false );
$insertNodaTagStmt = $this -> _mysqli_noda -> do_prepare ( " INSERT INTO `noda_tag`
( `tag_id` , `noda_source` , `noda_nrinsource` , `noda_link` , `noda_erfasst_von` )
VALUES
( ? , ? , ? , ? , ? )
ON DUPLICATE KEY UPDATE
`noda_nrinsource` = ? ,
`noda_link` = ? ,
`noda_erfasst_von` = ? " );
foreach ( $nodaLinks as $vocabName => $nodaId ) {
if ( empty ( self :: URL_PREFIXES_PLACES_NODA_SOURCE [ $vocabName ])) {
throw new Exception ( " Unknown URL prefix for: " . $vocabName );
}
$noda_link = self :: URL_PREFIXES_PLACES_NODA_SOURCE [ $vocabName ] . $nodaId ;
$insertNodaTagStmt -> bind_param ( " isssssss " , $tag_id , $vocabName , $nodaId , $noda_link , $erfasst_von , $nodaId , $noda_link , $erfasst_von );
$insertNodaTagStmt -> execute ();
}
$insertNodaTagStmt -> close ();
$insertNodaTagStmt = null ;
$this -> _mysqli_noda -> commit ();
$this -> _mysqli_noda -> autocommit ( true );
}
2021-03-17 16:10:49 +01:00
/**
* Function for retrieving information .
*
* @ param string $lang The user ' s selected used language .
* @ param string $wikidata_id Wikidata ID .
* @ param integer $tag_id Tag ID .
* @ param string $erfasst_von User name who ' s currently editing .
*
* @ return void
*/
public function retrieveTagInfoFromWikidataID ( string $lang , string $wikidata_id , int $tag_id , string $erfasst_von ) {
$data = MD_STD :: runCurl ( " https://www.wikidata.org/wiki/Special:EntityData/ " . $wikidata_id . " .json " , 10000 );
$data = json_decode ( $data , true );
if ( $data === null ) {
throw new MDhttpFailedException ( " Failed fetching from Wikidata. Try again later. " );
}
$data = $data [ 'entities' ][ $wikidata_id ];
$wikilink = $wikilinkterm = [];
2021-03-17 22:06:08 +01:00
foreach ( self :: LANGUAGES_MAIN_DESC as $tLang ) {
if ( isset ( $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'url' ])) $wikilink [ $tLang ] = $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'url' ];
if ( isset ( $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'title' ])) $wikilinkterm [ $tLang ] = str_replace ( ' ' , '_' , $data [ 'sitelinks' ][ $tLang . 'wiki' ][ 'title' ]);
}
2021-03-17 16:10:49 +01:00
$alreadyEntered = false ;
if ( isset ( $wikilink [ $lang ]) and isset ( $wikilinkterm [ $lang ]) and is_string ( $wikilinkterm [ $lang ])) {
$datafromwiki = MD_STD :: runCurl ( " https:// " . $lang . " .wikipedia.org/w/api.php?action=parse&page= " . urlencode ( $wikilinkterm [ $lang ]) . " &prop=text§ion=0&format=json " , 10000 );
$datafromwiki = json_decode ( $datafromwiki , true )[ 'parse' ][ 'text' ][ '*' ];
# Process data retrieved from wikipedia
if ( ! empty ( $datafromwiki = self :: _cleanWikidataInput (( string ) $datafromwiki ))) {
$alreadyEntered = $this -> retrieveTagDescFromWikipedia ( $tag_id , $datafromwiki , $wikilink [ $lang ], $lang , $lang , $erfasst_von );
}
}
foreach ( self :: LANGUAGES_MAIN_DESC as $sprache ) {
if ( $alreadyEntered === true ) break ;
if ( ! isset ( $wikilink [ $sprache ]) || ! isset ( $wikilinkterm [ $sprache ]) || ! is_string ( $wikilinkterm [ $sprache ])) continue ;
$datafromwiki = MD_STD :: runCurl ( " https:// " . $sprache . " .wikipedia.org/w/api.php?action=parse&page= " . urlencode (( string ) $wikilinkterm [ $sprache ]) . " &prop=text§ion=0&format=json " , 10000 );
$datafromwiki = json_decode ( $datafromwiki , true )[ 'parse' ][ 'text' ][ '*' ];
# Process data retrieved from wikipedia
if ( $datafromwiki = self :: _cleanWikidataInput (( string ) $datafromwiki )) {
$alreadyEntered = $this -> retrieveTagDescFromWikipedia ( $tag_id , $datafromwiki , $wikilink [ $sprache ], $lang , " $sprache " , $erfasst_von );
}
}
2021-03-17 22:06:08 +01:00
$nodaLinks = [
" wikidata " => $wikidata_id ,
];
foreach ( self :: P_IDS_NODA_TAGS as $vocabName => $pId ) {
if ( $vocabName === 'loc' ) continue ;
if ( isset ( $data [ 'claims' ][ $pId ])) $nodaLinks [ $vocabName ] = $data [ 'claims' ][ $pId ][ 0 ][ 'mainsnak' ][ 'datavalue' ][ 'value' ];
2021-03-17 16:10:49 +01:00
}
2021-03-17 22:06:08 +01:00
$this -> writeNodaLinksTag ( $nodaLinks , $tag_id , $erfasst_von );
2021-03-17 16:10:49 +01:00
2021-03-17 22:06:08 +01:00
// Get translations
2021-03-17 16:10:49 +01:00
if ( ! empty ( $data )) $this -> getWikidataTranslationsForTag ( $data , $tag_id );
}
/**
* Function for fetching translations from wikidata .
*
* @ param array < mixed > $data Entity data fetched from wikidata .
* @ param integer $tag_id Tag ID .
*
* @ return void
*/
public function getWikidataTranslationsForTag ( array $data , int $tag_id ) {
$checkagainstLanguage = self :: LANGUAGES_TO_CHECK ;
$insertStmt = $this -> _mysqli_noda -> do_prepare ( " CALL nodaInsertTagTranslation(?, ?, ?, ?, ?) " );
list ( $languagesToFetch , $wikilinks ) = self :: getWikidataWikipediaTranslationSources ( $checkagainstLanguage , $data );
try {
$contents = MD_STD :: runCurlMulti ( $languagesToFetch , 10000 );
}
catch ( TypeError $e ) {
throw new MDExpectedException ( " Failed to initialize a request. Try pressing F5 to run the requests again. " );
}
$this -> _mysqli_noda -> autocommit ( false );
foreach ( $checkagainstLanguage as $lang ) {
if ( ! empty ( $languagesToFetch [ $lang ]) && ! empty ( $data [ 'sitelinks' ][ $lang . 'wiki' ])) {
$url = $languagesToFetch [ $lang ];
$wikilink = $wikilinks [ $lang ];
if ( ! empty ( $contents [ $lang ])) {
$descFromWiki = $contents [ $lang ];
$descFromWiki = json_decode ( $descFromWiki , true )[ 'parse' ][ 'text' ][ '*' ];
if ( ! empty ( $descFromWiki )) {
# Process data retrieved from wikipedia
$tDescription = self :: _cleanWikidataInput (( string ) $descFromWiki );
if ( substr ( $tDescription , - 1 ) == chr ( 10 )) {
$tDescription = substr ( $tDescription , 0 , strlen ( $tDescription ) - 1 );
}
$tDescription = '"' . $tDescription . '" - (' . $data [ 'labels' ][ $lang ][ 'language' ] . '.wikipedia.org ' . date ( 'd.m.Y' ) . ')' ;
$tDescription = str_replace ( " ' " , " ´ " , MD_STD :: preg_replace_str ( " / \ & \ #91 \ ;[0-9] \ & \ #93 \ ;/ " , '' , $tDescription ));
}
else {
$tDescription = " " ;
}
}
else {
$tDescription = " " ;
}
$tLang = self :: _cleanWikidataInput (( string ) $data [ 'labels' ][ $lang ][ 'language' ]);
$tLabel = self :: _cleanWikidataInput (( string ) $data [ 'labels' ][ $lang ][ 'value' ]);
2021-03-17 22:06:08 +01:00
if ( in_array ( $tLang , self :: LANGUAGES_TO_CAPITALIZE , true )) {
2021-03-17 16:10:49 +01:00
$tLabel = ucfirst ( trim ( $tLabel ));
$tDescription = ucfirst ( trim ( $tDescription ));
}
try {
$insertStmt -> bind_param ( " issss " , $tag_id , $tLang , $tLabel , $tDescription , $wikilink );
$insertStmt -> execute ();
}
catch ( MDMysqliInvalidEncodingError $e ) {
}
}
else if ( ! empty ( $data [ 'labels' ][ $lang ][ 'value' ]) and ! empty ( $data [ 'descriptions' ][ $lang ])) {
$wikilink = " " ;
2021-03-17 22:06:08 +01:00
if ( in_array ( $lang , self :: LANGUAGES_TO_CAPITALIZE , true )) {
2021-03-17 16:10:49 +01:00
$data [ 'labels' ][ $lang ][ 'value' ] = ucfirst ( trim ( $data [ 'labels' ][ $lang ][ 'value' ]));
$data [ 'descriptions' ][ $lang ][ 'value' ] = ucfirst ( trim ( $data [ 'descriptions' ][ $lang ][ 'value' ]));
}
$insertStmt -> bind_param ( " issss " , $tag_id , $data [ 'labels' ][ $lang ][ 'language' ], $data [ 'labels' ][ $lang ][ 'value' ], $data [ 'descriptions' ][ $lang ][ 'value' ], $wikilink );
$insertStmt -> execute ();
}
}
$this -> _mysqli_noda -> commit ();
$this -> _mysqli_noda -> autocommit ( true );
$insertStmt -> close ();
unset ( $insertStmt );
}
/**
* Function for generating a wikidata results list .
*
* @ param array < array < mixed >> $wikidata_data Wikidata data .
* @ param string $link Links .
* @ param string $searchTerm Search term .
* @ param string $lang Language .
*
* @ return void
*/
public static function generateWikidataResultsList ( array $wikidata_data , string $link , string $searchTerm , string $lang ) : void {
if ( count ( $wikidata_data [ 'search' ]) === 0 ) {
echo '<p class="icons iconsAlert alert"><b>' . ucfirst ( $searchTerm ) . '</b> not found in Wikidata</p>' ;
return ;
}
echo '
< main id = " wikidataResultsList " > ' ;
foreach ( $wikidata_data [ 'search' ] as $result ) {
if (( isset ( $result [ 'description' ]) and $result [ 'description' ] == '' ) or ( isset ( $result [ 'label' ]) and $result [ 'label' ] == '' ) or ! isset ( $result [ 'label' ]) or ( isset ( $result [ 'description' ]) and $result [ 'description' ] == 'Wikipedia disambiguation page' ) or ( isset ( $result [ 'description' ]) and $result [ 'description' ] == 'Wikimedia disambiguation page' )) continue ;
echo '<div><a href="' . $link . 'suchbegriff=' . $searchTerm . '&wikidata_id=' . $result [ 'id' ] . '&lang=' . $lang . ' " >
< h4 class = " icons iconsTag " > ' . $result[' id '] . ' </ h4 > ' ;
echo '<p class="wikidataSummary">' . $result [ 'label' ];
if ( ! empty ( $result [ 'match' ])) echo " (<span class='icons iconsTranslate'> { $result [ 'match' ][ 'language' ] } : { $result [ 'match' ][ 'text' ] } </span>) " ;
echo '</p>' ;
if ( ! empty ( $result [ 'description' ])) echo '<p>' . $result [ 'description' ] . '</p>' ;
echo '</a><a class="icons iconsEye" target="_blank" href="https://www.wikidata.org/wiki/' . $result [ 'id' ] . '">Wikidata page</a></div>' ;
}
echo '
</ main > ' ;
}
/**
* Function generates HTML head for wikidata fetchers .
*
* @ param string $lang User language .
* @ param boolean $implyEnd If set to true , the end string will be echoed at the end of the script execution .
*
* @ return string
*/
public static function generateHTMLHeadForWikidataFetcher ( string $lang , bool $implyEnd = true ) : string {
$output = " <!DOCTYPE html><html class= \" getWikidata \" lang= \" { $lang } \" >
< head >
< title > Get Wikidata </ title >
< meta name = \ " viewport \" content= \" width=device-width, initial-scale=1 \" />
< link rel = \ " manifest \" href= \" ../manifest.webmanifest \" />
< meta name = \ " theme-color \" content= \" #0b1728 \" />
< link rel = \ " shortcut icon \" sizes= \" 16x16 32x32 \" href= \" ../img/mdlogo-nodac.svg.png \" />
< link rel = \ " apple-touch-icon \" sizes= \" 256x256 \" href= \" ../img/mdterm-256px.png \" />
< meta http - equiv = \ " content-type \" content= \" text/html; charset=UTF-8 \" /> " ;
if ( defined ( " MAIN_CSS_FILE " )) {
$output .= " <link rel= \" stylesheet \" type= \" text/css \" href= \" " . htmlspecialchars ( MAIN_CSS_FILE ) . " \" > " ;
}
$output .= "
< meta name = \ " description \" content= \" Fetch information from Wikidata. \" />
</ head >
< body > " ;
if ( $implyEnd === true ) {
register_shutdown_function ( function () : void {
echo printHTMLEnd ();
});
}
return MD_STD :: minimizeHTMLString ( $output );
}
/**
* Function generate header for wikidata fetcher pages .
*
* @ param MDTlLoader $tlLoader Translation variable .
* @ param string $additional Additional info .
* @ param string $searchTerm Search term .
*
* @ return string
*/
public static function generateWikidataFetcherHeader ( MDTlLoader $tlLoader , string $additional = " " , string $searchTerm = " " ) : string {
if ( empty ( $searchTerm ) and ! empty ( $_GET [ 'suchbegriff' ])) {
$searchTerm = $_GET [ 'suchbegriff' ];
}
$output = '
< header >
< h1 >< img src = " ../img/wikidata.png " alt = " Logo: Wikidata " /> ' . $tlLoader -> tl ( " wiki " , " wiki " , " fetch_from_wikidata " );
$output .= ': ' . $searchTerm ;
$output .= '</h1>' ;
$output .= $additional ;
$output .= '</header>' ;
return $output ;
}
/**
* Constructor .
*
* @ param MDMysqli $mysqli_noda DB connection .
*
* @ return void
*/
public function __construct ( MDMysqli $mysqli_noda ) {
$this -> _mysqli_noda = $mysqli_noda ;
}
}