Disallow fetching from Wikidata disambiguation pages

Close #23
This commit is contained in:
Joshua Ramon Enslin 2025-02-13 22:37:17 +01:00
parent 28f6db67ff
commit 9d7d53a858
Signed by: jrenslin
GPG Key ID: 46016F84501B70AE
3 changed files with 70 additions and 1 deletions

View File

@ -178,7 +178,20 @@ final class NodaWikidataFetcher {
if (empty($data['entities'][$wikidata_id])) {
throw new MDhttpFailedException("Failed fetching from Wikidata. Try again later.");
}
return $data['entities'][$wikidata_id];
$output = $data['entities'][$wikidata_id];
// Throw exception if this page is a dedicated disambigation item.-
// P31: Instance of; Q4167410: Wikimedia disambiguation page
if (isset($output['claims']) && isset($output['claims']['P31'])) {
foreach ($output['claims']['P31'] as $is_instance_of) {
if (isset($is_instance_of['mainsnak']['datavalue']['value']['id']) && $is_instance_of['mainsnak']['datavalue']['value']['id'] === 'Q4167410') {
throw new NodaWikidataFetcherDisambiguationIsDisallowedException("Loading wikidata disambiguation pages is disallowed");
}
}
}
return $output;
}

View File

@ -0,0 +1,27 @@
<?PHP
/**
* This file contains an exception class to be thrown if a user attempts to load
* data from a Wikidata item specifically established for a disambiguation page.
*
* @file
*
* @author Joshua Ramon Enslin <joshua@museum-digital.de>
*/
declare(strict_types = 1);
/**
* Exception class to be thrown if a user attempts to load
* data from a Wikidata item specifically established for a disambiguation page.
*/
final class NodaWikidataFetcherDisambiguationIsDisallowedException extends MDgenericInvalidInputsException {
/**
* Error message.
*
* @return string
*/
public function errorMessage() {
//error message
return 'Attempted to load a disambiguation page. Please select the specific item you want to fetch to enrich the given entry: ' . $this->getMessage();
}
}

View File

@ -11,6 +11,7 @@ use PHPUnit\Framework\Attributes\Medium;
use PHPUnit\Framework\Attributes\DataProvider;
require_once __DIR__ . '/../../MDMysqli/test_connections.conf.php';
require_once __DIR__ . '/../src/NodaWikidataFetcherDisambiguationIsDisallowedException.php';
/**
* This script contains tests for the Wikidata fetcher.
@ -37,6 +38,34 @@ final class NodaWikidataFetcherTest extends TestCase {
}
/**
* Data provider providing a Wikidata ID for a dedicated wikidata item for disambiguation pages.
*
* @return array<string: array{0: string}>
*/
public static function disambiguationPageProvider():array {
return [
'Disambiguation page for "Mochi" - Q6916210' => ['Q6916210'],
];
}
/**
* Throw error when attempting to load a dedicated wikidata entry for a disambiguation page.
*
* @param string $wikidata_id Wikidata ID.
*
* @return void
*/
#[DataProvider('disambiguationPageProvider')]
public function testWikidataIdFromLinkFailsForDisambiguationPages(string $wikidata_id):void {
self::expectException(NodaWikidataFetcherDisambiguationIsDisallowedException::class);
NodaWikidataFetcher::getWikidataEntity($wikidata_id);
}
/**
* Test to check whether the HTML page is correctly generated.
*