Hoo man has uploaded a new change for review. https://gerrit.wikimedia.org/r/260247
Change subject: snapshot: Deploy DCAT from operations/dumps/dcat ...................................................................... snapshot: Deploy DCAT from operations/dumps/dcat As a stopgap, until we have scap3 set up for this. Bug: T120932 Change-Id: I0a9a4a6dfc8ca5789b394bedcad98d064281f380 --- D modules/snapshot/files/dcat/DCAT.php D modules/snapshot/files/dcat/LICENSE D modules/snapshot/files/dcat/README.md D modules/snapshot/files/dcat/catalog.example.json D modules/snapshot/files/dcat/config.example.json D modules/snapshot/files/dcat/config.json D modules/snapshot/files/dcat/i18n/ar.json D modules/snapshot/files/dcat/i18n/ast.json D modules/snapshot/files/dcat/i18n/br.json D modules/snapshot/files/dcat/i18n/ca.json D modules/snapshot/files/dcat/i18n/ckb.json D modules/snapshot/files/dcat/i18n/de.json D modules/snapshot/files/dcat/i18n/en-ca.json D modules/snapshot/files/dcat/i18n/en.json D modules/snapshot/files/dcat/i18n/es.json D modules/snapshot/files/dcat/i18n/fa.json D modules/snapshot/files/dcat/i18n/fr.json D modules/snapshot/files/dcat/i18n/he.json D modules/snapshot/files/dcat/i18n/kn.json D modules/snapshot/files/dcat/i18n/lb.json D modules/snapshot/files/dcat/i18n/mk.json D modules/snapshot/files/dcat/i18n/nl.json D modules/snapshot/files/dcat/i18n/qqq.json D modules/snapshot/files/dcat/i18n/sv.json D modules/snapshot/files/dcat/i18n/tr.json D modules/snapshot/files/dcat/i18n/zh-hans.json D modules/snapshot/files/dcat/i18n/zh-hant.json M modules/snapshot/manifests/wikidatadumps/common.pp 28 files changed, 7 insertions(+), 1,150 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/47/260247/1 diff --git a/modules/snapshot/files/dcat/DCAT.php b/modules/snapshot/files/dcat/DCAT.php deleted file mode 100644 index e8b7904..0000000 --- a/modules/snapshot/files/dcat/DCAT.php +++ /dev/null @@ -1,664 +0,0 @@ -<?php -/** - * DCAT-AP generation for Wikibase - * - * @author Lokal_Profil - * @licence MIT - * - */ - -/** - * Validate that config is json and contains all necessary keys - * - * @param array $config json decoded config file - */ -function validateConfig( array $config ) { - // Later tests depend on these existing and being defined - $topBool = array( "api-enabled", "dumps-enabled" ); - foreach ( $topBool as $val ) { - if ( !array_key_exists( $val, $config ) ) { - throw new Exception( "$val is missing from the config file" ); - } elseif ( !is_bool( $config[$val] ) ) { - throw new Exception( "$val in the config file must be a boolean" ); - } - } - - // Always required - $top = array( - "directory", "uri", "themes", "keywords", "publisher", - "contactPoint", "ld-info", "catalog-license", "catalog-homepage", - "catalog-i18n", "catalog-issued" - ); - $sub = array( - "publisher" => array( "publisherType", "homepage", "name", "email" ), - "contactPoint" => array( "vcardType", "name", "email" ), - "ld-info" => array( "accessURL", "mediatype", "license" ) - ); - - // Dependent on topBool - if ( $config['api-enabled'] ) { - array_push( $top, "api-info" ); - $sub["api-info"] = array( "accessURL", "mediatype", "license" ); - } - if ( $config['dumps-enabled'] ) { - array_push( $top, "dump-info" ); - $sub["dump-info"] = array( "accessURL", "mediatype", "license" ); - } - - // Test - foreach ( $top as $val ) { - if ( !array_key_exists( $val, $config ) ) { - throw new Exception( "$val is missing from the config file" ); - } - } - foreach ( $sub as $key => $subArray ) { - foreach ( $subArray as $val ) { - if ( !array_key_exists( $val, $config[$key] ) ) { - throw new Exception( - $key . "[" . $val . "] is missing from the config file" - ); - } - } - } -} - -/** - * Load i18n files, local and remote, into an array - * - * @param array $langs array of langcode => filename - * @param array $config json decoded config file - * @return array: An i18n blob - */ -function makeI18nBlob( array $langs, array $config ) { - // load i18n files into i18n array - $i18n = array(); - foreach ( $langs as $langCode => $filename ) { - $i18n[$langCode] = json_decode( file_get_contents( $filename ), true ); - } - - // load catalog i18n info from URL and add to i18n object - $i18nJSON = json_decode( file_get_contents( $config['catalog-i18n'] ), true ); - if ( !isset( $i18nJSON ) ) { - throw new Exception( - "Could not read catalog-i18n. Are you sure " . - $config['catalog-i18n'] . - " exists and is valid json?" - ); - } - foreach ( array_keys( $i18n ) as $langCode ) { - if ( array_key_exists( "$langCode-title", $i18nJSON ) ) { - $i18n[$langCode]['catalog-title'] = $i18nJSON["$langCode-title"]; - } - if ( array_key_exists( "$langCode-description", $i18nJSON ) ) { - $i18n[$langCode]['catalog-description'] = $i18nJSON["$langCode-description"]; - } - } - - return $i18n; -} - -/** - * Construct a data blob as an easy way of passing data around - * - * @param string $config path to config file - * @return array: A data blob - */ -function makeDataBlob( $config ) { - // Open config file and languages - $config = json_decode( file_get_contents( $config ), true ); - validateConfig( $config ); - - // identify existing i18n files and load into array - $langs = array(); - foreach ( glob( __DIR__ . '/i18n/*.json' ) as $filename ) { - if ( $filename !== __DIR__ . '/i18n/qqq.json' ) { - $langcode = substr( $filename, - strlen( __DIR__ . '/i18n/' ), - -strlen( '.json' ) ); - $langs[$langcode] = $filename; - } - } - $i18n = makeI18nBlob( $langs, $config ); - - // hardcoded ids (for now at least) - // https://github.com/lokal-profil/DCAT/issues/2 - $ids = array( - 'publisher' => '_n42', - 'contactPoint' => '_n43', - 'liveDataset' => 'liveData', - 'dumpDatasetPrefix' => 'dumpData', - 'liveDistribLD' => 'liveDataLD', - 'liveDistribAPI' => 'liveDataAPI', - 'dumpDistribPrefix' => 'dumpDist', - ); - - // stick loaded data into blob - $data = array( - 'config' => $config, - 'dumps' => null, - 'i18n' => $i18n, - 'ids' => $ids, - ); - return $data; -} - -/** - * Add additional data to a distribution entry when dealing with a dump. - * Complement to writeDistribution() - * - * @param XmlWriter $xml XML stream to write to - * @param array $data data-blob of i18n and config variables - * @param string|null $dumpDate the date of the dumpfile, null for live data - * @param string $format the fileformat - */ -function dumpDistributionExtras( XMLWriter $xml, array $data, $dumpDate, $format ) { - $url = str_replace( - '$1', - $dumpDate . '/' . $data['dumps'][$dumpDate][$format]['filename'], - $data['config']['dump-info']['accessURL'] - ); - - $xml->startElementNS( 'dcat', 'accessURL', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, $url ); - $xml->endElement(); - - $xml->startElementNS( 'dcat', 'downloadURL', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, $url ); - $xml->endElement(); - - $xml->writeElementNS( 'dcterms', 'issued', null, - $data['dumps'][$dumpDate][$format]['timestamp'] ); - - $xml->startElementNS( 'dcat', 'byteSize', null ); - $xml->writeAttributeNS( 'rdf', 'datatype', null, - 'http://www.w3.org/2001/XMLSchema#decimal' ); - $xml->text( $data['dumps'][$dumpDate][$format]['byteSize'] ); - $xml->endElement(); -} - -/** - * Construct distribution entry for each format in which a distribution - * is available. The DCAT-specification requires each format to be a - * separate distribution. - * - * @param XmlWriter $xml XML stream to write to - * @param array $data data-blob of i18n and config variables - * @param string $distribId id for the distribution - * @param string $prefix prefix for corresponding entry in config file - * @param string|null $dumpDate the date of the dumpfile, null for live data - */ -function writeDistribution( XMLWriter $xml, array $data, $distribId, $prefix, $dumpDate ) { - $ids = array(); - - $allowedMediatypes = $data['config']["$prefix-info"]['mediatype']; - foreach ( $allowedMediatypes as $format => $mediatype ) { - // handle missing (and BETA) dump files - if ( !is_null( $dumpDate ) and !array_key_exists( $format, $data['dumps'][$dumpDate] ) ) { - continue; - } - - $id = $data['config']['uri'] . '#' . $distribId . $dumpDate . $format; - array_push( $ids, $id ); - - $xml->startElementNS( 'rdf', 'Description', null ); - $xml->writeAttributeNS( 'rdf', 'about', null, $id ); - - $xml->startElementNS( 'rdf', 'type', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, - 'http://www.w3.org/ns/dcat#Distribution' ); - $xml->endElement(); - - $xml->startElementNS( 'dcterms', 'license', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, - $data['config']["$prefix-info"]['license'] ); - $xml->endElement(); - - if ( is_null( $dumpDate ) ) { - $xml->startElementNS( 'dcat', 'accessURL', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, - $data['config']["$prefix-info"]['accessURL'] ); - $xml->endElement(); - } else { - dumpDistributionExtras( $xml, $data, $dumpDate, $format ); - } - - $xml->writeElementNS( 'dcterms', 'format', null, $mediatype ); - - // add description in each language - foreach ( $data['i18n'] as $langCode => $langData ) { - if ( array_key_exists( "distribution-$prefix-description", $langData ) ) { - $formatDescription = str_replace( - '$1', - $format, - $langData["distribution-$prefix-description"] - ); - $xml->startElementNS( 'dcterms', 'description', null ); - $xml->writeAttributeNS( 'xml', 'lang', null, $langCode ); - $xml->text( $formatDescription ); - $xml->endElement(); - } - } - - $xml->endElement(); - } - - return $ids; -} - -/** - * Add i18n title and description for a dataset - * - * @param XmlWriter $xml XML stream to write to - * @param array $data data-blob of i18n and config variables - * @param string|null $dumpDate the date of the dumpfile, null for live data - * @param string $type dump or live - */ -function writeDatasetI18n( XMLWriter $xml, array $data, $dumpDate, $type ) { - foreach ( $data['i18n'] as $langCode => $langData ) { - if ( array_key_exists( "dataset-$type-title", $langData ) ) { - $xml->startElementNS( 'dcterms', 'title', null ); - $xml->writeAttributeNS( 'xml', 'lang', null, $langCode ); - if ( $type === 'live' ) { - $xml->text( $langData['dataset-live-title'] ); - } else { - $xml->text( - str_replace( '$1', $dumpDate, $langData['dataset-dump-title'] ) - ); - } - $xml->endElement(); - } - if ( array_key_exists( "dataset-$type-description", $langData ) ) { - $xml->startElementNS( 'dcterms', 'description', null ); - $xml->writeAttributeNS( 'xml', 'lang', null, $langCode ); - $xml->text( $langData["dataset-$type-description"] ); - $xml->endElement(); - } - } -} - -/** - * Construct a dataset entry - * - * @param XmlWriter $xml XML stream to write to - * @param array $data data-blob of i18n and config variables - * @param string|null $dumpDate the date of the dumpfile, null for live data - * @param string $datasetId the id of the dataset - * @param string $publisher the nodeId of the publisher - * @param string $contactPoint the nodeId of the contactPoint - * @param array $distribution array of the distribution identifiers - */ -function writeDataset( XMLWriter $xml, array $data, $dumpDate, $datasetId, - $publisher, $contactPoint, array $distribution ) { - - $type = 'dump'; - if ( is_null( $dumpDate ) ) { - $type = 'live'; - } - - $id = $data['config']['uri'] . '#' . $datasetId . $dumpDate; - - $xml->startElementNS( 'rdf', 'Description', null ); - $xml->writeAttributeNS( 'rdf', 'about', null, $id ); - - $xml->startElementNS( 'rdf', 'type', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, - 'http://www.w3.org/ns/dcat#Dataset' ); - $xml->endElement(); - - $xml->startElementNS( 'adms', 'contactPoint', null ); - $xml->writeAttributeNS( 'rdf', 'nodeID', null, $contactPoint ); - $xml->endElement(); - - $xml->startElementNS( 'dcterms', 'publisher', null ); - $xml->writeAttributeNS( 'rdf', 'nodeID', null, $publisher ); - $xml->endElement(); - - if ( $type === 'live' ) { - $xml->startElementNS( 'dcterms', 'accrualPeriodicity', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, - 'http://purl.org/cld/freq/continuous' ); - $xml->endElement(); - } - - // add keywords - foreach ( $data['config']['keywords'] as $key => $keyword ) { - $xml->writeElementNS( 'dcat', 'keyword', null, $keyword ); - } - - // add themes - foreach ( $data['config']['themes'] as $key => $keyword ) { - $xml->startElementNS( 'dcat', 'theme', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, - "http://eurovoc.europa.eu/$keyword" ); - $xml->endElement(); - } - - // add title and description in each language - writeDatasetI18n( $xml, $data, $dumpDate, $type ); - - // add distributions - foreach ( $distribution as $key => $value ) { - $xml->startElementNS( 'dcat', 'distribution', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, $value ); - $xml->endElement(); - } - - $xml->endElement(); - return $id; -} - -/** - * Construct the publisher for the catalog and datasets with a given nodeId - * - * @param XmlWriter $xml XML stream to write to - * @param array $data data-blob of i18n and config variables - * @param string $publisher the nodeId of the publisher - */ -function writePublisher( XMLWriter $xml, array $data, $publisher ) { - $xml->startElementNS( 'rdf', 'Description', null ); - $xml->writeAttributeNS( 'rdf', 'nodeID', null, $publisher ); - - $xml->startElementNS( 'rdf', 'type', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, - 'http://xmlns.com/foaf/0.1/Agent' ); - $xml->endElement(); - - $xml->writeElementNS( 'foaf', 'name', null, - $data['config']['publisher']['name'] ); - - $xml->startElementNS( 'dcterms', 'type', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, - 'http://purl.org/adms/publishertype/' . - $data['config']['publisher']['publisherType'] ); - $xml->endElement(); - - $xml->writeElementNS( 'foaf', 'homepage', null, - $data['config']['publisher']['homepage'] ); - - $xml->startElementNS( 'vcard', 'hasEmail', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, - 'mailto:' . $data['config']['publisher']['email'] ); - $xml->endElement(); - - $xml->endElement(); -} - -/** - * Construct a contactPoint for the datasets with a given nodeId - * - * @param XmlWriter $xml XML stream to write to - * @param array $data data-blob of i18n and config variables - * @param string $contactPoint the nodeId of the contactPoint - */ -function writeContactPoint( XMLWriter $xml, array $data, $contactPoint ) { - $xml->startElementNS( 'rdf', 'Description', null ); - $xml->writeAttributeNS( 'rdf', 'nodeID', null, $contactPoint ); - - $xml->startElementNS( 'rdf', 'type', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, - 'http://www.w3.org/2006/vcard/ns#' . - $data['config']['contactPoint']['vcardType'] ); - $xml->endElement(); - - $xml->startElementNS( 'vcard', 'hasEmail', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, - 'mailto:' . $data['config']['contactPoint']['email'] ); - $xml->endElement(); - - $xml->writeElementNS( 'vcard', 'fn', null, - $data['config']['contactPoint']['name'] ); - - $xml->endElement(); -} - -/** - * Add language and i18n title and description for the catalog entry - * - * @param XmlWriter $xml XML stream to write to - * @param array $data data-blob of i18n and config variables - */ -function writeCatalogI18n( XMLWriter $xml, array $data ) { - foreach ( $data['i18n'] as $langCode => $langData ) { - $xml->startElementNS( 'dcterms', 'language', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, - "http://id.loc.gov/vocabulary/iso639-1/$langCode" ); - $xml->endElement(); - - if ( array_key_exists( 'catalog-title', $langData ) ) { - $xml->startElementNS( 'dcterms', 'title', null ); - $xml->writeAttributeNS( 'xml', 'lang', null, $langCode ); - $xml->text( $langData['catalog-title'] ); - $xml->endElement(); - } - if ( array_key_exists( 'catalog-description', $langData ) ) { - $xml->startElementNS( 'dcterms', 'description', null ); - $xml->writeAttributeNS( 'xml', 'lang', null, $langCode ); - $xml->text( $langData['catalog-description'] ); - $xml->endElement(); - } - } -} - -/** - * Construct the catalog entry - * - * @param XmlWriter $xml XML stream to write to - * @param array $data data-blob of i18n and config variables - * @param string $publisher the nodeId of the publisher - * @param array $dataset array of the dataset identifiers - */ -function writeCatalog( XMLWriter $xml, array $data, $publisher, array $dataset ) { - $xml->startElementNS( 'rdf', 'Description', null ); - $xml->writeAttributeNS( 'rdf', 'about', null, - $data['config']['uri'] . '#catalog' ); - - $xml->startElementNS( 'rdf', 'type', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, - 'http://www.w3.org/ns/dcat#Catalog' ); - $xml->endElement(); - - $xml->startElementNS( 'dcterms', 'license', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, - $data['config']['catalog-license'] ); - $xml->endElement(); - - $xml->startElementNS( 'dcat', 'themeTaxonomy', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, - 'http://eurovoc.europa.eu/' ); - $xml->endElement(); - - $xml->writeElementNS( 'foaf', 'homepage', null, - $data['config']['catalog-homepage'] ); - $xml->writeElementNS( 'dcterms', 'modified', null, date( 'Y-m-d' ) ); - $xml->writeElementNS( 'dcterms', 'issued', null, - $data['config']['catalog-issued'] ); - - $xml->startElementNS( 'dcterms', 'publisher', null ); - $xml->writeAttributeNS( 'rdf', 'nodeID', null, $publisher ); - $xml->endElement(); - - // add language, title and description in each language - writeCatalogI18n( $xml, $data ); - - // add datasets - foreach ( $dataset as $key => $value ) { - $xml->startElementNS( 'dcat', 'dataset', null ); - $xml->writeAttributeNS( 'rdf', 'resource', null, $value ); - $xml->endElement(); - } - - $xml->endElement(); -} - -/** - * Construct the whole DCAT-AP document given an array of dump info - * - * @param array $data data-blob of i18n and config variables - * @return string: xmldata - */ -function outputXml( array $data ) { - // Initializing the XML Object - $xml = new XmlWriter(); - $xml->openMemory(); - $xml->setIndent( true ); - $xml->setIndentString( ' ' ); - - // set namespaces - $xml->startDocument( '1.0', 'UTF-8' ); - $xml->startElementNS( 'rdf', 'RDF', null ); - $xml->writeAttributeNS( 'xmlns', 'rdf', null, - 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' ); - $xml->writeAttributeNS( 'xmlns', 'dcterms', null, - 'http://purl.org/dc/terms/' ); - $xml->writeAttributeNS( 'xmlns', 'dcat', null, - 'http://www.w3.org/ns/dcat#' ); - $xml->writeAttributeNS( 'xmlns', 'foaf', null, - 'http://xmlns.com/foaf/0.1/' ); - $xml->writeAttributeNS( 'xmlns', 'adms', null, - 'http://www.w3.org/ns/adms#' ); - $xml->writeAttributeNS( 'xmlns', 'vcard', null, - 'http://www.w3.org/2006/vcard/ns#' ); - - // Calls previously declared functions to construct xml - writePublisher( $xml, $data, $data['ids']['publisher'] ); - writeContactPoint( $xml, $data, $data['ids']['contactPoint'] ); - - $dataset = array(); - - // Live dataset and distributions - $liveDistribs = writeDistribution( $xml, $data, - $data['ids']['liveDistribLD'], 'ld', null ); - if ( $data['config']['api-enabled'] ) { - $liveDistribs = array_merge( $liveDistribs, - writeDistribution( $xml, $data, - $data['ids']['liveDistribAPI'], 'api', null ) - ); - } - array_push( $dataset, - writeDataset( $xml, $data, null, $data['ids']['liveDataset'], - $data['ids']['publisher'], $data['ids']['contactPoint'], - $liveDistribs ) - ); - - // Dump dataset and distributions - if ( $data['config']['dumps-enabled'] ) { - foreach ( $data['dumps'] as $key => $value ) { - $distIds = writeDistribution( $xml, $data, - $data['ids']['dumpDistribPrefix'], 'dump', $key ); - array_push( $dataset, - writeDataset( $xml, $data, $key, - $data['ids']['dumpDatasetPrefix'], - $data['ids']['publisher'], - $data['ids']['contactPoint'], $distIds ) - ); - } - } - - writeCatalog( $xml, $data, $data['ids']['publisher'], $dataset ); - - // Closing last XML node - $xml->endElement(); - - // Printing the XML - return $xml->outputMemory( true ); -} - -/** - * Given a dump directory produce array with data needed by outputXml() - * - * @param string $dirname directory name - * @param array $data data-blob of i18n and config variables - * @return array: of dumpdata, or empty array - */ -function scanDump( $dirname, array $data ) { - $testStrings = array(); - foreach ( $data['config']['dump-info']['mediatype'] as $fileEnding => $mediatype ) { - $testStrings[$fileEnding] = 'all.' . $fileEnding . '.gz'; - } - - $dumps = array(); - - // each valid subdirectory has the form YYYYMMDD and refers to a timestamp - foreach ( glob( $dirname . '/[0-9]*', GLOB_ONLYDIR ) as $subdir ) { - // $subdir = testdirNew/20150120 - $subDump = array(); - foreach ( glob( $subdir . '/*.gz' ) as $filename ) { - // match each file against an expected testString - foreach ( $testStrings as $fileEnding => $testString ) { - if ( substr( $filename, -strlen( $testString ) ) === $testString ) { - $info = stat( $filename ); - $filename = substr( $filename, strlen( $subdir . '/' ) ); - $subDump[$fileEnding] = array( - 'timestamp' => gmdate( 'Y-m-d', $info['mtime'] ), - 'byteSize' => $info['size'], - 'filename' => $filename - ); - } - } - } - // if files found then add to dumps - if ( count( $subDump ) > 0 ) { - $subdir = substr( $subdir, strlen( $dirname . '/' ) ); - $dumps[$subdir] = $subDump; - } - } - - return $dumps; -} - -/** - * Scan dump directory for dump files (if any) and - * create dcatap.rdf in the same directory - * - * @param array $options command line options to override defaults - */ -function run( array $options ) { - // Load config variables and i18n a data blob - if ( !isset( $options['config'] ) ) { - $options['config'] = 'config.json'; - } - if ( !is_file( $options['config'] ) ) { - throw new Exception( $options['config'] . " does not seem to exist" ); - } - $data = makeDataBlob( $options['config'] ); - - // Load directories from config/options and test for existence - if ( !isset( $options['dumpDir'] ) ) { - $options['dumpDir'] = $data['config']['directory']; - } - if ( !is_dir( $options['dumpDir'] ) or !is_readable( $options['dumpDir'] ) ) { - throw new Exception( - $options['dumpDir'] . " is not a valid readable directory" - ); - } - if ( !isset( $options['outputDir'] ) ) { - $options['outputDir'] = $data['config']['directory']; - } - if ( !is_dir( $options['outputDir'] ) or !is_writable( $options['outputDir'] ) ) { - throw new Exception( - $options['outputDir'] . " is not a valid writable directory" - ); - } - - // add dump data to data blob - $data['dumps'] = scanDump( $options['dumpDir'], $data ); - - // create xml string from data blob - $xml = outputXml( $data ); - - file_put_contents( $options['outputDir'] . "/dcatap.rdf", $xml ); -} - -// run from command-line with options -// Load options -$longOpts = array( - "config::", // Path to the config.json, default: config.json - "dumpDir::", // Path to the directory containing entity dumps, default: set in config - "outputDir::" // Path where dcat.rdf should be outputted, default: same as dumpDir -); -$options = getopt( '', $longOpts ); -try { - run( $options ); -} catch ( Exception $e ) { - die( $e->getMessage() ); -} diff --git a/modules/snapshot/files/dcat/LICENSE b/modules/snapshot/files/dcat/LICENSE deleted file mode 100644 index d7e0e57..0000000 --- a/modules/snapshot/files/dcat/LICENSE +++ /dev/null @@ -1,22 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2014 - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - diff --git a/modules/snapshot/files/dcat/README.md b/modules/snapshot/files/dcat/README.md deleted file mode 100644 index 0dbaa1e..0000000 --- a/modules/snapshot/files/dcat/README.md +++ /dev/null @@ -1,107 +0,0 @@ -DCAT-AP for Wikibase -================= - -A project aimed at generating a [DCAT-AP](https://joinup.ec.europa.eu/system/files/project/c3/22/18/DCAT-AP_Final_v1.00.html) -document for [Wikibase](http://wikiba.se) installations -in general and [Wikidata](http://wikidata.org) in particular. - -Takes into account access through: - -* Content negotiation (various formats) -* MediaWiki api (various formats) -* Entity dumps e.g. json, ttl (assumes that these are gziped) - -Current result can be found at [lokal-profil / dcatap.rdf](https://gist.github.com/lokal-profil/8086dc6bf2398d84a311) - - -## To use - -1. Copy `config.example.json` to `config.json` and change the contents - to match your installation. Refer to the *Config* section below for - an explanation of the individual configuration parameters. -2. Copy `catalog.example.json` to a suitable place (e.g. on-wiki) and - update the translations to fit your wikibase installation. Set this - value as `catalog-i18n` in the config file. -3. Create the dcatap.rdf file by running `php DCAT.php` or - `php DCAT.php --config="<path_1>" --dumpDir="<path_2>" --outputDir="<path_3>"` - where each of the options is optional and can be left out. - The options are: - 1. `--config` is the relative path to the json file containing the - configurations, defaults to `./config.json` - 2. `--dumpDir` is the relative path to the directory containing the - dumps (if any), defaults to the `directory` parameter in the - config file - 3. `--outputDir` is the relative path to the directory where the - `dcatap.rdf` file should be created, defaults to the `directory` - parameter in the config file - - -## Translations - -* Translations which are generic to the tool can be submitted as pull - requests and should be in the same format as the files in the `i18n` - directory. -* Translations which are specific to a project/catalog are added to - the location specified in the `catalog-i18n` parameter of the config - file. - - -## Config - -Below follows a key by key explanation of the config file. - -* `directory`: Relative path to the directory containing the dump - subcategories (if any) and for the final dcat file. -* `api-enabled`: (`Boolean`) Is API access activated for the MediaWiki - installation? -* `dumps-enabled`: (`Boolean`) Is JSON dump generation activated for the - WikiBase installation? -* `uri`: URL used as basis for rdf identifiers, - e.g. *http://www.example.org/about* -* `catalog-homepage`: URL for the homepage of the WikiBase installation, - e.g. *http://www.example.org* -* `catalog-issued`: ISO date at which the WikiBase installation was - first issued, e.g. *2000-12-24* -* `catalog-license`: License of the catalog, i.e. of the dcat file - itself (not the contents of the WikiBase installation), - e.g. *http://creativecommons.org/publicdomain/zero/1.0/* -* `catalog-i18n`: URL or path to json file containing i18n strings for - catalog title and description. Can be an on-wiki page, - e.g. *https://www.example.org/w/index.php?title=MediaWiki:DCAT.json&action=raw* -* `keywords`: (`array`) List of keywords applicable to all of the datasets -* `themes`: (`array`) List of thematic ids in accordance with - [Eurovoc](http://eurovoc.europa.eu/), e.g. *2191* for - http://eurovoc.europa.eu/2191 -* `publisher`: - * `name`: Name of the publisher - * `homepage`: URL for or the homepage of the publisher - * `email`: Contact e-mail for the publisher, should be a function - address, e.g. *i...@example.org* - * `publisherType`: Publisher type according to [ADMS](http://purl.org/adms/publishertype/1.0), - e.g. *NonProfitOrganisation* -* `contactPoint`: - * `name`: Name of the contact point - * `email`: E-mail for the contact point, should ideally be a - function address, e.g. *supp...@example.org* - * `vcardType`: Type of contact point, either `Organization` or - `Individual` -* `ld-info`: - * `accessURL`: URL to the content negotiation endpoint of the - WikiBase installation, e.g. *http://www.example.org/entity/* - * `mediatype`: (`object`) List of [IANA media types](http://www.iana.org/assignments/media-types/) - available through content negotiation in the format *file-ending:media-type* - * `license`: License of the data in the distribution, e.g. - *http://creativecommons.org/publicdomain/zero/1.0/* -* `api-info`: - * `accessURL`: URL to the MediaWiki API endpoint of the wiki, - e.g. *http://www.example.org/w/api.php* - * `mediatype`: (`object`) List of non-deprecated formats available - thorough the API, see ld-info:mediatype above for formatting - * `license`: See ld-info:license above -* `dump-info`: - * `accessURL`: URL to the directory where the *.json.gz* files - reside (`$1` is replaced on the fly by the actual filename), - e.g. *http://example.org/dumps/$1* - * `mediatype`: (`object`) List of media types. In practice this is - always `{"json": "application/json"}` ... for now - * `license`: See ld-info:license above diff --git a/modules/snapshot/files/dcat/catalog.example.json b/modules/snapshot/files/dcat/catalog.example.json deleted file mode 100644 index 04abdae..0000000 --- a/modules/snapshot/files/dcat/catalog.example.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "qqq-title": "The name of the catalog", - "qqq-description": "A description of the catalog", - "en-title": "Wikidata", - "en-description": "Wikidata is a free linked database that can be read and edited by both humans and machines. \nWikidata acts as central storage for the structured data of its Wikimedia sister projects including Wikipedia, Wikivoyage, Wikisource, and others.", - "sv-title": "Wikidata", - "sv-description": "Wikidata är en fri länkad databas som kan läsas och redigeras av både människor och maskiner. \nWikidata fungerar som ett centralt förvar av strukturerade data för sina systerprojekt vilka inkluderar Wikipedia, Wikivoyage, Wikisource, och fler." -} diff --git a/modules/snapshot/files/dcat/config.example.json b/modules/snapshot/files/dcat/config.example.json deleted file mode 100644 index e082fa9..0000000 --- a/modules/snapshot/files/dcat/config.example.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "directory": "testdir/test", - "api-enabled": true, - "dumps-enabled": true, - "uri": "https://www.wikidata.org/about", - "catalog-homepage": "https://www.wikidata.org", - "catalog-issued": "2012-10-30", - "catalog-license": "http://creativecommons.org/publicdomain/zero/1.0/", - "catalog-i18n": "https://www.wikidata.org/w/index.php?title=MediaWiki:DCAT.json&action=raw", - "keywords": ["data store", "semantic", "knowledgebase", "Wikimedia", "user generated content", "UGC", "Wikipedia", "Wikidata"], - "themes": ["1428", "441", "2191", "384", "7374"], - "publisher": { - "name": "Wikimedia Foundation", - "homepage": "http://wikimediafoundation.org/", - "email": "i...@wikimedia.org", - "publisherType": "NonProfitOrganisation" - }, - "contactPoint": { - "vcardType" : "Organization", - "name": "Wikidata information team", - "email": "i...@wikidata.org" - }, - "ld-info": { - "accessURL": "https://www.wikidata.org/entity/", - "mediatype": { - "json": "application/json", - "n3": "application/n-triples", - "rdf": "application/rdf+xml", - "ttl": "text/turtle", - "html": "text/html" - }, - "license": "http://creativecommons.org/publicdomain/zero/1.0/" - }, - "api-info": { - "accessURL": "https://www.wikidata.org/w/api.php", - "mediatype": { - "json": "application/json", - "xml": "application/xml" - }, - "license": "http://creativecommons.org/publicdomain/zero/1.0/" - }, - "dump-info": { - "accessURL": "https://dumps.wikimedia.org/wikidatawiki/entities/$1", - "mediatype": { - "json": "application/json", - "ttl": "text/turtle" - }, - "license": "http://creativecommons.org/publicdomain/zero/1.0/" - } -} diff --git a/modules/snapshot/files/dcat/config.json b/modules/snapshot/files/dcat/config.json deleted file mode 100644 index 6963b73..0000000 --- a/modules/snapshot/files/dcat/config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "directory": null, - "api-enabled": true, - "dumps-enabled": true, - "uri": "https://www.wikidata.org/about", - "catalog-homepage": "https://www.wikidata.org", - "catalog-issued": "2012-10-30", - "catalog-license": "http://creativecommons.org/publicdomain/zero/1.0/", - "catalog-i18n": "https://www.wikidata.org/w/index.php?title=MediaWiki:DCAT.json&action=raw", - "keywords": ["data store", "semantic", "knowledgebase", "Wikimedia", "user generated content", "UGC", "Wikipedia", "Wikidata"], - "themes": ["1428", "441", "2191", "384", "7374"], - "publisher": { - "name": "Wikimedia Foundation", - "homepage": "http://wikimediafoundation.org/", - "email": "i...@wikimedia.org", - "publisherType": "NonProfitOrganisation" - }, - "contactPoint": { - "vcardType" : "Organization", - "name": "Wikidata information team", - "email": "i...@wikidata.org" - }, - "ld-info": { - "accessURL": "https://www.wikidata.org/entity/", - "mediatype": { - "json": "application/json", - "n3": "application/n-triples", - "rdf": "application/rdf+xml", - "ttl": "text/turtle", - "html": "text/html" - }, - "license": "http://creativecommons.org/publicdomain/zero/1.0/" - }, - "api-info": { - "accessURL": "https://www.wikidata.org/w/api.php", - "mediatype": { - "json": "application/json", - "xml": "application/xml" - }, - "license": "http://creativecommons.org/publicdomain/zero/1.0/" - }, - "dump-info": { - "accessURL": "https://dumps.wikimedia.org/wikidatawiki/entities/$1", - "mediatype": { - "json": "application/json", - "ttl": "text/turtle" - }, - "license": "http://creativecommons.org/publicdomain/zero/1.0/" - } -} diff --git a/modules/snapshot/files/dcat/i18n/ar.json b/modules/snapshot/files/dcat/i18n/ar.json deleted file mode 100644 index c07767c..0000000 --- a/modules/snapshot/files/dcat/i18n/ar.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Mervat Salman" - ] - }, - "distribution-dump-description": "ملف $1 مضغوط." -} diff --git a/modules/snapshot/files/dcat/i18n/ast.json b/modules/snapshot/files/dcat/i18n/ast.json deleted file mode 100644 index 9f31d02..0000000 --- a/modules/snapshot/files/dcat/i18n/ast.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Xuacu" - ] - }, - "dataset-live-title": "Accesu en vivo", - "dataset-live-description": "La versión en direuto de los datos, inclui entidaes y propiedaes. Sólo tán catalogaos como distribuciones los formatos que nun tán anticuaos.", - "dataset-dump-title": "Volcáu d'entidaes del $1", - "dataset-dump-description": "Un volcáu estáticu de toles entidaes n'una data determinada.", - "distribution-ld-description": "Puntu final de Datos enllazaos. El formatu resuélvese por negociación de conteníu.", - "distribution-api-description": "El puntu final de la API de MediaWiki. El formatu dase col parámetru «format».", - "distribution-dump-description": "Un archivu $1 comprimíu con gzip." -} diff --git a/modules/snapshot/files/dcat/i18n/br.json b/modules/snapshot/files/dcat/i18n/br.json deleted file mode 100644 index 60b338e..0000000 --- a/modules/snapshot/files/dcat/i18n/br.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Gwenn-Ael" - ] - }, - "dataset-live-title": "Mont war-eeun", - "distribution-dump-description": "Ur restr $1 er furmad gzip." -} diff --git a/modules/snapshot/files/dcat/i18n/ca.json b/modules/snapshot/files/dcat/i18n/ca.json deleted file mode 100644 index 174fea3..0000000 --- a/modules/snapshot/files/dcat/i18n/ca.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "@metadata": { - "authors": [ - "F3RaN" - ] - }, - "dataset-live-title": "Accés directe", - "dataset-live-description": "La versió en directe de les dades, inclou les entitats i propietats. Només formats no obsolets estan catalogats com a distribucions." -} diff --git a/modules/snapshot/files/dcat/i18n/ckb.json b/modules/snapshot/files/dcat/i18n/ckb.json deleted file mode 100644 index 04e97dc..0000000 --- a/modules/snapshot/files/dcat/i18n/ckb.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Pirehelokan" - ] - }, - "dataset-live-title": "دەستپێگەشتنی زیندوو", - "distribution-dump-description": "پەڕگەیەکی زیپکراو$1" -} diff --git a/modules/snapshot/files/dcat/i18n/de.json b/modules/snapshot/files/dcat/i18n/de.json deleted file mode 100644 index 149151e..0000000 --- a/modules/snapshot/files/dcat/i18n/de.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Metalhead64" - ] - }, - "dataset-live-title": "Live-Zugriff", - "dataset-live-description": "Die Live-Version der Daten, enthält Elemente und Eigenschaften. Nur nicht-veraltete Formate werden als Verteilungen gelistet.", - "dataset-dump-title": "Elementdump vom $1", - "dataset-dump-description": "Ein statischer Dump aller Elemente für das angegebene Datum.", - "distribution-ld-description": "Der Verlinkte-Daten-Endpunkt. Das Format wird durch Inhaltsverhandlung aufgelöst.", - "distribution-api-description": "Der MediaWiki-API-Endpunkt. Das Format wird durch den Parameter „format“ angegeben.", - "distribution-dump-description": "Eine $1-gzip-Datei." -} diff --git a/modules/snapshot/files/dcat/i18n/en-ca.json b/modules/snapshot/files/dcat/i18n/en-ca.json deleted file mode 100644 index 7be1c6f..0000000 --- a/modules/snapshot/files/dcat/i18n/en-ca.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Rbix" - ] - }, - "dataset-dump-title": "Entity dump of $1", - "distribution-dump-description": "A gzipped $1 file." -} diff --git a/modules/snapshot/files/dcat/i18n/en.json b/modules/snapshot/files/dcat/i18n/en.json deleted file mode 100644 index d767a75..0000000 --- a/modules/snapshot/files/dcat/i18n/en.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "@metadata": { - "authors": ["Lokal_Profil"] - }, - "dataset-live-title": "Live access", - "dataset-live-description": "The live version of the data, includes entities and properties. Only non-deprecated formats are listed as distributions.", - "dataset-dump-title": "Entity dump of $1", - "dataset-dump-description": "A static dump of all entites for the given date.", - "distribution-ld-description": "The Linked Data endpoint. Format is resolved through content negotiation.", - "distribution-api-description": "The MediaWiki API endpoint. Format is given through the \"format\" parameter.", - "distribution-dump-description": "A gziped $1 file." -} diff --git a/modules/snapshot/files/dcat/i18n/es.json b/modules/snapshot/files/dcat/i18n/es.json deleted file mode 100644 index 0924843..0000000 --- a/modules/snapshot/files/dcat/i18n/es.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Macofe" - ] - }, - "dataset-live-title": "Acceso en vivo", - "dataset-live-description": "La versión en vivo de los datos, incluyendo entidades y propiedades. Solo se muestran como distribuciones los formatos que no están en desuso.", - "dataset-dump-title": "Volcado de la entidad del $1", - "dataset-dump-description": "Un volcado estático de todas las entidades en el fecha dada.", - "distribution-dump-description": "Un archivo $1 comprimido con gzip." -} diff --git a/modules/snapshot/files/dcat/i18n/fa.json b/modules/snapshot/files/dcat/i18n/fa.json deleted file mode 100644 index 073e6fd..0000000 --- a/modules/snapshot/files/dcat/i18n/fa.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Reza1615", - "Leyth" - ] - }, - "dataset-live-title": "دسترسی زنده", - "dataset-live-description": "نسخهٔ زندهٔ دادهها که شامل موجودیتها و خصوصیات است. در اینجا تنها قالبهای غیرمردود به عنوان توزیعها فهرست شدهاند.", - "dataset-dump-title": "دامپ محتوی $1", - "distribution-dump-description": "فایل فشرده gziped $1" -} diff --git a/modules/snapshot/files/dcat/i18n/fr.json b/modules/snapshot/files/dcat/i18n/fr.json deleted file mode 100644 index 05001eb..0000000 --- a/modules/snapshot/files/dcat/i18n/fr.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Wladek92" - ] - }, - "dataset-live-title": "Accès direct", - "dataset-live-description": "La version courante des données, comprend les entités et les propriétés. Seuls les formats non obsolètes sont répertoriés comme des distributions.", - "dataset-dump-title": "Données de l'entité $1", - "dataset-dump-description": "Vidage statique de toutes les entités à la date donnée.", - "distribution-ld-description": "Le point de terminaison des données liées. Le Format est résolu par la négociation du contenu.", - "distribution-api-description": "Le point de terminaison de l'API MediaWiki. Le format est donné par le paramètre \"format\".", - "distribution-dump-description": "Un fichier $1 au format gzip." -} diff --git a/modules/snapshot/files/dcat/i18n/he.json b/modules/snapshot/files/dcat/i18n/he.json deleted file mode 100644 index 2022d87..0000000 --- a/modules/snapshot/files/dcat/i18n/he.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Amire80" - ] - }, - "dataset-live-title": "גישה חיה", - "dataset-live-description": "הגרסה החיה של הנתונים, כולל ישויות ומאפיינים. רק תסדירים שלא התיישנו רשומים כהפצות.", - "dataset-dump-title": "היטל ישויות של $1", - "dataset-dump-description": "היטל סטטי של כל הישויות עבור תאריך נתון.", - "distribution-ld-description": "נקודת קצה של נתונים מקושרים. התסדיר נפתר דרך משא ומתן של תוכן.", - "distribution-api-description": "נקודת קצה של API של מדיה־ויקי. התסדיר ניתן דרך הפרמטר \"format\".", - "distribution-dump-description": "קובץ $1 ב־gzip." -} diff --git a/modules/snapshot/files/dcat/i18n/kn.json b/modules/snapshot/files/dcat/i18n/kn.json deleted file mode 100644 index 7d1fd33..0000000 --- a/modules/snapshot/files/dcat/i18n/kn.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Ananth subray" - ] - }, - "dataset-live-title": "ನೇರ ಪ್ರವೇಶ" -} diff --git a/modules/snapshot/files/dcat/i18n/lb.json b/modules/snapshot/files/dcat/i18n/lb.json deleted file mode 100644 index 69842b7..0000000 --- a/modules/snapshot/files/dcat/i18n/lb.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Robby" - ] - }, - "distribution-dump-description": "E gezipte(n) $1-Fichier." -} diff --git a/modules/snapshot/files/dcat/i18n/mk.json b/modules/snapshot/files/dcat/i18n/mk.json deleted file mode 100644 index e9a0394..0000000 --- a/modules/snapshot/files/dcat/i18n/mk.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Bjankuloski06" - ] - }, - "dataset-live-title": "Пристап во живо", - "dataset-live-description": "Верзијата во живо на податоците, вклучувајќи ги единици и својства. Наведени се само незастарени формати како распределби.", - "dataset-dump-title": "Склад на единици на $1", - "dataset-dump-description": "Статичен склад на сите единици за дадениот датум.", - "distribution-ld-description": "Крајната точка на Сврзани податоци. Форматот се решава преку содржински прилагодувања.", - "distribution-api-description": "Крајна точка на извршникот на МедијаВики. Форматот се задава со параметот „format“.", - "distribution-dump-description": "$1-податотека збиена со gzip." -} diff --git a/modules/snapshot/files/dcat/i18n/nl.json b/modules/snapshot/files/dcat/i18n/nl.json deleted file mode 100644 index 4105c56..0000000 --- a/modules/snapshot/files/dcat/i18n/nl.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Siebrand" - ] - }, - "dataset-live-title": "Live toegang", - "dataset-live-description": "De live versie van de gegevens, bevat entiteiten en eigenschappen. Alleen niet-verouderde formaten worden weergegeven als distributies.", - "dataset-dump-title": "Entiteitendump van $1", - "dataset-dump-description": "Een statische dump van alle entiteiten op een bepaalde datum.", - "distribution-ld-description": "Het Linked Data-eindpunt. Het formaat wordt opgemaakt via contentonderhandeling.", - "distribution-api-description": "Het eindpunt voor de MediaWiki-API. De opmaak wordt opgegeven via de parameter \"format\".", - "distribution-dump-description": "Een met Gzip gecomprimeerd bestand van het type $1." -} diff --git a/modules/snapshot/files/dcat/i18n/qqq.json b/modules/snapshot/files/dcat/i18n/qqq.json deleted file mode 100644 index c669eb1..0000000 --- a/modules/snapshot/files/dcat/i18n/qqq.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Lokal_Profil" - ] - }, - "dataset-live-title": "The title for the live access dataset", - "dataset-live-description": "The description of the live access dataset. For terminology see https://www.wikidata.org/wiki/Wikidata:Glossary. For deprecation see https://en.wikipedia.org/wiki/Deprecation.", - "dataset-dump-title": "The title for the entity dump where $1 is the date of the dump in the format YYYYMMDD", - "dataset-dump-description": "The description of the entity dump for the given date.", - "distribution-ld-description": "The description of the Linked Data endpoint. For content negotiation see https://en.wikipedia.org/wiki/Content_negotiation", - "distribution-api-description": "The description of the MediaWiki API endpoint. Leave \"format\" untranslated.", - "distribution-dump-description": "The description of a dump file where $1 is the file format." -} diff --git a/modules/snapshot/files/dcat/i18n/sv.json b/modules/snapshot/files/dcat/i18n/sv.json deleted file mode 100644 index 2b0984d..0000000 --- a/modules/snapshot/files/dcat/i18n/sv.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Lokal_Profil" - ] - }, - "dataset-live-title": "Direkt åtkomst", - "dataset-live-description": "The aktuella versionen av datan, inkluderande objekt och egenskaper. Endast icke-utfasade format listas för distributionerna.", - "dataset-dump-title": "Objekt dump från $1", - "dataset-dump-description": "En statisk dump av alla objekt för det angivna datumet.", - "distribution-ld-description": "Länkade data-accesspunkten. Formatet ges genom content negotiation.", - "distribution-api-description": "MediaWiki API-accesspunkten. Formatet ges genom \"format\"-parametern.", - "distribution-dump-description": "En gzipad $1-fil." -} diff --git a/modules/snapshot/files/dcat/i18n/tr.json b/modules/snapshot/files/dcat/i18n/tr.json deleted file mode 100644 index fb0d3c6..0000000 --- a/modules/snapshot/files/dcat/i18n/tr.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Kincki", - "Ömer Berkay" - ] - }, - "dataset-live-title": "Canlı erişim", - "dataset-dump-title": "Varlık dökümü $1", - "distribution-dump-description": "Bir gziped $1 dosyası." -} diff --git a/modules/snapshot/files/dcat/i18n/zh-hans.json b/modules/snapshot/files/dcat/i18n/zh-hans.json deleted file mode 100644 index 03c30b2..0000000 --- a/modules/snapshot/files/dcat/i18n/zh-hans.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Liuxinyu970226" - ] - }, - "dataset-live-title": "在线访问", - "dataset-live-description": "数据的在线版本,包括实体和属性。只有未弃用的格式会被列为分布。", - "dataset-dump-title": "$1的实体转储", - "dataset-dump-description": "所有实体在指定日期的静态转储。", - "distribution-ld-description": "被链接数据的末端。格式会通过内容协商以解决。", - "distribution-api-description": "MediaWiki API末端。格式会通过“format”参数提供。", - "distribution-dump-description": "一个gzip格式的$1文件。" -} diff --git a/modules/snapshot/files/dcat/i18n/zh-hant.json b/modules/snapshot/files/dcat/i18n/zh-hant.json deleted file mode 100644 index 0c46e5e..0000000 --- a/modules/snapshot/files/dcat/i18n/zh-hant.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "@metadata": { - "authors": [ - "Cwlin0416" - ] - }, - "distribution-dump-description": "一個已用 gzip 壓縮的 $1 檔案。" -} diff --git a/modules/snapshot/manifests/wikidatadumps/common.pp b/modules/snapshot/manifests/wikidatadumps/common.pp index a25da13..0c0d4b8 100644 --- a/modules/snapshot/manifests/wikidatadumps/common.pp +++ b/modules/snapshot/manifests/wikidatadumps/common.pp @@ -13,14 +13,13 @@ group => 'www-data', } - file { '/usr/local/share/dcat': - ensure => 'directory', - mode => '0444', - owner => 'datasets', - group => 'www-data', - recurse => true, - purge => true, - source => 'puppet:///modules/snapshot/dcat', + git { 'DCAT-AP': + directory => '/usr/local/share/dcat' + origin => 'https://gerrit.wikimedia.org/r/operations/dumps/dcat', + branch => 'master', + ensure => 'present', # Don't automatically update. + owner => 'datasets', + group => 'www-data', } } -- To view, visit https://gerrit.wikimedia.org/r/260247 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I0a9a4a6dfc8ca5789b394bedcad98d064281f380 Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Hoo man <h...@online.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits