Smalyshev edited the task description. (Show Details)

EDIT DETAILS
...
Here is a python 3 script that manifests the problem. It executes 10 queries 5 seconds apart (printing the duplicates of the first and the duplicate count of the others):

#!/usr/bin/env python3
#-*- coding: UTF-8 -*-

import requests, time

def queryWikidata(query):
WIKIDATAQUERYURL = 'https://query.wikidata.org/sparql'
data = "" params={'format': 'json', 'query': query}).json()

data = "" /> cleanData = []
for i in data:
cleanData.append({x: i[x]['value'] for x in i})
return cleanData



def testQuery(echo):
QUERY = """SELECT DISTINCT ?language ?languageLabel ?nSpeakers ?Lx ?LxLabel ?time ?country
WHERE
{
?language wdt:P31/wdt:P279* wd:Q34770.
?language p:P1098 ?nSpeakersStatement.
?nSpeakersStatement ps:P1098 ?nSpeakers
optional {?nSpeakersStatement pq:P518 ?Lx}.
optional {?nSpeakersStatement pq:P585 ?time}.
optional {?nSpeakersStatement pq:P17 ?country}.
FILTER EXISTS {?wikipedia wdt:P407 ?language}.
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }

}"""
foundDuplicates = []
data = "" /> data.sort(key=lambda x: (x['language'], x['nSpeakers']))
count=0
for i in data:
if data.count(i)>1 and i not in foundDuplicates:
if echo == True:
print(i)
foundDuplicates.append(i)
return len(foundDuplicates)

duplicateCount = []
duplicates = testQuery(True)
print('\nQuery', 1)
print('Duplicates:', duplicates)
duplicateCount.append(duplicates)
for i in range(2, 11):
time.sleep(5)
print('Query', i)
duplicates = testQuery(False)
print('Duplicates:', duplicates)
duplicateCount.append(duplicates)
print(duplicateCount)
```
#!/usr/bin/env python3
#-*- coding: UTF-8 -*-

import requests, time

def queryWikidata(query):
WIKIDATAQUERYURL = 'https://query.wikidata.org/sparql'
data = "" params={'format': 'json', 'query': query}).json()

data = "" /> cleanData = []
for i in data:
cleanData.append({x: i[x]['value'] for x in i})
return cleanData



def testQuery(echo):
QUERY = """SELECT DISTINCT ?language ?languageLabel ?nSpeakers ?Lx ?LxLabel ?time ?country
WHERE
{
?language wdt:P31/wdt:P279* wd:Q34770.
?language p:P1098 ?nSpeakersStatement.
?nSpeakersStatement ps:P1098 ?nSpeakers
optional {?nSpeakersStatement pq:P518 ?Lx}.
optional {?nSpeakersStatement pq:P585 ?time}.
optional {?nSpeakersStatement pq:P17 ?country}.
FILTER EXISTS {?wikipedia wdt:P407 ?language}.
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }

}"""
foundDuplicates = []
data = "" /> data.sort(key=lambda x: (x['language'], x['nSpeakers']))
count=0
for i in data:
if data.count(i)>1 and i not in foundDuplicates:
if echo == True:
print(i)
foundDuplicates.append(i)
return len(foundDuplicates)

duplicateCount = []
duplicates = testQuery(True)
print('\nQuery', 1)
print('Duplicates:', duplicates)
duplicateCount.append(duplicates)
for i in range(2, 11):
time.sleep(5)
print('Query', i)
duplicates = testQuery(False)
print('Duplicates:', duplicates)
duplicateCount.append(duplicates)
print(duplicateCount)
```



TASK DETAIL
https://phabricator.wikimedia.org/T153108

EMAIL PREFERENCES
https://phabricator.wikimedia.org/settings/panel/emailpreferences/

To: Smalyshev
Cc: Smalyshev, Aklapper, Tcp-ip, EBjune, mschwarzer, Avner, debt, Gehel, D3r1ck01, Jonas, FloNight, Xmlizer, Izno, jkroll, Wikidata-bugs, Jdouglas, aude, Deskana, Manybubbles, Mbch331
_______________________________________________
Wikidata-bugs mailing list
Wikidata-bugs@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/wikidata-bugs

Reply via email to