Add 1st version of search
This commit is contained in:
parent
8c193f1e7f
commit
803b07a4f4
|
|
@ -0,0 +1,110 @@
|
|||
#! venv/bin/python
|
||||
import sys
|
||||
import json
|
||||
|
||||
from datetime import datetime
|
||||
from pygbif import species as species
|
||||
from pygbif import occurrences as occ
|
||||
import urllib.request
|
||||
|
||||
TAXREF="TaxRef/TAXREFv17.txt"
|
||||
|
||||
|
||||
def download_image(url, save_as):
|
||||
try:
|
||||
urllib.request.urlretrieve(url, save_as)
|
||||
except urllib.error.HTTPError as e:
|
||||
print("Error : {} {} {}".format(e.code, e.reason, e.url))
|
||||
|
||||
|
||||
|
||||
#o = occ.get(key = 4507978353)
|
||||
#print(o)
|
||||
|
||||
FAMILLE=4
|
||||
RANG=14
|
||||
LB_NOM=15
|
||||
NOM_COMPLET=17
|
||||
NOM_VALIDE=19
|
||||
FR=23
|
||||
|
||||
def taxref_species(p):
|
||||
splist=[]
|
||||
print(p)
|
||||
with open(TAXREF) as f:
|
||||
line = f.readline()
|
||||
i=0
|
||||
while line:
|
||||
l = line.split("\t")
|
||||
l = [x.strip("\"") for x in l]
|
||||
#print (l[14], "ES", sep="#")
|
||||
if "ES" == l[RANG] and (l[FR] and( "P" == l[FR] or "E"==l[FR] or "I" == l[FR] ) ) and p in l[FAMILLE] :
|
||||
#print("{} / {} / {}/ {}".format( l[FAMILLE] , l[LB_NOM], l[NOM_VALIDE] , l[RANG] ))
|
||||
#print("{} / {} ".format( l[22] , l[23]))
|
||||
if not l[NOM_VALIDE] in splist: splist.append(l[NOM_VALIDE])
|
||||
|
||||
|
||||
line = f.readline()
|
||||
|
||||
f.close()
|
||||
|
||||
with open("TAXREF_"+p+".txt","w") as f :
|
||||
for s in splist:
|
||||
f.write(s+"\n")
|
||||
f.close()
|
||||
return splist
|
||||
|
||||
def write_gbif(fname,mode,data):
|
||||
with open(fname,mode) as f :
|
||||
f.writelines(data)
|
||||
f.close()
|
||||
|
||||
|
||||
|
||||
|
||||
def main(argv):
|
||||
splist = taxref_species(argv[1])
|
||||
#print (splist)
|
||||
print ("NB species " + str(len(splist)))
|
||||
|
||||
keys = [ species.name_backbone(x, verbose=True)['usageKey'] for x in splist[:1] ]
|
||||
out = [ occ.search(taxonKey = x, limit=0,mediaType='StillImage')['count'] for x in keys ]
|
||||
|
||||
x = dict(zip(splist, out))
|
||||
sorted(x.items(), key=lambda z:z[1], reverse=True)
|
||||
print(x)
|
||||
data_gbif = {}
|
||||
data_gbif["last_update"] = format(datetime.now())
|
||||
data_gbif["results"] = []
|
||||
|
||||
|
||||
for k in x:
|
||||
print (k , '->', x[k] , ' occurences')
|
||||
total_occurences = x[k]
|
||||
counter = 0
|
||||
limit = 100
|
||||
for counter in range(0,total_occurences,limit):
|
||||
out = [ occ.search(taxonKey = x, limit = limit, offset=counter, mediaType = 'StillImage') for x in keys ]
|
||||
|
||||
|
||||
#print (out)
|
||||
#print( out[0]['results'])
|
||||
|
||||
data_gbif["results"] += out[0]['results']
|
||||
|
||||
write_gbif("gbif.txt","w",json.dumps(data_gbif, indent=2))
|
||||
|
||||
for r in data_gbif['results']:
|
||||
for i,m in enumerate(r['media']) :
|
||||
ext="unkown"
|
||||
if m['format'] == "image/jpeg" :
|
||||
ext="jpeg"
|
||||
save_as = "{}_{:03d}.{}".format(r['key'], i+1, ext)
|
||||
save_as = "StillImage/" + save_as
|
||||
print(save_as)
|
||||
download_image(m["identifier"], save_as)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv)
|
||||
Loading…
Reference in New Issue