GOV/Webservice/Python-suds

aus GenWiki, dem genealogischen Lexikon zum Mitmachen.
< GOV‎ • Webservice
Zur Navigation springen Zur Suche springen

GOV-Typen nach SQLite Importieren

Create Table f_gov_type

-- Objekttypen-Tabelle in SQLite anlegen
DROP TABLE IF EXISTS f_gov_type;

CREATE TABLE f_gov_type (
type_id       integer,
type_desc_1   text,
type_desc_2   text
);

Import nach f_gov_type

Dieses importiert alle Objektypen-Texte in eine SQLite-Tabelle.

from SimpleServiceImpl_services import *
# Datenbank mit Pfad
sql_db = r'C:\temp\Demo\data\gov_demo.db'

from suds.client import Client
simple_service_url = 'http://gov.genealogy.net/services/SimpleService?wsdl'
read_client = Client(simple_service_url)

# Zugriff auf SQLlite
import sqlite3
# https://docs.python.org/3/library/sqlite3.html

# Zugriff auf SQLite-DB mit Autocommit
conn = sqlite3.connect(sql_db, isolation_level=None)

# Tabelle leeren
c = conn.cursor()
c.execute('DELETE FROM f_gov_type')
# Anzahl der Typen eintragen (Wert schätzen)
j = 0
for i in range(1, 290):
    try:
        gov_eintrag = read_client.service.getTypeDescription(i, 'deu')
        j = j + 1
        (desc_1, desc_2) = (gov_eintrag[0])
        if (desc_2 == None):
            desc_2 = ''
        ins_sql = 'INSERT INTO f_gov_type(type_id, type_desc_1, type_desc_2) ' + \
                  'VALUES (' + str(i) + \
                  ",'" + desc_1 + \
                  "','" + desc_2 + "')"
        # print('SQL:' + ins_sql)
        c.execute(ins_sql)
    # wenn es die Nummer nicht gibt - überspringen
    except:
        continue
print ('Anzahl Objekt-Typen: ' + str(j))

GOV-Einträge über Koordinaten nach SQLite als json importieren

Create Table

DROP TABLE IF EXISTS f_gov_data;

CREATE TABLE f_gov_data (
lfd_id        integer primary key autoincrement, -- Schlüssel in der Tabelle
gov_id        text, -- GOV-Kennung
item_data     text, -- GOV-Eintrag als Json
item_childs   text -- Liste der Kind-Objekte als Json
);

Import über Koordinaten

 # Umrechnung laufende Tage pro Jahr
import juliandate

from suds.client import Client
simple_service_url = 'http://gov.genealogy.net/services/SimpleService?wsdl'
read_client = Client(simple_service_url)

# Datenspeicherung als json
import json

# Zugriff auf SQLlite
import sqlite3
# https://docs.python.org/3/library/sqlite3.html

# Serialisieren WSDL-Suds-Objekt als Dictionary
from suds.sudsobject import asdict

def recursive_asdict(d):
    out = {}
    for k, v in asdict(d).items():
        # Zeitumrechnung
        if k == '_jd':
            dt = juliandate.to_gregorian(v)
            vc = str(10000 * dt[0] + 100 * dt[1] + dt[2])
            v = vc[0:4] + '-' + vc[4:6] + '-' + vc[6:8]
        if k == '_last-modification':
            v = str(v)
        if hasattr(v, '__keylist__'):
            out[k] = recursive_asdict(v)
        elif isinstance(v, list):
            out[k] = []
            for item in v:
                if hasattr(item, '__keylist__'):
                    out[k].append(recursive_asdict(item))
                else:
                    out[k].append(item)
        else:
            out[k] = v
    return out

sql_db = r'C:\temp\Demo\data\gov_demo.db'
conn = sqlite3.connect(sql_db, isolation_level=None)

# Da nur die ersten 500 Treffer geliefert werden, muss der Abfrage-Bereich klein genug gewählt werden.
# Ein Raster von 0.1 Dezimal-Grad ist geeignet.
latitude0 = 50.65
latitude1 = 50.75
longitude0 = 5.80
longitude1 = 6.90

# Liste der GOV-IDs über Koordinaten (max. 500)
item_list = read_client.service.searchByBoundingBox(latitude0, latitude1, longitude0, longitude1)

for i, gov_item in enumerate (item_list.item):
    gov_eintrag = read_client.service.getObject(gov_item)
    gov_eintrag_dict = recursive_asdict(gov_eintrag)
    gov_eintrag_json = json.dumps (gov_eintrag_dict, sort_keys=True, default=str, ensure_ascii=False)
    gov_eintrag_txt = gov_eintrag_json.replace("'", "''")

    gov_eintrag_child = read_client.service.getChildObjects(gov_item)
    gov_eintrag_child_dict = recursive_asdict(gov_eintrag_child)
    gov_eintrag_child_json = json.dumps(gov_eintrag_child_dict, sort_keys=True, default=str, ensure_ascii=False)
    gov_eintrag_child_txt = gov_eintrag_child_json.replace("'", "''")
    # print ('GOV-ID: ' + gov_item )

    sql_ins = 'insert into f_gov_data (gov_id,  item_data, item_childs) values (' + \
               "'" + gov_item + "','" + gov_eintrag_txt + "','" +  gov_eintrag_child_txt + "')"
    rc = c.execute(sql_ins)

Json- Views

Abfrage auf Arrays

Es können alle Arrays per SQL-Selektion abgefragt werden. Hier ein Beispiel.

drop view if exists v_gov_part_of_govid_1_3;

create view v_gov_part_of_govid_1_3 as 
select gov_id, item_data,
       breite, laenge, hoehe,
       plz_1, plz_1_ab, plz_1_bis,
       plz_2, plz_2_ab, plz_2_bis,
       name_1, sprache_1, name_2, sprache_2, 
       part_of_govid_1, part_of_govid_2, part_of_govid_3, text,
       case when (type_desc_2="") then f_gov_type.type_desc_1 else type_desc_1 || ' (' || type_desc_2 || ')' end as typ
from
(
  select  gov_id, item_data,
          json_extract(item_data,'$.position._lat') as breite,
          json_extract(item_data,'$.position._lon') as laenge,
          json_extract(item_data,'$.position._height') as hoehe,
--
          json_extract(item_data,'$.postal-code[0]._value') as plz_1,
          json_extract(item_data,'$.postal-code[0].timespan.begin._jd') as plz_1_ab,
          json_extract(item_data,'$.postal-code[0].timespan.end._jd') as plz_1_bis,
--
          json_extract(item_data,'$.postal-code[1]._value') as plz_2,
          json_extract(item_data,'$.postal-code[1].timespan.begin._jd') as plz_2_ab,
          json_extract(item_data,'$.postal-code[1].timespan.end._jd') as plz_2_bis,
--
          json_extract(item_data,'$.name[0]._value') as name_1,
          json_extract(item_data,'$.name[0]._lang') as sprache_1,
          json_extract(item_data,'$.name[1]._value') as name_2,
          json_extract(item_data,'$.name[1]._lang') as sprache_2,
--
          json_extract(item_data,'$.part-of[0]._ref') AS part_of_govid_1,
          json_extract(item_data,'$.part-of[1]._ref') as part_of_govid_2,
          json_extract(item_data,'$.part-of[2]._ref') as part_of_govid_3,
          json_extract(item_data,'$.note[0].text') as text,
          json_extract(item_data,'$.type[0]._value') as t_1
  from f_gov_data
) a
left join f_gov_type
on a.t_1 = f_gov_type.type_id
;

Abfrage auf Externe Kennungen

Auf diese Weise können Dubletten ermittelt werden.

drop view if exists v_gov_external_ref_1_6;

create view v_gov_external_ref_1_6 as 
select  gov_id, 
        json_extract(item_data,'$.external-reference[0]._value') AS extref
from f_gov_data 
where json_extract(item_data,'$.external-reference[0]') IS NOT NULL 
 union all
select  gov_id, 
        json_extract(item_data,'$.external-reference[1]._value') AS extref
from f_gov_data 
where json_extract(item_data,'$.external-reference[1]') IS NOT NULL 
 union all
select  gov_id, 
        json_extract(item_data,'$.external-reference[2]._value') AS extref
from f_gov_data 
where json_extract(item_data,'$.external-reference[2]') IS NOT NULL 
 union all
select  gov_id, 
        json_extract(item_data,'$.external-reference[3]._value') AS extref
from f_gov_data 
where json_extract(item_data,'$.external-reference[3]') IS NOT NULL 
 union all
select  gov_id, 
        json_extract(item_data,'$.external-reference[4]._value') AS extref
from f_gov_data 
where json_extract(item_data,'$.external-reference[4]') IS NOT NULL 
 union all
select  gov_id, 
        json_extract(item_data,'$.external-reference[5]._value') AS extref
from f_gov_data 
where json_extract(item_data,'$.external-reference[5]') IS NOT NULL 
order by gov_id, extref
;

Weblinks