Post by Uncle Buddy on May 19, 2022 19:33:41 GMT -8
Based on my most recent posts in this category, here is what seems like a good start on some code to import GEDCOM data directly into Treebard's data structure.
# gedcom_lines_to_nests_only_2_choices
import sqlite3
from re import sub
from query_strings_gedcom import (
update_gender_default_person, insert_person, insert_finding_birth,
delete_name_all, insert_source, insert_name, select_name_newest,
insert_name_default_person, insert_citation, select_citation_newest,
delete_finding_all, insert_links_citations_names,
insert_finding_default_person,
delete_source_all,
delete_person_all,
delete_places_places_all,
delete_place_all)
import dev_tools as dt
from dev_tools import looky, seeline
current_file = "d:/treebard_gps/app/python/test.db"
class GedcomImporter():
def __init__(self, import_file):
self.lines = []
self.instrux = []
self.head = []
self.most_recent_n = {}
self.conn = sqlite3.connect(current_file)
self.cur = self.conn.cursor()
self.read_gedcom(import_file)
for line in self.lines:
self.instrux.append({})
for idx, line in enumerate(self.lines):
n, pk, tag = line
if n == 0:
self.insert_primary_key(line, idx)
self.parse_lines()
self.cur.close()
self.conn.close()
print("line", looky(seeline()).lineno, "self.most_recent_n:", self.most_recent_n)
print("line", looky(seeline()).lineno, "self.instrux:", self.instrux)
def insert_primary_key(self, line, idx):
n, pk, tag = line
pk = int(sub("\D", "", pk))
if tag == "INDI":
self.cur.execute(insert_person, (pk,))
self.conn.commit()
self.cur.execute(insert_finding_birth, (pk,))
self.conn.commit()
self.instrux[idx]["person_id"] = pk
elif tag == "SOUR":
self.cur.execute(insert_source, (pk,))
self.conn.commit()
self.instrux[idx]["source_id"] = pk
elif tag == "FAM":
pass
elif tag == "OBJE":
pass
elif tag == "NOTE":
pass
elif tag == "REPO":
pass
elif tag == "SUBM":
pass
def read_gedcom(self, file):
""" The `encoding` parameter in `open()` strips `` from the front of the
first line.
"""
in_head = True
f = open(file, "r", encoding="utf-8-sig")
for line in f.readlines():
line = line.rstrip("\n")
line = line.split(" ", 2)
if len(line) < 3:
line.append(None)
line[0] = int(line[0])
if in_head is False:
self.lines.append(line)
elif line[1] == "HEAD":
self.head.append(line)
elif line[0] == 0:
in_head = False
self.lines.append(line)
else:
self.head.append(line)
if self.lines[-1][1] == "TRLR":
self.lines.pop()
else:
print("TRLR tag missing from end of file.")
def parse_lines(self):
for idx, line in enumerate(self.lines):
n, tag, data = line
self.most_recent_n[n] = idx
if n != 0:
self.build_on_branch(n, tag, data, idx)
def build_on_branch(self, n, tag, data, idx):
ref = self.instrux[self.most_recent_n[n-1]]
if tag == "NAME":
self.add_name(ref, data, idx)
elif tag == "SOUR":
self.save_source(ref, data, idx)
elif tag == "PAGE":
self.add_citation(ref, data, idx)
def add_citation(self, ref, data, idx):
print("line", looky(seeline()).lineno, "ref, data, idx:", ref, data, idx)
if ref.get("source_id"):
source_id = ref["source_id"]
self.cur.execute(insert_citation, (ref["source_id"], data))
self.conn.commit()
self.cur.execute(select_citation_newest)
self.cur.execute(
insert_links_citations_names, (self.cur.fetchone()[0], ref["name_id"]))
self.conn.commit()
def save_source(self, ref, data, idx):
print("line", looky(seeline()).lineno, "data, idx:", data, idx)
print("line", looky(seeline()).lineno, "ref:", ref)
if ref.get("name_id"):
name_id = ref["name_id"]
else:
return
fk = int(sub("\D", "", data))
print("line", looky(seeline()).lineno, "fk:", fk)
print("line", looky(seeline()).lineno, "name_id:", name_id)
self.instrux[idx]["source_id"] = fk
self.instrux[idx]["name_id"] = name_id
def add_name(self, ref, data, idx):
name, sorter = self.sort_name(data)
self.cur.execute(insert_name, (ref["person_id"], name, sorter))
self.conn.commit()
self.cur.execute(select_name_newest)
name_id = self.cur.fetchone()[0]
self.instrux[idx]["name_id"] = name_id
def sort_name(self, data):
name_list = data.split()
for i in name_list:
if i.startswith("/"):
idx = name_list.index(i)
x = name_list.pop(idx).strip("/")
sorter = list(name_list)
sorter.insert(0, "{},".format(x))
sorter = " ".join(sorter).strip()
name_list.insert(idx, x)
name = " ".join(name_list)
return name, sorter
if __name__ == "__main__":
test_tree = "d:/treebard_gps/etc/todd_boyett_connections_fixed.ged"
def reset_tree():
conn = sqlite3.connect(current_file)
cur = conn.cursor()
cur.execute(update_gender_default_person)
conn.commit()
cur.execute(delete_name_all)
conn.commit()
cur.execute(insert_name_default_person)
conn.commit()
cur.execute(delete_finding_all)
conn.commit()
cur.execute(insert_finding_default_person)
conn.commit()
cur.execute(delete_source_all)
conn.commit()
cur.execute(delete_person_all)
conn.commit()
cur.execute(delete_places_places_all)
conn.commit()
cur.execute(delete_place_all)
conn.commit()
cur.close()
conn.close()
reset_tree()
GedcomImporter(test_tree)