Essential start-over code for a GEDCOM import parser
Sept 2, 2023 20:09:46 GMT -8
Post by Uncle Buddy on Sept 2, 2023 20:09:46 GMT -8
In the process of writing code to import GEDCOM, I've started over so many times that it has become useful to have a starter kit. Here are the basic elements I always use when I start over.
Each reincarnation of this code gets closer to being finished before design mistakes pile up to a critical mass that buries me in problems too complex to bother with. At this point I either put the project on the shelf, start over, or vow to never try again. The current state of computer genealogy can't live without GEDCOM so here is the code I use to start over with:
Each reincarnation of this code gets closer to being finished before design mistakes pile up to a critical mass that buries me in problems too complex to bother with. At this point I either put the project on the shelf, start over, or vow to never try again. The current state of computer genealogy can't live without GEDCOM so here is the code I use to start over with:
# gedcom_import_parser_model_blank_for_starting_over.py
import sqlite3
from datetime import datetime
import_file = "d:/treebard_gps/app/python/r_a_neal_gedcomx.ged"
class GedKanDu():
def __init__(self, import_file):
self.import_file = import_file
self.head = []
self.vendor = ""
self.submitter = ""
trim_last_line = self.trim_trlr()
self.split_file(self.import_file)
self.save_head()
self.save_lines(trim=trim_last_line)
def save_lines(self, trim=False):
i = open(self.data_file, "r", encoding="utf-8-sig")
if trim:
values = i.readlines()[:-1]
else:
values = i.readlines()
for ln in values:
line = ln.replace("\n", "").strip()
print(line)
def split_file(self, import_file):
filename = import_file.split("/")[-1].replace(".ged", "")
stamp = datetime.now().strftime("%Y%m%d%H%M")
self.head_file = f"d:/treebard_gps/app/python/{filename}_{stamp}_HEAD.ged"
self.data_file = f"d:/treebard_gps/app/python/{filename}_{stamp}_DATA.ged"
with open(import_file, mode="r", encoding="utf-8-sig") as gedcom, open(
self.head_file, mode="w", encoding="utf-8-sig") as head:
for idx,line in enumerate(gedcom):
if line.startswith("0 @") is False:
head.write(line)
else:
here = idx
break
with open(import_file, mode="r", encoding="utf-8-sig") as gedcom, open(
self.data_file, mode="w", encoding="utf-8-sig") as values:
for idx, line in enumerate(gedcom):
if line.startswith("0 @") and idx >= here:
values.write(line)
elif idx >= here:
values.write(line)
def save_head(self):
def get_vendor(line):
self.vendor = line.split()[2]
def get_submitter(line):
fk = line.split()[2]
self.submitter = int(''.join(c for c in fk if c.isdigit()))
h = open(self.head_file, "r", encoding="utf-8-sig")
for idx, line in enumerate(h.readlines()):
line = line.replace("\n", "")
self.head.append(line)
HEAD_DICT = {"SOUR": get_vendor, "SUBM": get_submitter}
for line in self.head:
for k,v in HEAD_DICT.items():
if k in line:
v(line)
break
def trim_trlr(self):
""" Ignore `0 TRLR` at end of GEDCOM so we don't have to check every
line for it. Do this without storing the entire file in memory.
"""
with open(self.import_file) as f:
for line in f:
pass
bottom_line = line
if bottom_line.startswith("0 T"):
return True
else:
return False
if __name__ == "__main__":
GEDKANDU = GedKanDu(import_file)