DataIntegrationGroup · TylerAdamMartinez · Nov 5, 2025 · Nov 4, 2025 · Nov 4, 2025 · Nov 4, 2025
diff --git a/transfers/seed.py b/transfers/seed.py
@@ -6,47 +6,80 @@
 """
 
 import random
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from faker import Faker
 from db.engine import session_ctx
 from sqlalchemy import select
+from geoalchemy2.elements import WKTElement
 
 # Core models
 from db.contact import Contact, ThingContactAssociation
 from db.location import Location, LocationThingAssociation
-from db.thing import Thing
+from db.thing import Thing, ThingIdLink
 from db.sensor import Sensor
 from db.deployment import Deployment
+from db.field import FieldEvent, FieldActivity
 from db.sample import Sample
 from db.observation import Observation
 from db.parameter import Parameter
 from db.analysis_method import AnalysisMethod
-from db.regulatory_limit import RegulatoryLimit
-from db.transducer import TransducerObservation
-from db.status_history import StatusHistory
+from db.lexicon import (
+    LexiconTerm,
+    LexiconCategory,
+    LexiconTermCategoryAssociation,
+)
 
 fake = Faker()
 Faker.seed(42)
 random.seed(42)
 
 
-def seed_all(n=5):
+def get_terms_by_category(s, category_name: str) -> list[LexiconTerm]:
+    return list(
+        s.scalars(
+            select(LexiconTerm)
+            .join(LexiconTermCategoryAssociation)
+            .join(LexiconCategory)
+            .where(LexiconCategory.name == category_name)
+        )
+    )
+
+
+def seed_all(n: int = 5):
     """Seed roughly `n` of each main entity and connect them."""
+    new_mexico_bounds = [
+        (36.9, -106.6),  # Taos area
+        (35.1, -106.6),  # Albuquerque
+        (32.3, -106.8),  # Las Cruces
+        (34.4, -103.2),  # Clovis
+        (36.7, -108.2),  # Farmington
+    ]
+
     with session_ctx() as s:
-        contacts = []
-        locations = []
-        things = []
-        sensors = []
-        parameters = []
-        methods = []
-        samples = []
-        observations = []
+        contacts: list[Contact] = []
+        locations: list[Location] = []
+        things: list[Thing] = []
+        sensors: list[Sensor] = []
+        parameters: list[Parameter] = []
+        methods: list[AnalysisMethod] = []
+        field_events: list[FieldEvent] = []
+        field_activities: list[FieldActivity] = []
+        samples: list[Sample] = []
+        observations: list[Observation] = []
+
+        # 0. Lexicons
+        organization_terms = get_terms_by_category(s, "organization")
+        relation_terms = get_terms_by_category(s, "relation")
+        analysis_method_type_terms = get_terms_by_category(s, "analysis_method_type")
+        sample_method_terms = get_terms_by_category(s, "sample_method")
+        activity_type_terms = get_terms_by_category(s, "activity_type")
+        sensor_type_terms = get_terms_by_category(s, "sensor_type")
 
         # 1. Contacts
         for _ in range(n):
             c = Contact(
                 name=fake.name(),
-                organization=fake.company(),
+                organization=random.choice(organization_terms).term,
                 role=random.choice(["Hydrologist", "Technician", "Geologist"]),
                 contact_type="Primary",
             )
@@ -55,11 +88,17 @@ def seed_all(n=5):
 
         # 2. Locations
         for _ in range(n):
+            # Generate coordinates roughly within New Mexico’s bounding box
+            base_lat, base_lon = random.choice(new_mexico_bounds)
+            lat = round(base_lat + random.uniform(-0.3, 0.3), 6)
+            lon = round(base_lon + random.uniform(-0.3, 0.3), 6)
+
             loc = Location(
+                point=WKTElement(f"POINT({lon} {lat})", srid=4326),
                 elevation=round(fake.random_number(digits=3), 2),
-                county=fake.city(),
-                latitude=round(fake.latitude(), 6),
-                longitude=round(fake.longitude(), 6),
+                notes=fake.sentence(),
+                elevation_accuracy=random.uniform(0.1, 5.0),
+                coordinate_accuracy=random.uniform(0.1, 10.0),
                 release_status="public",
             )
             s.add(loc)
@@ -70,7 +109,7 @@ def seed_all(n=5):
         # If the environment variable MODE=development is set
         # then it will initialize both the parameter and lexicon tables.
         # See core/app.py for details
-        parameters = s.scalars(select(Parameter)).all()
+        parameters = list(s.scalars(select(Parameter)).all())
         if not parameters:
             raise RuntimeError("No parameters found — ensure init_parameter() ran.")
 
@@ -79,8 +118,8 @@ def seed_all(n=5):
             am = AnalysisMethod(
                 analysis_method_code=m,
                 analysis_method_name=f"Method {m}",
-                analysis_method_type="Lab",
-                source_organization="NMED",
+                analysis_method_type=random.choice(analysis_method_type_terms).term,
+                source_organization=random.choice(organization_terms).term,
             )
             s.add(am)
             methods.append(am)
@@ -100,11 +139,6 @@ def seed_all(n=5):
                 well_casing_depth=random.uniform(10, 50),
                 release_status="public",
             )
-
-            # link to random location
-            loc = random.choice(locations)
-            if hasattr(t, "locations"):
-                t.locations.append(loc)
             s.add(t)
             things.append(t)
 
@@ -125,31 +159,65 @@ def seed_all(n=5):
                 assoc = LocationThingAssociation(
                     location_id=loc.id,
                     thing_id=t.id,
-                    effective_start=datetime.utcnow(),
+                    effective_start=datetime.now(timezone.utc),
                     effective_end=None,
                 )
                 s.add(assoc)
 
-        # 5. Sensors & Deployments
+        for t in things:
+            for _ in range(random.randint(1, 3)):
+                chosen_org = random.choice(organization_terms)
+                link = ThingIdLink(
+                    thing_id=t.id,
+                    relation=random.choice(relation_terms).term,
+                    alternate_id=chosen_org.id,
+                    alternate_organization=chosen_org.term,
+                    release_status="public",
+                )
+                s.add(link)
+
+        # 5. FieldEvent, FieldActivity, Sensors & Deployments
+        for t in things:
+            fe = FieldEvent(
+                thing_id=t.id,
+                event_date=datetime.now(timezone.utc),
+                notes=f"Auto-generated field event for {t.name}",
+                release_status="public",
+            )
+            s.add(fe)
+            field_events.append(fe)
+
+        s.flush()
+
+        for fe in field_events:
+            fa = FieldActivity(
+                field_event_id=fe.id,
+                activity_type=random.choice(activity_type_terms).term,
+                notes=f"Auto-generated activity for event {fe.id}",
+                release_status="public",
+            )
+            s.add(fa)
+            field_activities.append(fa)
+
+        s.flush()
+
         for i in range(n):
             sn = Sensor(
                 name=f"Sensor-{i + 1}",
-                sensor_type=random.choice(
-                    ["Pressure Transducer", "Barometer", "Acoustic Sounder"]
-                ),
+                sensor_type=random.choice(sensor_type_terms).term,
                 serial_no=fake.unique.bothify(text="SN-####"),
             )
             sensors.append(sn)
             s.add(sn)
 
         s.flush()
-        deployments = []
+        deployments: list[Deployment] = []
         for t in things:
             sn = random.choice(sensors)
             d = Deployment(
                 thing=t,
                 sensor=sn,
-                installation_date=datetime.utcnow()
+                installation_date=datetime.now(timezone.utc)
                 - timedelta(days=random.randint(30, 180)),
                 removal_date=None,
             )
@@ -159,11 +227,10 @@ def seed_all(n=5):
         # 6. Samples & Observations
         for i in range(n):
             samp = Sample(
+                field_activity_id=random.choice(field_activities).id,
                 sample_name=f"SMPL-{fake.random_int(1000, 9999)}",
                 sample_matrix="water",
-                sample_method=fake.choice(
-                    ["Electric tape measurement (E-probe)", "Steel-tape measurement"]
-                ),
+                sample_method=random.choice(sample_method_terms).term,
                 sample_date=fake.date_time_this_year(),
             )
             t = random.choice(things)
@@ -184,46 +251,12 @@ def seed_all(n=5):
             )
             observations.append(obs)
             s.add(obs)
-
-        # 7. Regulatory Limits
-        for prm in parameters:
-            rl = RegulatoryLimit(
-                parameter=prm,
-                limit_value=random.uniform(50, 1000),
-                limit_unit="mg/L",
-            )
-            s.add(rl)
-
-        # 8. Status History (for Things)
-        for t in things:
-            st = StatusHistory(
-                status_type="Use Status",
-                status_value=random.choice(["Active", "Inactive", "Decommissioned"]),
-                start_date=datetime.utcnow() - timedelta(days=random.randint(100, 500)),
-                statusable_id=t.id,
-                statusable_type="Thing",
-                reason="Initial test seed status",
-            )
-            s.add(st)
-
-        # 9. Transducer Observations
-        for d in deployments:
-            for _ in range(3):
-                tobs = TransducerObservation(
-                    parameter=random.choice(parameters),
-                    deployment_id=d.id,
-                    observation_datetime=datetime.utcnow()
-                    - timedelta(hours=random.randint(1, 500)),
-                    value=round(random.uniform(10, 100), 2),
-                )
-                s.add(tobs)
-
         s.commit()
 
         print(
             f"Seed complete: {len(contacts)} contacts, {len(locations)} locations, "
-            f"{len(things)} things, {len(sensors)} sensors, {len(samples)} samples, "
-            f"{len(observations)} observations."
+            + f"{len(things)} things, {len(sensors)} sensors, {len(samples)} samples, "
+            + f"{len(observations)} observations."
         )