dc-l2-lagos-snap-normalize
Map (reference ← swipe → agent)
0:00
I need to get our legacy zoning layer ready for the state portal. lagos_zoning_legacy.gpkg has inconsistent zoning_class values (mixed casing, spelling variations, and some blank entries) plus vertex precision issues that cause tiny gaps and overlaps between adjacent parcels. Can you clean up the class names to consistent canonical TitleCase values, drop any rows with blank or whitespace-only classes, fix the vertex precision problems, and produce one merged geometry per canonical zoning class? Please write the result to zoning_aggregated.gpkg, with each row carrying two columns: zoning_class and a numeric area_m2.
0:00
System info
{
"adapter": "openrouter",
"model": "google/gemma-4-26b-a4b-it",
"base_url": "https://openrouter.ai/api/v1"
}
0:03
Bash
ls -l /work/lagos_zoning_legacy.gpkg
-rw-r--r-- 1 agent agent 2527232 Jun 16 10:39 /work/lagos_zoning_legacy.gpkg
0:18
Write
/work/inspect.py
content · 14 lines · py
import geopandas as gpd
def inspect_data(path):
gdf = gpd.read_file(path)
print("Columns:", gdf.columns.tolist())
print("Head:\n", gdf.head())
print("\nUnique zoning_class values:\n", gdf['zoning_class'].unique())
print("\nGeometry type:", gdf.geometry.type.unique())
print("\nCRS:", gdf.crs)
return gdf
if __name__ == "__main__":
gdf = inspect_data('/work/lagos_zoning_legacy.gpkg')
Wrote /work/inspect.py (418 chars).
0:21
Bash
python /work/inspect.py
Traceback (most recent call last):
File "/work/inspect.py", line 1, in <module>
import geopandas as gpd
File "/usr/local/lib/python3.14/site-packages/geopandas/__init__.py", line 3, in <module>
from geopandas.geoseries import GeoSeries
File "/usr/local/lib/python3.14/site-packages/geopandas/geoseries.py", line 7, in <module>
import numpy as np
File "/usr/local/lib/python3.14/site-packages/numpy/__init__.py", line 112, in <module>
from numpy.__config__ import show_config
File "/usr/local/lib/python3.14/site-packages/numpy/__config__.py", line 4, in <module>
from numpy._core._multiarray_umath import (
File "/usr/local/lib/python3.14/site-packages/numpy/_core/__init__.py", line 24, in <module>
from . import multiarray
File "/usr/local/lib/python3.14/site-packages/numpy/_core/multiarray.py", line 115, in <module>
@array_function_from_c_func_and_dispatcher(_multiarray_umath.empty_like)
File "/usr/local/lib/python3.14/site-packages/numpy/_core/overrides.py", line 185, in decorator
return array_function_dispatch(
File "/usr/local/lib/python3.14/site-packages/numpy/_core/overrides.py", line 161, in decorator
doc = inspect.cleandoc(dispatcher.__doc__)
AttributeError: module 'inspect' has no attribute 'cleandoc' (consider renaming '/work/inspect.py' since it has the same name as the standard library module named 'inspect' and prevents importing that standard library module)
0:23
Bash
rm /work/inspect.py
(no output)
0:25
Write
/work/data_inspector.py
content · 14 lines · py
import geopandas as gpd
def inspect_data(path):
gdf = gpd.read_file(path)
print("Columns:", gdf.columns.tolist())
print("Head:\n", gdf.head())
print("\nUnique zoning_class values:\n", gdf['zoning_class'].unique())
print("\nGeometry type:", gdf.geometry.type.unique())
print("\nCRS:", gdf.crs)
return gdf
if __name__ == "__main__":
gdf = inspect_data('/work/lagos_zoning_legacy.gpkg')
Wrote /work/data_inspector.py (418 chars).
0:27
Bash
python /work/data_inspector.py
Columns: ['parcel_id', 'lga_source', 'zoning_class', 'area_m2', 'geometry']
Head:
parcel_id ... geometry
0 1 ... POLYGON ((540000 720000, 540010 720000, 540010...
1 2 ... POLYGON ((540010 720000, 540020 720000, 540020...
2 3 ... POLYGON ((540020 720000, 540030 720000, 540030...
3 4 ... POLYGON ((540030 720000, 540040 720000, 540040...
4 5 ... POLYGON ((540040 720000, 540050 720000, 540050...
[5 rows x 5 columns]
Unique zoning_class values:
<ArrowStringArray>
[ 'RESIDENTIAL', 'residential', 'Resi.', 'resi.',
'RESI.', 'Residential', 'Comm.', 'comm.',
'COMM.', 'Commercial', 'COMMERCIAL', 'commercial',
'Indus.', 'indus.', 'INDUS.', 'Industrial',
'INDUSTRIAL', 'industrial', 'AGRI.', 'Agricultural',
'AGRICULTURAL', 'agricultural', 'Agri.', 'agri.',
'', ' ', nan, '\t']
Length: 28, dtype: str
Geometry type: <ArrowStringArray>
['Polygon']
Length: 1, dtype: str
CRS: EPSG:26331
0:55
Write
/work/solve.py
content · 102 lines · py
import geopandas as gpd
import pandas as pd
from shapely.geometry import MultiPolygon, Polygon
def clean_zoning_class(val):
if pd.isna(val) or str(val).strip() == "":
return None
s = str(val).strip().lower()
# Mapping variations to canonical names
# Based on inspection:
# RESI (Residential)
# COMM (Commercial)
# INDUS (Industrial)
# AGRI (Agricultural)
if s.startswith(('resi', 'res')):
return "Residential"
elif s.startswith(('comm', 'com')):
return "Commercial"
elif s.startswith(('indus', 'ind')):
return "Industrial"
elif s.startswith(('agri', 'agr')):
return "Agricultural"
else:
# Fallback to Title Case if it doesn't match prefixes but is a valid string
return s.title()
def solve():
input_path = '/work/lagos_zoning_legacy.gpkg'
output_path = '/work/zoning_aggregated.gpkg'
print(f"Reading {input_path}...")
gdf = gpd.read_file(input_path)
# 1. Clean zoning_class
print("Cleaning zoning_class...")
gdf['zoning_class'] = gdf['zoning_class'].apply(clean_zoning_class)
# 2. Drop rows with blank or whitespace-only classes (which are now None)
gdf = gdf.dropna(subset=['zoning_class'])
# 3. Fix vertex precision issues
# A common way to fix tiny gaps/overlaps is to round coordinates and then
# use a small buffer(0) or snap to grid.
# Given the coordinate magnitudes (~540000), rounding to 1 or 2 decimals
# (cm/mm level) should be safe.
print("Fixing vertex precision...")
def snap_to_grid(geom, precision=0.01):
# snap to grid by rounding coordinates
if geom.is_empty:
return geom
return gpd.GeoSeries([geom]).simplify(precision, preserve_topology=True).iloc[0]
# Instead of simplify, let's try rounding coordinates via a more robust method if possible,
# but simplify(preserve_topology=True) is quite effective for cleaning up small artifacts.
# However, a common GIS trick for "cleaning" geometry is buffer(0).
# Let's use a combination: snap to grid via rounding (if we can) or simplify.
# Since we can't easily round all coordinates in Shapely directly without iterating,
# let's use simplify with a very small tolerance to snap vertices.
gdf['geometry'] = gdf['geometry'].simplify(0.01, preserve_topology=True)
# Ensure geometries are valid
gdf['geometry'] = gdf['geometry'].buffer(0)
# 4. Produce one merged geometry per canonical zoning class
print("Aggregating by zoning class...")
# We need to group by zoning_class, dissolve the geometries, and calculate area.
# Note: Dissolving might create small gaps if the precision fix wasn't perfect.
# But the task asks for "one merged geometry per canonical zoning class".
# We will group by zoning_class
grouped = gdf.dissolve(by='zoning_class', aggfunc={'area_m2': 'sum'})
# The dissolve operation keeps the column used for grouping.
# The resulting gdf will have 'zoning_class' as index.
# We need to reset index and ensure columns are correct.
result_gdf = grouped.reset_index()
# Re-calculate area_m2 on the dissolved geometries to ensure accuracy
# (in case the original area_m2 column was just a field and not the actual area
# of the geometry, or if dissolving changed things).
# The prompt says: "each row carrying two columns: zoning_class and a numeric area_m2"
# It's safer to calculate it from the new geometries.
result_gdf['area_m2'] = result_gdf['geometry'].area
# Final selection of columns
result_gdf = result_gdf[['zoning_class', 'area_m2', 'geometry']]
print(f"Writing to {output_path}...")
result_gdf.to_file(output_path, driver="GPKG")
print("Done!")
print(result_gdf.head())
if __name__ == "__main__":
solve()
Wrote /work/solve.py (3933 chars).
0:58
Bash
python /work/solve.py
Reading /work/lagos_zoning_legacy.gpkg... Cleaning zoning_class... Fixing vertex precision... Aggregating by zoning class... Writing to /work/zoning_aggregated.gpkg... Done! zoning_class ... geometry 0 Agricultural ... MULTIPOLYGON (((540500 720500, 540500 720510, ... 1 Commercial ... MULTIPOLYGON (((540510 720000, 540510 720000, ... 2 Industrial ... MULTIPOLYGON (((540000 720530, 540000 720540, ... 3 Residential ... MULTIPOLYGON (((540000 720020, 540006.667 7200... [4 rows x 3 columns]
1:02
Bash
python -c "import geopandas as gpd; gdf = gpd.read_file('/work/zoning_aggregated...
zoning_class area_m2 0 Agricultural 249999.831759 1 Commercial 249999.831567 2 Industrial 249999.832057 3 Residential 249999.832482
1:17
Write
/work/solve.py
content · 59 lines · py
import geopandas as gpd
import pandas as pd
def clean_zoning_class(val):
if pd.isna(val) or str(val).strip() == "":
return None
s = str(val).strip().lower()
if s.startswith(('resi', 'res')):
return "Residential"
elif s.startswith(('comm', 'com')):
return "Commercial"
elif s.startswith(('indus', 'ind')):
return "Industrial"
elif s.startswith(('agri', 'agr')):
return "Agricultural"
else:
return s.title()
def solve():
input_path = '/work/lagos_zoning_legacy.gpkg'
output_path = '/work/zoning_aggregated.gpkg'
gdf = gpd.read_file(input_path)
# 1. Clean zoning_class and drop blanks
gdf['zoning_class'] = gdf['zoning_class'].apply(clean_zoning_class)
gdf = gdf.dropna(subset=['zoning_class'])
# 2. Fix vertex precision issues (gaps/overlaps)
# First, snap to a grid to ensure adjacent vertices align
# We'll use a small tolerance for simplification to snap vertices.
# Then buffer(0) to fix topology.
gdf['geometry'] = gdf['geometry'].simplify(0.01, preserve_topology=True)
gdf['geometry'] = gdf['geometry'].buffer(0)
# 3. Aggregate by zoning class
# We dissolve the geometries to merge them into one per class
# and sum the area.
# We use dissolve on 'zoning_class'.
grouped = gdf.dissolve(by='zoning_class')
# 4. Final cleaning of output
# The prompt asks for 'zoning_class' and 'area_m2'
# The dissolved gdf index is 'zoning_class'.
result_gdf = grouped.reset_index()
result_gdf['area_m2'] = result_gdf['geometry'].area
# Ensure the output has exactly the requested columns
final_gdf = result_gdf[['zoning_class', 'area_m2', 'geometry']]
# Write to GeoPackage
final_gdf.to_file(output_path, driver="GPKG")
print("Success")
if __name__ == "__main__":
solve()
Wrote /work/solve.py (1896 chars).
1:18
Bash
python /work/solve.py
Success
1:24
Bash
ls -l /work/zoning_aggregated.gpkg
-rw-r--r-- 1 agent agent 688128 Jun 16 10:40 /work/zoning_aggregated.gpkg
1:25
Bash
python -c "import geopandas as gpd; gdf = gpd.read_file('/work/zoning_aggregated...
zoning_class ... geometry 0 Agricultural ... MULTIPOLYGON (((540500 720500, 540500 720510, ... 1 Commercial ... MULTIPOLYGON (((540510 720000, 540510 720000, ... 2 Industrial ... MULTIPOLYGON (((540000 720530, 540000 720540, ... 3 Residential ... MULTIPOLYGON (((540000 720020, 540006.667 7200... [4 rows x 3 columns] ['zoning_class', 'area_m2', 'geometry']
1:33