dc-l2-lagos-snap-normalize
Map (reference ← swipe → agent)
0:00
I need to get our legacy zoning layer ready for the state portal. lagos_zoning_legacy.gpkg has inconsistent zoning_class values (mixed casing, spelling variations, and some blank entries) plus vertex precision issues that cause tiny gaps and overlaps between adjacent parcels. Can you clean up the class names to consistent canonical TitleCase values, drop any rows with blank or whitespace-only classes, fix the vertex precision problems, and produce one merged geometry per canonical zoning class? Please write the result to zoning_aggregated.gpkg, with each row carrying two columns: zoning_class and a numeric area_m2.
0:00
System info
{
"adapter": "openrouter",
"model": "google/gemma-4-26b-a4b-it",
"base_url": "https://openrouter.ai/api/v1"
}
0:01
Bash
ls -l /work/lagos_zoning_legacy.gpkg
-rw-r--r-- 1 agent agent 2527232 Jun 15 19:06 /work/lagos_zoning_legacy.gpkg
0:08
Write
/work/inspect.py
content · 18 lines · py
import geopandas as gpd
def inspect_data(file_path):
gdf = gpd.read_file(file_path)
print("Columns:", gdf.columns.tolist())
print("\nFirst 5 rows:")
print(gdf.head())
print("\nValue counts for 'zoning_class':")
if 'zoning_class' in gdf.columns:
print(gdf['zoning_class'].value_counts(dropna=False))
else:
print("Column 'zoning_class' not found!")
print("\nGeometry type:", gdf.geom_type.unique())
print("\nCRS:", gdf.crs)
if __name__ == "__main__":
inspect_data('/work/lagos_zoning_legacy.gpkg')
Wrote /work/inspect.py (554 chars).
0:10
Bash
python /work/inspect.py
Traceback (most recent call last):
File "/work/inspect.py", line 1, in <module>
import geopandas as gpd
File "/usr/local/lib/python3.14/site-packages/geopandas/__init__.py", line 3, in <module>
from geopandas.geoseries import GeoSeries
File "/usr/local/lib/python3.14/site-packages/geopandas/geoseries.py", line 7, in <module>
import numpy as np
File "/usr/local/lib/python3.14/site-packages/numpy/__init__.py", line 112, in <module>
from numpy.__config__ import show_config
File "/usr/local/lib/python3.14/site-packages/numpy/__config__.py", line 4, in <module>
from numpy._core._multiarray_umath import (
File "/usr/local/lib/python3.14/site-packages/numpy/_core/__init__.py", line 24, in <module>
from . import multiarray
File "/usr/local/lib/python3.14/site-packages/numpy/_core/multiarray.py", line 115, in <module>
@array_function_from_c_func_and_dispatcher(_multiarray_umath.empty_like)
File "/usr/local/lib/python3.14/site-packages/numpy/_core/overrides.py", line 185, in decorator
return array_function_dispatch(
File "/usr/local/lib/python3.14/site-packages/numpy/_core/overrides.py", line 161, in decorator
doc = inspect.cleandoc(dispatcher.__doc__)
AttributeError: module 'inspect' has no attribute 'cleandoc' (consider renaming '/work/inspect.py' since it has the same name as the standard library module named 'inspect' and prevents importing that standard library module)
0:23
Bash
rm /work/inspect.py
(no output)
0:41
Write
/work/debug_data.py
content · 18 lines · py
import geopandas as gpd
def inspect_data(file_path):
gdf = gpd.read_file(file_path)
print("Columns:", gdf.columns.tolist())
print("\nFirst 5 rows:")
print(gdf.head())
print("\nValue counts for 'zoning_class':")
if 'zoning_class' in gdf.columns:
print(gdf['zoning_class'].value_counts(dropna=False))
else:
print("Column 'zoning_class' not found!")
print("\nGeometry type:", gdf.geom_type.unique())
print("\nCRS:", gdf.crs)
if __name__ == "__main__":
inspect_data('/work/lagos_zoning_legacy.gpkg')
Wrote /work/debug_data.py (554 chars).
0:43
Bash
python /work/debug_data.py
Columns: ['parcel_id', 'lga_source', 'zoning_class', 'area_m2', 'geometry']
First 5 rows:
parcel_id ... geometry
0 1 ... POLYGON ((540000 720000, 540010 720000, 540010...
1 2 ... POLYGON ((540010 720000, 540020 720000, 540020...
2 3 ... POLYGON ((540020 720000, 540030 720000, 540030...
3 4 ... POLYGON ((540030 720000, 540040 720000, 540040...
4 5 ... POLYGON ((540040 720000, 540050 720000, 540050...
[5 rows x 5 columns]
Value counts for 'zoning_class':
zoning_class
Residential 420
COMMERCIAL 420
industrial 420
Agri. 420
residential 419
resi. 419
Comm. 419
COMM. 419
indus. 419
AGRI. 419
Industrial 418
AGRICULTURAL 418
RESIDENTIAL 417
RESI. 417
comm. 417
commercial 417
Indus. 417
INDUSTRIAL 417
Agricultural 417
agri. 417
Resi. 416
Commercial 416
INDUS. 416
agricultural 416
13
13
NaN 12
\t 12
Name: count, dtype: int64
Geometry type: <ArrowStringArray>
['Polygon']
Length: 1, dtype: str
CRS: EPSG:26331
1:37
Write
/work/solve.py
content · 84 lines · py
import geopandas as gpd
import pandas as pd
from shapely.geometry import Polygon, MultiPolygon
from shapely.ops import unary_union
def clean_zoning_class(val):
if pd.isna(val) or str(val).strip() == "":
return None
s = str(val).strip().lower()
# Mapping variations to canonical TitleCase names
mapping = {
'residential': 'Residential',
'resi.': 'Residential',
'resi': 'Residential',
'commercial': 'Commercial',
'comm.': 'Commercial',
'comm': 'Commercial',
'industrial': 'Industrial',
'indus.': 'Industrial',
'indus': 'Industrial',
'agricultural': 'Agricultural',
'agri.': 'Agricultural',
'agri': 'Agricultural'
}
# Try to find a match in mapping, otherwise use TitleCase version of the cleaned string
return mapping.get(s, s.capitalize())
def solve():
input_file = '/work/lagos_zoning_legacy.gpkg'
output_file = '/work/zoning_aggregated.gpkg'
print(f"Reading {input_file}...")
gdf = gpd.read_file(input_file)
# 1. Clean zoning_class
print("Cleaning zoning classes...")
gdf['zoning_class'] = gdf['zoning_class'].apply(clean_zoning_class)
# 2. Drop rows with blank/None classes
initial_count = len(gdf)
gdf = gdf.dropna(subset=['zoning_class'])
gdf = gdf[gdf['zoning_class'].str.strip() != ""]
print(f"Dropped {initial_count - len(gdf)} rows with invalid zoning classes.")
# 3. Fix vertex precision problems (tiny gaps/overlaps)
# We use buffer(0) and snap to a grid to fix topology issues.
# A small buffer can help close gaps, but buffer(0) is often enough for cleaning.
# To be more robust, let's use a small positive buffer then a negative one,
# or simply snap geometries to a small precision grid.
print("Fixing geometry precision...")
# Snapping to a grid of 0.01 (assuming units are meters, this is 1cm)
gdf['geometry'] = gdf['geometry'].buffer(0.001).buffer(-0.001) # Slight expansion/contraction to snap
# Or more simply:
gdf['geometry'] = gdf['geometry'].apply(lambda geom: geom.simplify(0.01, preserve_topology=True))
# 4. Produce one merged geometry per canonical zoning class
print("Aggregating geometries by zoning class...")
results = []
for z_class, group in gdf.groupby('zoning_class'):
# unary_union dissolves all boundaries between polygons in the group
merged_geom = group.geometry.unary_union
area = merged_geom.area
results.append({
'zoning_class': z_class,
'area_m2': area,
'geometry': merged_geom
})
agg_gdf = gpd.GeoDataFrame(results, crs=gdf.crs)
# Ensure the geometry type is consistent (MultiPolygon if there are separate islands)
# unary_union often returns Polygon or MultiPolygon.
print(f"Writing results to {output_file}...")
agg_gdf.to_file(output_file, driver="GPKG")
print("Done!")
if __name__ == "__main__":
solve()
Wrote /work/solve.py (3055 chars).
2:22
Write
/work/solve.py
content · 85 lines · py
import geopandas as gpd
import pandas as pd
from shapely.geometry import Polygon, MultiPolygon
from shapely.ops import unary_union
def clean_zoning_class(val):
if pd.isna(val) or str(val).strip() == "":
return None
s = str(val).strip().lower()
# Mapping variations to canonical TitleCase names
mapping = {
'residential': 'Residential',
'resi.': 'Residential',
'resi': 'Residential',
'commercial': 'Commercial',
'comm.': 'Commercial',
'comm': 'Commercial',
'industrial': 'Industrial',
'indus.': 'Industrial',
'indus': 'Industrial',
'agricultural': 'Agricultural',
'agri.': 'Agricultural',
'agri': 'Agricultural'
}
# Try to find a match in mapping, otherwise use TitleCase version of the cleaned string
return mapping.get(s, s.capitalize())
def solve():
input_file = '/work/lagos_zoning_legacy.gpkg'
output_file = '/work/zoning_aggregated.gpkg'
print(f"Reading {input_file}...")
gdf = gpd.read_file(input_file)
# 1. Clean zoning_class
print("Cleaning zoning classes...")
gdf['zoning_class'] = gdf['zoning_class'].apply(clean_zoning_class)
# 2. Drop rows with blank/None classes
initial_count = len(gdf)
gdf = gdf.dropna(subset=['zoning_class'])
gdf = gdf[gdf['zoning_class'].str.strip() != ""]
print(f"Dropped {initial_count - len(gdf)} rows with invalid zoning classes.")
# 3. Fix vertex precision problems (tiny gaps/overlaps)
# A robust way to fix small gaps/overlaps in a tiling dataset is to use a small buffer(0)
# or to snap to a grid.
# To specifically address "tiny gaps and overlaps", we can use a tiny buffer(dist)
# followed by buffer(-dist) to align edges.
print("Fixing geometry precision...")
# We'll try a very small buffer to snap vertices.
# Since the CRS is EPSG:26331 (meters), 0.01 is 1cm.
gdf['geometry'] = gdf['geometry'].buffer(0.01).buffer(-0.01)
# 4. Produce one merged geometry per canonical zoning class
print("Aggregating geometries by zoning class...")
results = []
for z_class, group in gdf.groupby('zoning_class'):
# Dissolve boundaries between polygons in the group
merged_geom = group.geometry.unary_union
area = merged_geom.area
results.append({
'zoning_class': z_class,
'area_m2': float(area),
'geometry': merged_geom
})
agg_gdf = gpd.GeoDataFrame(results, crs=gdf.crs)
# Ensure we only keep the requested columns
agg_gdf = agg_gdf[['zoning_class', 'area_m2', 'geometry']]
print(f"Writing results to {output_file}...")
agg_gdf.to_file(output_file, driver="GPKG")
print("Done!")
print("Resulting classes:")
print(agg_gdf[['zoning_class', 'area_m2']])
if __name__ == "__main__":
solve()
Wrote /work/solve.py (2928 chars).
2:23
Bash
python /work/solve.py
Reading /work/lagos_zoning_legacy.gpkg... Cleaning zoning classes... Dropped 50 rows with invalid zoning classes. Fixing geometry precision... Aggregating geometries by zoning class... Writing results to /work/zoning_aggregated.gpkg... Done! Resulting classes: zoning_class area_m2 0 Agricultural 249999.831737 1 Commercial 249999.831545 2 Industrial 249999.832035 3 Residential 249999.832460
/work/solve.py:64: DeprecationWarning: The 'unary_union' attribute is deprecated, use the 'union_all()' method instead. merged_geom = group.geometry.unary_union
2:28
Write
/work/inspect_output.py
content · 14 lines · py
import geopandas as gpd
import pandas as pd
def inspect_output(file_path):
gdf = gpd.read_file(file_path)
print("Columns:", gdf.columns.tolist())
print("\nContent:")
print(gdf)
print("\nGeometry type counts:")
print(gdf.geom_type.value_counts())
if __name__ == "__main__":
inspect_output('/work/zoning_aggregated.gpkg')
Wrote /work/inspect_output.py (350 chars).
2:29
Bash
python /work/inspect_output.py
Columns: ['zoning_class', 'area_m2', 'geometry'] Content: zoning_class ... geometry 0 Agricultural ... MULTIPOLYGON (((540530 720510, 540530 720510, ... 1 Commercial ... MULTIPOLYGON (((540520 720020, 540520 720020, ... 2 Industrial ... MULTIPOLYGON (((540000 720500, 540000 720500, ... 3 Residential ... MULTIPOLYGON (((540020 720000, 540020 720000, ... [4 rows x 3 columns] Geometry type counts: MultiPolygon 4 Name: count, dtype: int64
2:32
Bash
# No changes needed, just verifying the files exist
# No changes needed, just verifying the files exist ls -l /work/zoning_aggregated.gpkg
-rw-r--r-- 1 agent agent 1892352 Jun 15 19:08 /work/zoning_aggregated.gpkg
3:09