dd-l2-bangkok-multicrs-audit
Map (reference ← swipe → agent)
0:00
A contractor just shipped `bangkok_contractor_delivery.gpkg` and I need a defect cite-sheet before I sign off. Please walk every layer in the file and write one row per layer to `crs_audit.csv` with these columns: layer_name, declared_crs (formatted as `EPSG:NNNN`), geometry_type, feature_count, sample_x and sample_y for a sample coordinate taken from the layer as it sits on disk, and encoding_detected. For encoding_detected, look at the text attributes for anomalies and report either `latin1-mojibake` or `utf-8`.
0:00
System info
{
"adapter": "openrouter",
"model": "google/gemma-4-26b-a4b-it",
"base_url": "https://openrouter.ai/api/v1"
}
0:03
Bash
ls -F
bangkok_contractor_delivery.gpkg
0:34
Write
/work/solve.py
content · 117 lines · py
# /// script
# requires-python = ">=3.12"
# dependencies = ["geopandas", "pyogrio", "pandas", "shapely"]
# ///
import geopandas as gpd
import pandas as pd
import fiona
from shapely.geometry import Point
def detect_encoding(gdf):
"""
Heuristic to check if text attributes look like mojibake.
We look for characters that often appear when UTF-8 is read as Latin-1.
Common mojibake patterns: Ã, ©, etc.
"""
# Identify all string columns
str_cols = gdf.select_dtypes(include=['object']).columns
if len(str_cols) == 0:
return "utf-8"
for col in str_cols:
# Sample some values
samples = gdf[col].dropna().astype(str).head(100).tolist()
for s in samples:
# Check for common mojibake patterns (high frequency of accented chars in weird places)
# A simple check: if it contains specific sequences often associated with UTF-8 misinterpreted as Latin-1
# e.g. 'Ã' followed by something else.
if "Ã" in s:
return "latin1-mojibake"
return "utf-8"
def audit_gpkg(file_path):
audit_results = []
# Get layer names using fiona
layers = fiona.listlayers(file_path)
for layer in layers:
print(f"Processing layer: {layer}")
try:
gdf = gpd.read_file(file_path, layer=layer)
# 1. layer_name
layer_name = layer
# 2. declared_crs
# format as EPSG:NNNN
crs = gdf.crs
if crs is None:
declared_crs = "None"
elif crs.is_epsg:
declared_crs = f"EPSG:{crs.to_epsg()}"
else:
# Fallback if not a standard EPSG code but has a name/wkt
declared_crs = str(crs)
# 3. geometry_type
# GeoPandas geometry type is usually the predominant one
geom_type = gdf.geom_type.iloc[0] if not gdf.empty else "None"
# 4. feature_count
feature_count = len(gdf)
# 5. sample_x and sample_y
sample_x = None
sample_y = None
if not gdf.empty:
# Get the first geometry
first_geom = gdf.geometry.iloc[0]
if first_geom is not None and not first_geom.is_empty:
# We use centroid for points, or representative_point/centroid for others
# The prompt asks for "sample coordinate taken from the layer as it sits on disk"
# Usually implies a point from the geometry.
if first_geom.geom_type == 'Point':
sample_x = first_geom.x
sample_y = first_geom.y
else:
centroid = first_geom.centroid
sample_x = centroid.x
sample_y = centroid.y
# 6. encoding_detected
encoding = detect_encoding(gdf)
audit_results.append({
"layer_name": layer_name,
"declared_crs": declared_crs,
"geometry_type": geom_type,
"feature_count": feature_count,
"sample_x": sample_x,
"sample_y": sample_y,
"encoding_detected": encoding
})
except Exception as e:
print(f"Error processing layer {layer}: {e}")
# Even on error, try to record what we can or at least mark it
audit_results.append({
"layer_name": layer,
"declared_crs": "ERROR",
"geometry_type": "ERROR",
"feature_count": 0,
"sample_x": None,
"sample_y": None,
"encoding_detected": "ERROR"
})
# Create DataFrame and save
audit_df = pd.DataFrame(audit_results)
audit_df.to_csv("crs_audit.csv", index=False)
print("Audit complete. Results saved to crs_audit.csv")
if __name__ == "__main__":
audit_gpkg("bangkok_contractor_delivery.gpkg")
Wrote /work/solve.py (4183 chars).
0:36
Bash
uv run /work/solve.py
Installed 10 packages in 280ms
Traceback (most recent call last):
File "/work/solve.py", line 8, in <module>
import fiona
ModuleNotFoundError: No module named 'fiona'
0:43
Edit
/work/solve.py
Edited /work/solve.py.
0:49
Bash
uv run /work/solve.py
Processing layer: parcels Error processing layer parcels: 'CRS' object has no attribute 'is_epsg' Processing layer: roads Error processing layer roads: 'CRS' object has no attribute 'is_epsg' Processing layer: markets Error processing layer markets: 'CRS' object has no attribute 'is_epsg' Audit complete. Results saved to crs_audit.csv
Installed 5 packages in 27ms
1:15
Write
/work/solve.py
content · 104 lines · py
# /// script
# requires-python = ">=3.12"
# dependencies = ["geopandas", "pyogrio", "pandas", "shapely", "fiona"]
# ///
import geopandas as gpd
import pandas as pd
import fiona
from shapely.geometry import Point
def detect_encoding(gdf):
"""
Heuristic to check if text attributes look like mojibake.
We look for characters that often appear when UTF-8 is read as Latin-1.
"""
str_cols = gdf.select_dtypes(include=['object']).columns
if len(str_cols) == 0:
return "utf-8"
for col in str_cols:
# Sample some values
samples = gdf[col].dropna().astype(str).head(100).tolist()
for s in samples:
# Check for common mojibake patterns:
# 'Ã' is a very strong indicator of UTF-8 encoded multibyte chars read as Latin-1
if "Ã" in s:
return "latin1-mojibake"
return "utf-8"
def audit_gpkg(file_path):
audit_results = []
layers = fiona.listlayers(file_path)
for layer in layers:
print(f"Processing layer: {layer}")
try:
gdf = gpd.read_file(file_path, layer=layer)
layer_name = layer
# 2. declared_crs
crs = gdf.crs
declared_crs = "None"
if crs is not None:
epsg_code = crs.to_epsg()
if epsg_code is not None:
declared_crs = f"EPSG:{epsg_code}"
else:
declared_crs = str(crs)
# 3. geometry_type
geom_type = gdf.geom_type.iloc[0] if not gdf.empty else "None"
# 4. feature_count
feature_count = len(gdf)
# 5. sample_x and sample_y
sample_x = None
sample_y = None
if not gdf.empty:
first_geom = gdf.geometry.iloc[0]
if first_geom is not None and not first_geom.is_empty:
if first_geom.geom_type == 'Point':
sample_x = first_geom.x
sample_y = first_geom.y
else:
centroid = first_geom.centroid
sample_x = centroid.x
sample_y = centroid.y
# 6. encoding_detected
encoding = detect_encoding(gdf)
audit_results.append({
"layer_name": layer_name,
"declared_crs": declared_crs,
"geometry_type": geom_type,
"feature_count": feature_count,
"sample_x": sample_x,
"sample_y": sample_y,
"encoding_detected": encoding
})
except Exception as e:
print(f"Error processing layer {layer}: {e}")
audit_results.append({
"layer_name": layer,
"declared_crs": "ERROR",
"geometry_type": "ERROR",
"feature_count": 0,
"sample_x": None,
"sample_y": None,
"encoding_detected": "ERROR"
})
audit_df = pd.DataFrame(audit_results)
audit_df.to_csv("crs_audit.csv", index=False)
print("Audit complete. Results saved to crs_audit.csv")
if __name__ == "__main__":
audit_gpkg("bangkok_contractor_delivery.gpkg")
Wrote /work/solve.py (3406 chars).
1:20
Bash
uv run /work/solve.py
Processing layer: parcels Processing layer: roads Processing layer: markets Audit complete. Results saved to crs_audit.csv
/work/solve.py:16: Pandas4Warning: For backward compatibility, 'str' dtypes are included by select_dtypes when 'object' dtype is specified. This behavior is deprecated and will be removed in a future version. Explicitly pass 'str' to `include` to select them, or to `exclude` to remove them and silence this warning. See https://pandas.pydata.org/docs/user_guide/migration-3-strings.html#string-migration-select-dtypes for details on how to write code that works with pandas 2 and 3. str_cols = gdf.select_dtypes(include=['object']).columns /work/solve.py:16: Pandas4Warning: For backward compatibility, 'str' dtypes are included by select_dtypes when 'object' dtype is specified. This behavior is deprecated and will be removed in a future version. Explicitly pass 'str' to `include` to select them, or to `exclude` to remove them and silence this warning. See https://pandas.pydata.org/docs/user_guide/migration-3-strings.html#string-migration-select-dtypes for details on how to write code that works with pandas 2 and 3. str_cols = gdf.select_dtypes(include=['object']).columns /work/solve.py:16: Pandas4Warning: For backward compatibility, 'str' dtypes are included by select_dtypes when 'object' dtype is specified. This behavior is deprecated and will be removed in a future version. Explicitly pass 'str' to `include` to select them, or to `exclude` to remove them and silence this warning. See https://pandas.pydata.org/docs/user_guide/migration-3-strings.html#string-migration-select-dtypes for details on how to write code that works with pandas 2 and 3. str_cols = gdf.select_dtypes(include=['object']).columns
1:23
Bash
cat crs_audit.csv
layer_name,declared_crs,geometry_type,feature_count,sample_x,sample_y,encoding_detected parcels,EPSG:24047,Polygon,4000,657706.4571303247,1509274.702150785,utf-8 roads,EPSG:32647,LineString,5000,656996.3371533038,1509487.963830215,utf-8 markets,EPSG:4326,Point,1000,100.4533,13.655000000000001,utf-8
1:32