fio-l1-vienna-shapefile-recovery
Map (reference ← swipe → agent)
0:00
I'm migrating an old parcel shapefile (`parcels.shp`) into our web viewer, and a reference file `column_map.csv` is provided alongside it. Can you produce a clean GeoJSON `parcels.geojson` with the correct full column names and proper character encoding? The features should come through as Polygon or MultiPolygon.
0:00
System info
{
"adapter": "openrouter",
"model": "google/gemma-4-26b-a4b-it",
"base_url": "https://openrouter.ai/api/v1"
}
0:01
Bash
ls -F
column_map.csv parcels.cpg parcels.dbf parcels.prj parcels.shp parcels.shx
0:02
Read
column_map.csv
1 truncated,original
2 KATASTRALG,KATASTRALGEMEINDE_NAME
3 GRUNDSTUEC,GRUNDSTUECKSNUMMER
4 EIGENTUEME,EIGENTUEMER_NAME
5 WIDMUNG_BE,WIDMUNG_BEZEICHNUNG
6 STRASSE_NA,STRASSE_NAME
7 FLAECHE_M2,FLAECHE_M2
0:21
Write
solve.py
content · 118 lines · py
import geopandas as gpd
import pandas as pd
import json
def migrate_parcels(shapefile_path, column_map_path, output_path):
# 1. Load column map
# The CSV looks like it might have some leading whitespace or strange formatting based on the Read output
# It has: index (skipped?), truncated, original
# Let's try reading it with pandas and cleaning it
try:
# The Read output showed: " 1\ttruncated,original"
# It looks like a tab-separated or space-separated header followed by comma-separated values.
# Actually, looking closely at:
# " 1 truncated,original"
# " 2 KATASTRALG,KATASTRALGEMEINDE_NAME"
# It seems it's tab-separated for the first part and comma-separated for the rest?
# Or maybe it's just a standard CSV where the first column is index and there's a tab?
# Let's try reading it as a CSV with whitespace issues.
df_map = pd.read_csv(column_map_path, sep=None, engine='python')
print("Column map loaded successfully:")
print(df_map)
except Exception as e:
print(f"Error reading column map: {e}")
return
# Clean column map to create a dictionary {truncated: original}
# Based on Read output:
# Col 0: Index?
# Col 1: truncated
# Col 2: original
# We need to handle the column names correctly.
# Let's inspect columns of df_map
print(f"Columns in df_map: {df_map.columns.tolist()}")
# Depending on how read_csv parsed it, we might need to rename or select
# Let's assume it has columns like [' ', 'truncated', 'original'] or similar.
# Let's standardize the mapping.
mapping_dict = {}
# Find the columns that correspond to truncated and original
# Based on the content:
# 2 KATASTRALG,KATASTRALGEMEINDE_NAME
# It looks like 'truncated' and 'original' are the headers.
# We'll use the values from 'truncated' and 'original' columns
# We need to handle potential NaNs or incorrect parsing
for _, row in df_map.iterrows():
# Using index-based access in case names are weird
# We want the mapping: row[truncated] -> row[original]
# In the Read output, 'truncated' and 'original' are headers.
# Let's try to find them.
# It's safer to find the column names by checking if 'truncated' is in the columns
trunc_col = None
orig_col = None
for col in df_map.columns:
if 'truncated' in col.lower():
trunc_col = col
if 'original' in col.lower():
orig_col = col
if trunc_col and orig_col:
mapping_dict[str(row[trunc_col]).strip()] = str(row[orig_col]).strip()
break # we found the loop logic inside? no, the loop should be outside.
# Redoing the mapping extraction logic properly
mapping_dict = {}
trunc_col = None
orig_col = None
for col in df_map.columns:
if 'truncated' in col.lower():
trunc_col = col
if 'original' in col.lower():
orig_col = col
if trunc_col and orig_col:
for _, row in df_map.iterrows():
val_trunc = str(row[trunc_col]).strip()
val_orig = str(row[orig_col]).strip()
if val_trunc != 'nan' and val_orig != 'nan':
mapping_dict[val_trunc] = val_orig
print(f"Generated mapping dictionary: {mapping_dict}")
# 2. Load Shapefile
print(f"Loading shapefile: {shapefile_path}")
gdf = gpd.read_file(shapefile_path)
print(f"Initial columns: {gdf.columns.tolist()}")
# 3. Rename columns
# We only rename columns that are in our mapping dictionary
current_cols = gdf.columns.tolist()
new_cols = []
for col in current_cols:
if col in mapping_dict:
new_cols.append(mapping_dict[col])
else:
new_cols.append(col)
gdf.columns = new_cols
print(f"Renamed columns: {gdf.columns.tolist()}")
# 4. Ensure Geometry is Polygon or MultiPolygon
# GeoPandas usually handles this, but let's be sure.
# Filter out non-polygon geometries if any (though usually shapefiles are consistent)
gdf = gdf[gdf.geometry.type.isin(['Polygon', 'MultiPolygon'])]
print(f"Geometry types present: {gdf.geometry.type.unique()}")
# 5. Save to GeoJSON
print(f"Saving to {output_path}")
# Using encoding='utf-8' for proper character encoding
gdf.to_file(output_path, driver='GeoJSON', encoding='utf-8')
print("Successfully saved GeoJSON.")
if __name__ == "__main__":
migrate_parcels('parcels.shp', 'column_map.csv', 'parcels.geojson')
Wrote solve.py (4695 chars).
0:22
Bash
python solve.py
Column map loaded successfully:
truncated original
0 KATASTRALG KATASTRALGEMEINDE_NAME
1 GRUNDSTUEC GRUNDSTUECKSNUMMER
2 EIGENTUEME EIGENTUEMER_NAME
3 WIDMUNG_BE WIDMUNG_BEZEICHNUNG
4 STRASSE_NA STRASSE_NAME
5 FLAECHE_M2 FLAECHE_M2
Columns in df_map: ['truncated', 'original']
Generated mapping dictionary: {'KATASTRALG': 'KATASTRALGEMEINDE_NAME', 'GRUNDSTUEC': 'GRUNDSTUECKSNUMMER', 'EIGENTUEME': 'EIGENTUEMER_NAME', 'WIDMUNG_BE': 'WIDMUNG_BEZEICHNUNG', 'STRASSE_NA': 'STRASSE_NAME', 'FLAECHE_M2': 'FLAECHE_M2'}
Loading shapefile: parcels.shp
Initial columns: ['KATASTRALG', 'GRUNDSTUEC', 'EIGENTUEME', 'WIDMUNG_BE', 'STRASSE_NA', 'FLAECHE_M2', 'geometry']
Renamed columns: ['KATASTRALGEMEINDE_NAME', 'GRUNDSTUECKSNUMMER', 'EIGENTUEMER_NAME', 'WIDMUNG_BEZEICHNUNG', 'STRASSE_NAME', 'FLAECHE_M2', 'geometry']
Geometry types present: <ArrowStringArray>
['Polygon']
Length: 1, dtype: str
Saving to parcels.geojson
Successfully saved GeoJSON.
0:24
Bash
ls -F
column_map.csv parcels.cpg parcels.dbf parcels.geojson parcels.prj parcels.shp parcels.shx solve.py
0:29
Write
solve.py
content · 62 lines · py
import geopandas as gpd
import pandas as pd
import json
def migrate_parcels(shapefile_path, column_map_path, output_path):
# 1. Load column map
# The CSV might have some strange leading whitespace or formatting.
# Based on the previous attempt, it worked, but let's be more robust.
try:
# Let's read it with pandas and handle the whitespace.
# Using sep=None with engine='python' handles various delimiters.
df_map = pd.read_csv(column_map_path, sep=None, engine='python')
except Exception as e:
print(f"Error reading column map: {e}")
return
# Clean column names in the mapping file itself
df_map.columns = [c.strip() for c in df_map.columns]
mapping_dict = {}
# Find columns that resemble 'truncated' and 'original'
trunc_col = None
orig_col = None
for col in df_map.columns:
if 'truncated' in col.lower():
trunc_col = col
if 'original' in col.lower():
orig_col = col
if trunc_col and orig_col:
for _, row in df_map.iterrows():
val_trunc = str(row[trunc_col]).strip()
val_orig = str(row[orig_col]).strip()
# Check for non-null/non-nan values
if val_trunc != 'nan' and val_orig != 'nan' and val_trunc != '':
mapping_dict[val_trunc] = val_orig
print(f"Generated mapping dictionary: {mapping_dict}")
# 2. Load Shapefile
print(f"Loading shapefile: {shapefile_path}")
gdf = gpd.read_file(shapefile_path)
# 3. Rename columns
# The instruction asks for 'correct full column names'.
# We rename them using the map.
gdf = gdf.rename(columns=mapping_dict)
print(f"Columns after renaming: {gdf.columns.tolist()}")
# 4. Ensure Geometry is Polygon or MultiPolygon
# This is usually true for parcel datasets, but we filter to be safe.
gdf = gdf[gdf.geometry.type.isin(['Polygon', 'MultiPolygon'])]
# 5. Save to GeoJSON
# GeoJSON is standard UTF-8.
print(f"Saving to {output_path}")
gdf.to_file(output_path, driver='GeoJSON', encoding='utf-8')
print("Successfully saved GeoJSON.")
if __name__ == "__main__":
migrate_parcels('parcels.shp', 'column_map.csv', 'parcels.geojson')
Wrote solve.py (2273 chars).
0:30
Bash
python solve.py
Generated mapping dictionary: {'KATASTRALG': 'KATASTRALGEMEINDE_NAME', 'GRUNDSTUEC': 'GRUNDSTUECKSNUMMER', 'EIGENTUEME': 'EIGENTUEMER_NAME', 'WIDMUNG_BE': 'WIDMUNG_BEZEICHNUNG', 'STRASSE_NA': 'STRASSE_NAME', 'FLAECHE_M2': 'FLAECHE_M2'}
Loading shapefile: parcels.shp
Columns after renaming: ['KATASTRALGEMEINDE_NAME', 'GRUNDSTUECKSNUMMER', 'EIGENTUEMER_NAME', 'WIDMUNG_BEZEICHNUNG', 'STRASSE_NAME', 'FLAECHE_M2', 'geometry']
Saving to parcels.geojson
Successfully saved GeoJSON.
0:32
Bash
python -c "import geopandas as gpd; gdf = gpd.read_file('parcels.geojson'); prin...
KATASTRALGEMEINDE_NAME ... geometry 0 Innere Stadt ... POLYGON ((625700 483400, 625700 483425, 625730... 1 Mariahilf ... POLYGON ((625732 483400, 625732 483425, 625762... 2 Währing ... POLYGON ((625764 483400, 625764 483425, 625794... 3 Döbling ... POLYGON ((625796 483400, 625796 483425, 625826... 4 Hütteldorf ... POLYGON ((625828 483400, 625828 483425, 625858... [5 rows x 7 columns] <ArrowStringArray> ['Polygon'] Length: 1, dtype: str
0:37