Preview: CSV Data Cleaner
Upload messy CSV files and get cleaned data with AI-powered suggestions. Handles duplicates, missing values, format standardization, and custom transformations.
Preview Mode
This is a preview with sample data. The template uses placeholders like
which will be replaced with actual agent data.
Template Preview
Template Metadata
- Slug
- csv-data-cleaner
- Created By
- ozzo
- Created
- Nov 04, 2025
- Usage Count
- 0
Tags
csv
data-cleaning
data-quality
ai
transformation
Code Statistics
- HTML Lines
- 170
- CSS Lines
- 0
- JS Lines
- 0
- Python Lines
- 677
Source Code
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{{agent_name}}</title>
<script src="https://cdn.tailwindcss.com"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
</head>
<body class="bg-gradient-to-br from-blue-50 to-indigo-100 min-h-screen">
<div class="container mx-auto px-4 py-8 max-w-6xl">
<!-- Header -->
<div class="bg-white rounded-lg shadow-lg p-6 mb-6">
<div class="flex items-center justify-between">
<div>
<h1 class="text-3xl font-bold text-gray-800 mb-2">
<i class="fas fa-broom text-indigo-600 mr-2"></i>{{agent_name}}
</h1>
<p class="text-gray-600">{{description}}</p>
</div>
<div class="text-right">
<div class="text-sm text-gray-500">Provider: <span class="font-semibold">{{provider}}</span></div>
<div class="text-sm text-gray-500">Model: <span class="font-semibold">{{model_name}}</span></div>
</div>
</div>
</div>
<!-- Main Content -->
<div class="grid grid-cols-1 lg:grid-cols-2 gap-6">
<!-- Left Panel: File Upload & Preview -->
<div class="bg-white rounded-lg shadow-lg p-6">
<h2 class="text-xl font-bold text-gray-800 mb-4">
<i class="fas fa-upload mr-2"></i>Upload CSV File
</h2>
<!-- File Upload Area -->
<div class="mb-6">
<label class="flex flex-col items-center justify-center w-full h-32 border-2 border-dashed border-indigo-300 rounded-lg cursor-pointer bg-indigo-50 hover:bg-indigo-100 transition">
<div class="flex flex-col items-center justify-center pt-5 pb-6">
<i class="fas fa-cloud-upload-alt text-4xl text-indigo-500 mb-2"></i>
<p class="text-sm text-gray-600"><span class="font-semibold">Click to upload</span> or drag and drop</p>
<p class="text-xs text-gray-500">CSV files only</p>
</div>
<input id="fileInput" type="file" accept=".csv" class="hidden" />
</label>
<div id="fileName" class="mt-2 text-sm text-gray-600 hidden"></div>
</div>
<!-- Data Preview -->
<div id="previewSection" class="hidden">
<h3 class="text-lg font-semibold text-gray-700 mb-3">Data Preview</h3>
<div class="bg-gray-50 rounded-lg p-4 max-h-64 overflow-auto">
<table id="previewTable" class="min-w-full text-sm">
<thead id="previewHead" class="bg-indigo-100"></thead>
<tbody id="previewBody" class="divide-y divide-gray-200"></tbody>
</table>
</div>
<div class="mt-3 text-sm text-gray-600">
<span id="rowCount" class="font-semibold"></span> rows,
<span id="colCount" class="font-semibold"></span> columns
</div>
</div>
<!-- Issues Detected -->
<div id="issuesSection" class="mt-6 hidden">
<h3 class="text-lg font-semibold text-gray-700 mb-3">
<i class="fas fa-exclamation-triangle text-yellow-500 mr-2"></i>Issues Detected
</h3>
<div id="issuesList" class="space-y-2"></div>
</div>
</div>
<!-- Right Panel: Cleaning Operations & Results -->
<div class="bg-white rounded-lg shadow-lg p-6">
<h2 class="text-xl font-bold text-gray-800 mb-4">
<i class="fas fa-magic mr-2"></i>Cleaning Operations
</h2>
<!-- Quick Actions -->
<div class="mb-6">
<h3 class="text-sm font-semibold text-gray-700 mb-2">Quick Actions</h3>
<div class="grid grid-cols-2 gap-2">
<button onclick="handleQuickAction('remove_duplicates')" class="btn-action">
<i class="fas fa-copy mr-1"></i>Remove Duplicates
</button>
<button onclick="handleQuickAction('fill_missing')" class="btn-action">
<i class="fas fa-fill-drip mr-1"></i>Fill Missing Values
</button>
<button onclick="handleQuickAction('standardize_formats')" class="btn-action">
<i class="fas fa-align-left mr-1"></i>Standardize Formats
</button>
<button onclick="handleQuickAction('clean_whitespace')" class="btn-action">
<i class="fas fa-eraser mr-1"></i>Clean Whitespace
</button>
</div>
</div>
<!-- AI-Powered Suggestions -->
<div class="mb-6">
<button id="getSuggestionsBtn" onclick="getAISuggestions()"
class="w-full bg-gradient-to-r from-purple-600 to-indigo-600 text-white px-4 py-3 rounded-lg hover:from-purple-700 hover:to-indigo-700 transition font-semibold">
<i class="fas fa-brain mr-2"></i>Get AI Suggestions
</button>
</div>
<!-- Suggestions Display -->
<div id="suggestionsSection" class="hidden mb-6">
<h3 class="text-lg font-semibold text-gray-700 mb-3">AI Suggestions</h3>
<div id="suggestionsList" class="space-y-2 max-h-64 overflow-y-auto"></div>
</div>
<!-- Custom Query -->
<div class="mb-6">
<h3 class="text-sm font-semibold text-gray-700 mb-2">Custom Cleaning Task</h3>
<textarea id="customQuery"
placeholder="Describe what you want to clean or transform... (e.g., 'Convert all dates to YYYY-MM-DD format')"
class="w-full px-3 py-2 border border-gray-300 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:border-transparent"
rows="3"></textarea>
<button onclick="executeCustomCleaning()"
class="mt-2 w-full bg-indigo-600 text-white px-4 py-2 rounded-lg hover:bg-indigo-700 transition">
<i class="fas fa-play mr-2"></i>Execute
</button>
</div>
<!-- Download Cleaned Data -->
<div id="downloadSection" class="hidden">
<button id="downloadBtn" onclick="downloadCleanedCSV()"
class="w-full bg-green-600 text-white px-4 py-3 rounded-lg hover:bg-green-700 transition font-semibold">
<i class="fas fa-download mr-2"></i>Download Cleaned CSV
</button>
</div>
</div>
</div>
<!-- Activity Log -->
<div class="bg-white rounded-lg shadow-lg p-6 mt-6">
<h2 class="text-xl font-bold text-gray-800 mb-4">
<i class="fas fa-history mr-2"></i>Activity Log
</h2>
<div id="activityLog" class="space-y-2 max-h-64 overflow-y-auto">
<div class="text-gray-500 text-sm italic">No activities yet. Upload a CSV file to begin.</div>
</div>
</div>
</div>
<!-- Loading Overlay -->
<div id="loadingOverlay" class="hidden fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50">
<div class="bg-white rounded-lg p-6 flex flex-col items-center">
<div class="animate-spin rounded-full h-16 w-16 border-b-4 border-indigo-600 mb-4"></div>
<p id="loadingText" class="text-gray-700 font-semibold">Processing...</p>
</div>
</div>
<style>
.btn-action {
@apply bg-gray-100 hover:bg-gray-200 text-gray-700 px-3 py-2 rounded-lg text-sm transition border border-gray-300;
}
.issue-badge {
@apply inline-block px-2 py-1 rounded text-xs font-semibold;
}
.issue-high { @apply bg-red-100 text-red-800; }
.issue-medium { @apply bg-yellow-100 text-yellow-800; }
.issue-low { @apply bg-blue-100 text-blue-800; }
</style>
<script type="text/python">
{{python_code}}
</script>
</body>
</html>
import sys
import json
import csv
import io
import re
from datetime import datetime
from js import document, console, Blob, URL, window
import pyodide
from pyodide.ffi import create_proxy
# ============================================================================
# Configuration
# ============================================================================
PROVIDER = "[[[PROVIDER|openai]]]"
API_KEY = "[[[API_KEY]]]"
MODEL_NAME = "[[[MODEL_NAME|gpt-4]]]"
TEMPERATURE = [[[TEMPERATURE|0.3]]]
MAX_TOKENS = [[[MAX_TOKENS|2000]]]
# ============================================================================
# Global State
# ============================================================================
current_data = None
original_data = None
column_names = []
cleaning_history = []
# ============================================================================
# CSV Data Handler
# ============================================================================
class CSVDataHandler:
@staticmethod
def parse_csv(csv_content):
"""Parse CSV content into structured data"""
try:
reader = csv.DictReader(io.StringIO(csv_content))
data = list(reader)
columns = reader.fieldnames if reader.fieldnames else []
return data, columns
except Exception as e:
log_activity(f"Error parsing CSV: {str(e)}", "error")
return None, None
@staticmethod
def detect_issues(data, columns):
"""Detect common data quality issues"""
issues = []
if not data:
return issues
# Check for missing values
for col in columns:
missing_count = sum(1 for row in data if not row.get(col) or str(row.get(col)).strip() == '')
if missing_count > 0:
percentage = (missing_count / len(data)) * 100
issues.append({
'type': 'missing_values',
'severity': 'high' if percentage > 20 else 'medium',
'column': col,
'count': missing_count,
'message': f"{col}: {missing_count} missing values ({percentage:.1f}%)"
})
# Check for duplicates
unique_rows = set()
duplicate_count = 0
for row in data:
row_tuple = tuple(sorted(row.items()))
if row_tuple in unique_rows:
duplicate_count += 1
else:
unique_rows.add(row_tuple)
if duplicate_count > 0:
issues.append({
'type': 'duplicates',
'severity': 'medium',
'count': duplicate_count,
'message': f"Found {duplicate_count} duplicate rows"
})
# Check for inconsistent formats (dates, emails, etc.)
for col in columns:
values = [str(row.get(col, '')).strip() for row in data if row.get(col)]
# Date format check
date_patterns = [
r'\d{4}-\d{2}-\d{2}', # YYYY-MM-DD
r'\d{2}/\d{2}/\d{4}', # MM/DD/YYYY
r'\d{2}-\d{2}-\d{4}', # DD-MM-YYYY
]
date_formats = set()
for val in values[:100]: # Sample first 100
for pattern in date_patterns:
if re.match(pattern, val):
date_formats.add(pattern)
if len(date_formats) > 1:
issues.append({
'type': 'inconsistent_format',
'severity': 'low',
'column': col,
'message': f"{col}: Inconsistent date formats detected"
})
# Check for whitespace issues
whitespace_issues = []
for col in columns:
has_leading = any(str(row.get(col, '')).startswith(' ') for row in data[:100] if row.get(col))
has_trailing = any(str(row.get(col, '')).endswith(' ') for row in data[:100] if row.get(col))
if has_leading or has_trailing:
whitespace_issues.append(col)
if whitespace_issues:
issues.append({
'type': 'whitespace',
'severity': 'low',
'columns': whitespace_issues,
'message': f"Whitespace issues in: {', '.join(whitespace_issues[:3])}"
})
return issues
# ============================================================================
# AI Integration
# ============================================================================
async def get_ai_response(prompt, system_message=None):
"""Get response from AI provider"""
try:
if PROVIDER == "openai":
from openai import OpenAI
client = OpenAI(api_key=API_KEY)
messages = []
if system_message:
messages.append({"role": "system", "content": system_message})
messages.append({"role": "user", "content": prompt})
response = await client.chat.completions.create(
model=MODEL_NAME,
messages=messages,
temperature=TEMPERATURE,
max_tokens=MAX_TOKENS
)
return response.choices[0].message.content
elif PROVIDER == "anthropic":
from anthropic import Anthropic
client = Anthropic(api_key=API_KEY)
response = await client.messages.create(
model=MODEL_NAME,
max_tokens=MAX_TOKENS,
temperature=TEMPERATURE,
system=system_message or "",
messages=[{"role": "user", "content": prompt}]
)
return response.content[0].text
except Exception as e:
console.error(f"AI Error: {str(e)}")
return None
async def generate_cleaning_suggestions(data_sample, columns, issues):
"""Generate AI-powered cleaning suggestions"""
system_message = """You are a data cleaning expert. Analyze CSV data and provide specific, actionable cleaning suggestions.
Focus on: data quality issues, standardization opportunities, and data transformation recommendations.
Return suggestions as a JSON array with this format:
[
{"action": "action_name", "description": "what it does", "priority": "high|medium|low", "target": "column_name or 'all'"}
]
"""
prompt = f"""Analyze this CSV data and provide cleaning suggestions:
Columns: {', '.join(columns)}
Row count: {len(data_sample)}
Sample data (first 5 rows):
{json.dumps(data_sample[:5], indent=2)}
Detected issues:
{json.dumps(issues, indent=2)}
Provide 5-8 specific cleaning suggestions as JSON."""
response = await get_ai_response(prompt, system_message)
if response:
try:
# Extract JSON from response
json_match = re.search(r'\[.*\]', response, re.DOTALL)
if json_match:
return json.loads(json_match.group())
except:
pass
return []
async def execute_ai_cleaning(data, instruction):
"""Execute custom cleaning instruction using AI"""
system_message = """You are a data transformation expert. Given a cleaning instruction and CSV data,
generate Python code to perform the transformation. Return ONLY the Python code, no explanations.
The code should work with a list of dictionaries and return the cleaned list.
Use variable name 'data' for input and 'cleaned_data' for output."""
data_sample = data[:10] if len(data) > 10 else data
prompt = f"""Data structure: List of dictionaries with keys: {list(data[0].keys()) if data else []}
Sample data: {json.dumps(data_sample, indent=2)}
Cleaning instruction: {instruction}
Generate Python code to perform this cleaning operation."""
code = await get_ai_response(prompt, system_message)
if code:
try:
# Extract code from markdown if present
code_match = re.search(r'```python\n(.*)\n```', code, re.DOTALL)
if code_match:
code = code_match.group(1)
# Execute code
local_vars = {'data': data}
exec(code, globals(), local_vars)
return local_vars.get('cleaned_data', data)
except Exception as e:
console.error(f"Code execution error: {str(e)}")
log_activity(f"Error executing cleaning: {str(e)}", "error")
return data
# ============================================================================
# Data Cleaning Operations
# ============================================================================
def remove_duplicates(data):
"""Remove duplicate rows"""
seen = set()
cleaned = []
for row in data:
row_tuple = tuple(sorted(row.items()))
if row_tuple not in seen:
seen.add(row_tuple)
cleaned.append(row)
return cleaned
def fill_missing_values(data, columns):
"""Fill missing values with intelligent defaults"""
for col in columns:
# Collect non-empty values
values = [row.get(col) for row in data if row.get(col) and str(row.get(col)).strip()]
if not values:
continue
# Try to determine data type and fill accordingly
try:
# Check if numeric
numeric_values = [float(v) for v in values if str(v).replace('.','').replace('-','').isdigit()]
if len(numeric_values) > len(values) * 0.5: # Mostly numeric
fill_value = sum(numeric_values) / len(numeric_values) # Mean
for row in data:
if not row.get(col) or str(row.get(col)).strip() == '':
row[col] = f"{fill_value:.2f}"
continue
except:
pass
# For text, use most common value
from collections import Counter
most_common = Counter(values).most_common(1)
if most_common:
fill_value = most_common[0][0]
for row in data:
if not row.get(col) or str(row.get(col)).strip() == '':
row[col] = fill_value
return data
def standardize_formats(data, columns):
"""Standardize common formats (dates, phone numbers, etc.)"""
for col in columns:
# Standardize dates
for row in data:
val = str(row.get(col, '')).strip()
if not val:
continue
# Try various date formats
date_obj = None
for fmt in ['%m/%d/%Y', '%d-%m-%Y', '%Y-%m-%d', '%m-%d-%Y']:
try:
date_obj = datetime.strptime(val, fmt)
break
except:
continue
if date_obj:
row[col] = date_obj.strftime('%Y-%m-%d') # Standardize to ISO format
return data
def clean_whitespace(data, columns):
"""Remove leading/trailing whitespace from all fields"""
for row in data:
for col in columns:
if row.get(col):
row[col] = str(row[col]).strip()
return data
# ============================================================================
# UI Helper Functions
# ============================================================================
def log_activity(message, level="info"):
"""Add message to activity log"""
log_div = document.getElementById("activityLog")
if log_div.children.length == 1 and "No activities" in log_div.innerHTML:
log_div.innerHTML = ""
colors = {
"info": "text-blue-600",
"success": "text-green-600",
"error": "text-red-600",
"warning": "text-yellow-600"
}
icons = {
"info": "fa-info-circle",
"success": "fa-check-circle",
"error": "fa-exclamation-circle",
"warning": "fa-exclamation-triangle"
}
timestamp = datetime.now().strftime("%H:%M:%S")
color = colors.get(level, "text-gray-600")
icon = icons.get(level, "fa-circle")
entry = document.createElement("div")
entry.className = "text-sm py-1"
entry.innerHTML = f'<span class="text-gray-400">[{timestamp}]</span> <i class="fas {icon} {color} mr-1"></i><span class="{color}">{message}</span>'
log_div.appendChild(entry)
log_div.scrollTop = log_div.scrollHeight
def show_loading(message="Processing..."):
"""Show loading overlay"""
document.getElementById("loadingOverlay").classList.remove("hidden")
document.getElementById("loadingText").textContent = message
def hide_loading():
"""Hide loading overlay"""
document.getElementById("loadingOverlay").classList.add("hidden")
def update_preview():
"""Update data preview table"""
global current_data, column_names
if not current_data or not column_names:
return
# Update counts
document.getElementById("rowCount").textContent = str(len(current_data))
document.getElementById("colCount").textContent = str(len(column_names))
# Update table
thead = document.getElementById("previewHead")
tbody = document.getElementById("previewBody")
thead.innerHTML = ""
tbody.innerHTML = ""
# Header
header_row = document.createElement("tr")
for col in column_names:
th = document.createElement("th")
th.className = "px-3 py-2 text-left text-xs font-semibold text-gray-700"
th.textContent = col
header_row.appendChild(th)
thead.appendChild(header_row)
# Body (first 10 rows)
for row_data in current_data[:10]:
row = document.createElement("tr")
for col in column_names:
td = document.createElement("td")
td.className = "px-3 py-2 text-xs text-gray-600"
td.textContent = str(row_data.get(col, ''))[:50] # Truncate long values
row.appendChild(td)
tbody.appendChild(row)
document.getElementById("previewSection").classList.remove("hidden")
def display_issues(issues):
"""Display detected issues"""
if not issues:
return
issues_list = document.getElementById("issuesList")
issues_list.innerHTML = ""
for issue in issues:
div = document.createElement("div")
div.className = "bg-gray-50 rounded p-3 border-l-4 border-yellow-400"
severity_class = f"issue-{issue['severity']}"
div.innerHTML = f'''
<div class="flex items-start">
<span class="issue-badge {severity_class} mr-2">{issue['severity'].upper()}</span>
<div>
<p class="text-sm text-gray-700">{issue['message']}</p>
</div>
</div>
'''
issues_list.appendChild(div)
document.getElementById("issuesSection").classList.remove("hidden")
log_activity(f"Detected {len(issues)} data quality issues", "warning")
def display_suggestions(suggestions):
"""Display AI cleaning suggestions"""
if not suggestions:
return
suggestions_div = document.getElementById("suggestionsList")
suggestions_div.innerHTML = ""
for i, suggestion in enumerate(suggestions):
div = document.createElement("div")
div.className = "bg-gradient-to-r from-purple-50 to-indigo-50 rounded-lg p-3 border border-indigo-200"
priority_colors = {
"high": "text-red-600",
"medium": "text-yellow-600",
"low": "text-blue-600"
}
priority_class = priority_colors.get(suggestion.get('priority', 'low'), 'text-gray-600')
div.innerHTML = f'''
<div class="flex items-start justify-between">
<div class="flex-1">
<h4 class="font-semibold text-sm text-gray-800">{suggestion.get('action', '').replace('_', ' ').title()}</h4>
<p class="text-xs text-gray-600 mt-1">{suggestion.get('description', '')}</p>
<span class="text-xs {priority_class} font-semibold mt-1 inline-block">Priority: {suggestion.get('priority', 'low').upper()}</span>
</div>
<button onclick="applySuggestion({i})" class="ml-2 bg-indigo-600 hover:bg-indigo-700 text-white text-xs px-3 py-1 rounded">
Apply
</button>
</div>
'''
suggestions_div.appendChild(div)
document.getElementById("suggestionsSection").classList.remove("hidden")
log_activity(f"Generated {len(suggestions)} AI suggestions", "success")
# ============================================================================
# Event Handlers
# ============================================================================
async def handle_file_upload(event):
"""Handle CSV file upload"""
global current_data, original_data, column_names
show_loading("Loading CSV file...")
try:
file = event.target.files.item(0)
if not file:
return
# Read file content
array_buffer = await file.arrayBuffer()
bytes_data = array_buffer.to_bytes()
csv_content = bytes_data.decode('utf-8')
# Parse CSV
data, columns = CSVDataHandler.parse_csv(csv_content)
if data is None:
hide_loading()
return
current_data = data
original_data = [row.copy() for row in data]
column_names = columns
# Update UI
document.getElementById("fileName").textContent = f"Loaded: {file.name}"
document.getElementById("fileName").classList.remove("hidden")
update_preview()
# Detect and display issues
issues = CSVDataHandler.detect_issues(data, columns)
if issues:
display_issues(issues)
document.getElementById("downloadSection").classList.remove("hidden")
log_activity(f"Loaded {file.name}: {len(data)} rows, {len(columns)} columns", "success")
except Exception as e:
console.error(f"Upload error: {str(e)}")
log_activity(f"Error loading file: {str(e)}", "error")
finally:
hide_loading()
async def handle_quick_action(action):
"""Handle quick cleaning actions"""
global current_data, column_names
if not current_data:
log_activity("Please upload a CSV file first", "warning")
return
show_loading(f"Applying {action.replace('_', ' ')}...")
try:
original_count = len(current_data)
if action == "remove_duplicates":
current_data = remove_duplicates(current_data)
removed = original_count - len(current_data)
log_activity(f"Removed {removed} duplicate rows", "success")
elif action == "fill_missing":
current_data = fill_missing_values(current_data, column_names)
log_activity("Filled missing values", "success")
elif action == "standardize_formats":
current_data = standardize_formats(current_data, column_names)
log_activity("Standardized data formats", "success")
elif action == "clean_whitespace":
current_data = clean_whitespace(current_data, column_names)
log_activity("Cleaned whitespace", "success")
update_preview()
cleaning_history.append(action)
except Exception as e:
console.error(f"Action error: {str(e)}")
log_activity(f"Error during {action}: {str(e)}", "error")
finally:
hide_loading()
async def get_ai_suggestions_handler():
"""Get AI-powered cleaning suggestions"""
global current_data, column_names
if not current_data:
log_activity("Please upload a CSV file first", "warning")
return
show_loading("Getting AI suggestions...")
try:
issues = CSVDataHandler.detect_issues(current_data, column_names)
suggestions = await generate_cleaning_suggestions(current_data, column_names, issues)
if suggestions:
display_suggestions(suggestions)
# Store suggestions globally for apply function
window.aiSuggestions = suggestions
else:
log_activity("Could not generate suggestions", "warning")
except Exception as e:
console.error(f"Suggestion error: {str(e)}")
log_activity(f"Error getting suggestions: {str(e)}", "error")
finally:
hide_loading()
async def execute_custom_cleaning_handler():
"""Execute custom cleaning instruction"""
global current_data
if not current_data:
log_activity("Please upload a CSV file first", "warning")
return
query = document.getElementById("customQuery").value.strip()
if not query:
log_activity("Please enter a cleaning instruction", "warning")
return
show_loading("Executing custom cleaning...")
try:
cleaned = await execute_ai_cleaning(current_data, query)
current_data = cleaned
update_preview()
log_activity(f"Executed: {query}", "success")
document.getElementById("customQuery").value = ""
except Exception as e:
console.error(f"Custom cleaning error: {str(e)}")
log_activity(f"Error executing cleaning: {str(e)}", "error")
finally:
hide_loading()
async def apply_suggestion_handler(index):
"""Apply a specific AI suggestion"""
global current_data
if not hasattr(window, 'aiSuggestions'):
return
suggestions = window.aiSuggestions
if index >= len(suggestions):
return
suggestion = suggestions[index]
show_loading(f"Applying: {suggestion.get('action', '')}")
try:
# Execute the suggestion as a custom cleaning task
instruction = f"{suggestion.get('action', '')}: {suggestion.get('description', '')}"
cleaned = await execute_ai_cleaning(current_data, instruction)
current_data = cleaned
update_preview()
log_activity(f"Applied suggestion: {suggestion.get('action', '')}", "success")
except Exception as e:
console.error(f"Apply suggestion error: {str(e)}")
log_activity(f"Error applying suggestion: {str(e)}", "error")
finally:
hide_loading()
def download_cleaned_csv():
"""Download cleaned CSV file"""
global current_data, column_names
if not current_data:
log_activity("No data to download", "warning")
return
try:
# Convert data back to CSV
output = io.StringIO()
writer = csv.DictWriter(output, fieldnames=column_names)
writer.writeheader()
writer.writerows(current_data)
csv_content = output.getvalue()
# Create download
blob = Blob.new([csv_content], {type: "text/csv"})
url = URL.createObjectURL(blob)
link = document.createElement("a")
link.href = url
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
link.download = f"cleaned_data_{timestamp}.csv"
link.click()
URL.revokeObjectURL(url)
log_activity("Downloaded cleaned CSV file", "success")
except Exception as e:
console.error(f"Download error: {str(e)}")
log_activity(f"Error downloading file: {str(e)}", "error")
# ============================================================================
# Setup Event Listeners
# ============================================================================
def setup():
"""Initialize the application"""
log_activity("CSV Data Cleaner initialized", "info")
# File input
file_input = document.getElementById("fileInput")
file_input.addEventListener("change", create_proxy(handle_file_upload))
# Expose functions to window for onclick handlers
window.handleQuickAction = create_proxy(handle_quick_action)
window.getAISuggestions = create_proxy(get_ai_suggestions_handler)
window.executeCustomCleaning = create_proxy(execute_custom_cleaning_handler)
window.applySuggestion = create_proxy(apply_suggestion_handler)
window.downloadCleanedCSV = create_proxy(download_cleaned_csv)
# Run setup
setup()