Python for resume comparison
Python for resume comparison
Python script that compares 4 resumes across 6 specific categories:
1. Visualization
2. Data Engineering
3. Python
4. Power BI
5. Storytelling
6. Experience
import re
import pandas as pd
from pathlib import Path
# ========== CONFIGURATION ==========
# Define categories and related keywords (you can adjust these)
CATEGORY_KEYWORDS = {
"Visualization": ["visualization", "dashboard", "tableau", "power bi", "lookerstudio", "data visualization", "charts", "plots", "infographic"],
"Data Engineer": ["data pipeline", "etl", "data warehouse", "data lake", "airflow", "snowflake", "databricks", "spark", "dbt", "bigquery", "redshift"],
"Python": ["python", "pandas", "numpy", "scikit", "tensorflow", "matplotlib", "flask", "django"],
"Power BI": ["power bi", "dax", "powerquery", "m language", "data model", "report builder"],
"Storytelling": ["storytelling", "data storytelling", "insight", "narrative", "business story", "data-driven decision", "presentation"],
"Experience": ["years", "experience", "lead", "managed", "delivered", "designed", "implemented", "developed", "strategic", "team", "project"]
}
# ========== HELPER FUNCTIONS ==========
def normalize_text(text):
"""Basic text normalization."""
text = text.lower()
text = re.sub(r'[^a-z0-9\s\+\-\.]', ' ', text)
text = re.sub(r'\s+', ' ', text).strip()
return text
def score_resume(text, categories):
"""Count keyword matches per category."""
text = normalize_text(text)
scores = {}
for cat, keywords in categories.items():
count = sum(len(re.findall(r'\b' + re.escape(k) + r'\b', text)) for k in keywords)
scores[cat] = count
scores["Total"] = sum(scores.values())
return scores
# ========== SAMPLE INPUT (Replace with your resume text or file reads) ==========
resume_texts = {
"Resume A": """Head of Data with 15+ years... Skilled in Power BI, Python, Tableau, and data storytelling...""",
"Resume B": """Senior Data Engineer experienced with Airflow, dbt, Databricks, and Snowflake...""",
"Resume C": """Analytics professional focusing on visualization using Power BI and Looker Studio...""",
"Resume D": """Python developer and data storyteller with hands-on experience in ETL, data pipelines, and BI tools..."""
}
# ========== COMPUTE SCORES ==========
results = []
for name, text in resume_texts.items():
row = {"Resume": name}
row.update(score_resume(text, CATEGORY_KEYWORDS))
results.append(row)
df = pd.DataFrame(results)
# Normalize scores (relative to max per category)
for cat in CATEGORY_KEYWORDS.keys():
df[f"{cat} Score (%)"] = (df[cat] / df[cat].max() * 100).round(1)
# Sort by total score
df = df.sort_values("Total", ascending=False).reset_index(drop=True)
# ========== OUTPUT ==========
print("\n===== Resume Comparison Summary =====\n")
print(df.to_markdown(index=False))
# Save to HTML or CSV
output_path_html = Path("resume_comparison_4resumes.html")
output_path_csv = Path("resume_comparison_4resumes.csv")
df.to_html(output_path_html, index=False)
df.to_csv(output_path_csv, index=False)
print(f"\nReport saved as:\n - {output_path_html.resolve()}\n - {output_path_csv.resolve()}")
Sample Report